From bbdd4d80bde2c9d584f057586eb8af2f4307dea3 Mon Sep 17 00:00:00 2001 From: eabdullin Date: Tue, 30 Apr 2024 11:41:18 +0000 Subject: [PATCH 1/4] import CS qemu-kvm-8.2.0-11.el9_3 --- .gitignore | 2 +- .qemu-kvm.metadata | 2 +- SOURCES/0004-Initial-redhat-build.patch | 52 +- ...0005-Enable-disable-devices-for-RHEL.patch | 478 +- ...Machine-type-related-general-changes.patch | 143 +- SOURCES/0007-Add-aarch64-machine-types.patch | 368 +- SOURCES/0008-Add-ppc64-machine-types.patch | 76 +- SOURCES/0009-Add-s390x-machine-types.patch | 58 +- SOURCES/0010-Add-x86_64-machine-types.patch | 233 +- SOURCES/0011-Enable-make-check.patch | 129 +- ...mber-of-devices-that-can-be-assigned.patch | 32 +- ...Add-support-statement-to-help-output.patch | 18 +- ...documentation-instead-of-qemu-system.patch | 8 +- ...on-warning-when-opening-v2-images-rw.patch | 10 +- ....4.0-qemu-kvm-machine-type-for-aarch.patch | 44 + ...-add-usb-support-to-guest-get-fsinfo.patch | 53 - ...0017-Add-RHEL-9.2.0-compat-structure.patch | 110 - ...c-Update-x86-machine-type-compatibil.patch | 76 - .../0019-Disable-unwanted-new-devices.patch | 83 - SOURCES/README.tests | 4 +- ...vm-Compile-IOMMUFD-object-on-aarch64.patch | 37 + SOURCES/kvm-Compile-IOMMUFD-on-s390x.patch | 37 + SOURCES/kvm-Compile-IOMMUFD-on-x86_64.patch | 37 + .../kvm-Implement-SMBIOS-type-9-v2.6.patch | 155 + ...ent-base-of-SMBIOS-type-9-descriptor.patch | 218 + ...har-socket-Fix-TLS-io-channels-sendi.patch | 60 + ...se-a-child-source-for-qio-input-sour.patch | 216 + ...-allow-repeating-hot-unplug-requests.patch | 84 - ...text_acquire-aio_context_release-a-n.patch | 60 + ...ontext_acquire-aio_context_release-A.patch | 102 + ...uivalence-between-AIO_WAIT_WHILE-and.patch | 81 + ...le-reentrancy-detection-for-apic-msi.patch | 55 - ...ional-reentrancy-guard-to-the-BH-API.patch | 231 - ...-use-after-free-on-re-entrancy-guard.patch | 70 - ...iommufd-Introduce-the-iommufd-object.patch | 476 ++ ...-Remove-check-on-number-of-backend-u.patch | 47 + .../kvm-backends-iommufd-Remove-mutex.patch | 112 + ...-disable-reentrancy-detection-for-io.patch | 57 - ...se-padded-I-O-vecs-exceeding-IOV_MAX.patch | 354 -- ...-no_coroutine_fns-in-qmp_block_resiz.patch | 56 - ...ix-pad_request-s-request-restriction.patch | 73 - ...end-Allow-concurrent-context-changes.patch | 104 + ...o_unref-for-calls-in-coroutine-conte.patch | 386 -- ...o-do-not-use-open-flags-in-qemu_open.patch | 74 - ...-blkio-enable-the-completion-eventfd.patch | 54 - ...-back-on-using-path-when-fd-setting-.patch | 67 - ...ck-blkio-fix-module_block.py-parsing.patch | 205 - ...-blkio_connect-in-the-drivers-functi.patch | 151 - ...y-blkio_connect-if-it-fails-using-fd.patch | 85 - ...blkio_set_int-fd-to-check-fd-support.patch | 49 - ...qemu_open-to-support-fd-passing-for-.patch | 108 - ...t-assert_bdrv_graph_readable-by-defa.patch | 121 - ...wrapper-use-qemu_get_current_aio_con.patch | 69 + ...-set-up-Linux-AIO-and-io_uring-in-th.patch | 217 + .../kvm-block-remove-AioContext-locking.patch | 4438 +++++++++++++++++ SOURCES/kvm-block-remove-bdrv_co_lock.patch | 97 + ...outdated-AioContext-locking-comments.patch | 411 ++ ...ket-Fix-TLS-io-channels-sending-too-.patch | 105 + ...iority-of-the-HUP-GSource-in-socket-.patch | 78 + ...ch-add-qemu_bh_new-aio_bh_new-checks.patch | 55 - ...utine-cap-per-thread-local-pool-size.patch | 412 ++ ...kvm-coroutine-reserve-5-000-mappings.patch | 61 + ...-don-t-lock-AioContext-in-dma_blk_cb.patch | 75 + ...d-VFIO-iommufd-backend-documentation.patch | 228 + ...e-AioContext-lock-from-IOThread-docs.patch | 98 + ...m-graph-lock-Disable-locking-for-now.patch | 153 - ...graph-lock-remove-AioContext-locking.patch | 1190 +++++ ...et_min_alignment-to-express-32-GiB-a.patch | 94 + ...i-blobs-as-resizable-on-RHEL-pc-mach.patch | 40 - ...rning-on-acpi-table-size-to-pc-machi.patch | 101 - ...m-Activate-IOMMUFD-for-virt-machines.patch | 42 + ...idate-cluster-and-NUMA-node-boundary.patch | 60 - ...mu-Handle-big-endian-hosts-correctly.patch | 166 - ...properties-to-disable-high-memory-re.patch | 88 + SOURCES/kvm-hw-arm-virt-Fix-compats.patch | 132 + ...date-cluster-and-NUMA-node-boundary-.patch | 41 - ...ecate-virt-rhel9.-0-2-.0-machine-typ.patch | 41 + ...86-Activate-IOMMUFD-for-q35-machines.patch | 41 + ...-smbios_set_defaults-to-machine_done.patch | 186 + ...CI_ERR_UNCOR_MASK-reg-for-machine-ty.patch | 44 - ...CI_ERR_UNCOR_MASK-register-for-machi.patch | 118 - .../kvm-hw-ppc-Kconfig-Imply-VFIO_PCI.patch | 116 + ...qemu_bh_new-calls-with-qemu_bh_new_g.patch | 470 -- ...5a-Fix-reentrancy-issues-in-the-LSI-.patch | 141 - ...teration-over-global-VFIODevice-list.patch | 73 + ...i-quirks-Sanitize-capability-pointer.patch | 76 - ...ks-Support-alternate-offset-for-GPUD.patch | 110 - ...Fix-potential-OOB-access-in-virtio_i.patch | 62 - ...how-the-EBX-register-of-CPUID-0x8000.patch | 52 - ...checks-and-information-related-to-re.patch | 77 - ....h-fix-qemu_rect_init-mis-assignment.patch | 54 + ...-for-reset-AioContext-switches-with-.patch | 133 + ...ommit-with-iothreads-and-ongoing-I-O.patch | 144 - ...sizing-image-attached-to-an-iothread.patch | 132 - ...rnative-CPU-type-that-is-not-depreca.patch | 44 - ...ts-add-filter_qmp_generated_node_ids.patch | 49 + ...ds-stream-Use-the-right-TimeoutError.patch | 49 + .../kvm-iotests-iov-padding-New-test.patch | 186 - ...1-to-Python-for-reliable-QMP-testing.patch | 592 +++ ...outdated-AioContext-locking-comments.patch | 105 + ...-Activate-IOMMUFD-for-s390x-machines.patch | 42 + ...pu-stats-fd-to-avoid-vcpu-interrupti.patch | 160 - ...loongarch_ipi_iocsr-re-entrnacy-safe.patch | 53 - ...le-reentrancy-detection-for-MMIO-reg.patch | 70 - ...le-reentrancy-detection-for-script-R.patch | 58 - ...reintroduce-memory-region-size-check.patch | 112 + ...-memory-prevent-dma-reentracy-issues.patch | 150 - ...checks-prior-to-unsetting-engaged_in.patch | 67 - ...ration-Add-switchover-ack-capability.patch | 162 - ...postcopy_ram_supported_by_host-to-re.patch | 308 -- ...t-disk-reactivation-in-more-failure-.patch | 111 - ...kvm-migration-Create-migrate_cap_set.patch | 93 - ...tion-Create-migrate_checkpoint_delay.patch | 84 - ...-migrate_cpu_throttle_increment-func.patch | 75 - ...-migrate_cpu_throttle_initial-to-opt.patch | 75 - ...-migrate_cpu_throttle_tailslow-funct.patch | 78 - ...reate-migrate_max_bandwidth-function.patch | 232 - ...tion-Create-migrate_max_cpu_throttle.patch | 88 - ...Create-migrate_rdma_pin_all-function.patch | 95 - ...e-migrate_throttle_trigger_threshold.patch | 75 - SOURCES/kvm-migration-Create-options.c.patch | 524 -- ...ion-Enable-switchover-ack-capability.patch | 56 - ...-block-device-inactivation-failures-.patch | 116 - ...ation-Implement-switchover-ack-logic.patch | 339 -- ...ll-functions-check-have-the-same-for.patch | 431 -- ...gration-Make-dirty_sync_count-atomic.patch | 105 - ...e-dirty_sync_missed_zero_copy-atomic.patch | 92 - ...migration-Make-downtime_bytes-atomic.patch | 68 - ...-migration-Make-multifd_bytes-atomic.patch | 99 - ...ration-Make-postcopy_requests-atomic.patch | 69 - ...-migration-Make-precopy_bytes-atomic.patch | 68 - ...ram_counters-and-ram_atomic_counters.patch | 270 - ...on-Minor-control-flow-simplification.patch | 52 - ...-migrate_announce_params-to-option.c.patch | 90 - ...on-Move-migrate_cap_set-to-options.c.patch | 110 - ...Move-migrate_caps_check-to-options.c.patch | 458 -- ...ve-migrate_colo_enabled-to-options.c.patch | 136 - ...n-Move-migrate_postcopy-to-options.c.patch | 98 - ...-Move-migrate_use_block-to-options.c.patch | 134 - ...igrate_use_block_incremental-to-opti.patch | 121 - ...migrate_use_compression-to-options.c.patch | 183 - ...Move-migrate_use_events-to-options.c.patch | 120 - ...ove-migrate_use_multifd-to-options.c.patch | 247 - ...Move-migrate_use_return-to-options.c.patch | 138 - ...on-Move-migrate_use_tls-to-options.c.patch | 134 - ...Move-migrate_use_xbzrle-to-options.c.patch | 156 - ...igrate_use_zero_copy_send-to-options.patch | 167 - ...ve-migration_properties-to-options.c.patch | 409 -- ...ove-parameters-functions-to-option.c.patch | 317 -- ...mp_migrate_set_capabilities-to-optio.patch | 100 - ...mp_migrate_set_parameters-to-options.patch | 943 ---- ...mp_query_migrate_capabilities-to-opt.patch | 100 - ...igrate_caps_check-the-old-and-new-ca.patch | 226 - ...ation-Rename-duplicate-to-zero_pages.patch | 109 - ...ration-Rename-normal-to-normal_pages.patch | 109 - ...Update-atomic-stats-out-of-the-mutex.patch | 52 - ...n-Use-migrate_max_postcopy_bandwidth.patch | 40 - ...ark-mixed-functions-that-can-suspend.patch | 153 - ...igration_global_dump-to-migration-hm.patch | 121 - ...copy-Detect-file-system-on-dest-host.patch | 117 - ...-extra-whitespace-character-for-code.patch | 44 - ...-enabled_capabilities-to-capabilitie.patch | 329 -- ...all-job_pause_point-under-graph-lock.patch | 90 + ...-coroutine-commands-in-qemu_aio_cont.patch | 1630 ++++++ ...roperty-multifd-flush-after-each-sec.patch | 127 - ...ifd-Fix-the-number-of-channels-ready.patch | 58 - ...flush-once-each-full-round-of-memory.patch | 166 - ...Protect-multifd_send_sync_main-calls.patch | 78 - ...rained_poll-to-wake-coroutine-in-rig.patch | 159 - ...rver-Fix-race-in-draining-the-export.patch | 95 + ...id-per-NBDRequest-nbd_client_get-put.patch | 53 + ...duce-NBDClient-lock-to-protect-field.patch | 373 ++ ...traverse-NBDExport-clients-from-main.patch | 176 + ...fd-type-checking-to-its-own-function.patch | 78 - ...t-prepare-to-cleanup-net_init_socket.patch | 60 - ...vm-net-socket-remove-net_init_socket.patch | 102 - ...uster-and-NUMA-node-boundary-if-requ.patch | 145 - ...-Don-t-use-__bss_start-with-the-larl.patch | 78 - ...-s390-ccw-Fix-indentation-in-start.S.patch | 218 - ...-Makefile-Use-z-noexecstack-to-silen.patch | 50 - ...-Provide-space-for-initial-stack-fra.patch | 59 - ...S-entry-point-type-to-auto-by-defaul.patch | 115 + ...manufacturer-product-version-to-matc.patch | 44 + ...tplug-detect-state-register-to-cmask.patch | 87 - ...tcopy-ram-do-not-use-qatomic_mb_read.patch | 42 - ...eature-for-BlockdevOptionsVirtioBlkV.patch | 79 - ...ange-the-reduced-phys-bits-value-fro.patch | 50 - ...adVirtQueueMappingList-property-type.patch | 167 + ...es-alias-all-object-class-properties.patch | 85 + ...Update-the-reduced-phys-bits-documen.patch | 60 - ...-increase-NOFILE-soft-limit-on-POSIX.patch | 135 + ...sable-reentrancy-detection-for-iomem.patch | 54 - ...-the-device-request-notifier-interfa.patch | 220 - ...i-avoid-double-enable-disable-of-aif.patch | 106 + ...drive-ISM-reset-from-subsystem-reset.patch | 137 + ...-pci-refresh-fh-before-disabling-aif.patch | 71 + ...rious-warning-with-asynchronous-tear.patch | 129 - SOURCES/kvm-scsi-Await-request-purging.patch | 124 + ...-callbacks-run-in-the-correct-AioCon.patch | 88 + ...si-cleanup-scsi_clear_unit_attention.patch | 81 - ...attention-only-for-REPORT-LUNS-comma.patch | 110 - ...n-t-lock-AioContext-in-I-O-code-path.patch | 245 + ...-attention-when-creating-the-request.patch | 132 - ...-SCSIDevice-requests-from-one-thread.patch | 307 ++ .../kvm-scsi-remove-AioContext-locking.patch | 280 ++ ...ove-outdated-AioContext-lock-comment.patch | 41 + ...-add-smbios_add_usr_blob_size-helper.patch | 62 + ...-avoid-mangling-user-provided-tables.patch | 309 ++ ...legacy-mode-code-only-for-pc-machine.patch | 517 ++ ...mbios_get_tables-from-legacy-handlin.patch | 65 + ...ios_type4_count-before-building-tabl.patch | 38 + ...heck-type4-structures-in-legacy-mode.patch | 133 + ...-when-building-type-4-table-is-not-p.patch | 72 + ...bios-entry-point-type-with-auto-valu.patch | 48 + ...ios-get-rid-of-global-smbios_ep_type.patch | 281 ++ ...bios-get-rid-of-smbios_legacy-global.patch | 198 + ...get-rid-of-smbios_smp_sockets-global.patch | 134 + ...vm-smbios-handle-errors-consistently.patch | 217 + ...f-entry-point-is-auto-try-to-build-v.patch | 131 + ...pose-structures-bitmaps-used-by-both.patch | 330 ++ ...ve-client_migrate_info-command-to-ui.patch | 248 - ...t-visitor-Fix-pseudo-struct-handling.patch | 190 + ...tput-visitor-show-structs-as-omitted.patch | 90 + ...EPYC-Genoa-model-to-support-Zen-4-pr.patch | 203 - ...VNMI-and-automatic-IBRS-feature-bits.patch | 105 - ...a-couple-of-feature-bits-in-8000_000.patch | 94 - ...feature-bits-for-CPUID_Fn80000021_EA.patch | 126 - ...missing-feature-bits-in-EPYC-Milan-m.patch | 152 - ...new-EPYC-CPU-versions-with-updated-c.patch | 192 - ...386-add-support-for-FB_CLEAR-feature.patch | 71 - ...86-add-support-for-FLUSH_L1D-feature.patch | 70 - ...w-versioned-CPUs-to-specify-new-cach.patch | 116 - ...-pv-Provide-some-more-useful-informa.patch | 205 + ...sts-remove-aio_context_acquire-tests.patch | 125 + ...test-replication-timeout-to-60-secon.patch | 46 + ...our-channel-order-for-PNG-screenshot.patch | 88 - ...d-add-asserts-for-update-and-request.patch | 81 + ...k-type-as-not-available-when-there-i.patch | 107 + ...own-wire-up-query-command-line-optio.patch | 180 - SOURCES/kvm-util-char_dev-Add-open_cdev.patch | 175 + .../kvm-util-iov-Make-qiov_slice-public.patch | 97 - ...-iov-Remove-qemu_iovec_init_extended.patch | 156 - .../kvm-util-mmap-alloc-qemu_fd_getfs.patch | 95 - ...il-vfio-helpers-Use-g_file_read_link.patch | 82 - ...k-migration-if-device-has-cvq-and-x-.patch | 61 - ...pa-export-vhost_vdpa_set_vring_ready.patch | 105 - ...olation-check-to-net_init_vhost_vdpa.patch | 286 -- ...t_vdpa_set_vring_ready-to-the-caller.patch | 134 - ...dpa-remove-net-cvq-migration-blocker.patch | 51 - ...t_vdpa_net_load-to-vhost_vdpa_net_cv.patch | 49 - ...o-in-vhost_vdpa_get_vring_group-erro.patch | 67 - ...irst-queue-SVQ-state-for-CVQ-default.patch | 46 - ...inter-dereference-bug-in-vfio_bars_f.patch | 72 - ...mplement-a-common-device-info-helper.patch | 196 - ...-helper-function-to-initialize-VFIOD.patch | 154 + ...ase-object-for-VFIOContainer-and-tar.patch | 129 + ...ntainerBase-poiner-parameter-const-i.patch | 276 + ...e-selection-of-a-given-iommu-backend.patch | 75 + ...o-cdev-pre-openable-by-passing-a-fil.patch | 87 + ...ODevice-initializations-in-vfio_ap_i.patch | 81 + ...he-selection-of-a-given-iommu-backen.patch | 79 + ...io-cdev-pre-openable-by-passing-a-fi.patch | 93 + ...IODevice-initializations-in-vfio_ccw.patch | 85 + ...oduce-vfio_container_init-destroy-he.patch | 98 + ...n-Move-giommu_list-in-base-container.patch | 221 + ...on-return-early-if-space-isn-t-empty.patch | 55 + ...-Convert-functions-to-base-container.patch | 257 + ...ainer-Implement-attach-detach_device.patch | 97 + ...nitialize-VFIOIOMMUOps-under-vfio_in.patch | 65 + ...ntoduce-a-new-VFIOIOMMUClass-setup-h.patch | 55 + ...-Introduce-a-VFIOIOMMU-QOM-interface.patch | 143 + ...ntroduce-a-VFIOIOMMU-legacy-QOM-inte.patch | 168 + ...ainer-Introduce-a-empty-VFIOIOMMUOps.patch | 71 + ...ntroduce-vfio_legacy_setup-for-furth.patch | 118 + ...ove-dirty_pgsizes-and-max_dirty_bitm.patch | 102 + ...r-Move-iova_ranges-to-base-container.patch | 168 + ...iner-Move-listener-to-base-container.patch | 522 ++ ...ove-per-container-device-list-in-bas.patch | 230 + ...ove-pgsizes-and-dma_max_mappings-to-.patch | 242 + ...r-Move-space-field-to-base-container.patch | 265 + ...ner-Move-vrdl_list-to-base-container.patch | 255 + ...ename-vfio_init_container-to-vfio_se.patch | 66 + ...eplace-basename-with-g_path_get_base.patch | 59 + ...witch-to-IOMMU-BE-set_dirty_page_tra.patch | 235 + ...ontainer-Switch-to-dma_map-unmap-API.patch | 303 ++ ...-support-for-iova_ranges-and-pgsizes.patch | 115 + ...ble-pci-hot-reset-through-iommufd-cd.patch | 215 + ...ommufd-Implement-the-iommufd-backend.patch | 561 +++ ...roduce-a-VFIOIOMMU-iommufd-QOM-inter.patch | 155 + ...lax-assert-check-for-iommufd-backend.patch | 71 + ...-iommufd-Remove-CONFIG_IOMMUFD-usage.patch | 55 + ...ove-the-use-of-stat-to-check-file-ex.patch | 56 + ...-Add-VFIO-migration-pre-copy-support.patch | 438 -- ...dd-helper-function-to-set-state-or-r.patch | 115 + ...dd-support-for-switchover-ack-capabi.patch | 192 - ...hange-vIOMMU-blocker-from-global-to-.patch | 171 - ...ree-resources-when-vfio_migration_re.patch | 145 - ...Make-VFIO-migration-non-experimental.patch | 283 -- ...efactor-vfio_save_block-to-return-sa.patch | 102 - ...n-Remove-print-of-Migration-disabled.patch | 56 - ...ion-Reset-bytes_transferred-properly.patch | 165 - ...eturn-bool-type-for-vfio_migration_r.patch | 125 - ...kip-log_sync-during-migration-SETUP-.patch | 68 - ...tore-VFIO-migration-flags-in-VFIOMig.patch | 70 - ...he-selection-of-a-given-iommu-backen.patch | 81 + ...io_prepare_kvm_msi_virq_batch-in-MSI.patch | 67 - ...fio-pci-Clear-MSI-X-IRQ-index-always.patch | 69 + ...able-INTx-in-vfio_realize-error-path.patch | 54 - ...-out-a-helper-vfio_pci_get_pci_hot_r.patch | 139 + ...o-pci-Fix-a-segfault-in-vfio_realize.patch | 67 - ...-vfio-pci-Fix-a-use-after-free-issue.patch | 56 - ...aked-timer-in-vfio_realize-error-pat.patch | 55 - ...oduce-a-vfio-pci-hot-reset-interface.patch | 466 ++ ...io-cdev-pre-openable-by-passing-a-fi.patch | 237 + ...IODevice-initializations-in-vfio_ins.patch | 70 + ...-pci-Static-Resizable-BAR-capability.patch | 141 - ...vm-vfio-pci-add-support-for-VF-token.patch | 104 - ...low-the-selection-of-a-given-iommu-b.patch | 77 + ...ke-vfio-cdev-pre-openable-by-passing.patch | 108 + ...ve-VFIODevice-initializations-in-vfi.patch | 64 + ...d-VFIOIOMMUOps-with-a-release-handle.patch | 129 + ...duce-a-sPAPR-VFIOIOMMU-QOM-interface.patch | 150 + ...duce-spapr-backend-and-target-interf.patch | 91 + ...ve-hostwin_list-into-spapr-container.patch | 188 + ...prereg_listener-into-spapr-container.patch | 120 + ...compile-sPAPR-IOMMU-support-when-nee.patch | 46 + ...h-to-spapr-IOMMU-BE-add-del_section_.patch | 184 + ...host_dev_enable_notifiers-error-case.patch | 138 - ...t-cleanup-the-vdpa-vhost-net-structu.patch | 67 - ...a-mute-unaligned-memory-error-report.patch | 86 - ...-Re-enable-notifications-after-drain.patch | 139 + ...otential-nullpointer-read-access-in-.patch | 47 + ...-ioeventfd_attach-in-start_ioeventfd.patch | 75 + ...lk-add-iothread-vq-mapping-parameter.patch | 464 ++ ...-virtio-blk-add-lock-to-protect-s-rq.patch | 177 + ...-always-set-ioeventfd-during-startup.patch | 63 + ...-using-ioeventfd-state-in-irqfd-cond.patch | 72 + ...-lock-AioContext-in-the-completion-c.patch | 167 + ...-lock-AioContext-in-the-submission-c.patch | 67 + ...ove-dataplane-code-into-virtio-blk.c.patch | 1009 ++++ ...e-dataplane-create-destroy-functions.patch | 117 + ...io-blk-rename-dataplane-to-ioeventfd.patch | 307 ++ ...-restart-s-rq-reqs-in-vq-AioContexts.patch | 106 + ...ate-failure-to-set-BlockBackend-AioC.patch | 72 + ...lock-migration-of-VMs-with-blob-true.patch | 87 + ...-64kB-host-page-size-VFIO-device-ass.patch | 151 - ...ork-the-traces-in-virtio_iommu_set_p.patch | 83 - ...ndardize-granule-extraction-and-form.patch | 88 - ...-mem-default-enable-dynamic-memslots.patch | 70 + ...ctly-report-maximum-tx_queue_size-va.patch | 92 - ...ttach-event-vq-notifier-with-no_poll.patch | 78 + ...t-lock-AioContext-around-virtio_queu.patch | 58 + ...ace-AioContext-lock-with-tmf_bh_lock.patch | 173 + ...ate-backends-before-migration-object.patch | 58 - ...6-rhel-9.2.0-machine-type-compat-fix.patch | 48 + SOURCES/qemu-ga.sysconfig | 12 +- SOURCES/qemu-guest-agent.service | 2 +- SPECS/qemu-kvm.spec | 1109 ++-- 358 files changed, 32550 insertions(+), 24472 deletions(-) create mode 100644 SOURCES/0016-Introduce-RHEL-9.4.0-qemu-kvm-machine-type-for-aarch.patch delete mode 100644 SOURCES/0016-qga-linux-add-usb-support-to-guest-get-fsinfo.patch delete mode 100644 SOURCES/0017-Add-RHEL-9.2.0-compat-structure.patch delete mode 100644 SOURCES/0018-redhat-hw-i386-pc-Update-x86-machine-type-compatibil.patch delete mode 100644 SOURCES/0019-Disable-unwanted-new-devices.patch create mode 100644 SOURCES/kvm-Compile-IOMMUFD-object-on-aarch64.patch create mode 100644 SOURCES/kvm-Compile-IOMMUFD-on-s390x.patch create mode 100644 SOURCES/kvm-Compile-IOMMUFD-on-x86_64.patch create mode 100644 SOURCES/kvm-Implement-SMBIOS-type-9-v2.6.patch create mode 100644 SOURCES/kvm-Implement-base-of-SMBIOS-type-9-descriptor.patch create mode 100644 SOURCES/kvm-Revert-chardev-char-socket-Fix-TLS-io-channels-sendi.patch create mode 100644 SOURCES/kvm-Revert-chardev-use-a-child-source-for-qio-input-sour.patch delete mode 100644 SOURCES/kvm-acpi-pcihp-allow-repeating-hot-unplug-requests.patch create mode 100644 SOURCES/kvm-aio-make-aio_context_acquire-aio_context_release-a-n.patch create mode 100644 SOURCES/kvm-aio-remove-aio_context_acquire-aio_context_release-A.patch create mode 100644 SOURCES/kvm-aio-wait-draw-equivalence-between-AIO_WAIT_WHILE-and.patch delete mode 100644 SOURCES/kvm-apic-disable-reentrancy-detection-for-apic-msi.patch delete mode 100644 SOURCES/kvm-async-Add-an-optional-reentrancy-guard-to-the-BH-API.patch delete mode 100644 SOURCES/kvm-async-avoid-use-after-free-on-re-entrancy-guard.patch create mode 100644 SOURCES/kvm-backends-iommufd-Introduce-the-iommufd-object.patch create mode 100644 SOURCES/kvm-backends-iommufd-Remove-check-on-number-of-backend-u.patch create mode 100644 SOURCES/kvm-backends-iommufd-Remove-mutex.patch delete mode 100644 SOURCES/kvm-bcm2835_property-disable-reentrancy-detection-for-io.patch delete mode 100644 SOURCES/kvm-block-Collapse-padded-I-O-vecs-exceeding-IOV_MAX.patch delete mode 100644 SOURCES/kvm-block-Don-t-call-no_coroutine_fns-in-qmp_block_resiz.patch delete mode 100644 SOURCES/kvm-block-Fix-pad_request-s-request-restriction.patch create mode 100644 SOURCES/kvm-block-backend-Allow-concurrent-context-changes.patch delete mode 100644 SOURCES/kvm-block-bdrv-blk_co_unref-for-calls-in-coroutine-conte.patch delete mode 100644 SOURCES/kvm-block-blkio-do-not-use-open-flags-in-qemu_open.patch delete mode 100644 SOURCES/kvm-block-blkio-enable-the-completion-eventfd.patch delete mode 100644 SOURCES/kvm-block-blkio-fall-back-on-using-path-when-fd-setting-.patch delete mode 100644 SOURCES/kvm-block-blkio-fix-module_block.py-parsing.patch delete mode 100644 SOURCES/kvm-block-blkio-move-blkio_connect-in-the-drivers-functi.patch delete mode 100644 SOURCES/kvm-block-blkio-retry-blkio_connect-if-it-fails-using-fd.patch delete mode 100644 SOURCES/kvm-block-blkio-use-blkio_set_int-fd-to-check-fd-support.patch delete mode 100644 SOURCES/kvm-block-blkio-use-qemu_open-to-support-fd-passing-for-.patch delete mode 100644 SOURCES/kvm-block-compile-out-assert_bdrv_graph_readable-by-defa.patch create mode 100644 SOURCES/kvm-block-coroutine-wrapper-use-qemu_get_current_aio_con.patch create mode 100644 SOURCES/kvm-block-file-posix-set-up-Linux-AIO-and-io_uring-in-th.patch create mode 100644 SOURCES/kvm-block-remove-AioContext-locking.patch create mode 100644 SOURCES/kvm-block-remove-bdrv_co_lock.patch create mode 100644 SOURCES/kvm-block-remove-outdated-AioContext-locking-comments.patch create mode 100644 SOURCES/kvm-chardev-char-socket-Fix-TLS-io-channels-sending-too-.patch create mode 100644 SOURCES/kvm-chardev-lower-priority-of-the-HUP-GSource-in-socket-.patch delete mode 100644 SOURCES/kvm-checkpatch-add-qemu_bh_new-aio_bh_new-checks.patch create mode 100644 SOURCES/kvm-coroutine-cap-per-thread-local-pool-size.patch create mode 100644 SOURCES/kvm-coroutine-reserve-5-000-mappings.patch create mode 100644 SOURCES/kvm-dma-helpers-don-t-lock-AioContext-in-dma_blk_cb.patch create mode 100644 SOURCES/kvm-docs-devel-Add-VFIO-iommufd-backend-documentation.patch create mode 100644 SOURCES/kvm-docs-remove-AioContext-lock-from-IOThread-docs.patch delete mode 100644 SOURCES/kvm-graph-lock-Disable-locking-for-now.patch create mode 100644 SOURCES/kvm-graph-lock-remove-AioContext-locking.patch create mode 100644 SOURCES/kvm-hv-balloon-use-get_min_alignment-to-express-32-GiB-a.patch delete mode 100644 SOURCES/kvm-hw-acpi-Mark-acpi-blobs-as-resizable-on-RHEL-pc-mach.patch delete mode 100644 SOURCES/kvm-hw-acpi-limit-warning-on-acpi-table-size-to-pc-machi.patch create mode 100644 SOURCES/kvm-hw-arm-Activate-IOMMUFD-for-virt-machines.patch delete mode 100644 SOURCES/kvm-hw-arm-Validate-cluster-and-NUMA-node-boundary.patch delete mode 100644 SOURCES/kvm-hw-arm-smmu-Handle-big-endian-hosts-correctly.patch create mode 100644 SOURCES/kvm-hw-arm-virt-Add-properties-to-disable-high-memory-re.patch create mode 100644 SOURCES/kvm-hw-arm-virt-Fix-compats.patch delete mode 100644 SOURCES/kvm-hw-arm-virt-Validate-cluster-and-NUMA-node-boundary-.patch create mode 100644 SOURCES/kvm-hw-arm-virt-deprecate-virt-rhel9.-0-2-.0-machine-typ.patch create mode 100644 SOURCES/kvm-hw-i386-Activate-IOMMUFD-for-q35-machines.patch create mode 100644 SOURCES/kvm-hw-i386-pc-Defer-smbios_set_defaults-to-machine_done.patch delete mode 100644 SOURCES/kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-reg-for-machine-ty.patch delete mode 100644 SOURCES/kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-register-for-machi.patch create mode 100644 SOURCES/kvm-hw-ppc-Kconfig-Imply-VFIO_PCI.patch delete mode 100644 SOURCES/kvm-hw-replace-most-qemu_bh_new-calls-with-qemu_bh_new_g.patch delete mode 100644 SOURCES/kvm-hw-scsi-lsi53c895a-Fix-reentrancy-issues-in-the-LSI-.patch create mode 100644 SOURCES/kvm-hw-vfio-fix-iteration-over-global-VFIODevice-list.patch delete mode 100644 SOURCES/kvm-hw-vfio-pci-quirks-Sanitize-capability-pointer.patch delete mode 100644 SOURCES/kvm-hw-vfio-pci-quirks-Support-alternate-offset-for-GPUD.patch delete mode 100644 SOURCES/kvm-hw-virtio-iommu-Fix-potential-OOB-access-in-virtio_i.patch delete mode 100644 SOURCES/kvm-i386-cpu-Update-how-the-EBX-register-of-CPUID-0x8000.patch delete mode 100644 SOURCES/kvm-i386-sev-Update-checks-and-information-related-to-re.patch create mode 100644 SOURCES/kvm-include-ui-rect.h-fix-qemu_rect_init-mis-assignment.patch create mode 100644 SOURCES/kvm-iotests-Add-test-for-reset-AioContext-switches-with-.patch delete mode 100644 SOURCES/kvm-iotests-Test-commit-with-iothreads-and-ongoing-I-O.patch delete mode 100644 SOURCES/kvm-iotests-Test-resizing-image-attached-to-an-iothread.patch delete mode 100644 SOURCES/kvm-iotests-Use-alternative-CPU-type-that-is-not-depreca.patch create mode 100644 SOURCES/kvm-iotests-add-filter_qmp_generated_node_ids.patch create mode 100644 SOURCES/kvm-iotests-iothreads-stream-Use-the-right-TimeoutError.patch delete mode 100644 SOURCES/kvm-iotests-iov-padding-New-test.patch create mode 100644 SOURCES/kvm-iotests-port-141-to-Python-for-reliable-QMP-testing.patch create mode 100644 SOURCES/kvm-job-remove-outdated-AioContext-locking-comments.patch create mode 100644 SOURCES/kvm-kconfig-Activate-IOMMUFD-for-s390x-machines.patch delete mode 100644 SOURCES/kvm-kvm-reuse-per-vcpu-stats-fd-to-avoid-vcpu-interrupti.patch delete mode 100644 SOURCES/kvm-loongarch-mark-loongarch_ipi_iocsr-re-entrnacy-safe.patch delete mode 100644 SOURCES/kvm-lsi53c895a-disable-reentrancy-detection-for-MMIO-reg.patch delete mode 100644 SOURCES/kvm-lsi53c895a-disable-reentrancy-detection-for-script-R.patch create mode 100644 SOURCES/kvm-memory-device-reintroduce-memory-region-size-check.patch delete mode 100644 SOURCES/kvm-memory-prevent-dma-reentracy-issues.patch delete mode 100644 SOURCES/kvm-memory-stricter-checks-prior-to-unsetting-engaged_in.patch delete mode 100644 SOURCES/kvm-migration-Add-switchover-ack-capability.patch delete mode 100644 SOURCES/kvm-migration-Allow-postcopy_ram_supported_by_host-to-re.patch delete mode 100644 SOURCES/kvm-migration-Attempt-disk-reactivation-in-more-failure-.patch delete mode 100644 SOURCES/kvm-migration-Create-migrate_cap_set.patch delete mode 100644 SOURCES/kvm-migration-Create-migrate_checkpoint_delay.patch delete mode 100644 SOURCES/kvm-migration-Create-migrate_cpu_throttle_increment-func.patch delete mode 100644 SOURCES/kvm-migration-Create-migrate_cpu_throttle_initial-to-opt.patch delete mode 100644 SOURCES/kvm-migration-Create-migrate_cpu_throttle_tailslow-funct.patch delete mode 100644 SOURCES/kvm-migration-Create-migrate_max_bandwidth-function.patch delete mode 100644 SOURCES/kvm-migration-Create-migrate_max_cpu_throttle.patch delete mode 100644 SOURCES/kvm-migration-Create-migrate_rdma_pin_all-function.patch delete mode 100644 SOURCES/kvm-migration-Create-migrate_throttle_trigger_threshold.patch delete mode 100644 SOURCES/kvm-migration-Create-options.c.patch delete mode 100644 SOURCES/kvm-migration-Enable-switchover-ack-capability.patch delete mode 100644 SOURCES/kvm-migration-Handle-block-device-inactivation-failures-.patch delete mode 100644 SOURCES/kvm-migration-Implement-switchover-ack-logic.patch delete mode 100644 SOURCES/kvm-migration-Make-all-functions-check-have-the-same-for.patch delete mode 100644 SOURCES/kvm-migration-Make-dirty_sync_count-atomic.patch delete mode 100644 SOURCES/kvm-migration-Make-dirty_sync_missed_zero_copy-atomic.patch delete mode 100644 SOURCES/kvm-migration-Make-downtime_bytes-atomic.patch delete mode 100644 SOURCES/kvm-migration-Make-multifd_bytes-atomic.patch delete mode 100644 SOURCES/kvm-migration-Make-postcopy_requests-atomic.patch delete mode 100644 SOURCES/kvm-migration-Make-precopy_bytes-atomic.patch delete mode 100644 SOURCES/kvm-migration-Merge-ram_counters-and-ram_atomic_counters.patch delete mode 100644 SOURCES/kvm-migration-Minor-control-flow-simplification.patch delete mode 100644 SOURCES/kvm-migration-Move-migrate_announce_params-to-option.c.patch delete mode 100644 SOURCES/kvm-migration-Move-migrate_cap_set-to-options.c.patch delete mode 100644 SOURCES/kvm-migration-Move-migrate_caps_check-to-options.c.patch delete mode 100644 SOURCES/kvm-migration-Move-migrate_colo_enabled-to-options.c.patch delete mode 100644 SOURCES/kvm-migration-Move-migrate_postcopy-to-options.c.patch delete mode 100644 SOURCES/kvm-migration-Move-migrate_use_block-to-options.c.patch delete mode 100644 SOURCES/kvm-migration-Move-migrate_use_block_incremental-to-opti.patch delete mode 100644 SOURCES/kvm-migration-Move-migrate_use_compression-to-options.c.patch delete mode 100644 SOURCES/kvm-migration-Move-migrate_use_events-to-options.c.patch delete mode 100644 SOURCES/kvm-migration-Move-migrate_use_multifd-to-options.c.patch delete mode 100644 SOURCES/kvm-migration-Move-migrate_use_return-to-options.c.patch delete mode 100644 SOURCES/kvm-migration-Move-migrate_use_tls-to-options.c.patch delete mode 100644 SOURCES/kvm-migration-Move-migrate_use_xbzrle-to-options.c.patch delete mode 100644 SOURCES/kvm-migration-Move-migrate_use_zero_copy_send-to-options.patch delete mode 100644 SOURCES/kvm-migration-Move-migration_properties-to-options.c.patch delete mode 100644 SOURCES/kvm-migration-Move-parameters-functions-to-option.c.patch delete mode 100644 SOURCES/kvm-migration-Move-qmp_migrate_set_capabilities-to-optio.patch delete mode 100644 SOURCES/kvm-migration-Move-qmp_migrate_set_parameters-to-options.patch delete mode 100644 SOURCES/kvm-migration-Move-qmp_query_migrate_capabilities-to-opt.patch delete mode 100644 SOURCES/kvm-migration-Pass-migrate_caps_check-the-old-and-new-ca.patch delete mode 100644 SOURCES/kvm-migration-Rename-duplicate-to-zero_pages.patch delete mode 100644 SOURCES/kvm-migration-Rename-normal-to-normal_pages.patch delete mode 100644 SOURCES/kvm-migration-Update-atomic-stats-out-of-the-mutex.patch delete mode 100644 SOURCES/kvm-migration-Use-migrate_max_postcopy_bandwidth.patch delete mode 100644 SOURCES/kvm-migration-mark-mixed-functions-that-can-suspend.patch delete mode 100644 SOURCES/kvm-migration-move-migration_global_dump-to-migration-hm.patch delete mode 100644 SOURCES/kvm-migration-postcopy-Detect-file-system-on-dest-host.patch delete mode 100644 SOURCES/kvm-migration-remove-extra-whitespace-character-for-code.patch delete mode 100644 SOURCES/kvm-migration-rename-enabled_capabilities-to-capabilitie.patch create mode 100644 SOURCES/kvm-mirror-Don-t-call-job_pause_point-under-graph-lock.patch create mode 100644 SOURCES/kvm-monitor-only-run-coroutine-commands-in-qemu_aio_cont.patch delete mode 100644 SOURCES/kvm-multifd-Create-property-multifd-flush-after-each-sec.patch delete mode 100644 SOURCES/kvm-multifd-Fix-the-number-of-channels-ready.patch delete mode 100644 SOURCES/kvm-multifd-Only-flush-once-each-full-round-of-memory.patch delete mode 100644 SOURCES/kvm-multifd-Protect-multifd_send_sync_main-calls.patch delete mode 100644 SOURCES/kvm-nbd-server-Fix-drained_poll-to-wake-coroutine-in-rig.patch create mode 100644 SOURCES/kvm-nbd-server-Fix-race-in-draining-the-export.patch create mode 100644 SOURCES/kvm-nbd-server-avoid-per-NBDRequest-nbd_client_get-put.patch create mode 100644 SOURCES/kvm-nbd-server-introduce-NBDClient-lock-to-protect-field.patch create mode 100644 SOURCES/kvm-nbd-server-only-traverse-NBDExport-clients-from-main.patch delete mode 100644 SOURCES/kvm-net-socket-move-fd-type-checking-to-its-own-function.patch delete mode 100644 SOURCES/kvm-net-socket-prepare-to-cleanup-net_init_socket.patch delete mode 100644 SOURCES/kvm-net-socket-remove-net_init_socket.patch delete mode 100644 SOURCES/kvm-numa-Validate-cluster-and-NUMA-node-boundary-if-requ.patch delete mode 100644 SOURCES/kvm-pc-bios-s390-ccw-Don-t-use-__bss_start-with-the-larl.patch delete mode 100644 SOURCES/kvm-pc-bios-s390-ccw-Fix-indentation-in-start.S.patch delete mode 100644 SOURCES/kvm-pc-bios-s390-ccw-Makefile-Use-z-noexecstack-to-silen.patch delete mode 100644 SOURCES/kvm-pc-bios-s390-ccw-Provide-space-for-initial-stack-fra.patch create mode 100644 SOURCES/kvm-pc-q35-set-SMBIOS-entry-point-type-to-auto-by-defaul.patch create mode 100644 SOURCES/kvm-pc-smbios-fixup-manufacturer-product-version-to-matc.patch delete mode 100644 SOURCES/kvm-pcie-Add-hotplug-detect-state-register-to-cmask.patch delete mode 100644 SOURCES/kvm-postcopy-ram-do-not-use-qatomic_mb_read.patch delete mode 100644 SOURCES/kvm-qapi-add-fdset-feature-for-BlockdevOptionsVirtioBlkV.patch delete mode 100644 SOURCES/kvm-qapi-i386-sev-Change-the-reduced-phys-bits-value-fro.patch create mode 100644 SOURCES/kvm-qdev-add-IOThreadVirtQueueMappingList-property-type.patch create mode 100644 SOURCES/kvm-qdev-properties-alias-all-object-class-properties.patch delete mode 100644 SOURCES/kvm-qemu-options.hx-Update-the-reduced-phys-bits-documen.patch create mode 100644 SOURCES/kvm-qemu_init-increase-NOFILE-soft-limit-on-POSIX.patch delete mode 100644 SOURCES/kvm-raven-disable-reentrancy-detection-for-iomem.patch delete mode 100644 SOURCES/kvm-s390x-ap-Wire-up-the-device-request-notifier-interfa.patch create mode 100644 SOURCES/kvm-s390x-pci-avoid-double-enable-disable-of-aif.patch create mode 100644 SOURCES/kvm-s390x-pci-drive-ISM-reset-from-subsystem-reset.patch create mode 100644 SOURCES/kvm-s390x-pci-refresh-fh-before-disabling-aif.patch delete mode 100644 SOURCES/kvm-s390x-pv-Fix-spurious-warning-with-asynchronous-tear.patch create mode 100644 SOURCES/kvm-scsi-Await-request-purging.patch create mode 100644 SOURCES/kvm-scsi-assert-that-callbacks-run-in-the-correct-AioCon.patch delete mode 100644 SOURCES/kvm-scsi-cleanup-scsi_clear_unit_attention.patch delete mode 100644 SOURCES/kvm-scsi-clear-unit-attention-only-for-REPORT-LUNS-comma.patch create mode 100644 SOURCES/kvm-scsi-don-t-lock-AioContext-in-I-O-code-path.patch delete mode 100644 SOURCES/kvm-scsi-fetch-unit-attention-when-creating-the-request.patch create mode 100644 SOURCES/kvm-scsi-only-access-SCSIDevice-requests-from-one-thread.patch create mode 100644 SOURCES/kvm-scsi-remove-AioContext-locking.patch create mode 100644 SOURCES/kvm-scsi-remove-outdated-AioContext-lock-comment.patch create mode 100644 SOURCES/kvm-smbios-add-smbios_add_usr_blob_size-helper.patch create mode 100644 SOURCES/kvm-smbios-avoid-mangling-user-provided-tables.patch create mode 100644 SOURCES/kvm-smbios-build-legacy-mode-code-only-for-pc-machine.patch create mode 100644 SOURCES/kvm-smbios-cleanup-smbios_get_tables-from-legacy-handlin.patch create mode 100644 SOURCES/kvm-smbios-clear-smbios_type4_count-before-building-tabl.patch create mode 100644 SOURCES/kvm-smbios-don-t-check-type4-structures-in-legacy-mode.patch create mode 100644 SOURCES/kvm-smbios-error-out-when-building-type-4-table-is-not-p.patch create mode 100644 SOURCES/kvm-smbios-extend-smbios-entry-point-type-with-auto-valu.patch create mode 100644 SOURCES/kvm-smbios-get-rid-of-global-smbios_ep_type.patch create mode 100644 SOURCES/kvm-smbios-get-rid-of-smbios_legacy-global.patch create mode 100644 SOURCES/kvm-smbios-get-rid-of-smbios_smp_sockets-global.patch create mode 100644 SOURCES/kvm-smbios-handle-errors-consistently.patch create mode 100644 SOURCES/kvm-smbios-in-case-of-entry-point-is-auto-try-to-build-v.patch create mode 100644 SOURCES/kvm-smbios-rename-expose-structures-bitmaps-used-by-both.patch delete mode 100644 SOURCES/kvm-spice-move-client_migrate_info-command-to-ui.patch create mode 100644 SOURCES/kvm-string-output-visitor-Fix-pseudo-struct-handling.patch create mode 100644 SOURCES/kvm-string-output-visitor-show-structs-as-omitted.patch delete mode 100644 SOURCES/kvm-target-i386-Add-EPYC-Genoa-model-to-support-Zen-4-pr.patch delete mode 100644 SOURCES/kvm-target-i386-Add-VNMI-and-automatic-IBRS-feature-bits.patch delete mode 100644 SOURCES/kvm-target-i386-Add-a-couple-of-feature-bits-in-8000_000.patch delete mode 100644 SOURCES/kvm-target-i386-Add-feature-bits-for-CPUID_Fn80000021_EA.patch delete mode 100644 SOURCES/kvm-target-i386-Add-missing-feature-bits-in-EPYC-Milan-m.patch delete mode 100644 SOURCES/kvm-target-i386-Add-new-EPYC-CPU-versions-with-updated-c.patch delete mode 100644 SOURCES/kvm-target-i386-add-support-for-FB_CLEAR-feature.patch delete mode 100644 SOURCES/kvm-target-i386-add-support-for-FLUSH_L1D-feature.patch delete mode 100644 SOURCES/kvm-target-i386-allow-versioned-CPUs-to-specify-new-cach.patch create mode 100644 SOURCES/kvm-target-s390x-kvm-pv-Provide-some-more-useful-informa.patch create mode 100644 SOURCES/kvm-tests-remove-aio_context_acquire-tests.patch create mode 100644 SOURCES/kvm-tests-unit-Bump-test-replication-timeout-to-60-secon.patch delete mode 100644 SOURCES/kvm-ui-Fix-pixel-colour-channel-order-for-PNG-screenshot.patch create mode 100644 SOURCES/kvm-ui-clipboard-add-asserts-for-update-and-request.patch create mode 100644 SOURCES/kvm-ui-clipboard-mark-type-as-not-available-when-there-i.patch delete mode 100644 SOURCES/kvm-util-async-teardown-wire-up-query-command-line-optio.patch create mode 100644 SOURCES/kvm-util-char_dev-Add-open_cdev.patch delete mode 100644 SOURCES/kvm-util-iov-Make-qiov_slice-public.patch delete mode 100644 SOURCES/kvm-util-iov-Remove-qemu_iovec_init_extended.patch delete mode 100644 SOURCES/kvm-util-mmap-alloc-qemu_fd_getfs.patch delete mode 100644 SOURCES/kvm-util-vfio-helpers-Use-g_file_read_link.patch delete mode 100644 SOURCES/kvm-vdpa-do-not-block-migration-if-device-has-cvq-and-x-.patch delete mode 100644 SOURCES/kvm-vdpa-export-vhost_vdpa_set_vring_ready.patch delete mode 100644 SOURCES/kvm-vdpa-move-CVQ-isolation-check-to-net_init_vhost_vdpa.patch delete mode 100644 SOURCES/kvm-vdpa-move-vhost_vdpa_set_vring_ready-to-the-caller.patch delete mode 100644 SOURCES/kvm-vdpa-remove-net-cvq-migration-blocker.patch delete mode 100644 SOURCES/kvm-vdpa-rename-vhost_vdpa_net_load-to-vhost_vdpa_net_cv.patch delete mode 100644 SOURCES/kvm-vdpa-return-errno-in-vhost_vdpa_get_vring_group-erro.patch delete mode 100644 SOURCES/kvm-vdpa-use-first-queue-SVQ-state-for-CVQ-default.patch delete mode 100644 SOURCES/kvm-vfio-Fix-null-pointer-dereference-bug-in-vfio_bars_f.patch delete mode 100644 SOURCES/kvm-vfio-Implement-a-common-device-info-helper.patch create mode 100644 SOURCES/kvm-vfio-Introduce-a-helper-function-to-initialize-VFIOD.patch create mode 100644 SOURCES/kvm-vfio-Introduce-base-object-for-VFIOContainer-and-tar.patch create mode 100644 SOURCES/kvm-vfio-Make-VFIOContainerBase-poiner-parameter-const-i.patch create mode 100644 SOURCES/kvm-vfio-ap-Allow-the-selection-of-a-given-iommu-backend.patch create mode 100644 SOURCES/kvm-vfio-ap-Make-vfio-cdev-pre-openable-by-passing-a-fil.patch create mode 100644 SOURCES/kvm-vfio-ap-Move-VFIODevice-initializations-in-vfio_ap_i.patch create mode 100644 SOURCES/kvm-vfio-ccw-Allow-the-selection-of-a-given-iommu-backen.patch create mode 100644 SOURCES/kvm-vfio-ccw-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch create mode 100644 SOURCES/kvm-vfio-ccw-Move-VFIODevice-initializations-in-vfio_ccw.patch create mode 100644 SOURCES/kvm-vfio-common-Introduce-vfio_container_init-destroy-he.patch create mode 100644 SOURCES/kvm-vfio-common-Move-giommu_list-in-base-container.patch create mode 100644 SOURCES/kvm-vfio-common-return-early-if-space-isn-t-empty.patch create mode 100644 SOURCES/kvm-vfio-container-Convert-functions-to-base-container.patch create mode 100644 SOURCES/kvm-vfio-container-Implement-attach-detach_device.patch create mode 100644 SOURCES/kvm-vfio-container-Initialize-VFIOIOMMUOps-under-vfio_in.patch create mode 100644 SOURCES/kvm-vfio-container-Intoduce-a-new-VFIOIOMMUClass-setup-h.patch create mode 100644 SOURCES/kvm-vfio-container-Introduce-a-VFIOIOMMU-QOM-interface.patch create mode 100644 SOURCES/kvm-vfio-container-Introduce-a-VFIOIOMMU-legacy-QOM-inte.patch create mode 100644 SOURCES/kvm-vfio-container-Introduce-a-empty-VFIOIOMMUOps.patch create mode 100644 SOURCES/kvm-vfio-container-Introduce-vfio_legacy_setup-for-furth.patch create mode 100644 SOURCES/kvm-vfio-container-Move-dirty_pgsizes-and-max_dirty_bitm.patch create mode 100644 SOURCES/kvm-vfio-container-Move-iova_ranges-to-base-container.patch create mode 100644 SOURCES/kvm-vfio-container-Move-listener-to-base-container.patch create mode 100644 SOURCES/kvm-vfio-container-Move-per-container-device-list-in-bas.patch create mode 100644 SOURCES/kvm-vfio-container-Move-pgsizes-and-dma_max_mappings-to-.patch create mode 100644 SOURCES/kvm-vfio-container-Move-space-field-to-base-container.patch create mode 100644 SOURCES/kvm-vfio-container-Move-vrdl_list-to-base-container.patch create mode 100644 SOURCES/kvm-vfio-container-Rename-vfio_init_container-to-vfio_se.patch create mode 100644 SOURCES/kvm-vfio-container-Replace-basename-with-g_path_get_base.patch create mode 100644 SOURCES/kvm-vfio-container-Switch-to-IOMMU-BE-set_dirty_page_tra.patch create mode 100644 SOURCES/kvm-vfio-container-Switch-to-dma_map-unmap-API.patch create mode 100644 SOURCES/kvm-vfio-iommufd-Add-support-for-iova_ranges-and-pgsizes.patch create mode 100644 SOURCES/kvm-vfio-iommufd-Enable-pci-hot-reset-through-iommufd-cd.patch create mode 100644 SOURCES/kvm-vfio-iommufd-Implement-the-iommufd-backend.patch create mode 100644 SOURCES/kvm-vfio-iommufd-Introduce-a-VFIOIOMMU-iommufd-QOM-inter.patch create mode 100644 SOURCES/kvm-vfio-iommufd-Relax-assert-check-for-iommufd-backend.patch create mode 100644 SOURCES/kvm-vfio-iommufd-Remove-CONFIG_IOMMUFD-usage.patch create mode 100644 SOURCES/kvm-vfio-iommufd-Remove-the-use-of-stat-to-check-file-ex.patch delete mode 100644 SOURCES/kvm-vfio-migration-Add-VFIO-migration-pre-copy-support.patch create mode 100644 SOURCES/kvm-vfio-migration-Add-helper-function-to-set-state-or-r.patch delete mode 100644 SOURCES/kvm-vfio-migration-Add-support-for-switchover-ack-capabi.patch delete mode 100644 SOURCES/kvm-vfio-migration-Change-vIOMMU-blocker-from-global-to-.patch delete mode 100644 SOURCES/kvm-vfio-migration-Free-resources-when-vfio_migration_re.patch delete mode 100644 SOURCES/kvm-vfio-migration-Make-VFIO-migration-non-experimental.patch delete mode 100644 SOURCES/kvm-vfio-migration-Refactor-vfio_save_block-to-return-sa.patch delete mode 100644 SOURCES/kvm-vfio-migration-Remove-print-of-Migration-disabled.patch delete mode 100644 SOURCES/kvm-vfio-migration-Reset-bytes_transferred-properly.patch delete mode 100644 SOURCES/kvm-vfio-migration-Return-bool-type-for-vfio_migration_r.patch delete mode 100644 SOURCES/kvm-vfio-migration-Skip-log_sync-during-migration-SETUP-.patch delete mode 100644 SOURCES/kvm-vfio-migration-Store-VFIO-migration-flags-in-VFIOMig.patch create mode 100644 SOURCES/kvm-vfio-pci-Allow-the-selection-of-a-given-iommu-backen.patch delete mode 100644 SOURCES/kvm-vfio-pci-Call-vfio_prepare_kvm_msi_virq_batch-in-MSI.patch create mode 100644 SOURCES/kvm-vfio-pci-Clear-MSI-X-IRQ-index-always.patch delete mode 100644 SOURCES/kvm-vfio-pci-Disable-INTx-in-vfio_realize-error-path.patch create mode 100644 SOURCES/kvm-vfio-pci-Extract-out-a-helper-vfio_pci_get_pci_hot_r.patch delete mode 100644 SOURCES/kvm-vfio-pci-Fix-a-segfault-in-vfio_realize.patch delete mode 100644 SOURCES/kvm-vfio-pci-Fix-a-use-after-free-issue.patch delete mode 100644 SOURCES/kvm-vfio-pci-Free-leaked-timer-in-vfio_realize-error-pat.patch create mode 100644 SOURCES/kvm-vfio-pci-Introduce-a-vfio-pci-hot-reset-interface.patch create mode 100644 SOURCES/kvm-vfio-pci-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch create mode 100644 SOURCES/kvm-vfio-pci-Move-VFIODevice-initializations-in-vfio_ins.patch delete mode 100644 SOURCES/kvm-vfio-pci-Static-Resizable-BAR-capability.patch delete mode 100644 SOURCES/kvm-vfio-pci-add-support-for-VF-token.patch create mode 100644 SOURCES/kvm-vfio-platform-Allow-the-selection-of-a-given-iommu-b.patch create mode 100644 SOURCES/kvm-vfio-platform-Make-vfio-cdev-pre-openable-by-passing.patch create mode 100644 SOURCES/kvm-vfio-platform-Move-VFIODevice-initializations-in-vfi.patch create mode 100644 SOURCES/kvm-vfio-spapr-Extend-VFIOIOMMUOps-with-a-release-handle.patch create mode 100644 SOURCES/kvm-vfio-spapr-Introduce-a-sPAPR-VFIOIOMMU-QOM-interface.patch create mode 100644 SOURCES/kvm-vfio-spapr-Introduce-spapr-backend-and-target-interf.patch create mode 100644 SOURCES/kvm-vfio-spapr-Move-hostwin_list-into-spapr-container.patch create mode 100644 SOURCES/kvm-vfio-spapr-Move-prereg_listener-into-spapr-container.patch create mode 100644 SOURCES/kvm-vfio-spapr-Only-compile-sPAPR-IOMMU-support-when-nee.patch create mode 100644 SOURCES/kvm-vfio-spapr-switch-to-spapr-IOMMU-BE-add-del_section_.patch delete mode 100644 SOURCES/kvm-vhost-fix-vhost_dev_enable_notifiers-error-case.patch delete mode 100644 SOURCES/kvm-vhost-vdpa-do-not-cleanup-the-vdpa-vhost-net-structu.patch delete mode 100644 SOURCES/kvm-vhost-vdpa-mute-unaligned-memory-error-report.patch create mode 100644 SOURCES/kvm-virtio-Re-enable-notifications-after-drain.patch create mode 100644 SOURCES/kvm-virtio-blk-Fix-potential-nullpointer-read-access-in-.patch create mode 100644 SOURCES/kvm-virtio-blk-Use-ioeventfd_attach-in-start_ioeventfd.patch create mode 100644 SOURCES/kvm-virtio-blk-add-iothread-vq-mapping-parameter.patch create mode 100644 SOURCES/kvm-virtio-blk-add-lock-to-protect-s-rq.patch create mode 100644 SOURCES/kvm-virtio-blk-always-set-ioeventfd-during-startup.patch create mode 100644 SOURCES/kvm-virtio-blk-avoid-using-ioeventfd-state-in-irqfd-cond.patch create mode 100644 SOURCES/kvm-virtio-blk-don-t-lock-AioContext-in-the-completion-c.patch create mode 100644 SOURCES/kvm-virtio-blk-don-t-lock-AioContext-in-the-submission-c.patch create mode 100644 SOURCES/kvm-virtio-blk-move-dataplane-code-into-virtio-blk.c.patch create mode 100644 SOURCES/kvm-virtio-blk-rename-dataplane-create-destroy-functions.patch create mode 100644 SOURCES/kvm-virtio-blk-rename-dataplane-to-ioeventfd.patch create mode 100644 SOURCES/kvm-virtio-blk-restart-s-rq-reqs-in-vq-AioContexts.patch create mode 100644 SOURCES/kvm-virtio-blk-tolerate-failure-to-set-BlockBackend-AioC.patch create mode 100644 SOURCES/kvm-virtio-gpu-block-migration-of-VMs-with-blob-true.patch delete mode 100644 SOURCES/kvm-virtio-iommu-Fix-64kB-host-page-size-VFIO-device-ass.patch delete mode 100644 SOURCES/kvm-virtio-iommu-Rework-the-traces-in-virtio_iommu_set_p.patch delete mode 100644 SOURCES/kvm-virtio-iommu-Standardize-granule-extraction-and-form.patch create mode 100644 SOURCES/kvm-virtio-mem-default-enable-dynamic-memslots.patch delete mode 100644 SOURCES/kvm-virtio-net-correctly-report-maximum-tx_queue_size-va.patch create mode 100644 SOURCES/kvm-virtio-scsi-Attach-event-vq-notifier-with-no_poll.patch create mode 100644 SOURCES/kvm-virtio-scsi-don-t-lock-AioContext-around-virtio_queu.patch create mode 100644 SOURCES/kvm-virtio-scsi-replace-AioContext-lock-with-tmf_bh_lock.patch delete mode 100644 SOURCES/kvm-vl.c-Create-late-backends-before-migration-object.patch create mode 100644 SOURCES/kvm-x86-rhel-9.2.0-machine-type-compat-fix.patch diff --git a/.gitignore b/.gitignore index 7dc73be..ded41b6 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1 @@ -SOURCES/qemu-8.0.0.tar.xz +SOURCES/qemu-8.2.0.tar.xz diff --git a/.qemu-kvm.metadata b/.qemu-kvm.metadata index a158c44..4a22f24 100644 --- a/.qemu-kvm.metadata +++ b/.qemu-kvm.metadata @@ -1 +1 @@ -17d54a85aa5d7f5dcfc619aa34049f9a91ceed0d SOURCES/qemu-8.0.0.tar.xz +1615e59b1bd68324e0819245fe003e33c14a52f9 SOURCES/qemu-8.2.0.tar.xz diff --git a/SOURCES/0004-Initial-redhat-build.patch b/SOURCES/0004-Initial-redhat-build.patch index 612633e..a63b5c3 100644 --- a/SOURCES/0004-Initial-redhat-build.patch +++ b/SOURCES/0004-Initial-redhat-build.patch @@ -1,4 +1,4 @@ -From 84039bfc860878f3c3421de4a1836ac5d6300ed7 Mon Sep 17 00:00:00 2001 +From faae70a870156f86a5cf55ca967b15d7612941ff Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 26 May 2021 10:56:02 +0200 Subject: Initial redhat build @@ -13,7 +13,7 @@ several issues are fixed in QEMU tree: We disable make check due to issues with some of the tests. -This rebase is based on qemu-kvm-7.2.0-14.el9 +This rebase is based on qemu-kvm-8.1.0-5.el9 Signed-off-by: Miroslav Rezanina -- @@ -50,32 +50,39 @@ Rebase changes (7.0.0): - Change permissions on installing tests/Makefile.include - Remove ssh block driver -Rebase changes (7.1.0 rc0): +Rebase changes (7.1.0): - --disable-vnc-png renamed to --disable-png (upstream) - removed --disable-vhost-vsock and --disable-vhost-scsi - capstone submodule removed - Temporary include capstone build -Rebase changes (7.2.0 rc0): +Rebase changes (7.2.0): - Switch --enable-slirp=system to --enable-slirp - -Rebaes changes (7.2.0 rc2): - Added new configure options (blkio and sndio, both disabled) Rebase changes (7.2.0): - Fix SRPM name generation to work on Fedora 37 - Switch back to system meson -Rebase changes (8.0.0-rc1): +Rebase changes (8.0.0): - use enable-dtrace-backands instead of enable-dtrace-backend - Removed qemu virtiofsd bits - -Rebase changes (8.0.0-rc2): - test/check-block.sh removed (upstream) - -Rebase changes (8.0.0-rc3): - Add new --disable-* options for configure +Rebase changes (8.1.0): +- qmp-spec.txt installed by make +- Removed --meson configure option +- Add --disable-pypi +- Removed --with-git and -with-gitsubmodules +- Renamed --disable-pypi to --disable-downloads +- Minor updates in README.tests + +Rebase changes (8.2.0): +- Removed --disable-hax (upstream) +- Added --disable-plugins configure option +- Fixing frh.py strings + Merged patches (6.0.0): - 605758c902 Limit build on Power to qemu-img and qemu-ga only @@ -168,24 +175,35 @@ Merged patches (7.0.0): - d46d2710b2 spec: Obsolete old usb redir subpackage - 6f52a50b68 spec: Obsolete ssh driver -Merged patches (7.2.0 rc4): +Merged patches (7.2.0): - 8c6834feb6 Remove opengl display device subpackages (C9S MR 124) - 0ecc97f29e spec: Add requires for packages with additional virtio-gpu variants (C9S MR 124) -Merged patches (8.0.0-rc1): +Merged patches (8.0.0): - 7754f6ba78 Minor packaging fixes - 401af56187 spec: Disable VDUSE +Merged patches (8.1.0): +- 0c2306676f Enable Linux io_uring +- b7fa6426d5 Enable libblkio block drivers +- 19f6d7a6f4 Fix virtio-blk-vhost-vdpa typo in spec file +- f356cae88f spec: Build DBUS display +- 77b763efd5 Provide elf2dmp binary in qemu-tools + +Merged patches (8.2.0): +- cd9efa221d Enable qemu-kvm-device-usb-redirec for aarch64 + Signed-off-by: Miroslav Rezanina --- .distro/Makefile | 100 + - .distro/Makefile.common | 41 + + .distro/Makefile.common | 42 + .distro/README.tests | 39 + .distro/modules-load.conf | 4 + .distro/qemu-guest-agent.service | 1 - - .distro/qemu-kvm.spec.template | 4528 +++++++++++++++++++++++ + .distro/qemu-kvm.spec.template | 4909 +++++++++++++++++++++++ .distro/rpminspect.yaml | 6 +- .distro/scripts/extract_build_cmd.py | 12 + + .distro/scripts/frh.py | 4 +- .distro/scripts/process-patches.sh | 4 + .gitignore | 1 + README.systemtap | 43 + @@ -193,7 +211,7 @@ Signed-off-by: Miroslav Rezanina scripts/systemtap/conf.d/qemu_kvm.conf | 4 + scripts/systemtap/script.d/qemu_kvm.stp | 1 + ui/vnc-auth-sasl.c | 2 +- - 15 files changed, 4784 insertions(+), 4 deletions(-) + 16 files changed, 5168 insertions(+), 6 deletions(-) create mode 100644 .distro/Makefile create mode 100644 .distro/Makefile.common create mode 100644 .distro/README.tests @@ -296,5 +314,5 @@ index 47fdae5b21..2a950caa2a 100644 if (saslErr != SASL_OK) { error_setg(errp, "Failed to initialize SASL auth: %s", -- -2.39.1 +2.39.3 diff --git a/SOURCES/0005-Enable-disable-devices-for-RHEL.patch b/SOURCES/0005-Enable-disable-devices-for-RHEL.patch index 14dd3f9..97c53b4 100644 --- a/SOURCES/0005-Enable-disable-devices-for-RHEL.patch +++ b/SOURCES/0005-Enable-disable-devices-for-RHEL.patch @@ -1,4 +1,4 @@ -From 63829772dbc2075fc014a9d52e3968735d228018 Mon Sep 17 00:00:00 2001 +From 048067b4618ba1fa7c8c517185d4cd3a675eba72 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 7 Dec 2022 03:05:48 -0500 Subject: Enable/disable devices for RHEL @@ -22,21 +22,31 @@ Rebase notes (7.0.0): - Renamed CONFIG_ARM_GIC_TCG to CONFIG_ARM_GICV3_TCG - Removed upstream devices -Rebase notes (7.1.0 rc0): +Rebase notes (7.1.0): - Added CONFIG_VHOST_VSOCK and CONFIG_VHOST_USER_VSOCK configs - Added CONFIG_CXL and CONFIG_CXL_MEM_DEVICE for aarch64 and x86_64 - -Rebase notes (7.1.0 rc3): - Added CONFIG_VHOST_USER_FS option (all archs) -Rebase notes (7.2.0 rc20): +Rebase notes (7.2.0): - Removed disabling a15mpcore.c as no longer needed -Rebase notes (8.0.0-rc1): +Rebase notes (8.0.0): - Rename CONFIG_ACPI_X86_ICH to CONFIG_ACPI_ICH9 - Inlude qemu/error-report.h in hw/display/cirrus_vga.c - Change virtiofsd dependency version +Rebase notes (8.1.0): +- Added CONFIG_PCIE_PCI_BRIDGE for x86_64 +- Disabling tcg cpus for aarch64 +- Disable CONFIG_ARM_V7M and remove related hack +- Moved aarch64 tcg cpu disabling from arm machine type commit + +Rebase notes (8.2.0): +- Disabled new a710 arm64 tcg cpu +- No longer needed hack for removal of i2c-echo +- Disable new neoverse-v2 +- Removed CONFIG_OPENGL from x86_64 config file + Merged patches (6.1.0): - c51bf45304 Remove SPICE and QXL from x86_64-rh-devices.mak - 02fc745601 aarch64-rh-devices: add CONFIG_PVPANIC_PCI @@ -53,32 +63,47 @@ Merged patches (7.0.0): - fd7c45a5a8 redhat: Enable virtio-mem as tech-preview on x86-64 - c9e68ea451 Enable SGX -- RH Only -Merged patches (7.1.0 rc0): +Merged patches (7.1.0): - 38b89dc245 pc: Move s3/s4 suspend disabling to compat (only hw/acpi/ich9.c chunk) - 8f663466c6 configs/devices/aarch64-softmmu: Enable CONFIG_VIRTIO_MEM - 1bf372717a Enable virtio-iommu-pci on aarch64 - ae3f269458 Enable virtio-iommu-pci on x86_64 + +Merged patches (8.1.0): +- 8173d2eaba Disable unwanted new devices + +Merged patches (8.2.0): +- b29f66431f Enable igb on x86_64 --- .distro/qemu-kvm.spec.template | 18 +-- .../aarch64-softmmu/aarch64-rh-devices.mak | 41 +++++++ .../ppc64-softmmu/ppc64-rh-devices.mak | 37 ++++++ configs/devices/rh-virtio.mak | 10 ++ .../s390x-softmmu/s390x-rh-devices.mak | 18 +++ - .../x86_64-softmmu/x86_64-rh-devices.mak | 109 ++++++++++++++++++ - hw/arm/meson.build | 2 +- + .../x86_64-softmmu/x86_64-rh-devices.mak | 110 ++++++++++++++++++ + hw/arm/virt.c | 2 + hw/block/fdc.c | 10 ++ hw/cpu/meson.build | 3 +- - hw/display/cirrus_vga.c | 7 +- + hw/cxl/meson.build | 3 +- + hw/display/cirrus_vga.c | 4 + hw/ide/piix.c | 5 +- + hw/ide/qdev.c | 9 ++ hw/input/pckbd.c | 2 + hw/net/e1000.c | 2 + hw/ppc/spapr_cpu_core.c | 2 + hw/usb/meson.build | 2 +- - target/arm/cpu_tcg.c | 10 ++ + hw/virtio/meson.build | 5 +- + target/arm/arm-qmp-cmds.c | 2 + + target/arm/cpu.c | 4 + + target/arm/cpu.h | 3 + + target/arm/cpu64.c | 12 +- + target/arm/tcg/cpu32.c | 2 + + target/arm/tcg/cpu64.c | 8 ++ target/ppc/cpu-models.c | 9 ++ target/s390x/cpu_models_sysemu.c | 3 + target/s390x/kvm/kvm.c | 8 ++ - 19 files changed, 285 insertions(+), 13 deletions(-) + tests/qtest/arm-cpu-features.c | 4 + + 28 files changed, 323 insertions(+), 15 deletions(-) create mode 100644 configs/devices/aarch64-softmmu/aarch64-rh-devices.mak create mode 100644 configs/devices/ppc64-softmmu/ppc64-rh-devices.mak create mode 100644 configs/devices/rh-virtio.mak @@ -87,7 +112,7 @@ Merged patches (7.1.0 rc0): diff --git a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak new file mode 100644 -index 0000000000..720ec0cb57 +index 0000000000..aec1831199 --- /dev/null +++ b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak @@ -0,0 +1,41 @@ @@ -97,12 +122,12 @@ index 0000000000..720ec0cb57 +CONFIG_ARM_GICV3_TCG=y +CONFIG_ARM_GIC=y +CONFIG_ARM_SMMUV3=y -+CONFIG_ARM_V7M=y +CONFIG_ARM_VIRT=y +CONFIG_CXL=y +CONFIG_CXL_MEM_DEVICE=y +CONFIG_EDID=y +CONFIG_PCIE_PORT=y ++CONFIG_PCIE_PCI_BRIDGE=y +CONFIG_PCI_DEVICES=y +CONFIG_PCI_TESTDEV=y +CONFIG_PFLASH_CFI01=y @@ -217,10 +242,10 @@ index 0000000000..69a799adbd +CONFIG_VHOST_USER_FS=y diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak new file mode 100644 -index 0000000000..668b2d0e18 +index 0000000000..ce5be73633 --- /dev/null +++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak -@@ -0,0 +1,109 @@ +@@ -0,0 +1,110 @@ +include ../rh-virtio.mak + +CONFIG_ACPI=y @@ -259,6 +284,7 @@ index 0000000000..668b2d0e18 +CONFIG_IDE_PCI=y +CONFIG_IDE_PIIX=y +CONFIG_IDE_QDEV=y ++CONFIG_IGB_PCI_EXPRESS=y +CONFIG_IOAPIC=y +CONFIG_IOH3420=y +CONFIG_ISA_BUS=y @@ -268,7 +294,6 @@ index 0000000000..668b2d0e18 +CONFIG_MC146818RTC=y +CONFIG_MEM_DEVICE=y +CONFIG_NVDIMM=y -+CONFIG_OPENGL=y +CONFIG_PAM=y +CONFIG_PC=y +CONFIG_PCI=y @@ -282,6 +307,7 @@ index 0000000000..668b2d0e18 +CONFIG_PCSPK=y +CONFIG_PC_ACPI=y +CONFIG_PC_PCI=y ++CONFIG_PCIE_PCI_BRIDGE=y +CONFIG_PFLASH_CFI01=y +CONFIG_PVPANIC_ISA=y +CONFIG_PXB=y @@ -330,19 +356,26 @@ index 0000000000..668b2d0e18 +CONFIG_VHOST_VSOCK=y +CONFIG_VHOST_USER_VSOCK=y +CONFIG_VHOST_USER_FS=y -diff --git a/hw/arm/meson.build b/hw/arm/meson.build -index b545ba0e4f..a41a16cba7 100644 ---- a/hw/arm/meson.build -+++ b/hw/arm/meson.build -@@ -29,7 +29,7 @@ arm_ss.add(when: 'CONFIG_VEXPRESS', if_true: files('vexpress.c')) - arm_ss.add(when: 'CONFIG_ZYNQ', if_true: files('xilinx_zynq.c')) - arm_ss.add(when: 'CONFIG_SABRELITE', if_true: files('sabrelite.c')) +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index be2856c018..af9ea4dd1c 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -205,6 +205,7 @@ static const int a15irqmap[] = { + }; --arm_ss.add(when: 'CONFIG_ARM_V7M', if_true: files('armv7m.c')) -+#arm_ss.add(when: 'CONFIG_ARM_V7M', if_true: files('armv7m.c')) - arm_ss.add(when: 'CONFIG_EXYNOS4', if_true: files('exynos4210.c')) - arm_ss.add(when: 'CONFIG_PXA2XX', if_true: files('pxa2xx.c', 'pxa2xx_gpio.c', 'pxa2xx_pic.c')) - arm_ss.add(when: 'CONFIG_DIGIC', if_true: files('digic.c')) + static const char *valid_cpus[] = { ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + #ifdef CONFIG_TCG + ARM_CPU_TYPE_NAME("cortex-a7"), + ARM_CPU_TYPE_NAME("cortex-a15"), +@@ -219,6 +220,7 @@ static const char *valid_cpus[] = { + ARM_CPU_TYPE_NAME("neoverse-n2"), + #endif + ARM_CPU_TYPE_NAME("cortex-a53"), ++#endif /* disabled for RHEL */ + ARM_CPU_TYPE_NAME("cortex-a57"), + ARM_CPU_TYPE_NAME("host"), + ARM_CPU_TYPE_NAME("max"), diff --git a/hw/block/fdc.c b/hw/block/fdc.c index d7cc4d3ec1..12d0a60905 100644 --- a/hw/block/fdc.c @@ -372,18 +405,32 @@ index d7cc4d3ec1..12d0a60905 100644 error_setg(errp, "Cannot choose a fallback FDrive type of 'auto'"); return; diff --git a/hw/cpu/meson.build b/hw/cpu/meson.build -index e37490074f..4431e3731c 100644 +index 6d319947ca..91962fd863 100644 --- a/hw/cpu/meson.build +++ b/hw/cpu/meson.build @@ -1,4 +1,5 @@ --softmmu_ss.add(files('core.c', 'cluster.c')) -+#softmmu_ss.add(files('core.c', 'cluster.c')) -+softmmu_ss.add(files('core.c')) +-system_ss.add(files('core.c', 'cluster.c')) ++#system_ss.add(files('core.c', 'cluster.c')) ++system_ss.add(files('core.c')) - softmmu_ss.add(when: 'CONFIG_ARM11MPCORE', if_true: files('arm11mpcore.c')) - softmmu_ss.add(when: 'CONFIG_REALVIEW', if_true: files('realview_mpcore.c')) + system_ss.add(when: 'CONFIG_ARM11MPCORE', if_true: files('arm11mpcore.c')) + system_ss.add(when: 'CONFIG_REALVIEW', if_true: files('realview_mpcore.c')) +diff --git a/hw/cxl/meson.build b/hw/cxl/meson.build +index ea0aebf6e3..6878f06974 100644 +--- a/hw/cxl/meson.build ++++ b/hw/cxl/meson.build +@@ -6,7 +6,8 @@ system_ss.add(when: 'CONFIG_CXL', + 'cxl-host.c', + 'cxl-cdat.c', + 'cxl-events.c', +- 'switch-mailbox-cci.c', ++# Disabled for 8.2.0 rebase for RHEL 9.4.0 ++# 'switch-mailbox-cci.c', + ), + if_false: files( + 'cxl-host-stubs.c', diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c -index b80f98b6c4..cbde6a8f15 100644 +index b80f98b6c4..0370cf8a64 100644 --- a/hw/display/cirrus_vga.c +++ b/hw/display/cirrus_vga.c @@ -36,6 +36,7 @@ @@ -394,31 +441,21 @@ index b80f98b6c4..cbde6a8f15 100644 #include "sysemu/reset.h" #include "qapi/error.h" #include "trace.h" -@@ -47,6 +48,7 @@ - #include "qom/object.h" - #include "ui/console.h" - -+ - /* - * TODO: - * - destination write mask support not complete (bits 5..7) -@@ -2946,7 +2948,10 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp) +@@ -2946,6 +2947,9 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp) PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev); int16_t device_id = pc->device_id; -- /* -+ warn_report("'cirrus-vga' is deprecated, " -+ "please use a different VGA card instead"); ++ warn_report("'cirrus-vga' is deprecated, " ++ "please use a different VGA card instead"); + -+ /* + /* * Follow real hardware, cirrus card emulated has 4 MB video memory. * Also accept 8 MB/16 MB for backward compatibility. - */ diff --git a/hw/ide/piix.c b/hw/ide/piix.c -index 41d60921e3..a4af45b4e8 100644 +index 4e5e12935f..03ca06bb17 100644 --- a/hw/ide/piix.c +++ b/hw/ide/piix.c -@@ -193,7 +193,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data) +@@ -190,7 +190,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data) k->device_id = PCI_DEVICE_ID_INTEL_82371SB_1; k->class_id = PCI_CLASS_STORAGE_IDE; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); @@ -428,7 +465,7 @@ index 41d60921e3..a4af45b4e8 100644 } static const TypeInfo piix3_ide_info = { -@@ -216,6 +217,8 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data) +@@ -214,6 +215,8 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data) k->class_id = PCI_CLASS_STORAGE_IDE; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); dc->hotpluggable = false; @@ -437,6 +474,52 @@ index 41d60921e3..a4af45b4e8 100644 } static const TypeInfo piix4_ide_info = { +diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c +index 1b3b4da01d..454bfa5783 100644 +--- a/hw/ide/qdev.c ++++ b/hw/ide/qdev.c +@@ -283,10 +283,13 @@ static void ide_cd_realize(IDEDevice *dev, Error **errp) + ide_dev_initfn(dev, IDE_CD, errp); + } + ++/* Disabled for Red Hat Enterprise Linux */ ++#if 0 + static void ide_cf_realize(IDEDevice *dev, Error **errp) + { + ide_dev_initfn(dev, IDE_CFATA, errp); + } ++#endif + + #define DEFINE_IDE_DEV_PROPERTIES() \ + DEFINE_BLOCK_PROPERTIES(IDEDrive, dev.conf), \ +@@ -346,6 +349,8 @@ static const TypeInfo ide_cd_info = { + .class_init = ide_cd_class_init, + }; + ++/* Disabled for Red Hat Enterprise Linux */ ++#if 0 + static Property ide_cf_properties[] = { + DEFINE_IDE_DEV_PROPERTIES(), + DEFINE_BLOCK_CHS_PROPERTIES(IDEDrive, dev.conf), +@@ -371,6 +376,7 @@ static const TypeInfo ide_cf_info = { + .instance_size = sizeof(IDEDrive), + .class_init = ide_cf_class_init, + }; ++#endif + + static void ide_device_class_init(ObjectClass *klass, void *data) + { +@@ -396,7 +402,10 @@ static void ide_register_types(void) + type_register_static(&ide_bus_info); + type_register_static(&ide_hd_info); + type_register_static(&ide_cd_info); ++/* Disabled for Red Hat Enterprise Linux */ ++#if 0 + type_register_static(&ide_cf_info); ++#endif + type_register_static(&ide_device_type_info); + } + diff --git a/hw/input/pckbd.c b/hw/input/pckbd.c index b92b63bedc..3b6235dde6 100644 --- a/hw/input/pckbd.c @@ -451,10 +534,10 @@ index b92b63bedc..3b6235dde6 100644 static const TypeInfo i8042_info = { diff --git a/hw/net/e1000.c b/hw/net/e1000.c -index 23d660619f..b75c9aa799 100644 +index 8ffe1077f1..b3dfeeca4f 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c -@@ -1805,6 +1805,7 @@ static const E1000Info e1000_devices[] = { +@@ -1746,6 +1746,7 @@ static const E1000Info e1000_devices[] = { .revision = 0x03, .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, }, @@ -462,7 +545,7 @@ index 23d660619f..b75c9aa799 100644 { .name = "e1000-82544gc", .device_id = E1000_DEV_ID_82544GC_COPPER, -@@ -1817,6 +1818,7 @@ static const E1000Info e1000_devices[] = { +@@ -1758,6 +1759,7 @@ static const E1000Info e1000_devices[] = { .revision = 0x03, .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, }, @@ -471,10 +554,10 @@ index 23d660619f..b75c9aa799 100644 static void e1000_register_types(void) diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c -index 8a4861f45a..fcb5dfe792 100644 +index 91fae56573..33e0c8724c 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c -@@ -379,10 +379,12 @@ static const TypeInfo spapr_cpu_core_type_infos[] = { +@@ -386,10 +386,12 @@ static const TypeInfo spapr_cpu_core_type_infos[] = { .instance_size = sizeof(SpaprCpuCore), .class_size = sizeof(SpaprCpuCoreClass), }, @@ -488,10 +571,10 @@ index 8a4861f45a..fcb5dfe792 100644 DEFINE_SPAPR_CPU_CORE_TYPE("power7+_v2.1"), DEFINE_SPAPR_CPU_CORE_TYPE("power8_v2.0"), diff --git a/hw/usb/meson.build b/hw/usb/meson.build -index 599dc24f0d..905a994c3a 100644 +index e94149ebde..4a8adbf3dc 100644 --- a/hw/usb/meson.build +++ b/hw/usb/meson.build -@@ -52,7 +52,7 @@ softmmu_ss.add(when: 'CONFIG_USB_SMARTCARD', if_true: files('dev-smartcard-reade +@@ -52,7 +52,7 @@ system_ss.add(when: 'CONFIG_USB_SMARTCARD', if_true: files('dev-smartcard-reader if cacard.found() usbsmartcard_ss = ss.source_set() usbsmartcard_ss.add(when: 'CONFIG_USB_SMARTCARD', @@ -500,86 +583,206 @@ index 599dc24f0d..905a994c3a 100644 hw_usb_modules += {'smartcard': usbsmartcard_ss} endif -diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c -index df0c45e523..c154a4dcf2 100644 ---- a/target/arm/cpu_tcg.c -+++ b/target/arm/cpu_tcg.c -@@ -155,6 +155,7 @@ void define_cortex_a72_a57_a53_cp_reginfo(ARMCPU *cpu) +diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build +index c0055a7832..12e1d6c67e 100644 +--- a/hw/virtio/meson.build ++++ b/hw/virtio/meson.build +@@ -17,8 +17,9 @@ if have_vhost + if have_vhost_user + # fixme - this really should be generic + specific_virtio_ss.add(files('vhost-user.c')) +- system_virtio_ss.add(files('vhost-user-device.c')) +- system_virtio_ss.add(when: 'CONFIG_VIRTIO_PCI', if_true: files('vhost-user-device-pci.c')) ++# Disabled for 8.2.0 rebase for RHEL 9.4.0 ++# system_virtio_ss.add(files('vhost-user-device.c')) ++# system_virtio_ss.add(when: 'CONFIG_VIRTIO_PCI', if_true: files('vhost-user-device-pci.c')) + endif + if have_vhost_vdpa + system_virtio_ss.add(files('vhost-vdpa.c')) +diff --git a/target/arm/arm-qmp-cmds.c b/target/arm/arm-qmp-cmds.c +index b53d5efe13..64989a02d1 100644 +--- a/target/arm/arm-qmp-cmds.c ++++ b/target/arm/arm-qmp-cmds.c +@@ -231,6 +231,7 @@ CpuModelExpansionInfo *qmp_query_cpu_model_expansion(CpuModelExpansionType type, + static void arm_cpu_add_definition(gpointer data, gpointer user_data) + { + ObjectClass *oc = data; ++ CPUClass *cc = CPU_CLASS(oc); + CpuDefinitionInfoList **cpu_list = user_data; + CpuDefinitionInfo *info; + const char *typename; +@@ -240,6 +241,7 @@ static void arm_cpu_add_definition(gpointer data, gpointer user_data) + info->name = g_strndup(typename, + strlen(typename) - strlen("-" TYPE_ARM_CPU)); + info->q_typename = g_strdup(typename); ++ info->deprecated = !!cc->deprecation_note; + + QAPI_LIST_PREPEND(*cpu_list, info); + } +diff --git a/target/arm/cpu.c b/target/arm/cpu.c +index efb22a87f9..a32521ada9 100644 +--- a/target/arm/cpu.c ++++ b/target/arm/cpu.c +@@ -2524,6 +2524,10 @@ static void cpu_register_class_init(ObjectClass *oc, void *data) + + acc->info = data; + cc->gdb_core_xml_file = "arm-core.xml"; ++ ++ if (acc->info->deprecation_note) { ++ cc->deprecation_note = acc->info->deprecation_note; ++ } + } + + void arm_cpu_register(const ARMCPUInfo *info) +diff --git a/target/arm/cpu.h b/target/arm/cpu.h +index a0282e0d28..7e0f0dfea7 100644 +--- a/target/arm/cpu.h ++++ b/target/arm/cpu.h +@@ -34,6 +34,8 @@ + #define KVM_HAVE_MCE_INJECTION 1 + #endif + ++#define RHEL_CPU_DEPRECATION "use 'host' / 'max'" ++ + #define EXCP_UDEF 1 /* undefined instruction */ + #define EXCP_SWI 2 /* software interrupt */ + #define EXCP_PREFETCH_ABORT 3 +@@ -1120,6 +1122,7 @@ typedef struct ARMCPUInfo { + const char *name; + void (*initfn)(Object *obj); + void (*class_init)(ObjectClass *oc, void *data); ++ const char *deprecation_note; + } ARMCPUInfo; + + /** +diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c +index 1e9c6c85ae..10be900803 100644 +--- a/target/arm/cpu64.c ++++ b/target/arm/cpu64.c +@@ -648,6 +648,7 @@ static void aarch64_a57_initfn(Object *obj) + define_cortex_a72_a57_a53_cp_reginfo(cpu); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void aarch64_a53_initfn(Object *obj) + { + ARMCPU *cpu = ARM_CPU(obj); +@@ -704,6 +705,7 @@ static void aarch64_a53_initfn(Object *obj) + cpu->gic_pribits = 5; + define_cortex_a72_a57_a53_cp_reginfo(cpu); + } ++#endif + + static void aarch64_host_initfn(Object *obj) + { +@@ -742,8 +744,11 @@ static void aarch64_max_initfn(Object *obj) + } + + static const ARMCPUInfo aarch64_cpus[] = { +- { .name = "cortex-a57", .initfn = aarch64_a57_initfn }, ++ { .name = "cortex-a57", .initfn = aarch64_a57_initfn, ++ .deprecation_note = RHEL_CPU_DEPRECATION }, ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { .name = "cortex-a53", .initfn = aarch64_a53_initfn }, ++#endif /* disabled for RHEL */ + { .name = "max", .initfn = aarch64_max_initfn }, + #if defined(CONFIG_KVM) || defined(CONFIG_HVF) + { .name = "host", .initfn = aarch64_host_initfn }, +@@ -815,8 +820,13 @@ static void aarch64_cpu_instance_init(Object *obj) + static void cpu_register_class_init(ObjectClass *oc, void *data) + { + ARMCPUClass *acc = ARM_CPU_CLASS(oc); ++ CPUClass *cc = CPU_CLASS(oc); + + acc->info = data; ++ ++ if (acc->info->deprecation_note) { ++ cc->deprecation_note = acc->info->deprecation_note; ++ } + } + + void aarch64_cpu_register(const ARMCPUInfo *info) +diff --git a/target/arm/tcg/cpu32.c b/target/arm/tcg/cpu32.c +index d9e0e2a4dd..c5c639a6ea 100644 +--- a/target/arm/tcg/cpu32.c ++++ b/target/arm/tcg/cpu32.c +@@ -98,6 +98,7 @@ void aa32_max_features(ARMCPU *cpu) /* CPU models. These are not needed for the AArch64 linux-user build. */ #if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64) +#if 0 /* Disabled for Red Hat Enterprise Linux */ - #if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG) + #if !defined(CONFIG_USER_ONLY) static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request) { -@@ -508,6 +509,7 @@ static void cortex_a9_initfn(Object *obj) - cpu->isar.reset_pmcr_el0 = 0x41093000; - define_arm_cp_regs(cpu, cortexa9_cp_reginfo); - } -+#endif /* disabled for RHEL */ +@@ -1189,3 +1190,4 @@ static void arm_tcg_cpu_register_types(void) + type_init(arm_tcg_cpu_register_types) - #ifndef CONFIG_USER_ONLY - static uint64_t a15_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri) -@@ -532,6 +534,7 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = { - .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, - }; + #endif /* !CONFIG_USER_ONLY || !TARGET_AARCH64 */ ++#endif /* disabled for RHEL */ +diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c +index fcda99e158..bd5a993ff8 100644 +--- a/target/arm/tcg/cpu64.c ++++ b/target/arm/tcg/cpu64.c +@@ -29,6 +29,7 @@ + #include "cpu-features.h" + #include "cpregs.h" +#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void cortex_a7_initfn(Object *obj) + static uint64_t make_ccsidr64(unsigned assoc, unsigned linesize, + unsigned cachesize) { - ARMCPU *cpu = ARM_CPU(obj); -@@ -580,6 +583,7 @@ static void cortex_a7_initfn(Object *obj) - cpu->isar.reset_pmcr_el0 = 0x41072000; - define_arm_cp_regs(cpu, cortexa15_cp_reginfo); /* Same as A15 */ +@@ -134,6 +135,7 @@ static void aarch64_a35_initfn(Object *obj) + /* These values are the same with A53/A57/A72. */ + define_cortex_a72_a57_a53_cp_reginfo(cpu); } -+#endif /* disabled for RHEL */ ++#endif - static void cortex_a15_initfn(Object *obj) - { -@@ -628,6 +632,7 @@ static void cortex_a15_initfn(Object *obj) - define_arm_cp_regs(cpu, cortexa15_cp_reginfo); - } + static void cpu_max_get_sve_max_vq(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +@@ -223,6 +225,7 @@ static void cpu_max_get_l0gptsz(Object *obj, Visitor *v, const char *name, + static Property arm_cpu_lpa2_property = + DEFINE_PROP_BOOL("lpa2", ARMCPU, prop_lpa2, true); +#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void cortex_m0_initfn(Object *obj) + static void aarch64_a55_initfn(Object *obj) { ARMCPU *cpu = ARM_CPU(obj); -@@ -1110,6 +1115,7 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data) - - cc->gdb_core_xml_file = "arm-m-profile.xml"; +@@ -1065,6 +1068,7 @@ static void aarch64_neoverse_n2_initfn(Object *obj) + aarch64_add_pauth_properties(obj); + aarch64_add_sve_properties(obj); } -+#endif /* disabled for RHEL */ ++#endif - #ifndef TARGET_AARCH64 /* -@@ -1177,6 +1183,7 @@ static void arm_max_initfn(Object *obj) - #endif /* !TARGET_AARCH64 */ + * -cpu max: a CPU with as many features enabled as our emulation supports. +@@ -1259,6 +1263,7 @@ void aarch64_max_tcg_initfn(Object *obj) + qdev_property_add_static(DEVICE(obj), &arm_cpu_lpa2_property); + } - static const ARMCPUInfo arm_tcg_cpus[] = { +#if 0 /* Disabled for Red Hat Enterprise Linux */ - { .name = "arm926", .initfn = arm926_initfn }, - { .name = "arm946", .initfn = arm946_initfn }, - { .name = "arm1026", .initfn = arm1026_initfn }, -@@ -1192,7 +1199,9 @@ static const ARMCPUInfo arm_tcg_cpus[] = { - { .name = "cortex-a7", .initfn = cortex_a7_initfn }, - { .name = "cortex-a8", .initfn = cortex_a8_initfn }, - { .name = "cortex-a9", .initfn = cortex_a9_initfn }, -+#endif /* disabled for RHEL */ - { .name = "cortex-a15", .initfn = cortex_a15_initfn }, + static const ARMCPUInfo aarch64_cpus[] = { + { .name = "cortex-a35", .initfn = aarch64_a35_initfn }, + { .name = "cortex-a55", .initfn = aarch64_a55_initfn }, +@@ -1270,14 +1275,17 @@ static const ARMCPUInfo aarch64_cpus[] = { + { .name = "neoverse-v1", .initfn = aarch64_neoverse_v1_initfn }, + { .name = "neoverse-n2", .initfn = aarch64_neoverse_n2_initfn }, + }; ++#endif + + static void aarch64_cpu_register_types(void) + { +#if 0 /* Disabled for Red Hat Enterprise Linux */ - { .name = "cortex-m0", .initfn = cortex_m0_initfn, - .class_init = arm_v7m_class_init }, - { .name = "cortex-m3", .initfn = cortex_m3_initfn, -@@ -1224,6 +1233,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = { - { .name = "pxa270-b1", .initfn = pxa270b1_initfn }, - { .name = "pxa270-c0", .initfn = pxa270c0_initfn }, - { .name = "pxa270-c5", .initfn = pxa270c5_initfn }, -+#endif /* disabled for RHEL */ - #ifndef TARGET_AARCH64 - { .name = "max", .initfn = arm_max_initfn }, - #endif + size_t i; + + for (i = 0; i < ARRAY_SIZE(aarch64_cpus); ++i) { + aarch64_cpu_register(&aarch64_cpus[i]); + } ++#endif + } + + type_init(aarch64_cpu_register_types) diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c -index 912b037c63..cd3ff700ac 100644 +index 7dbb47de64..69fddb05bc 100644 --- a/target/ppc/cpu-models.c +++ b/target/ppc/cpu-models.c @@ -66,6 +66,7 @@ @@ -609,7 +812,7 @@ index 912b037c63..cd3ff700ac 100644 POWERPC_DEF("power7_v2.3", CPU_POWERPC_POWER7_v23, POWER7, "POWER7 v2.3") POWERPC_DEF("power7+_v2.1", CPU_POWERPC_POWER7P_v21, POWER7, -@@ -896,12 +900,15 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { +@@ -898,12 +902,15 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { { "7447a", "7447a_v1.2" }, { "7457a", "7457a_v1.2" }, { "apollo7pm", "7457a_v1.0" }, @@ -625,7 +828,7 @@ index 912b037c63..cd3ff700ac 100644 { "power7", "power7_v2.3" }, { "power7+", "power7+_v2.1" }, { "power8e", "power8e_v2.1" }, -@@ -911,12 +918,14 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { +@@ -913,12 +920,14 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { { "power10", "power10_v2.0" }, #endif @@ -655,10 +858,10 @@ index 63981bf36b..87a4480c05 100644 /* detect missing features if any to properly report them */ diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c -index 3ac7ec9acf..97da1a6424 100644 +index 33ab3551f4..912e493951 100644 --- a/target/s390x/kvm/kvm.c +++ b/target/s390x/kvm/kvm.c -@@ -2529,6 +2529,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) +@@ -2567,6 +2567,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) error_setg(errp, "KVM doesn't support CPU models"); return; } @@ -673,6 +876,37 @@ index 3ac7ec9acf..97da1a6424 100644 prop.cpuid = s390_cpuid_from_cpu_model(model); prop.ibc = s390_ibc_from_cpu_model(model); /* configure cpu features indicated via STFL(e) */ +diff --git a/tests/qtest/arm-cpu-features.c b/tests/qtest/arm-cpu-features.c +index a8a4c668ad..2458cc527c 100644 +--- a/tests/qtest/arm-cpu-features.c ++++ b/tests/qtest/arm-cpu-features.c +@@ -451,8 +451,10 @@ static void test_query_cpu_model_expansion(const void *data) + assert_error(qts, "host", "The CPU type 'host' requires KVM", NULL); + + /* Test expected feature presence/absence for some cpu types */ ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + assert_has_feature_enabled(qts, "cortex-a15", "pmu"); + assert_has_not_feature(qts, "cortex-a15", "aarch64"); ++#endif /* disabled for RHEL */ + + /* Enabling and disabling pmu should always work. */ + assert_has_feature_enabled(qts, "max", "pmu"); +@@ -469,6 +471,7 @@ static void test_query_cpu_model_expansion(const void *data) + assert_has_feature_enabled(qts, "cortex-a57", "pmu"); + assert_has_feature_enabled(qts, "cortex-a57", "aarch64"); + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + assert_has_feature_enabled(qts, "a64fx", "pmu"); + assert_has_feature_enabled(qts, "a64fx", "aarch64"); + /* +@@ -481,6 +484,7 @@ static void test_query_cpu_model_expansion(const void *data) + "{ 'sve384': true }"); + assert_error(qts, "a64fx", "cannot enable sve640", + "{ 'sve640': true }"); ++#endif /* disabled for RHEL */ + + sve_tests_default(qts, "max"); + pauth_tests_default(qts, "max"); -- -2.39.1 +2.39.3 diff --git a/SOURCES/0006-Machine-type-related-general-changes.patch b/SOURCES/0006-Machine-type-related-general-changes.patch index 5dd591f..4a4c6fb 100644 --- a/SOURCES/0006-Machine-type-related-general-changes.patch +++ b/SOURCES/0006-Machine-type-related-general-changes.patch @@ -1,4 +1,4 @@ -From c13f8e21b32aa06b08847e88080f2fdea5084a9b Mon Sep 17 00:00:00 2001 +From d9ff466c980d219ebf230ea24becce294c196f1f Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 11 Jan 2019 09:54:45 +0100 Subject: Machine type related general changes @@ -19,10 +19,13 @@ Rebase notes (7.0.0): - Remove downstream changes leftovers in hw/rtc/mc146818rtc.c - Remove unnecessary change in hw/usb/hcd-uhci.c -Rebase notes (7.1.0 rc0): +Rebase notes (7.1.0): - Moved adding rhel_old_machine_deprecation variable from s390x to general machine types commit - Moved adding hw_compat_rhel_8_6 struct from x86_64 to general machine types commit +Rebase notes (8.1.0): +- Do not modify unused vga-isa.c + Merged patches (6.1.0): - f2fb42a3c6 redhat: add missing entries in hw_compat_rhel_8_4 - 1949ec258e hw/arm/virt: Disable PL011 clock migration through hw_compat_rhel_8_3 @@ -40,39 +43,45 @@ Merged patches (7.0.0): - ef5afcc86d Fix virtio-net-pci* "vectors" compat - 168f0d56e3 compat: Update hw_compat_rhel_8_5 with 6.2.0 RC2 changes -Merged patches (7.1.0 rc0): +Merged patches (7.1.0): - 38b89dc245 pc: Move s3/s4 suspend disabling to compat (only hw/acpi/piix4.c chunk) - 1d6439527a WRB: Introduce RHEL 9.0.0 hw compat structure (only hw/core/machine.c and include/hw/boards.h chunk) -Merged patches (7.2.0 rc0): +Merged patches (7.2.0): - 0be2889fa2 Introduce upstream 7.0 compat changes (only applicable parts) -Merged patches (8.0.0-rc1): +Merged patches (8.0.0): - 21ed34787b Addd 7.2 compat bits for RHEL 9.1 machine type - e5c8d5d603 virtio-rng-pci: fix migration compat for vectors - 5a5fa77059 virtio-rng-pci: fix transitional migration compat for vectors + +Merged patches (8.1.0): +- bd5d81d286 Add RHEL 9.2.0 compat structure (general part) +- 1165e24c6b hw/pci: Disable PCI_ERR_UNCOR_MASK reg for machine type <= pc-q35-rhel9.2.0 + +Merged patches (8.2.0): +- 4ee284aca9 Add machine types compat bits. (partial) --- hw/acpi/piix4.c | 2 +- hw/arm/virt.c | 2 +- - hw/core/machine.c | 229 +++++++++++++++++++++++++++++++++++ - hw/display/vga-isa.c | 2 +- + hw/core/machine.c | 267 +++++++++++++++++++++++++++++++++++ hw/i386/pc_piix.c | 2 + hw/i386/pc_q35.c | 2 + hw/net/rtl8139.c | 4 +- - hw/smbios/smbios.c | 46 ++++++- + hw/smbios/smbios.c | 46 +++++- hw/timer/i8254_common.c | 2 +- - hw/usb/hcd-xhci-pci.c | 59 ++++++--- + hw/usb/hcd-xhci-pci.c | 59 ++++++-- hw/usb/hcd-xhci-pci.h | 1 + - include/hw/boards.h | 31 +++++ + include/hw/boards.h | 40 ++++++ include/hw/firmware/smbios.h | 5 +- include/hw/i386/pc.h | 3 + - 14 files changed, 367 insertions(+), 23 deletions(-) + 13 files changed, 413 insertions(+), 22 deletions(-) diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c -index 63d2113b86..a24b9aac92 100644 +index dd523d2e4c..5050c0ba97 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c -@@ -247,7 +247,7 @@ static bool vmstate_test_migrate_acpi_index(void *opaque, int version_id) +@@ -245,7 +245,7 @@ static bool vmstate_test_migrate_acpi_index(void *opaque, int version_id) static const VMStateDescription vmstate_acpi = { .name = "piix4_pm", .version_id = 3, @@ -82,10 +91,10 @@ index 63d2113b86..a24b9aac92 100644 .fields = (VMStateField[]) { VMSTATE_PCI_DEVICE(parent_obj, PIIX4PMState), diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index ac626b3bef..4a6e89c7bc 100644 +index af9ea4dd1c..62f0f7d4d6 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c -@@ -1629,7 +1629,7 @@ static void virt_build_smbios(VirtMachineState *vms) +@@ -1638,7 +1638,7 @@ static void virt_build_smbios(VirtMachineState *vms) smbios_set_defaults("QEMU", product, vmc->smbios_old_sys_ver ? "1.0" : mc->name, false, @@ -95,10 +104,10 @@ index ac626b3bef..4a6e89c7bc 100644 /* build the array of physical mem area from base_memmap */ mem_array.address = vms->memmap[VIRT_MEM].base; diff --git a/hw/core/machine.c b/hw/core/machine.c -index cd13b8b0a3..5aa567fad3 100644 +index 0c17398141..446601ee30 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c -@@ -46,6 +46,235 @@ GlobalProperty hw_compat_7_2[] = { +@@ -57,6 +57,273 @@ GlobalProperty hw_compat_7_2[] = { }; const size_t hw_compat_7_2_len = G_N_ELEMENTS(hw_compat_7_2); @@ -108,6 +117,44 @@ index cd13b8b0a3..5aa567fad3 100644 +const char *rhel_old_machine_deprecation = + "machine types for previous major releases are deprecated"; + ++GlobalProperty hw_compat_rhel_9_4[] = { ++ /* hw_compat_rhel_9_4 from hw_compat_8_0 */ ++ { TYPE_VIRTIO_NET, "host_uso", "off"}, ++ /* hw_compat_rhel_9_4 from hw_compat_8_0 */ ++ { TYPE_VIRTIO_NET, "guest_uso4", "off"}, ++ /* hw_compat_rhel_9_4 from hw_compat_8_0 */ ++ { TYPE_VIRTIO_NET, "guest_uso6", "off"}, ++ /* hw_compat_rhel_9_4 from hw_compat_8_1 */ ++ { TYPE_PCI_BRIDGE, "x-pci-express-writeable-slt-bug", "true" }, ++ /* hw_compat_rhel_9_4 from hw_compat_8_1 */ ++ { "ramfb", "x-migrate", "off" }, ++ /* hw_compat_rhel_9_4 from hw_compat_8_1 */ ++ { "vfio-pci-nohotplug", "x-ramfb-migrate", "off" }, ++ /* hw_compat_rhel_9_4 from hw_compat_8_1 */ ++ { "igb", "x-pcie-flr-init", "off" }, ++}; ++const size_t hw_compat_rhel_9_4_len = G_N_ELEMENTS(hw_compat_rhel_9_4); ++ ++GlobalProperty hw_compat_rhel_9_3[] = { ++ /* hw_compat_rhel_9_3 from hw_compat_8_0 */ ++ { "migration", "multifd-flush-after-each-section", "on"}, ++ /* hw_compat_rhel_9_3 from hw_compat_8_0 */ ++ { TYPE_PCI_DEVICE, "x-pcie-ari-nextfn-1", "on" }, ++}; ++const size_t hw_compat_rhel_9_3_len = G_N_ELEMENTS(hw_compat_rhel_9_3); ++ ++GlobalProperty hw_compat_rhel_9_2[] = { ++ /* hw_compat_rhel_9_2 from hw_compat_7_2 */ ++ { "e1000e", "migrate-timadj", "off" }, ++ /* hw_compat_rhel_9_2 from hw_compat_7_2 */ ++ { "virtio-mem", "x-early-migration", "false" }, ++ /* hw_compat_rhel_9_2 from hw_compat_7_2 */ ++ { "migration", "x-preempt-pre-7-2", "true" }, ++ /* hw_compat_rhel_9_2 from hw_compat_7_2 */ ++ { TYPE_PCI_DEVICE, "x-pcie-err-unc-mask", "off" }, ++}; ++const size_t hw_compat_rhel_9_2_len = G_N_ELEMENTS(hw_compat_rhel_9_2); ++ +/* + * Mostly the same as hw_compat_7_0 + */ @@ -334,25 +381,12 @@ index cd13b8b0a3..5aa567fad3 100644 GlobalProperty hw_compat_7_1[] = { { "virtio-device", "queue_reset", "false" }, { "virtio-rng-pci", "vectors", "0" }, -diff --git a/hw/display/vga-isa.c b/hw/display/vga-isa.c -index 2a5437d803..0db2c2b2a1 100644 ---- a/hw/display/vga-isa.c -+++ b/hw/display/vga-isa.c -@@ -89,7 +89,7 @@ static void vga_isa_realizefn(DeviceState *dev, Error **errp) - } - - static Property vga_isa_properties[] = { -- DEFINE_PROP_UINT32("vgamem_mb", ISAVGAState, state.vram_size_mb, 8), -+ DEFINE_PROP_UINT32("vgamem_mb", ISAVGAState, state.vram_size_mb, 16), - DEFINE_PROP_END_OF_LIST(), - }; - diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 30eedd62a3..14a794081e 100644 +index eace854335..2a9f465619 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c -@@ -201,6 +201,8 @@ static void pc_init1(MachineState *machine, - smbios_set_defaults("QEMU", "Standard PC (i440FX + PIIX, 1996)", +@@ -238,6 +238,8 @@ static void pc_init1(MachineState *machine, + smbios_set_defaults("QEMU", mc->desc, mc->name, pcmc->smbios_legacy_mode, pcmc->smbios_uuid_encoded, + pcmc->smbios_stream_product, @@ -361,11 +395,11 @@ index 30eedd62a3..14a794081e 100644 } diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 797ba347fd..dc0ba5f9e7 100644 +index 4f3e5412f6..912cb0c0dc 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c -@@ -202,6 +202,8 @@ static void pc_q35_init(MachineState *machine) - smbios_set_defaults("QEMU", "Standard PC (Q35 + ICH9, 2009)", +@@ -206,6 +206,8 @@ static void pc_q35_init(MachineState *machine) + smbios_set_defaults("QEMU", mc->desc, mc->name, pcmc->smbios_legacy_mode, pcmc->smbios_uuid_encoded, + pcmc->smbios_stream_product, @@ -374,10 +408,10 @@ index 797ba347fd..dc0ba5f9e7 100644 } diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c -index 5a5aaf868d..3d473d5869 100644 +index 4af8c66266..7dc12907ab 100644 --- a/hw/net/rtl8139.c +++ b/hw/net/rtl8139.c -@@ -3178,7 +3178,7 @@ static int rtl8139_pre_save(void *opaque) +@@ -3169,7 +3169,7 @@ static int rtl8139_pre_save(void *opaque) static const VMStateDescription vmstate_rtl8139 = { .name = "rtl8139", @@ -386,7 +420,7 @@ index 5a5aaf868d..3d473d5869 100644 .minimum_version_id = 3, .post_load = rtl8139_post_load, .pre_save = rtl8139_pre_save, -@@ -3259,7 +3259,9 @@ static const VMStateDescription vmstate_rtl8139 = { +@@ -3250,7 +3250,9 @@ static const VMStateDescription vmstate_rtl8139 = { VMSTATE_UINT32(tally_counters.TxMCol, RTL8139State), VMSTATE_UINT64(tally_counters.RxOkPhy, RTL8139State), VMSTATE_UINT64(tally_counters.RxOkBrd, RTL8139State), @@ -397,7 +431,7 @@ index 5a5aaf868d..3d473d5869 100644 VMSTATE_UINT16(tally_counters.TxUndrn, RTL8139State), diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c -index d2007e70fb..319eae9e9d 100644 +index 2a90601ac5..7bde23e59d 100644 --- a/hw/smbios/smbios.c +++ b/hw/smbios/smbios.c @@ -58,6 +58,9 @@ static bool smbios_legacy = true; @@ -419,7 +453,7 @@ index d2007e70fb..319eae9e9d 100644 SMBIOS_TABLE_SET_STR(2, manufacturer_str, type2.manufacturer); SMBIOS_TABLE_SET_STR(2, product_str, type2.product); -@@ -980,7 +983,10 @@ void smbios_set_cpuid(uint32_t version, uint32_t features) +@@ -985,7 +988,10 @@ void smbios_set_cpuid(uint32_t version, uint32_t features) void smbios_set_defaults(const char *manufacturer, const char *product, const char *version, bool legacy_mode, @@ -431,7 +465,7 @@ index d2007e70fb..319eae9e9d 100644 { smbios_have_defaults = true; smbios_legacy = legacy_mode; -@@ -1001,11 +1007,45 @@ void smbios_set_defaults(const char *manufacturer, const char *product, +@@ -1006,11 +1012,45 @@ void smbios_set_defaults(const char *manufacturer, const char *product, g_free(smbios_entries); } @@ -479,10 +513,10 @@ index d2007e70fb..319eae9e9d 100644 SMBIOS_SET_DEFAULT(type3.manufacturer, manufacturer); SMBIOS_SET_DEFAULT(type3.version, version); diff --git a/hw/timer/i8254_common.c b/hw/timer/i8254_common.c -index 050875b497..32935da46c 100644 +index b25da448c8..0331e84398 100644 --- a/hw/timer/i8254_common.c +++ b/hw/timer/i8254_common.c -@@ -231,7 +231,7 @@ static const VMStateDescription vmstate_pit_common = { +@@ -229,7 +229,7 @@ static const VMStateDescription vmstate_pit_common = { .pre_save = pit_dispatch_pre_save, .post_load = pit_dispatch_post_load, .fields = (VMStateField[]) { @@ -603,13 +637,22 @@ index 08f70ce97c..1be7527c1b 100644 #endif diff --git a/include/hw/boards.h b/include/hw/boards.h -index 6fbbfd56c8..c5a965d27f 100644 +index da85f86efb..4a21eddbf9 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h -@@ -459,4 +459,35 @@ extern const size_t hw_compat_2_2_len; +@@ -503,4 +503,44 @@ extern const size_t hw_compat_2_2_len; extern GlobalProperty hw_compat_2_1[]; extern const size_t hw_compat_2_1_len; ++extern GlobalProperty hw_compat_rhel_9_4[]; ++extern const size_t hw_compat_rhel_9_4_len; ++ ++extern GlobalProperty hw_compat_rhel_9_3[]; ++extern const size_t hw_compat_rhel_9_3_len; ++ ++extern GlobalProperty hw_compat_rhel_9_2[]; ++extern const size_t hw_compat_rhel_9_2_len; ++ +extern GlobalProperty hw_compat_rhel_9_1[]; +extern const size_t hw_compat_rhel_9_1_len; + @@ -659,13 +702,13 @@ index 7f3259a630..d24b3ccd32 100644 void smbios_get_tables(MachineState *ms, const struct smbios_phys_mem_area *mem_array, diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 8206d5405a..908a275736 100644 +index a10ceeabbf..037942d233 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h -@@ -111,6 +111,9 @@ struct PCMachineClass { - bool smbios_defaults; +@@ -113,6 +113,9 @@ struct PCMachineClass { bool smbios_legacy_mode; bool smbios_uuid_encoded; + SmbiosEntryPointType default_smbios_ep_type; + /* New fields needed for Windows HardwareID-6 matching */ + const char *smbios_stream_product; + const char *smbios_stream_version; @@ -673,5 +716,5 @@ index 8206d5405a..908a275736 100644 /* RAM / address space compat: */ bool gigabyte_align; -- -2.39.1 +2.39.3 diff --git a/SOURCES/0007-Add-aarch64-machine-types.patch b/SOURCES/0007-Add-aarch64-machine-types.patch index f47bbd0..fde7982 100644 --- a/SOURCES/0007-Add-aarch64-machine-types.patch +++ b/SOURCES/0007-Add-aarch64-machine-types.patch @@ -1,4 +1,4 @@ -From ec6468b65a3af0e2b84575c9f965f61916d0d8ea Mon Sep 17 00:00:00 2001 +From 23f614ab0b79ec1c6f65a7f0d6993bfdfc53fd23 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 12:53:31 +0200 Subject: Add aarch64 machine types @@ -17,18 +17,19 @@ Rebase notes (7.0.0): - Added dtb-kaslr-seed option - Set no_tcg_lpa2 to true -Rebase notes (7.1.0 rc0): +Rebase notes (7.1.0): - replace dtb_kaslr_seed by dtb_randomness - -Rebase notes (7.1.0 rc3): - Updated dtb_randomness comment -Rebase notes (7.2.0 rc0): +Rebase notes (7.2.0): - Disabled cortex-a35 -Rebase notes (8.0.0-rc1): +Rebase notes (8.0.0): - Moved changed code from target/arm/helper.c to target/arm/arm-qmp-cmds.c +Rebase notes (8.1.0): +- Added setting default_nic + Merged patches (6.2.0): - 9a3d4fde0e hw/arm/virt: Remove 9.0 machine type - f7d04d6695 hw: arm: virt: Add hw_compat_rhel_8_5 to 8.5 machine type @@ -46,33 +47,33 @@ Merged patches (7.0.0): - f79b31bdef hw/arm/virt: Remove the dtb-kaslr-seed machine option - b6fca85f4a hw/arm/virt: Fix missing initialization in instance/class_init() -Merged patches (7.1.0 rc0): +Merged patches (7.1.0): - ac97dd4f9f RHEL-only: AArch64: Drop unsupported CPU types - e9c0a70664 target/arm: deprecate named CPU models -Merged patches (7.2.0 rc0): +Merged patches (7.2.0): - 0be2889fa2 Introduce upstream 7.0 compat changes (only applicable parts) -Merged patches (8.0.0-rc1): +Merged patches (8.0.0): - c1a21266d8 redhat: aarch64: add rhel9.2.0 virt machine type - d97cd7c513 redhat: fix virt-rhel9.2.0 compat props + +Merged patches (8.1.0): +- bd5d81d286 Add RHEL 9.2.0 compat structure (arm part) +- c07f666086 hw/arm/virt: Validate cluster and NUMA node boundary for RHEL machines + +Merged patches (8.2.0): +- 4ee284aca9 Add machine types compat bits. (partial) --- - hw/arm/virt.c | 251 ++++++++++++++++++++++++++++++++- - include/hw/arm/virt.h | 8 ++ - target/arm/arm-qmp-cmds.c | 2 + - target/arm/cpu-qom.h | 1 + - target/arm/cpu.c | 5 + - target/arm/cpu.h | 2 + - target/arm/cpu64.c | 16 ++- - target/arm/cpu_tcg.c | 12 +- - tests/qtest/arm-cpu-features.c | 6 + - 9 files changed, 289 insertions(+), 14 deletions(-) + hw/arm/virt.c | 250 +++++++++++++++++++++++++++++++++++++++++- + include/hw/arm/virt.h | 8 ++ + 2 files changed, 257 insertions(+), 1 deletion(-) diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 4a6e89c7bc..1ae1654be5 100644 +index 62f0f7d4d6..c541efee5e 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c -@@ -81,6 +81,7 @@ +@@ -82,6 +82,7 @@ #include "hw/char/pl011.h" #include "qemu/guest-random.h" @@ -80,13 +81,12 @@ index 4a6e89c7bc..1ae1654be5 100644 #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \ static void virt_##major##_##minor##_class_init(ObjectClass *oc, \ void *data) \ -@@ -107,7 +108,48 @@ +@@ -108,7 +109,48 @@ DEFINE_VIRT_MACHINE_LATEST(major, minor, true) #define DEFINE_VIRT_MACHINE(major, minor) \ DEFINE_VIRT_MACHINE_LATEST(major, minor, false) -- +#endif /* disabled for RHEL */ -+ + +#define DEFINE_RHEL_MACHINE_LATEST(m, n, s, latest) \ + static void rhel##m##n##s##_virt_class_init(ObjectClass *oc, \ + void *data) \ @@ -130,28 +130,7 @@ index 4a6e89c7bc..1ae1654be5 100644 /* Number of external interrupt lines to configure the GIC with */ #define NUM_IRQS 256 -@@ -204,16 +246,20 @@ static const int a15irqmap[] = { - }; - - static const char *valid_cpus[] = { -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - ARM_CPU_TYPE_NAME("cortex-a7"), - ARM_CPU_TYPE_NAME("cortex-a15"), - ARM_CPU_TYPE_NAME("cortex-a35"), - ARM_CPU_TYPE_NAME("cortex-a53"), - ARM_CPU_TYPE_NAME("cortex-a55"), -+#endif /* disabled for RHEL */ - ARM_CPU_TYPE_NAME("cortex-a57"), -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - ARM_CPU_TYPE_NAME("cortex-a72"), - ARM_CPU_TYPE_NAME("cortex-a76"), - ARM_CPU_TYPE_NAME("a64fx"), - ARM_CPU_TYPE_NAME("neoverse-n1"), -+#endif /* disabled for RHEL */ - ARM_CPU_TYPE_NAME("host"), - ARM_CPU_TYPE_NAME("max"), - }; -@@ -2339,6 +2385,7 @@ static void machvirt_init(MachineState *machine) +@@ -2341,6 +2383,7 @@ static void machvirt_init(MachineState *machine) qemu_add_machine_init_done_notifier(&vms->machine_done); } @@ -159,7 +138,7 @@ index 4a6e89c7bc..1ae1654be5 100644 static bool virt_get_secure(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2366,6 +2413,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) +@@ -2368,6 +2411,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) vms->virt = value; } @@ -167,16 +146,15 @@ index 4a6e89c7bc..1ae1654be5 100644 static bool virt_get_highmem(Object *obj, Error **errp) { -@@ -2380,7 +2428,7 @@ static void virt_set_highmem(Object *obj, bool value, Error **errp) - +@@ -2383,6 +2427,7 @@ static void virt_set_highmem(Object *obj, bool value, Error **errp) vms->highmem = value; } -- + +#if 0 /* Disabled for Red Hat Enterprise Linux */ static bool virt_get_compact_highmem(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2436,7 +2484,7 @@ static void virt_set_highmem_mmio(Object *obj, bool value, Error **errp) +@@ -2438,7 +2483,7 @@ static void virt_set_highmem_mmio(Object *obj, bool value, Error **errp) vms->highmem_mmio = value; } @@ -185,7 +163,7 @@ index 4a6e89c7bc..1ae1654be5 100644 static bool virt_get_its(Object *obj, Error **errp) { -@@ -2452,6 +2500,7 @@ static void virt_set_its(Object *obj, bool value, Error **errp) +@@ -2454,6 +2499,7 @@ static void virt_set_its(Object *obj, bool value, Error **errp) vms->its = value; } @@ -193,7 +171,7 @@ index 4a6e89c7bc..1ae1654be5 100644 static bool virt_get_dtb_randomness(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2465,6 +2514,7 @@ static void virt_set_dtb_randomness(Object *obj, bool value, Error **errp) +@@ -2467,6 +2513,7 @@ static void virt_set_dtb_randomness(Object *obj, bool value, Error **errp) vms->dtb_randomness = value; } @@ -201,7 +179,7 @@ index 4a6e89c7bc..1ae1654be5 100644 static char *virt_get_oem_id(Object *obj, Error **errp) { -@@ -2548,6 +2598,7 @@ static void virt_set_ras(Object *obj, bool value, Error **errp) +@@ -2550,6 +2597,7 @@ static void virt_set_ras(Object *obj, bool value, Error **errp) vms->ras = value; } @@ -209,7 +187,7 @@ index 4a6e89c7bc..1ae1654be5 100644 static bool virt_get_mte(Object *obj, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(obj); -@@ -2561,6 +2612,7 @@ static void virt_set_mte(Object *obj, bool value, Error **errp) +@@ -2563,6 +2611,7 @@ static void virt_set_mte(Object *obj, bool value, Error **errp) vms->mte = value; } @@ -217,7 +195,7 @@ index 4a6e89c7bc..1ae1654be5 100644 static char *virt_get_gic_version(Object *obj, Error **errp) { -@@ -2988,6 +3040,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) +@@ -2935,6 +2984,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) return fixed_ipa ? 0 : requested_pa_size; } @@ -225,7 +203,7 @@ index 4a6e89c7bc..1ae1654be5 100644 static void virt_machine_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); -@@ -3441,3 +3494,195 @@ static void virt_machine_2_6_options(MachineClass *mc) +@@ -3405,3 +3455,201 @@ static void virt_machine_2_6_options(MachineClass *mc) vmc->no_pmu = true; } DEFINE_VIRT_MACHINE(2, 6) @@ -263,7 +241,10 @@ index 4a6e89c7bc..1ae1654be5 100644 + mc->smp_props.clusters_supported = true; + mc->auto_enable_numa_with_memhp = true; + mc->auto_enable_numa_with_memdev = true; ++ /* platform instead of architectural choice */ ++ mc->cpu_cluster_has_numa_boundary = true; + mc->default_ram_id = "mach-virt.ram"; ++ mc->default_nic = "virtio-net-pci"; + + object_class_property_add(oc, "acpi", "OnOffAuto", + virt_get_acpi, virt_set_acpi, @@ -404,6 +385,9 @@ index 4a6e89c7bc..1ae1654be5 100644 +static void rhel920_virt_options(MachineClass *mc) +{ + compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); ++ compat_props_add(mc->compat_props, hw_compat_rhel_9_4, hw_compat_rhel_9_4_len); ++ compat_props_add(mc->compat_props, hw_compat_rhel_9_3, hw_compat_rhel_9_3_len); ++ compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len); +} +DEFINE_RHEL_MACHINE_AS_LATEST(9, 2, 0) + @@ -422,10 +406,10 @@ index 4a6e89c7bc..1ae1654be5 100644 +} +DEFINE_RHEL_MACHINE(9, 0, 0) diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h -index e1ddbea96b..81c2363a40 100644 +index f69239850e..7b8abe5645 100644 --- a/include/hw/arm/virt.h +++ b/include/hw/arm/virt.h -@@ -187,9 +187,17 @@ struct VirtMachineState { +@@ -177,9 +177,17 @@ struct VirtMachineState { #define VIRT_ECAM_ID(high) (high ? VIRT_HIGH_PCIE_ECAM : VIRT_PCIE_ECAM) @@ -443,270 +427,6 @@ index e1ddbea96b..81c2363a40 100644 void virt_acpi_setup(VirtMachineState *vms); bool virt_is_acpi_enabled(VirtMachineState *vms); -diff --git a/target/arm/arm-qmp-cmds.c b/target/arm/arm-qmp-cmds.c -index c8fa524002..3aa089abf3 100644 ---- a/target/arm/arm-qmp-cmds.c -+++ b/target/arm/arm-qmp-cmds.c -@@ -231,6 +231,7 @@ CpuModelExpansionInfo *qmp_query_cpu_model_expansion(CpuModelExpansionType type, - static void arm_cpu_add_definition(gpointer data, gpointer user_data) - { - ObjectClass *oc = data; -+ CPUClass *cc = CPU_CLASS(oc); - CpuDefinitionInfoList **cpu_list = user_data; - CpuDefinitionInfo *info; - const char *typename; -@@ -240,6 +241,7 @@ static void arm_cpu_add_definition(gpointer data, gpointer user_data) - info->name = g_strndup(typename, - strlen(typename) - strlen("-" TYPE_ARM_CPU)); - info->q_typename = g_strdup(typename); -+ info->deprecated = !!cc->deprecation_note; - - QAPI_LIST_PREPEND(*cpu_list, info); - } -diff --git a/target/arm/cpu-qom.h b/target/arm/cpu-qom.h -index 514c22ced9..f789173451 100644 ---- a/target/arm/cpu-qom.h -+++ b/target/arm/cpu-qom.h -@@ -35,6 +35,7 @@ typedef struct ARMCPUInfo { - const char *name; - void (*initfn)(Object *obj); - void (*class_init)(ObjectClass *oc, void *data); -+ const char *deprecation_note; - } ARMCPUInfo; - - void arm_cpu_register(const ARMCPUInfo *info); -diff --git a/target/arm/cpu.c b/target/arm/cpu.c -index 5182ed0c91..6740a8b940 100644 ---- a/target/arm/cpu.c -+++ b/target/arm/cpu.c -@@ -2290,8 +2290,13 @@ static void arm_cpu_instance_init(Object *obj) - static void cpu_register_class_init(ObjectClass *oc, void *data) - { - ARMCPUClass *acc = ARM_CPU_CLASS(oc); -+ CPUClass *cc = CPU_CLASS(oc); - - acc->info = data; -+ -+ if (acc->info->deprecation_note) { -+ cc->deprecation_note = acc->info->deprecation_note; -+ } - } - - void arm_cpu_register(const ARMCPUInfo *info) -diff --git a/target/arm/cpu.h b/target/arm/cpu.h -index c097cae988..829d4a2328 100644 ---- a/target/arm/cpu.h -+++ b/target/arm/cpu.h -@@ -34,6 +34,8 @@ - #define KVM_HAVE_MCE_INJECTION 1 - #endif - -+#define RHEL_CPU_DEPRECATION "use 'host' / 'max'" -+ - #define EXCP_UDEF 1 /* undefined instruction */ - #define EXCP_SWI 2 /* software interrupt */ - #define EXCP_PREFETCH_ABORT 3 -diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c -index 0fb07cc7b6..47459627fb 100644 ---- a/target/arm/cpu64.c -+++ b/target/arm/cpu64.c -@@ -31,6 +31,7 @@ - #include "hw/qdev-properties.h" - #include "internals.h" - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void aarch64_a35_initfn(Object *obj) - { - ARMCPU *cpu = ARM_CPU(obj); -@@ -110,6 +111,7 @@ static void aarch64_a35_initfn(Object *obj) - /* These values are the same with A53/A57/A72. */ - define_cortex_a72_a57_a53_cp_reginfo(cpu); - } -+#endif /* disabled for RHEL */ - - void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) - { -@@ -730,6 +732,7 @@ static void aarch64_a57_initfn(Object *obj) - define_cortex_a72_a57_a53_cp_reginfo(cpu); - } - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void aarch64_a53_initfn(Object *obj) - { - ARMCPU *cpu = ARM_CPU(obj); -@@ -1164,6 +1167,7 @@ static void aarch64_neoverse_n1_initfn(Object *obj) - - define_neoverse_n1_cp_reginfo(cpu); - } -+#endif /* disabled for RHEL */ - - static void aarch64_host_initfn(Object *obj) - { -@@ -1373,14 +1377,19 @@ static void aarch64_max_initfn(Object *obj) - } - - static const ARMCPUInfo aarch64_cpus[] = { -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - { .name = "cortex-a35", .initfn = aarch64_a35_initfn }, -- { .name = "cortex-a57", .initfn = aarch64_a57_initfn }, -+#endif /* disabled for RHEL */ -+ { .name = "cortex-a57", .initfn = aarch64_a57_initfn, -+ .deprecation_note = RHEL_CPU_DEPRECATION }, -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - { .name = "cortex-a53", .initfn = aarch64_a53_initfn }, - { .name = "cortex-a55", .initfn = aarch64_a55_initfn }, - { .name = "cortex-a72", .initfn = aarch64_a72_initfn }, - { .name = "cortex-a76", .initfn = aarch64_a76_initfn }, - { .name = "a64fx", .initfn = aarch64_a64fx_initfn }, - { .name = "neoverse-n1", .initfn = aarch64_neoverse_n1_initfn }, -+#endif /* disabled for RHEL */ - { .name = "max", .initfn = aarch64_max_initfn }, - #if defined(CONFIG_KVM) || defined(CONFIG_HVF) - { .name = "host", .initfn = aarch64_host_initfn }, -@@ -1452,8 +1461,13 @@ static void aarch64_cpu_instance_init(Object *obj) - static void cpu_register_class_init(ObjectClass *oc, void *data) - { - ARMCPUClass *acc = ARM_CPU_CLASS(oc); -+ CPUClass *cc = CPU_CLASS(oc); - - acc->info = data; -+ -+ if (acc->info->deprecation_note) { -+ cc->deprecation_note = acc->info->deprecation_note; -+ } - } - - void aarch64_cpu_register(const ARMCPUInfo *info) -diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c -index c154a4dcf2..f29425b656 100644 ---- a/target/arm/cpu_tcg.c -+++ b/target/arm/cpu_tcg.c -@@ -152,10 +152,10 @@ void define_cortex_a72_a57_a53_cp_reginfo(ARMCPU *cpu) - } - #endif /* !CONFIG_USER_ONLY */ - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - /* CPU models. These are not needed for the AArch64 linux-user build. */ - #if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64) - --#if 0 /* Disabled for Red Hat Enterprise Linux */ - #if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG) - static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request) - { -@@ -509,7 +509,6 @@ static void cortex_a9_initfn(Object *obj) - cpu->isar.reset_pmcr_el0 = 0x41093000; - define_arm_cp_regs(cpu, cortexa9_cp_reginfo); - } --#endif /* disabled for RHEL */ - - #ifndef CONFIG_USER_ONLY - static uint64_t a15_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri) -@@ -534,7 +533,6 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = { - .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0 }, - }; - --#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void cortex_a7_initfn(Object *obj) - { - ARMCPU *cpu = ARM_CPU(obj); -@@ -583,7 +581,6 @@ static void cortex_a7_initfn(Object *obj) - cpu->isar.reset_pmcr_el0 = 0x41072000; - define_arm_cp_regs(cpu, cortexa15_cp_reginfo); /* Same as A15 */ - } --#endif /* disabled for RHEL */ - - static void cortex_a15_initfn(Object *obj) - { -@@ -632,7 +629,6 @@ static void cortex_a15_initfn(Object *obj) - define_arm_cp_regs(cpu, cortexa15_cp_reginfo); - } - --#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void cortex_m0_initfn(Object *obj) - { - ARMCPU *cpu = ARM_CPU(obj); -@@ -1115,7 +1111,6 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data) - - cc->gdb_core_xml_file = "arm-m-profile.xml"; - } --#endif /* disabled for RHEL */ - - #ifndef TARGET_AARCH64 - /* -@@ -1183,7 +1178,6 @@ static void arm_max_initfn(Object *obj) - #endif /* !TARGET_AARCH64 */ - - static const ARMCPUInfo arm_tcg_cpus[] = { --#if 0 /* Disabled for Red Hat Enterprise Linux */ - { .name = "arm926", .initfn = arm926_initfn }, - { .name = "arm946", .initfn = arm946_initfn }, - { .name = "arm1026", .initfn = arm1026_initfn }, -@@ -1199,9 +1193,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = { - { .name = "cortex-a7", .initfn = cortex_a7_initfn }, - { .name = "cortex-a8", .initfn = cortex_a8_initfn }, - { .name = "cortex-a9", .initfn = cortex_a9_initfn }, --#endif /* disabled for RHEL */ - { .name = "cortex-a15", .initfn = cortex_a15_initfn }, --#if 0 /* Disabled for Red Hat Enterprise Linux */ - { .name = "cortex-m0", .initfn = cortex_m0_initfn, - .class_init = arm_v7m_class_init }, - { .name = "cortex-m3", .initfn = cortex_m3_initfn, -@@ -1233,7 +1225,6 @@ static const ARMCPUInfo arm_tcg_cpus[] = { - { .name = "pxa270-b1", .initfn = pxa270b1_initfn }, - { .name = "pxa270-c0", .initfn = pxa270c0_initfn }, - { .name = "pxa270-c5", .initfn = pxa270c5_initfn }, --#endif /* disabled for RHEL */ - #ifndef TARGET_AARCH64 - { .name = "max", .initfn = arm_max_initfn }, - #endif -@@ -1261,3 +1252,4 @@ static void arm_tcg_cpu_register_types(void) - type_init(arm_tcg_cpu_register_types) - - #endif /* !CONFIG_USER_ONLY || !TARGET_AARCH64 */ -+#endif /* disabled for RHEL */ -diff --git a/tests/qtest/arm-cpu-features.c b/tests/qtest/arm-cpu-features.c -index 1cb08138ad..834497dfec 100644 ---- a/tests/qtest/arm-cpu-features.c -+++ b/tests/qtest/arm-cpu-features.c -@@ -441,8 +441,10 @@ static void test_query_cpu_model_expansion(const void *data) - assert_error(qts, "host", "The CPU type 'host' requires KVM", NULL); - - /* Test expected feature presence/absence for some cpu types */ -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - assert_has_feature_enabled(qts, "cortex-a15", "pmu"); - assert_has_not_feature(qts, "cortex-a15", "aarch64"); -+#endif /* disabled for RHEL */ - - /* Enabling and disabling pmu should always work. */ - assert_has_feature_enabled(qts, "max", "pmu"); -@@ -459,6 +461,7 @@ static void test_query_cpu_model_expansion(const void *data) - assert_has_feature_enabled(qts, "cortex-a57", "pmu"); - assert_has_feature_enabled(qts, "cortex-a57", "aarch64"); - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - assert_has_feature_enabled(qts, "a64fx", "pmu"); - assert_has_feature_enabled(qts, "a64fx", "aarch64"); - /* -@@ -471,6 +474,7 @@ static void test_query_cpu_model_expansion(const void *data) - "{ 'sve384': true }"); - assert_error(qts, "a64fx", "cannot enable sve640", - "{ 'sve640': true }"); -+#endif /* disabled for RHEL */ - - sve_tests_default(qts, "max"); - pauth_tests_default(qts, "max"); -@@ -506,9 +510,11 @@ static void test_query_cpu_model_expansion_kvm(const void *data) - QDict *resp; - char *error; - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - assert_error(qts, "cortex-a15", - "We cannot guarantee the CPU type 'cortex-a15' works " - "with KVM on this host", NULL); -+#endif /* disabled for RHEL */ - - assert_has_feature_enabled(qts, "host", "aarch64"); - -- -2.39.1 +2.39.3 diff --git a/SOURCES/0008-Add-ppc64-machine-types.patch b/SOURCES/0008-Add-ppc64-machine-types.patch index ab78cae..a269adb 100644 --- a/SOURCES/0008-Add-ppc64-machine-types.patch +++ b/SOURCES/0008-Add-ppc64-machine-types.patch @@ -1,4 +1,4 @@ -From 401d0ebf1ee959fd944df6b5b4ae9c51c36d1244 Mon Sep 17 00:00:00 2001 +From d03cff85f5f1b69b1a66011ebaa974ece81d31bc Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:27:13 +0200 Subject: Add ppc64 machine types @@ -20,7 +20,7 @@ Merged patches (6.1.0): - af69d1ca6e Remove RHEL 7.4.0 machine types (only ppc64 changes) - 8f7a74ab78 Remove RHEL 7.5.0 machine types (only ppc64 changes) -Merged patches (7.1.0 rc0): +Merged patches (7.1.0): - baa6790171 target/ppc/cpu-models: Fix ppc_cpu_aliases list for RHEL --- hw/ppc/spapr.c | 243 ++++++++++++++++++++++++++++++++++++++++ @@ -34,10 +34,10 @@ Merged patches (7.1.0 rc0): 8 files changed, 314 insertions(+), 1 deletion(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c -index 4921198b9d..e24b3e22e3 100644 +index df09aa9d6a..ff459e1a46 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c -@@ -1634,6 +1634,9 @@ static void spapr_machine_reset(MachineState *machine, ShutdownCause reason) +@@ -1689,6 +1689,9 @@ static void spapr_machine_reset(MachineState *machine, ShutdownCause reason) pef_kvm_reset(machine->cgs, &error_fatal); spapr_caps_apply(spapr); @@ -47,7 +47,7 @@ index 4921198b9d..e24b3e22e3 100644 first_ppc_cpu = POWERPC_CPU(first_cpu); if (kvm_enabled() && kvmppc_has_cap_mmu_radix() && -@@ -3348,6 +3351,20 @@ static void spapr_set_host_serial(Object *obj, const char *value, Error **errp) +@@ -3397,6 +3400,20 @@ static void spapr_set_host_serial(Object *obj, const char *value, Error **errp) spapr->host_serial = g_strdup(value); } @@ -68,7 +68,7 @@ index 4921198b9d..e24b3e22e3 100644 static void spapr_instance_init(Object *obj) { SpaprMachineState *spapr = SPAPR_MACHINE(obj); -@@ -3426,6 +3443,12 @@ static void spapr_instance_init(Object *obj) +@@ -3475,6 +3492,12 @@ static void spapr_instance_init(Object *obj) spapr_get_host_serial, spapr_set_host_serial); object_property_set_description(obj, "host-serial", "Host serial number to advertise in guest device tree"); @@ -81,7 +81,7 @@ index 4921198b9d..e24b3e22e3 100644 } static void spapr_machine_finalizefn(Object *obj) -@@ -4683,6 +4706,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) +@@ -4734,6 +4757,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) vmc->client_architecture_support = spapr_vof_client_architecture_support; vmc->quiesce = spapr_vof_quiesce; vmc->setprop = spapr_vof_setprop; @@ -89,15 +89,15 @@ index 4921198b9d..e24b3e22e3 100644 } static const TypeInfo spapr_machine_info = { -@@ -4734,6 +4758,7 @@ static void spapr_machine_latest_class_options(MachineClass *mc) +@@ -4785,6 +4809,7 @@ static void spapr_machine_latest_class_options(MachineClass *mc) } \ type_init(spapr_machine_register_##suffix) +#if 0 /* Disabled for Red Hat Enterprise Linux */ /* - * pseries-8.0 + * pseries-8.2 */ -@@ -4894,6 +4919,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc) +@@ -4967,6 +4992,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc) } DEFINE_SPAPR_MACHINE(4_1, "4.1", false); @@ -105,8 +105,8 @@ index 4921198b9d..e24b3e22e3 100644 /* * pseries-4.0 -@@ -4913,6 +4939,8 @@ static bool phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, - *nv2atsd = 0; +@@ -4982,6 +5008,8 @@ static bool phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, + } return true; } + @@ -114,7 +114,7 @@ index 4921198b9d..e24b3e22e3 100644 static void spapr_machine_4_0_class_options(MachineClass *mc) { SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); -@@ -5240,6 +5268,221 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) +@@ -5306,6 +5334,221 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) compat_props_add(mc->compat_props, hw_compat_2_1, hw_compat_2_1_len); } DEFINE_SPAPR_MACHINE(2_1, "2.1", false); @@ -337,7 +337,7 @@ index 4921198b9d..e24b3e22e3 100644 static void spapr_machine_register_types(void) { diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c -index fcb5dfe792..ab8fb5bf62 100644 +index 33e0c8724c..9d01663f43 100644 --- a/hw/ppc/spapr_cpu_core.c +++ b/hw/ppc/spapr_cpu_core.c @@ -25,6 +25,7 @@ @@ -348,7 +348,7 @@ index fcb5dfe792..ab8fb5bf62 100644 static void spapr_reset_vcpu(PowerPCCPU *cpu) { -@@ -259,6 +260,7 @@ static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, +@@ -261,6 +262,7 @@ static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, { CPUPPCState *env = &cpu->env; CPUState *cs = CPU(cpu); @@ -356,7 +356,7 @@ index fcb5dfe792..ab8fb5bf62 100644 if (!qdev_realize(DEVICE(cpu), NULL, errp)) { return false; -@@ -270,6 +272,17 @@ static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, +@@ -277,6 +279,17 @@ static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, /* Set time-base frequency to 512 MHz. vhyp must be set first. */ cpu_ppc_tb_init(env, SPAPR_TIMEBASE_FREQ); @@ -375,10 +375,10 @@ index fcb5dfe792..ab8fb5bf62 100644 qdev_unrealize(DEVICE(cpu)); return false; diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h -index 5c8aabd444..04489d5808 100644 +index e91791a1a9..1951d8a2a0 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h -@@ -155,6 +155,7 @@ struct SpaprMachineClass { +@@ -154,6 +154,7 @@ struct SpaprMachineClass { bool pre_5_2_numa_associativity; bool pre_6_2_numa_affinity; @@ -386,7 +386,7 @@ index 5c8aabd444..04489d5808 100644 bool (*phb_placement)(SpaprMachineState *spapr, uint32_t index, uint64_t *buid, hwaddr *pio, hwaddr *mmio32, hwaddr *mmio64, -@@ -257,6 +258,9 @@ struct SpaprMachineState { +@@ -256,6 +257,9 @@ struct SpaprMachineState { /* Set by -boot */ char *boot_device; @@ -397,7 +397,7 @@ index 5c8aabd444..04489d5808 100644 char *kvm_type; char *host_model; diff --git a/target/ppc/compat.c b/target/ppc/compat.c -index 7949a24f5a..f207a9ba01 100644 +index ebef2cccec..ff2c00c60e 100644 --- a/target/ppc/compat.c +++ b/target/ppc/compat.c @@ -114,8 +114,19 @@ static const CompatInfo *compat_by_pvr(uint32_t pvr) @@ -422,10 +422,10 @@ index 7949a24f5a..f207a9ba01 100644 const CompatInfo *compat = compat_by_pvr(compat_pvr); const CompatInfo *min = compat_by_pvr(min_compat_pvr); diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c -index cd3ff700ac..1cb49c8087 100644 +index 69fddb05bc..64a05aaef3 100644 --- a/target/ppc/cpu-models.c +++ b/target/ppc/cpu-models.c -@@ -746,6 +746,7 @@ +@@ -748,6 +748,7 @@ /* PowerPC CPU aliases */ PowerPCCPUAlias ppc_cpu_aliases[] = { @@ -434,10 +434,10 @@ index cd3ff700ac..1cb49c8087 100644 { "405cr", "405crc" }, { "405gp", "405gpd" }, diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h -index 557d736dab..6646ec1c27 100644 +index f8101ffa29..e799a2bee6 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h -@@ -1482,6 +1482,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch) +@@ -1635,6 +1635,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch) /* Compatibility modes */ #if defined(TARGET_PPC64) @@ -446,18 +446,18 @@ index 557d736dab..6646ec1c27 100644 uint32_t min_compat_pvr, uint32_t max_compat_pvr); bool ppc_type_check_compat(const char *cputype, uint32_t compat_pvr, diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c -index 78f6fc50cd..68d06c3f8f 100644 +index 9b1abe2fc4..56f1c46e8e 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c -@@ -88,6 +88,7 @@ static int cap_ppc_nested_kvm_hv; - static int cap_large_decr; +@@ -89,6 +89,7 @@ static int cap_large_decr; static int cap_fwnmi; static int cap_rpt_invalidate; + static int cap_ail_mode_3; +static int cap_ppc_secure_guest; static uint32_t debug_inst_opcode; -@@ -135,6 +136,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) +@@ -141,6 +142,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT); kvmppc_get_cpu_characteristics(s); cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV); @@ -465,8 +465,8 @@ index 78f6fc50cd..68d06c3f8f 100644 cap_large_decr = kvmppc_get_dec_bits(); cap_fwnmi = kvm_vm_check_extension(s, KVM_CAP_PPC_FWNMI); /* -@@ -2569,6 +2571,16 @@ int kvmppc_has_cap_rpt_invalidate(void) - return cap_rpt_invalidate; +@@ -2579,6 +2581,16 @@ bool kvmppc_supports_ail_3(void) + return cap_ail_mode_3; } +bool kvmppc_has_cap_secure_guest(void) @@ -482,7 +482,7 @@ index 78f6fc50cd..68d06c3f8f 100644 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void) { uint32_t host_pvr = mfpvr(); -@@ -2969,3 +2981,18 @@ bool kvm_arch_cpu_check_are_resettable(void) +@@ -2979,3 +2991,18 @@ bool kvm_arch_cpu_check_are_resettable(void) void kvm_arch_accel_class_init(ObjectClass *oc) { } @@ -502,27 +502,27 @@ index 78f6fc50cd..68d06c3f8f 100644 + } +} diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h -index 5fd9753953..b5ebfe2be0 100644 +index 1975fb5ee6..d1017f98be 100644 --- a/target/ppc/kvm_ppc.h +++ b/target/ppc/kvm_ppc.h -@@ -43,6 +43,7 @@ int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu); +@@ -46,6 +46,7 @@ int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu); target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu, bool radix, bool gtse, uint64_t proc_tbl); +void kvmppc_svm_allow(Error **errp); - #ifndef CONFIG_USER_ONLY bool kvmppc_spapr_use_multitce(void); int kvmppc_spapr_enable_inkernel_multitce(void); -@@ -77,6 +78,8 @@ int kvmppc_get_cap_large_decr(void); - int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable); + void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift, +@@ -79,6 +80,8 @@ int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable); int kvmppc_has_cap_rpt_invalidate(void); + bool kvmppc_supports_ail_3(void); int kvmppc_enable_hwrng(void); +bool kvmppc_has_cap_secure_guest(void); +int kvmppc_enable_cap_secure_guest(void); int kvmppc_put_books_sregs(PowerPCCPU *cpu); PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void); void kvmppc_check_papr_resize_hpt(Error **errp); -@@ -396,6 +399,16 @@ static inline int kvmppc_has_cap_rpt_invalidate(void) +@@ -427,6 +430,16 @@ static inline bool kvmppc_supports_ail_3(void) return false; } @@ -540,5 +540,5 @@ index 5fd9753953..b5ebfe2be0 100644 { return -1; -- -2.39.1 +2.39.3 diff --git a/SOURCES/0009-Add-s390x-machine-types.patch b/SOURCES/0009-Add-s390x-machine-types.patch index 07dfb57..c3b9936 100644 --- a/SOURCES/0009-Add-s390x-machine-types.patch +++ b/SOURCES/0009-Add-s390x-machine-types.patch @@ -1,4 +1,4 @@ -From 3c7647197729fcd76e219070c6f359bb3667d04d Mon Sep 17 00:00:00 2001 +From 3623043d4a923bf9f541d439c76e7874cf0fa81d Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:47:32 +0200 Subject: Add s390x machine types @@ -8,7 +8,7 @@ Adding changes to add RHEL machine types for s390x architecture. Signed-off-by: Miroslav Rezanina -- -Rebase changes (7.1.0 rc0): +Rebase changes (7.1.0): - Moved adding rhel_old_machine_deprecation variable to general machine types commit Merged patches (6.1.0): @@ -23,52 +23,74 @@ Merged patches (7.0.0): - 4b0efa7e21 redhat: Add rhel8.6.0 and rhel9.0.0 machine types for s390x - dcc64971bf RHEL: mark old machine types as deprecated (partialy) -Merged patches (7.1.0 rc0): +Merged patches (7.1.0): - 1d6439527a WRB: Introduce RHEL 9.0.0 hw compat structure (only hw/s390x/s390-virtio-ccw.c chunk) - c8ad21ca31 redhat: Update s390x machine type compatibility for rebase to QEMU 7.0.0 - 5bcf8d874c target/s390x: deprecate CPUs older than z14 -Merged patches (7.2.0 rc0): +Merged patches (7.2.0): - 0be2889fa2 Introduce upstream 7.0 compat changes (only applicable parts) -Merged patches (8.0.0-rc1): +Merged patches (8.0.0): - 27c188c6a4 redhat: Update s390x machine type compatibility for QEMU 7.2.0 update - a932b8d429 redhat: Add new rhel-9.2.0 s390x machine type - ac88104bad s390x/s390-virtio-ccw: Activate zPCI features on s390-ccw-virtio-rhel8.6.0 + +Merged patches (8.1.0): +- bd5d81d286 Add RHEL 9.2.0 compat structure (s390x part) + +Merged patches (8.2.0): +- 4ee284aca9 Add machine types compat bits. (partial) --- - hw/s390x/s390-virtio-ccw.c | 143 +++++++++++++++++++++++++++++++ + hw/s390x/s390-virtio-ccw.c | 159 +++++++++++++++++++++++++++++++ target/s390x/cpu_models.c | 11 +++ target/s390x/cpu_models.h | 2 + target/s390x/cpu_models_sysemu.c | 2 + - 4 files changed, 158 insertions(+) + 4 files changed, 174 insertions(+) diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 503f212a31..dcd3b966b0 100644 +index 7262725d2e..984891b82a 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c -@@ -826,6 +826,7 @@ bool css_migration_enabled(void) +@@ -855,6 +855,7 @@ bool css_migration_enabled(void) } \ type_init(ccw_machine_register_##suffix) +#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void ccw_machine_8_0_instance_options(MachineState *machine) + static void ccw_machine_8_2_instance_options(MachineState *machine) { } -@@ -1201,6 +1202,148 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) +@@ -1256,6 +1257,164 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); } DEFINE_CCW_MACHINE(2_4, "2.4", false); +#endif + + ++static void ccw_machine_rhel940_instance_options(MachineState *machine) ++{ ++} ++ ++static void ccw_machine_rhel940_class_options(MachineClass *mc) ++{ ++} ++DEFINE_CCW_MACHINE(rhel940, "rhel9.4.0", true); ++ +static void ccw_machine_rhel920_instance_options(MachineState *machine) +{ ++ ccw_machine_rhel940_instance_options(machine); +} + +static void ccw_machine_rhel920_class_options(MachineClass *mc) +{ ++ ccw_machine_rhel940_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_9_4, hw_compat_rhel_9_4_len); ++ compat_props_add(mc->compat_props, hw_compat_rhel_9_3, hw_compat_rhel_9_3_len); ++ compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len); ++ mc->smp_props.drawers_supported = false; /* from ccw_machine_8_1 */ ++ mc->smp_props.books_supported = false; /* from ccw_machine_8_1 */ +} -+DEFINE_CCW_MACHINE(rhel920, "rhel9.2.0", true); ++DEFINE_CCW_MACHINE(rhel920, "rhel9.2.0", false); + +static void ccw_machine_rhel900_instance_options(MachineState *machine) +{ @@ -204,7 +226,7 @@ index 503f212a31..dcd3b966b0 100644 static void ccw_machine_register_types(void) { diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c -index 457b5cb10c..ff6b9463cb 100644 +index a63d990e4e..198b81f2c0 100644 --- a/target/s390x/cpu_models.c +++ b/target/s390x/cpu_models.c @@ -46,6 +46,9 @@ @@ -217,7 +239,7 @@ index 457b5cb10c..ff6b9463cb 100644 static S390CPUDef s390_cpu_defs[] = { CPUDEF_INIT(0x2064, 7, 1, 38, 0x00000000U, "z900", "IBM zSeries 900 GA1"), CPUDEF_INIT(0x2064, 7, 2, 38, 0x00000000U, "z900.2", "IBM zSeries 900 GA2"), -@@ -857,22 +860,30 @@ static void s390_host_cpu_model_class_init(ObjectClass *oc, void *data) +@@ -856,22 +859,30 @@ static void s390_host_cpu_model_class_init(ObjectClass *oc, void *data) static void s390_base_cpu_model_class_init(ObjectClass *oc, void *data) { S390CPUClass *xcc = S390_CPU_CLASS(oc); @@ -249,16 +271,16 @@ index 457b5cb10c..ff6b9463cb 100644 static void s390_qemu_cpu_model_class_init(ObjectClass *oc, void *data) diff --git a/target/s390x/cpu_models.h b/target/s390x/cpu_models.h -index fb1adc8b21..d76745afa9 100644 +index d7b8912989..1a806a97c4 100644 --- a/target/s390x/cpu_models.h +++ b/target/s390x/cpu_models.h -@@ -38,6 +38,8 @@ struct S390CPUDef { +@@ -38,6 +38,8 @@ typedef struct S390CPUDef { S390FeatBitmap full_feat; /* used to init full_feat from generated data */ S390FeatInit full_init; + /* if deprecated, provides a suggestion */ + const char *deprecation_note; - }; + } S390CPUDef; /* CPU model based on a CPU definition */ diff --git a/target/s390x/cpu_models_sysemu.c b/target/s390x/cpu_models_sysemu.c @@ -282,5 +304,5 @@ index 87a4480c05..28c1b0486c 100644 if (cpu_list_data->model) { Object *obj; -- -2.39.1 +2.39.3 diff --git a/SOURCES/0010-Add-x86_64-machine-types.patch b/SOURCES/0010-Add-x86_64-machine-types.patch index 9685338..d24bb57 100644 --- a/SOURCES/0010-Add-x86_64-machine-types.patch +++ b/SOURCES/0010-Add-x86_64-machine-types.patch @@ -1,4 +1,4 @@ -From 510291040cb280e1f68b793a84ec0f7d1c88aafa Mon Sep 17 00:00:00 2001 +From b432505cb28bc3b9b0c1849210ac6c63bca3fe37 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 19 Oct 2018 13:10:31 +0200 Subject: Add x86_64 machine types @@ -13,9 +13,12 @@ Rebase notes (6.1.0): Rebase notes (7.0.0): - Reset alias for all machine-types except latest one -Rebase notes (8.0.0-rc1): +Rebase notes (8.0.0): - remove legacy_no_rng_seed usage (removed upstream) +Rebase notes (8.1.0): +- default_nic_model to default_nic + Merged patches (6.1.0): - 59c284ad3b x86: Add x86 rhel8.5 machine types - a8868b42fe redhat: x86: Enable 'kvm-asyncpf-int' by default @@ -35,35 +38,44 @@ Merged patches (7.0.0): - dcc64971bf RHEL: mark old machine types as deprecated (partialy) - 6b396f182b RHEL: disable "seqpacket" for "vhost-vsock-device" in rhel8.6.0 -Merged patches (7.1.0 rc0): +Merged patches (7.1.0): - 38b89dc245 pc: Move s3/s4 suspend disabling to compat (only hw/i386/pc.c chunk) - 1d6439527a WRB: Introduce RHEL 9.0.0 hw compat structure (x86_64 specific changes) - 35b5c8554f target/i386: deprecate CPUs older than x86_64-v2 ABI -Merged patches (7.2.0 rc0): +Merged patches (7.2.0): - 0be2889fa2 Introduce upstream 7.0 compat changes (only applicable parts) -Merged patches (8.0.0-rc1): +Merged patches (8.0.0): - f33ca8aed4 x86: rhel 9.2.0 machine type + +Merged patches (8.1.0): +- bd5d81d286 Add RHEL 9.2.0 compat structure (x86_64 part) +- c6eaf73add redhat: hw/i386/pc: Update x86 machine type compatibility for QEMU 8.0.0 update +- 6cbf496e5e hw/acpi: Mark acpi blobs as resizable on RHEL pc machines version 7.6 and above + +Merged patches (8.2.0): +- 4ee284aca9 Add machine types compat bits. (partial) +- 719e2ac147 Fix x86 machine type compatibility for qemu-kvm 8.1.0 --- - hw/i386/pc.c | 147 +++++++++++++++++++++- - hw/i386/pc_piix.c | 86 ++++++++++++- - hw/i386/pc_q35.c | 252 ++++++++++++++++++++++++++++++++++++- + hw/i386/pc.c | 159 ++++++++++++++++++++- + hw/i386/pc_piix.c | 112 ++++++++++++++- + hw/i386/pc_q35.c | 285 ++++++++++++++++++++++++++++++++++++- include/hw/boards.h | 2 + - include/hw/i386/pc.h | 27 ++++ - target/i386/cpu.c | 21 ++++ + include/hw/i386/pc.h | 33 +++++ + target/i386/cpu.c | 21 +++ target/i386/kvm/kvm-cpu.c | 1 + target/i386/kvm/kvm.c | 4 + tests/qtest/pvpanic-test.c | 5 +- - 9 files changed, 538 insertions(+), 7 deletions(-) + 9 files changed, 615 insertions(+), 7 deletions(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index 1489abf010..8abb1f872e 100644 +index 29b9964733..a1faa9e92c 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c -@@ -407,6 +407,149 @@ GlobalProperty pc_compat_1_4[] = { +@@ -323,6 +323,161 @@ GlobalProperty pc_compat_2_0[] = { }; - const size_t pc_compat_1_4_len = G_N_ELEMENTS(pc_compat_1_4); + const size_t pc_compat_2_0_len = G_N_ELEMENTS(pc_compat_2_0); +/* This macro is for changes to properties that are RHEL specific, + * different to the current upstream and to be applied to the latest @@ -80,13 +92,25 @@ index 1489abf010..8abb1f872e 100644 + { TYPE_X86_CPU, "host-phys-bits-limit", "48" }, + { TYPE_X86_CPU, "vmx-entry-load-perf-global-ctrl", "off" }, + { TYPE_X86_CPU, "vmx-exit-load-perf-global-ctrl", "off" }, -+ /* bz 1508330 */ ++ /* bz 1508330 */ + { "vfio-pci", "x-no-geforce-quirks", "on" }, + /* bz 1941397 */ + { TYPE_X86_CPU, "kvm-asyncpf-int", "on" }, +}; +const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); + ++GlobalProperty pc_rhel_9_3_compat[] = { ++ /* pc_rhel_9_3_compat from pc_compat_8_0 */ ++ { "virtio-mem", "unplugged-inaccessible", "auto" }, ++}; ++const size_t pc_rhel_9_3_compat_len = G_N_ELEMENTS(pc_rhel_9_3_compat); ++ ++GlobalProperty pc_rhel_9_2_compat[] = { ++ /* pc_rhel_9_2_compat from pc_compat_7_2 */ ++ { "ICH9-LPC", "noreboot", "true" }, ++}; ++const size_t pc_rhel_9_2_compat_len = G_N_ELEMENTS(pc_rhel_9_2_compat); ++ +GlobalProperty pc_rhel_9_0_compat[] = { + /* pc_rhel_9_0_compat from pc_compat_6_2 */ + { "virtio-mem", "unplugged-inaccessible", "off" }, @@ -177,27 +201,27 @@ index 1489abf010..8abb1f872e 100644 + * machine types irrespective of host. + */ +GlobalProperty pc_rhel_7_6_compat[] = { -+ /* pc_rhel_7_6_compat from pc_compat_3_0 */ ++ /* pc_rhel_7_6_compat from pc_compat_3_0 */ + { TYPE_X86_CPU, "x-hv-synic-kvm-only", "on" }, -+ /* pc_rhel_7_6_compat from pc_compat_3_0 */ ++ /* pc_rhel_7_6_compat from pc_compat_3_0 */ + { "Skylake-Server" "-" TYPE_X86_CPU, "pku", "off" }, -+ /* pc_rhel_7_6_compat from pc_compat_3_0 */ ++ /* pc_rhel_7_6_compat from pc_compat_3_0 */ + { "Skylake-Server-IBRS" "-" TYPE_X86_CPU, "pku", "off" }, -+ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ + { TYPE_X86_CPU, "x-migrate-smi-count", "off" }, -+ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ + { "Skylake-Client" "-" TYPE_X86_CPU, "mpx", "on" }, -+ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ + { "Skylake-Client-IBRS" "-" TYPE_X86_CPU, "mpx", "on" }, -+ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ + { "Skylake-Server" "-" TYPE_X86_CPU, "mpx", "on" }, -+ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ + { "Skylake-Server-IBRS" "-" TYPE_X86_CPU, "mpx", "on" }, -+ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ + { "Cascadelake-Server" "-" TYPE_X86_CPU, "mpx", "on" }, -+ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ + { "Icelake-Client" "-" TYPE_X86_CPU, "mpx", "on" }, -+ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ + { "Icelake-Server" "-" TYPE_X86_CPU, "mpx", "on" }, +}; +const size_t pc_rhel_7_6_compat_len = G_N_ELEMENTS(pc_rhel_7_6_compat); @@ -211,15 +235,15 @@ index 1489abf010..8abb1f872e 100644 GSIState *pc_gsi_create(qemu_irq **irqs, bool pci_enabled) { GSIState *s; -@@ -1944,6 +2087,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) - pcmc->pvh_enabled = true; +@@ -1826,6 +1981,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) pcmc->kvmclock_create_always = true; + pcmc->resizable_acpi_blob = true; assert(!mc->get_hotplug_handler); + mc->async_pf_vmexit_disable = false; mc->get_hotplug_handler = pc_get_hotplug_handler; mc->hotplug_allowed = pc_hotplug_allowed; mc->cpu_index_to_instance_props = x86_cpu_index_to_props; -@@ -1954,7 +2098,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) +@@ -1836,7 +1992,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) mc->has_hotpluggable_cpus = true; mc->default_boot_order = "cad"; mc->block_default_type = IF_IDE; @@ -230,10 +254,10 @@ index 1489abf010..8abb1f872e 100644 mc->wakeup = pc_machine_wakeup; hc->pre_plug = pc_machine_device_pre_plug_cb; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 14a794081e..3e330fd36f 100644 +index 2a9f465619..44038391fb 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c -@@ -54,6 +54,7 @@ +@@ -53,6 +53,7 @@ #include "qapi/error.h" #include "qemu/error-report.h" #include "sysemu/xen.h" @@ -241,18 +265,18 @@ index 14a794081e..3e330fd36f 100644 #ifdef CONFIG_XEN #include #include "hw/xen/xen_pt.h" -@@ -198,8 +199,8 @@ static void pc_init1(MachineState *machine, +@@ -235,8 +236,8 @@ static void pc_init1(MachineState *machine, if (pcmc->smbios_defaults) { MachineClass *mc = MACHINE_GET_CLASS(machine); /* These values are guest ABI, do not change */ -- smbios_set_defaults("QEMU", "Standard PC (i440FX + PIIX, 1996)", +- smbios_set_defaults("QEMU", mc->desc, - mc->name, pcmc->smbios_legacy_mode, + smbios_set_defaults("Red Hat", "KVM", + mc->desc, pcmc->smbios_legacy_mode, pcmc->smbios_uuid_encoded, pcmc->smbios_stream_product, pcmc->smbios_stream_version, -@@ -351,6 +352,7 @@ static void pc_init1(MachineState *machine, +@@ -453,6 +454,7 @@ static void pc_set_south_bridge(Object *obj, int value, Error **errp) * hw_compat_*, pc_compat_*, or * pc_*_machine_options(). */ @@ -260,7 +284,7 @@ index 14a794081e..3e330fd36f 100644 static void pc_compat_2_3_fn(MachineState *machine) { X86MachineState *x86ms = X86_MACHINE(machine); -@@ -899,3 +901,83 @@ static void xenfv_3_1_machine_options(MachineClass *m) +@@ -970,3 +972,109 @@ static void xenfv_3_1_machine_options(MachineClass *m) DEFINE_PC_MACHINE(xenfv, "xenfv-3.1", pc_xen_hvm_init, xenfv_3_1_machine_options); #endif @@ -274,8 +298,9 @@ index 14a794081e..3e330fd36f 100644 + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + m->family = "pc_piix_Y"; + m->default_machine_opts = "firmware=bios-256k.bin,hpet=off"; -+ pcmc->default_nic_model = "e1000"; + pcmc->pci_root_uid = 0; ++ pcmc->resizable_acpi_blob = true; ++ m->default_nic = "e1000"; + m->default_display = "std"; + m->no_parallel = 1; + m->numa_mem_supported = true; @@ -296,6 +321,7 @@ index 14a794081e..3e330fd36f 100644 +static void pc_machine_rhel760_options(MachineClass *m) +{ + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ ObjectClass *oc = OBJECT_CLASS(m); + pc_machine_rhel7_options(m); + m->desc = "RHEL 7.6.0 PC (i440FX + PIIX, 1996)"; + m->async_pf_vmexit_disable = true; @@ -309,7 +335,31 @@ index 14a794081e..3e330fd36f 100644 + pcmc->kvmclock_create_always = false; + /* From pc_i440fx_5_1_machine_options() */ + pcmc->pci_root_uid = 1; ++ /* From pc_i440fx_7_0_machine_options() */ + pcmc->enforce_amd_1tb_hole = false; ++ /* From pc_i440fx_8_0_machine_options() */ ++ pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_32; ++ /* Introduced in QEMU 8.2 */ ++ pcmc->default_south_bridge = TYPE_PIIX3_DEVICE; ++ ++ object_class_property_add_enum(oc, "x-south-bridge", "PCSouthBridgeOption", ++ &PCSouthBridgeOption_lookup, ++ pc_get_south_bridge, ++ pc_set_south_bridge); ++ object_class_property_set_description(oc, "x-south-bridge", ++ "Use a different south bridge than PIIX3"); ++ ++ ++ compat_props_add(m->compat_props, hw_compat_rhel_9_4, ++ hw_compat_rhel_9_4_len); ++ compat_props_add(m->compat_props, hw_compat_rhel_9_3, ++ hw_compat_rhel_9_3_len); ++ compat_props_add(m->compat_props, pc_rhel_9_3_compat, ++ pc_rhel_9_3_compat_len); ++ compat_props_add(m->compat_props, hw_compat_rhel_9_2, ++ hw_compat_rhel_9_2_len); ++ compat_props_add(m->compat_props, pc_rhel_9_2_compat, ++ pc_rhel_9_2_compat_len); + compat_props_add(m->compat_props, hw_compat_rhel_9_1, + hw_compat_rhel_9_1_len); + compat_props_add(m->compat_props, hw_compat_rhel_9_0, @@ -345,21 +395,21 @@ index 14a794081e..3e330fd36f 100644 +DEFINE_PC_MACHINE(rhel760, "pc-i440fx-rhel7.6.0", pc_init_rhel760, + pc_machine_rhel760_options); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index dc0ba5f9e7..98601bb76f 100644 +index 912cb0c0dc..6387df97c8 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c -@@ -199,8 +199,8 @@ static void pc_q35_init(MachineState *machine) +@@ -203,8 +203,8 @@ static void pc_q35_init(MachineState *machine) if (pcmc->smbios_defaults) { /* These values are guest ABI, do not change */ -- smbios_set_defaults("QEMU", "Standard PC (Q35 + ICH9, 2009)", +- smbios_set_defaults("QEMU", mc->desc, - mc->name, pcmc->smbios_legacy_mode, + smbios_set_defaults("Red Hat", "KVM", + mc->desc, pcmc->smbios_legacy_mode, pcmc->smbios_uuid_encoded, pcmc->smbios_stream_product, pcmc->smbios_stream_version, -@@ -354,6 +354,7 @@ static void pc_q35_init(MachineState *machine) +@@ -363,6 +363,7 @@ static void pc_q35_init(MachineState *machine) DEFINE_PC_MACHINE(suffix, name, pc_init_##suffix, optionfn) @@ -367,7 +417,7 @@ index dc0ba5f9e7..98601bb76f 100644 static void pc_q35_machine_options(MachineClass *m) { PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -@@ -663,3 +664,250 @@ static void pc_q35_2_4_machine_options(MachineClass *m) +@@ -699,3 +700,283 @@ static void pc_q35_2_4_machine_options(MachineClass *m) DEFINE_Q35_MACHINE(v2_4, "pc-q35-2.4", NULL, pc_q35_2_4_machine_options); @@ -379,8 +429,8 @@ index dc0ba5f9e7..98601bb76f 100644 +static void pc_q35_machine_rhel_options(MachineClass *m) +{ + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -+ pcmc->default_nic_model = "e1000e"; + pcmc->pci_root_uid = 0; ++ m->default_nic = "e1000e"; + m->family = "pc_q35_Z"; + m->units_per_default_bus = 1; + m->default_machine_opts = "firmware=bios-256k.bin,hpet=off"; @@ -396,6 +446,24 @@ index dc0ba5f9e7..98601bb76f 100644 + compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); +} + ++static void pc_q35_init_rhel940(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel940_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel_options(m); ++ m->desc = "RHEL-9.4.0 PC (Q35 + ICH9, 2009)"; ++ pcmc->smbios_stream_product = "RHEL"; ++ pcmc->smbios_stream_version = "9.4.0"; ++} ++ ++DEFINE_PC_MACHINE(q35_rhel940, "pc-q35-rhel9.4.0", pc_q35_init_rhel940, ++ pc_q35_machine_rhel940_options); ++ ++ +static void pc_q35_init_rhel920(MachineState *machine) +{ + pc_q35_init(machine); @@ -404,10 +472,25 @@ index dc0ba5f9e7..98601bb76f 100644 +static void pc_q35_machine_rhel920_options(MachineClass *m) +{ + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); -+ pc_q35_machine_rhel_options(m); ++ pc_q35_machine_rhel940_options(m); + m->desc = "RHEL-9.2.0 PC (Q35 + ICH9, 2009)"; ++ m->alias = NULL; + pcmc->smbios_stream_product = "RHEL"; + pcmc->smbios_stream_version = "9.2.0"; ++ ++ /* From pc_q35_8_0_machine_options() */ ++ pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_32; ++ ++ compat_props_add(m->compat_props, hw_compat_rhel_9_4, ++ hw_compat_rhel_9_4_len); ++ compat_props_add(m->compat_props, hw_compat_rhel_9_3, ++ hw_compat_rhel_9_3_len); ++ compat_props_add(m->compat_props, pc_rhel_9_3_compat, ++ pc_rhel_9_3_compat_len); ++ compat_props_add(m->compat_props, hw_compat_rhel_9_2, ++ hw_compat_rhel_9_2_len); ++ compat_props_add(m->compat_props, pc_rhel_9_2_compat, ++ pc_rhel_9_2_compat_len); +} + +DEFINE_PC_MACHINE(q35_rhel920, "pc-q35-rhel9.2.0", pc_q35_init_rhel920, @@ -619,10 +702,10 @@ index dc0ba5f9e7..98601bb76f 100644 +DEFINE_PC_MACHINE(q35_rhel760, "pc-q35-rhel7.6.0", pc_q35_init_rhel760, + pc_q35_machine_rhel760_options); diff --git a/include/hw/boards.h b/include/hw/boards.h -index c5a965d27f..5e7446ee40 100644 +index 4a21eddbf9..4edfdb0ddb 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h -@@ -268,6 +268,8 @@ struct MachineClass { +@@ -277,6 +277,8 @@ struct MachineClass { strList *allowed_dynamic_sysbus_devices; bool auto_enable_numa_with_memhp; bool auto_enable_numa_with_memdev; @@ -632,16 +715,22 @@ index c5a965d27f..5e7446ee40 100644 bool smbus_no_migration_support; bool nvdimm_supported; diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 908a275736..4376f64a47 100644 +index 037942d233..37644ede7e 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h -@@ -293,6 +293,33 @@ extern const size_t pc_compat_1_4_len; +@@ -314,6 +314,39 @@ extern const size_t pc_compat_1_4_len; int pc_machine_kvm_type(MachineState *machine, const char *vm_type); +extern GlobalProperty pc_rhel_compat[]; +extern const size_t pc_rhel_compat_len; + ++extern GlobalProperty pc_rhel_9_3_compat[]; ++extern const size_t pc_rhel_9_3_compat_len; ++ ++extern GlobalProperty pc_rhel_9_2_compat[]; ++extern const size_t pc_rhel_9_2_compat_len; ++ +extern GlobalProperty pc_rhel_9_0_compat[]; +extern const size_t pc_rhel_9_0_compat_len; + @@ -670,10 +759,10 @@ index 908a275736..4376f64a47 100644 static void pc_machine_##suffix##_class_init(ObjectClass *oc, void *data) \ { \ diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 6576287e5b..0ef2bf1b93 100644 +index cd16cb893d..93203d9b91 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c -@@ -1834,9 +1834,13 @@ static const CPUCaches epyc_milan_cache_info = { +@@ -2190,9 +2190,13 @@ static const CPUCaches epyc_genoa_cache_info = { * PT in VMX operation */ @@ -687,7 +776,7 @@ index 6576287e5b..0ef2bf1b93 100644 .level = 0xd, .vendor = CPUID_VENDOR_AMD, .family = 15, -@@ -1857,6 +1861,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2213,6 +2217,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "phenom", @@ -695,7 +784,7 @@ index 6576287e5b..0ef2bf1b93 100644 .level = 5, .vendor = CPUID_VENDOR_AMD, .family = 16, -@@ -1889,6 +1894,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2245,6 +2250,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "core2duo", @@ -703,7 +792,7 @@ index 6576287e5b..0ef2bf1b93 100644 .level = 10, .vendor = CPUID_VENDOR_INTEL, .family = 6, -@@ -1931,6 +1937,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2287,6 +2293,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "kvm64", @@ -711,7 +800,7 @@ index 6576287e5b..0ef2bf1b93 100644 .level = 0xd, .vendor = CPUID_VENDOR_INTEL, .family = 15, -@@ -1972,6 +1979,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2328,6 +2335,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "qemu32", @@ -719,7 +808,7 @@ index 6576287e5b..0ef2bf1b93 100644 .level = 4, .vendor = CPUID_VENDOR_INTEL, .family = 6, -@@ -1986,6 +1994,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2342,6 +2350,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "kvm32", @@ -727,7 +816,7 @@ index 6576287e5b..0ef2bf1b93 100644 .level = 5, .vendor = CPUID_VENDOR_INTEL, .family = 15, -@@ -2016,6 +2025,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2372,6 +2381,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "coreduo", @@ -735,7 +824,7 @@ index 6576287e5b..0ef2bf1b93 100644 .level = 10, .vendor = CPUID_VENDOR_INTEL, .family = 6, -@@ -2049,6 +2059,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2405,6 +2415,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "486", @@ -743,7 +832,7 @@ index 6576287e5b..0ef2bf1b93 100644 .level = 1, .vendor = CPUID_VENDOR_INTEL, .family = 4, -@@ -2061,6 +2072,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2417,6 +2428,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "pentium", @@ -751,7 +840,7 @@ index 6576287e5b..0ef2bf1b93 100644 .level = 1, .vendor = CPUID_VENDOR_INTEL, .family = 5, -@@ -2073,6 +2085,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2429,6 +2441,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "pentium2", @@ -759,7 +848,7 @@ index 6576287e5b..0ef2bf1b93 100644 .level = 2, .vendor = CPUID_VENDOR_INTEL, .family = 6, -@@ -2085,6 +2098,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2441,6 +2454,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "pentium3", @@ -767,7 +856,7 @@ index 6576287e5b..0ef2bf1b93 100644 .level = 3, .vendor = CPUID_VENDOR_INTEL, .family = 6, -@@ -2097,6 +2111,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2453,6 +2467,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "athlon", @@ -775,7 +864,7 @@ index 6576287e5b..0ef2bf1b93 100644 .level = 2, .vendor = CPUID_VENDOR_AMD, .family = 6, -@@ -2112,6 +2127,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2468,6 +2483,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "n270", @@ -783,7 +872,7 @@ index 6576287e5b..0ef2bf1b93 100644 .level = 10, .vendor = CPUID_VENDOR_INTEL, .family = 6, -@@ -2137,6 +2153,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2493,6 +2509,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "Conroe", @@ -791,7 +880,7 @@ index 6576287e5b..0ef2bf1b93 100644 .level = 10, .vendor = CPUID_VENDOR_INTEL, .family = 6, -@@ -2177,6 +2194,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -2533,6 +2550,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "Penryn", @@ -799,7 +888,7 @@ index 6576287e5b..0ef2bf1b93 100644 .level = 10, .vendor = CPUID_VENDOR_INTEL, .family = 6, -@@ -3893,6 +3911,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -4394,6 +4412,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "Opteron_G1", @@ -807,7 +896,7 @@ index 6576287e5b..0ef2bf1b93 100644 .level = 5, .vendor = CPUID_VENDOR_AMD, .family = 15, -@@ -3913,6 +3932,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -4414,6 +4433,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "Opteron_G2", @@ -815,7 +904,7 @@ index 6576287e5b..0ef2bf1b93 100644 .level = 5, .vendor = CPUID_VENDOR_AMD, .family = 15, -@@ -3935,6 +3955,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { +@@ -4436,6 +4456,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .name = "Opteron_G3", @@ -824,10 +913,10 @@ index 6576287e5b..0ef2bf1b93 100644 .vendor = CPUID_VENDOR_AMD, .family = 16, diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c -index 7237378a7d..7b8a3d5af0 100644 +index 9c791b7b05..b91af5051f 100644 --- a/target/i386/kvm/kvm-cpu.c +++ b/target/i386/kvm/kvm-cpu.c -@@ -137,6 +137,7 @@ static PropValue kvm_default_props[] = { +@@ -138,6 +138,7 @@ static PropValue kvm_default_props[] = { { "acpi", "off" }, { "monitor", "off" }, { "svm", "off" }, @@ -836,10 +925,10 @@ index 7237378a7d..7b8a3d5af0 100644 }; diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c -index de531842f6..8d82304609 100644 +index 4ce80555b4..9d41edf01e 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c -@@ -3822,6 +3822,7 @@ static int kvm_get_msrs(X86CPU *cpu) +@@ -3711,6 +3711,7 @@ static int kvm_get_msrs(X86CPU *cpu) struct kvm_msr_entry *msrs = cpu->kvm_msr_buf->entries; int ret, i; uint64_t mtrr_top_bits; @@ -847,7 +936,7 @@ index de531842f6..8d82304609 100644 kvm_msr_buf_reset(cpu); -@@ -4177,6 +4178,9 @@ static int kvm_get_msrs(X86CPU *cpu) +@@ -4065,6 +4066,9 @@ static int kvm_get_msrs(X86CPU *cpu) break; case MSR_KVM_ASYNC_PF_EN: env->async_pf_en_msr = msrs[i].data; @@ -881,5 +970,5 @@ index 78f1cf8186..ac954c9b06 100644 val = qtest_inb(qts, 0x505); g_assert_cmpuint(val, ==, 3); -- -2.39.1 +2.39.3 diff --git a/SOURCES/0011-Enable-make-check.patch b/SOURCES/0011-Enable-make-check.patch index cc91302..54015c0 100644 --- a/SOURCES/0011-Enable-make-check.patch +++ b/SOURCES/0011-Enable-make-check.patch @@ -1,4 +1,4 @@ -From 738db8353055eb6fd902513949c6659af8b401d0 Mon Sep 17 00:00:00 2001 +From 66a0510405e5142a1f9e38e0770aa0f10aed3e03 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 2 Sep 2020 09:39:41 +0200 Subject: Enable make check @@ -24,43 +24,49 @@ Rebase changes (7.0.0): - Remove unnecessary changes in iotest 051 - Remove changes in bios-tables-test.c and prom-env-test.c qtests -Rebase changes (7.1.0 rc0): +Rebase changes (7.1.0): - Disable bcm2835-dma-test (added upstream) -Rebase changes (8.0.0-rc1): +Rebase changes (8.0.0): - Removed chunks for disabling bios-table-test (protected upstream) - -Rebase change (8.0.0-rc2): - Disable new qemu-iotests execution - Revert change in tco qtest (blocking test run) +Rebase changes (8.1.0): +- Do not disable device-plug-test for s390x + +Rebase changes (8.2.0 rc1): +- Remove unneeded hack in qtest/usb-hcd-xhci-test.c + Merged patches (6.1.0): - 2f129df7d3 redhat: Enable the 'test-block-iothread' test again -Merged patches (7.1.0 rc0): +Merged patches (7.1.0): - 64d736640e RHEL-only: tests/avocado: Switch aarch64 tests from a53 to a57 + +Merged patches (8.1.0): +- f468163234 iotests: Use alternative CPU type that is not deprecated in RHEL --- .distro/qemu-kvm.spec.template | 4 ++-- tests/avocado/replay_kernel.py | 2 +- tests/avocado/reverse_debugging.py | 2 +- tests/avocado/tcg_plugins.py | 6 ++--- tests/qemu-iotests/meson.build | 34 ++++++++++++++--------------- + tests/qemu-iotests/testenv.py | 3 +++ tests/qtest/fuzz-e1000e-test.c | 2 +- tests/qtest/fuzz-virtio-scsi-test.c | 2 +- tests/qtest/intel-hda-test.c | 2 +- tests/qtest/libqos/meson.build | 2 +- tests/qtest/lpc-ich9-test.c | 2 +- - tests/qtest/meson.build | 2 -- - tests/qtest/tco-test.c | 2 +- - tests/qtest/usb-hcd-xhci-test.c | 4 ++++ + tests/qtest/meson.build | 1 - tests/qtest/virtio-net-failover.c | 1 + - 14 files changed, 35 insertions(+), 32 deletions(-) + 13 files changed, 33 insertions(+), 30 deletions(-) diff --git a/tests/avocado/replay_kernel.py b/tests/avocado/replay_kernel.py -index f13456e1ec..2fee270a42 100644 +index c37afa662c..61c95a2198 100644 --- a/tests/avocado/replay_kernel.py +++ b/tests/avocado/replay_kernel.py -@@ -147,7 +147,7 @@ def test_aarch64_virt(self): +@@ -153,7 +153,7 @@ def test_aarch64_virt(self): """ :avocado: tags=arch:aarch64 :avocado: tags=machine:virt @@ -70,10 +76,10 @@ index f13456e1ec..2fee270a42 100644 kernel_url = ('https://archives.fedoraproject.org/pub/archive/fedora' '/linux/releases/29/Everything/aarch64/os/images/pxeboot' diff --git a/tests/avocado/reverse_debugging.py b/tests/avocado/reverse_debugging.py -index 680c314cfc..71eccb8fb6 100644 +index 4cce5a5598..e9248a04a2 100644 --- a/tests/avocado/reverse_debugging.py +++ b/tests/avocado/reverse_debugging.py -@@ -206,7 +206,7 @@ def test_aarch64_virt(self): +@@ -230,7 +230,7 @@ def test_aarch64_virt(self): """ :avocado: tags=arch:aarch64 :avocado: tags=machine:virt @@ -83,10 +89,10 @@ index 680c314cfc..71eccb8fb6 100644 kernel_url = ('https://archives.fedoraproject.org/pub/archive/fedora' '/linux/releases/29/Everything/aarch64/os/images/pxeboot' diff --git a/tests/avocado/tcg_plugins.py b/tests/avocado/tcg_plugins.py -index 642d2e49e3..93b3afd823 100644 +index 15fd87b2c1..f0d9d89c93 100644 --- a/tests/avocado/tcg_plugins.py +++ b/tests/avocado/tcg_plugins.py -@@ -68,7 +68,7 @@ def test_aarch64_virt_insn(self): +@@ -66,7 +66,7 @@ def test_aarch64_virt_insn(self): :avocado: tags=accel:tcg :avocado: tags=arch:aarch64 :avocado: tags=machine:virt @@ -95,7 +101,7 @@ index 642d2e49e3..93b3afd823 100644 """ kernel_path = self._grab_aarch64_kernel() kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE + -@@ -94,7 +94,7 @@ def test_aarch64_virt_insn_icount(self): +@@ -96,7 +96,7 @@ def test_aarch64_virt_insn_icount(self): :avocado: tags=accel:tcg :avocado: tags=arch:aarch64 :avocado: tags=machine:virt @@ -104,7 +110,7 @@ index 642d2e49e3..93b3afd823 100644 """ kernel_path = self._grab_aarch64_kernel() kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE + -@@ -120,7 +120,7 @@ def test_aarch64_virt_mem_icount(self): +@@ -126,7 +126,7 @@ def test_aarch64_virt_mem_icount(self): :avocado: tags=accel:tcg :avocado: tags=arch:aarch64 :avocado: tags=machine:virt @@ -114,7 +120,7 @@ index 642d2e49e3..93b3afd823 100644 kernel_path = self._grab_aarch64_kernel() kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE + diff --git a/tests/qemu-iotests/meson.build b/tests/qemu-iotests/meson.build -index 9735071a29..32002335f4 100644 +index 53847cb98f..a2abdb650e 100644 --- a/tests/qemu-iotests/meson.build +++ b/tests/qemu-iotests/meson.build @@ -51,21 +51,21 @@ foreach format, speed: qemu_iotests_formats @@ -156,6 +162,20 @@ index 9735071a29..32002335f4 100644 +# suite: suites) +# endforeach endforeach +diff --git a/tests/qemu-iotests/testenv.py b/tests/qemu-iotests/testenv.py +index 3ff38f2661..cab9a2bd6c 100644 +--- a/tests/qemu-iotests/testenv.py ++++ b/tests/qemu-iotests/testenv.py +@@ -244,6 +244,9 @@ def __init__(self, source_dir: str, build_dir: str, + if self.qemu_prog.endswith(f'qemu-system-{suffix}'): + self.qemu_options += f' -machine {machine}' + ++ if self.qemu_prog.endswith('qemu-system-x86_64'): ++ self.qemu_options += ' -cpu Nehalem' ++ + # QEMU_DEFAULT_MACHINE + self.qemu_default_machine = get_default_machine(self.qemu_prog) + diff --git a/tests/qtest/fuzz-e1000e-test.c b/tests/qtest/fuzz-e1000e-test.c index 5052883fb6..b5286f4b12 100644 --- a/tests/qtest/fuzz-e1000e-test.c @@ -183,20 +203,20 @@ index e37b48b2cc..88647da054 100644 qtest_outl(s, 0xcf8, 0x80001811); diff --git a/tests/qtest/intel-hda-test.c b/tests/qtest/intel-hda-test.c -index d4a8db6fd6..1a796ec15a 100644 +index 663bb6c485..2efc43e3f7 100644 --- a/tests/qtest/intel-hda-test.c +++ b/tests/qtest/intel-hda-test.c -@@ -38,7 +38,7 @@ static void test_issue542_ich6(void) +@@ -42,7 +42,7 @@ static void test_issue542_ich6(void) { QTestState *s; - s = qtest_init("-nographic -nodefaults -M pc-q35-6.2 " + s = qtest_init("-nographic -nodefaults -M pc-q35-rhel9.0.0 " + AUDIODEV "-device intel-hda,id=" HDA_ID CODEC_DEVICES); - qtest_outl(s, 0xcf8, 0x80000804); diff --git a/tests/qtest/libqos/meson.build b/tests/qtest/libqos/meson.build -index cc209a8de5..42a7c529c9 100644 +index 90aae42a22..9bc4e41af0 100644 --- a/tests/qtest/libqos/meson.build +++ b/tests/qtest/libqos/meson.build @@ -44,7 +44,7 @@ libqos_srcs = files( @@ -206,8 +226,8 @@ index cc209a8de5..42a7c529c9 100644 - 'virtio-iommu.c', +# 'virtio-iommu.c', 'virtio-gpio.c', + 'virtio-scmi.c', 'generic-pcihost.c', - diff --git a/tests/qtest/lpc-ich9-test.c b/tests/qtest/lpc-ich9-test.c index 8ac95b89f7..cd2102555c 100644 --- a/tests/qtest/lpc-ich9-test.c @@ -222,10 +242,10 @@ index 8ac95b89f7..cd2102555c 100644 qtest_outl(s, 0xcf8, 0x8000f840); /* PMBASE */ diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build -index 85ea4e8d99..893afc8eeb 100644 +index 47dabf91d0..0bdfa3a821 100644 --- a/tests/qtest/meson.build +++ b/tests/qtest/meson.build -@@ -94,7 +94,6 @@ qtests_i386 = \ +@@ -97,7 +97,6 @@ qtests_i386 = \ 'drive_del-test', 'tco-test', 'cpu-plug-test', @@ -233,62 +253,11 @@ index 85ea4e8d99..893afc8eeb 100644 'vmgenid-test', 'migration-test', 'test-x86-cpuid-compat', -@@ -223,7 +222,6 @@ qtests_s390x = \ - (config_host.has_key('CONFIG_POSIX') ? ['test-filter-redirector'] : []) + \ - ['boot-serial-test', - 'drive_del-test', -- 'device-plug-test', - 'virtio-ccw-test', - 'cpu-plug-test', - 'migration-test'] -diff --git a/tests/qtest/tco-test.c b/tests/qtest/tco-test.c -index 0547d41173..3756ce82d8 100644 ---- a/tests/qtest/tco-test.c -+++ b/tests/qtest/tco-test.c -@@ -60,7 +60,7 @@ static void test_init(TestData *d) - QTestState *qs; - - qs = qtest_initf("-machine q35 %s %s", -- d->noreboot ? "-global ICH9-LPC.noreboot=true" : "", -+ d->noreboot ? "" : "-global ICH9-LPC.noreboot=false", - !d->args ? "" : d->args); - qtest_irq_intercept_in(qs, "ioapic"); - -diff --git a/tests/qtest/usb-hcd-xhci-test.c b/tests/qtest/usb-hcd-xhci-test.c -index 10ef9d2a91..3855873050 100644 ---- a/tests/qtest/usb-hcd-xhci-test.c -+++ b/tests/qtest/usb-hcd-xhci-test.c -@@ -21,6 +21,7 @@ static void test_xhci_hotplug(void) - usb_test_hotplug(global_qtest, "xhci", "1", NULL); - } - -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - static void test_usb_uas_hotplug(void) - { - QTestState *qts = global_qtest; -@@ -36,6 +37,7 @@ static void test_usb_uas_hotplug(void) - qtest_qmp_device_del(qts, "scsihd"); - qtest_qmp_device_del(qts, "uas"); - } -+#endif - - static void test_usb_ccid_hotplug(void) - { -@@ -56,7 +58,9 @@ int main(int argc, char **argv) - - qtest_add_func("/xhci/pci/init", test_xhci_init); - qtest_add_func("/xhci/pci/hotplug", test_xhci_hotplug); -+#if 0 /* Disabled for Red Hat Enterprise Linux */ - qtest_add_func("/xhci/pci/hotplug/usb-uas", test_usb_uas_hotplug); -+#endif - qtest_add_func("/xhci/pci/hotplug/usb-ccid", test_usb_ccid_hotplug); - - qtest_start("-device nec-usb-xhci,id=xhci" diff --git a/tests/qtest/virtio-net-failover.c b/tests/qtest/virtio-net-failover.c -index 4a809590bf..1bf3fa641c 100644 +index 0d40bc1f2d..4c633c1584 100644 --- a/tests/qtest/virtio-net-failover.c +++ b/tests/qtest/virtio-net-failover.c -@@ -25,6 +25,7 @@ +@@ -26,6 +26,7 @@ #define PCI_SEL_BASE 0x0010 #define BASE_MACHINE "-M q35 -nodefaults " \ @@ -297,5 +266,5 @@ index 4a809590bf..1bf3fa641c 100644 "-device pcie-root-port,id=root1,addr=0x2,bus=pcie.0,chassis=2 " -- -2.39.1 +2.39.3 diff --git a/SOURCES/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch b/SOURCES/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch index 430959b..8222efd 100644 --- a/SOURCES/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch +++ b/SOURCES/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch @@ -1,4 +1,4 @@ -From 34cb4f7ddd762ec46ed1a6a4261aebde39360ca4 Mon Sep 17 00:00:00 2001 +From a27cfa0b407bd806ce389a7c69d0130bcfd35244 Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Tue, 3 Dec 2013 20:05:13 +0100 Subject: vfio: cap number of devices that can be assigned @@ -26,13 +26,16 @@ Count of slots increased to 509 later so we could increase limit to 64 as some usecases require more than 32 devices. Signed-off-by: Bandan Das + +Rebase changes (231025): +- Update to upstream changes --- - hw/vfio/pci.c | 29 ++++++++++++++++++++++++++++- + hw/vfio/pci.c | 31 ++++++++++++++++++++++++++++++- hw/vfio/pci.h | 1 + - 2 files changed, 29 insertions(+), 1 deletion(-) + 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index ec9a854361..a779053be3 100644 +index c62c02f7b6..ec98080f28 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -48,6 +48,9 @@ @@ -45,13 +48,20 @@ index ec9a854361..a779053be3 100644 static void vfio_disable_interrupts(VFIOPCIDevice *vdev); static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled); static void vfio_msi_disable_common(VFIOPCIDevice *vdev); -@@ -2854,9 +2857,30 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) - ssize_t len; +@@ -3076,14 +3079,37 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + { + VFIOPCIDevice *vdev = VFIO_PCI(pdev); + VFIODevice *vbasedev = &vdev->vbasedev; ++ VFIODevice *vbasedev_iter; ++ VFIOGroup *group; + char *tmp, *subsys; + Error *err = NULL; struct stat st; - int groupid; - int i, ret; + int ret, i = 0; bool is_mdev; + char uuid[UUID_STR_LEN]; + char *name; + if (device_limit && device_limit != vdev->assigned_device_limit) { + error_setg(errp, "Assigned device limit has been redefined. " @@ -77,7 +87,7 @@ index ec9a854361..a779053be3 100644 if (!vbasedev->sysfsdev) { if (!(~vdev->host.domain || ~vdev->host.bus || ~vdev->host.slot || ~vdev->host.function)) { -@@ -3294,6 +3318,9 @@ static Property vfio_pci_dev_properties[] = { +@@ -3501,6 +3527,9 @@ static Property vfio_pci_dev_properties[] = { DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false), DEFINE_PROP_BOOL("x-no-geforce-quirks", VFIOPCIDevice, no_geforce_quirks, false), @@ -88,10 +98,10 @@ index ec9a854361..a779053be3 100644 false), DEFINE_PROP_BOOL("x-no-vfio-ioeventfd", VFIOPCIDevice, no_vfio_ioeventfd, diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h -index 177abcc8fb..45235d38ba 100644 +index fba8737ab2..eb74d9de2d 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h -@@ -140,6 +140,7 @@ struct VFIOPCIDevice { +@@ -142,6 +142,7 @@ struct VFIOPCIDevice { EventNotifier err_notifier; EventNotifier req_notifier; int (*resetfn)(struct VFIOPCIDevice *); @@ -100,5 +110,5 @@ index 177abcc8fb..45235d38ba 100644 uint32_t device_id; uint32_t sub_vendor_id; -- -2.39.1 +2.39.3 diff --git a/SOURCES/0013-Add-support-statement-to-help-output.patch b/SOURCES/0013-Add-support-statement-to-help-output.patch index 25db0b8..bc5d9b4 100644 --- a/SOURCES/0013-Add-support-statement-to-help-output.patch +++ b/SOURCES/0013-Add-support-statement-to-help-output.patch @@ -1,4 +1,4 @@ -From 8964a3e8835992442902d35b011a708787366d82 Mon Sep 17 00:00:00 2001 +From 424f14d123fe1043518758605d94ed5ba50e52ad Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Wed, 4 Dec 2013 18:53:17 +0100 Subject: Add support statement to -help output @@ -17,14 +17,14 @@ as unsupported by Red Hat, and advising users to use libvirt instead. Signed-off-by: Eduardo Habkost --- - softmmu/vl.c | 9 +++++++++ + system/vl.c | 9 +++++++++ 1 file changed, 9 insertions(+) -diff --git a/softmmu/vl.c b/softmmu/vl.c -index ea20b23e4c..ad4173138d 100644 ---- a/softmmu/vl.c -+++ b/softmmu/vl.c -@@ -834,9 +834,17 @@ static void version(void) +diff --git a/system/vl.c b/system/vl.c +index 2bcd9efb9a..93635ffc5b 100644 +--- a/system/vl.c ++++ b/system/vl.c +@@ -870,9 +870,17 @@ static void version(void) QEMU_COPYRIGHT "\n"); } @@ -42,7 +42,7 @@ index ea20b23e4c..ad4173138d 100644 printf("usage: %s [options] [disk_image]\n\n" "'disk_image' is a raw hard disk image for IDE hard disk 0\n\n", g_get_prgname()); -@@ -862,6 +870,7 @@ static void help(int exitcode) +@@ -898,6 +906,7 @@ static void help(int exitcode) "\n" QEMU_HELP_BOTTOM "\n"); @@ -51,5 +51,5 @@ index ea20b23e4c..ad4173138d 100644 } -- -2.39.1 +2.39.3 diff --git a/SOURCES/0014-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch b/SOURCES/0014-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch index b97c844..7fa10b5 100644 --- a/SOURCES/0014-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +++ b/SOURCES/0014-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch @@ -1,4 +1,4 @@ -From 0b72d348fa0714de641ee242e5cee97df006e8fd Mon Sep 17 00:00:00 2001 +From c683ff4a770b77dbe707413840918a46f67fa825 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 8 Jul 2020 08:35:50 +0200 Subject: Use qemu-kvm in documentation instead of qemu-system- @@ -36,10 +36,10 @@ index 52d6454b93..d74dbdeca9 100644 .. |I2C| replace:: I\ :sup:`2`\ C .. |I2S| replace:: I\ :sup:`2`\ S diff --git a/qemu-options.hx b/qemu-options.hx -index 59bdf67a2c..52b49f1f6a 100644 +index 42fd09e4de..557118cb1f 100644 --- a/qemu-options.hx +++ b/qemu-options.hx -@@ -3296,11 +3296,11 @@ SRST +@@ -3469,11 +3469,11 @@ SRST :: @@ -57,5 +57,5 @@ index 59bdf67a2c..52b49f1f6a 100644 ``-netdev vhost-vdpa[,vhostdev=/path/to/dev][,vhostfd=h]`` Establish a vhost-vdpa netdev. -- -2.39.1 +2.39.3 diff --git a/SOURCES/0015-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch b/SOURCES/0015-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch index 1e2f8e1..667d431 100644 --- a/SOURCES/0015-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch +++ b/SOURCES/0015-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch @@ -1,4 +1,4 @@ -From bd6bcebfd783fa49e283d035d378fb5240423d84 Mon Sep 17 00:00:00 2001 +From 776bff1be5e98982a9bbc8345ff27274ff5b8c0f Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 20 Aug 2021 18:25:12 +0200 Subject: qcow2: Deprecation warning when opening v2 images rw @@ -44,10 +44,10 @@ Rebase notes (6.1.0): 2 files changed, 7 insertions(+) diff --git a/block/qcow2.c b/block/qcow2.c -index 30fd53fa64..22084730f9 100644 +index 13e032bd5e..7968735346 100644 --- a/block/qcow2.c +++ b/block/qcow2.c -@@ -1337,6 +1337,12 @@ qcow2_do_open(BlockDriverState *bs, QDict *options, int flags, +@@ -1358,6 +1358,12 @@ qcow2_do_open(BlockDriverState *bs, QDict *options, int flags, ret = -ENOTSUP; goto fail; } @@ -61,7 +61,7 @@ index 30fd53fa64..22084730f9 100644 s->qcow_version = header.version; diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter -index 6b32c7fbfa..6ddda2ee64 100644 +index 2846c83808..83472953a2 100644 --- a/tests/qemu-iotests/common.filter +++ b/tests/qemu-iotests/common.filter @@ -83,6 +83,7 @@ _filter_qemu() @@ -73,5 +73,5 @@ index 6b32c7fbfa..6ddda2ee64 100644 } -- -2.39.1 +2.39.3 diff --git a/SOURCES/0016-Introduce-RHEL-9.4.0-qemu-kvm-machine-type-for-aarch.patch b/SOURCES/0016-Introduce-RHEL-9.4.0-qemu-kvm-machine-type-for-aarch.patch new file mode 100644 index 0000000..4e62baa --- /dev/null +++ b/SOURCES/0016-Introduce-RHEL-9.4.0-qemu-kvm-machine-type-for-aarch.patch @@ -0,0 +1,44 @@ +From 3b9b38339346ebfaf3e8ddf0822eba1cc9e78408 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Thu, 14 Dec 2023 04:42:01 -0500 +Subject: Introduce RHEL 9.4.0 qemu-kvm machine type for aarch64 + +Jira: https://issues.redhat.com/browse/RHEL-17168 + +Adding new machine type to support enabling new features. + +Signed-off-by: Miroslav Rezanina +--- + hw/arm/virt.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index c541efee5e..0b17c94ad7 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3630,14 +3630,21 @@ static void rhel_machine_init(void) + } + type_init(rhel_machine_init); + ++static void rhel940_virt_options(MachineClass *mc) ++{ ++} ++DEFINE_RHEL_MACHINE_AS_LATEST(9, 4, 0) ++ + static void rhel920_virt_options(MachineClass *mc) + { ++ rhel940_virt_options(mc); ++ + compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); + compat_props_add(mc->compat_props, hw_compat_rhel_9_4, hw_compat_rhel_9_4_len); + compat_props_add(mc->compat_props, hw_compat_rhel_9_3, hw_compat_rhel_9_3_len); + compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len); + } +-DEFINE_RHEL_MACHINE_AS_LATEST(9, 2, 0) ++DEFINE_RHEL_MACHINE(9, 2, 0) + + static void rhel900_virt_options(MachineClass *mc) + { +-- +2.39.3 + diff --git a/SOURCES/0016-qga-linux-add-usb-support-to-guest-get-fsinfo.patch b/SOURCES/0016-qga-linux-add-usb-support-to-guest-get-fsinfo.patch deleted file mode 100644 index bb9455a..0000000 --- a/SOURCES/0016-qga-linux-add-usb-support-to-guest-get-fsinfo.patch +++ /dev/null @@ -1,53 +0,0 @@ -From 78a42cf27aa519bb71214443ab570b40e156fa9c Mon Sep 17 00:00:00 2001 -From: Kfir Manor -Date: Sun, 22 Jan 2023 17:33:07 +0200 -Subject: qga/linux: add usb support to guest-get-fsinfo - -RH-Author: Kostiantyn Kostiuk -RH-MergeRequest: 140: qga/linux: add usb support to guest-get-fsinfo -RH-Bugzilla: 2149191 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: yvugenfi -RH-Commit: [1/1] bae929a2d0d0ad20e7308ede69c26499fc2119c7 (kostyanf14/redhat_centos-stream_src_qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2149191 -Upstream patch: https://patchew.org/QEMU/20230122153307.1050593-1-kfir@daynix.com/ - -Signed-off-by: Kfir Manor -Reviewed-by: Konstantin Kostiuk -Signed-off-by: Konstantin Kostiuk - -Patch-name: kvm-qga-linux-add-usb-support-to-guest-get-fsinfo.patch -Patch-id: 72 -Patch-present-in-specfile: True ---- - qga/commands-posix.c | 6 +++++- - 1 file changed, 5 insertions(+), 1 deletion(-) - -diff --git a/qga/commands-posix.c b/qga/commands-posix.c -index 079689d79a..97754930c1 100644 ---- a/qga/commands-posix.c -+++ b/qga/commands-posix.c -@@ -879,7 +879,9 @@ static bool build_guest_fsinfo_for_pci_dev(char const *syspath, - g_str_equal(driver, "sym53c8xx") || - g_str_equal(driver, "virtio-pci") || - g_str_equal(driver, "ahci") || -- g_str_equal(driver, "nvme"))) { -+ g_str_equal(driver, "nvme") || -+ g_str_equal(driver, "xhci_hcd") || -+ g_str_equal(driver, "ehci-pci"))) { - break; - } - -@@ -976,6 +978,8 @@ static bool build_guest_fsinfo_for_pci_dev(char const *syspath, - } - } else if (strcmp(driver, "nvme") == 0) { - disk->bus_type = GUEST_DISK_BUS_TYPE_NVME; -+ } else if (strcmp(driver, "ehci-pci") == 0 || strcmp(driver, "xhci_hcd") == 0) { -+ disk->bus_type = GUEST_DISK_BUS_TYPE_USB; - } else { - g_debug("unknown driver '%s' (sysfs path '%s')", driver, syspath); - goto cleanup; --- -2.39.1 - diff --git a/SOURCES/0017-Add-RHEL-9.2.0-compat-structure.patch b/SOURCES/0017-Add-RHEL-9.2.0-compat-structure.patch deleted file mode 100644 index ce0ba5c..0000000 --- a/SOURCES/0017-Add-RHEL-9.2.0-compat-structure.patch +++ /dev/null @@ -1,110 +0,0 @@ -From bd5d81d2865c239ffea0fecf32476732149ad05c Mon Sep 17 00:00:00 2001 -From: Miroslav Rezanina -Date: Wed, 15 Feb 2023 02:03:17 -0500 -Subject: Add RHEL 9.2.0 compat structure - -Adding compatibility bits necessary to keep 9.2.0 machine -types same after rebase to 8.0. - -Signed-off-by: Miroslav Rezanina - -Rebase notes (8.0.0 rc4): -- Added migration.x-preempt-pre-7-2 compat) ---- - hw/arm/virt.c | 1 + - hw/core/machine.c | 10 ++++++++++ - hw/i386/pc_piix.c | 2 ++ - hw/i386/pc_q35.c | 3 +++ - hw/s390x/s390-virtio-ccw.c | 1 + - include/hw/boards.h | 3 +++ - 6 files changed, 20 insertions(+) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 1ae1654be5..9be53e9355 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -3669,6 +3669,7 @@ type_init(rhel_machine_init); - static void rhel920_virt_options(MachineClass *mc) - { - compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); -+ compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len); - } - DEFINE_RHEL_MACHINE_AS_LATEST(9, 2, 0) - -diff --git a/hw/core/machine.c b/hw/core/machine.c -index 5aa567fad3..0e0120b7f2 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -52,6 +52,16 @@ const size_t hw_compat_7_2_len = G_N_ELEMENTS(hw_compat_7_2); - const char *rhel_old_machine_deprecation = - "machine types for previous major releases are deprecated"; - -+GlobalProperty hw_compat_rhel_9_2[] = { -+ /* hw_compat_rhel_9_2 from hw_compat_7_2 */ -+ { "e1000e", "migrate-timadj", "off" }, -+ /* hw_compat_rhel_9_2 from hw_compat_7_2 */ -+ { "virtio-mem", "x-early-migration", "false" }, -+ /* hw_compat_rhel_9_2 from hw_compat_7_2 */ -+ { "migration", "x-preempt-pre-7-2", "true" }, -+}; -+const size_t hw_compat_rhel_9_2_len = G_N_ELEMENTS(hw_compat_rhel_9_2); -+ - /* - * Mostly the same as hw_compat_7_0 - */ -diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 3e330fd36f..90fb6e2e03 100644 ---- a/hw/i386/pc_piix.c -+++ b/hw/i386/pc_piix.c -@@ -947,6 +947,8 @@ static void pc_machine_rhel760_options(MachineClass *m) - /* From pc_i440fx_5_1_machine_options() */ - pcmc->pci_root_uid = 1; - pcmc->enforce_amd_1tb_hole = false; -+ compat_props_add(m->compat_props, hw_compat_rhel_9_2, -+ hw_compat_rhel_9_2_len); - compat_props_add(m->compat_props, hw_compat_rhel_9_1, - hw_compat_rhel_9_1_len); - compat_props_add(m->compat_props, hw_compat_rhel_9_0, -diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 98601bb76f..8945b69175 100644 ---- a/hw/i386/pc_q35.c -+++ b/hw/i386/pc_q35.c -@@ -701,6 +701,9 @@ static void pc_q35_machine_rhel920_options(MachineClass *m) - m->desc = "RHEL-9.2.0 PC (Q35 + ICH9, 2009)"; - pcmc->smbios_stream_product = "RHEL"; - pcmc->smbios_stream_version = "9.2.0"; -+ -+ compat_props_add(m->compat_props, hw_compat_rhel_9_2, -+ hw_compat_rhel_9_2_len); - } - - DEFINE_PC_MACHINE(q35_rhel920, "pc-q35-rhel9.2.0", pc_q35_init_rhel920, -diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index dcd3b966b0..6a0b93c63d 100644 ---- a/hw/s390x/s390-virtio-ccw.c -+++ b/hw/s390x/s390-virtio-ccw.c -@@ -1211,6 +1211,7 @@ static void ccw_machine_rhel920_instance_options(MachineState *machine) - - static void ccw_machine_rhel920_class_options(MachineClass *mc) - { -+ compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len); - } - DEFINE_CCW_MACHINE(rhel920, "rhel9.2.0", true); - -diff --git a/include/hw/boards.h b/include/hw/boards.h -index 5e7446ee40..5f08bd7550 100644 ---- a/include/hw/boards.h -+++ b/include/hw/boards.h -@@ -461,6 +461,9 @@ extern const size_t hw_compat_2_2_len; - extern GlobalProperty hw_compat_2_1[]; - extern const size_t hw_compat_2_1_len; - -+extern GlobalProperty hw_compat_rhel_9_2[]; -+extern const size_t hw_compat_rhel_9_2_len; -+ - extern GlobalProperty hw_compat_rhel_9_1[]; - extern const size_t hw_compat_rhel_9_1_len; - --- -2.39.1 - diff --git a/SOURCES/0018-redhat-hw-i386-pc-Update-x86-machine-type-compatibil.patch b/SOURCES/0018-redhat-hw-i386-pc-Update-x86-machine-type-compatibil.patch deleted file mode 100644 index 81993e9..0000000 --- a/SOURCES/0018-redhat-hw-i386-pc-Update-x86-machine-type-compatibil.patch +++ /dev/null @@ -1,76 +0,0 @@ -From c6eaf73adda2e87fe91c9a3836f45dd58a553e06 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Mon, 27 Mar 2023 15:14:03 +0200 -Subject: redhat: hw/i386/pc: Update x86 machine type compatibility for QEMU - 8.0.0 update - -Add pc_rhel_9_2_compat based on upstream pc_compat_7_2. - -Signed-off-by: Thomas Huth ---- - hw/i386/pc.c | 6 ++++++ - hw/i386/pc_piix.c | 2 ++ - hw/i386/pc_q35.c | 2 ++ - include/hw/i386/pc.h | 3 +++ - 4 files changed, 13 insertions(+) - -diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index 8abb1f872e..f216922cee 100644 ---- a/hw/i386/pc.c -+++ b/hw/i386/pc.c -@@ -429,6 +429,12 @@ GlobalProperty pc_rhel_compat[] = { - }; - const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); - -+GlobalProperty pc_rhel_9_2_compat[] = { -+ /* pc_rhel_9_2_compat from pc_compat_7_2 */ -+ { "ICH9-LPC", "noreboot", "true" }, -+}; -+const size_t pc_rhel_9_2_compat_len = G_N_ELEMENTS(pc_rhel_9_2_compat); -+ - GlobalProperty pc_rhel_9_0_compat[] = { - /* pc_rhel_9_0_compat from pc_compat_6_2 */ - { "virtio-mem", "unplugged-inaccessible", "off" }, -diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 90fb6e2e03..fc704d783f 100644 ---- a/hw/i386/pc_piix.c -+++ b/hw/i386/pc_piix.c -@@ -949,6 +949,8 @@ static void pc_machine_rhel760_options(MachineClass *m) - pcmc->enforce_amd_1tb_hole = false; - compat_props_add(m->compat_props, hw_compat_rhel_9_2, - hw_compat_rhel_9_2_len); -+ compat_props_add(m->compat_props, pc_rhel_9_2_compat, -+ pc_rhel_9_2_compat_len); - compat_props_add(m->compat_props, hw_compat_rhel_9_1, - hw_compat_rhel_9_1_len); - compat_props_add(m->compat_props, hw_compat_rhel_9_0, -diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c -index 8945b69175..e97655616a 100644 ---- a/hw/i386/pc_q35.c -+++ b/hw/i386/pc_q35.c -@@ -704,6 +704,8 @@ static void pc_q35_machine_rhel920_options(MachineClass *m) - - compat_props_add(m->compat_props, hw_compat_rhel_9_2, - hw_compat_rhel_9_2_len); -+ compat_props_add(m->compat_props, pc_rhel_9_2_compat, -+ pc_rhel_9_2_compat_len); - } - - DEFINE_PC_MACHINE(q35_rhel920, "pc-q35-rhel9.2.0", pc_q35_init_rhel920, -diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index 4376f64a47..d218ad1628 100644 ---- a/include/hw/i386/pc.h -+++ b/include/hw/i386/pc.h -@@ -296,6 +296,9 @@ int pc_machine_kvm_type(MachineState *machine, const char *vm_type); - extern GlobalProperty pc_rhel_compat[]; - extern const size_t pc_rhel_compat_len; - -+extern GlobalProperty pc_rhel_9_2_compat[]; -+extern const size_t pc_rhel_9_2_compat_len; -+ - extern GlobalProperty pc_rhel_9_0_compat[]; - extern const size_t pc_rhel_9_0_compat_len; - --- -2.39.1 - diff --git a/SOURCES/0019-Disable-unwanted-new-devices.patch b/SOURCES/0019-Disable-unwanted-new-devices.patch deleted file mode 100644 index f656ca9..0000000 --- a/SOURCES/0019-Disable-unwanted-new-devices.patch +++ /dev/null @@ -1,83 +0,0 @@ -From 8173d2eabaf77312d36b00c618f6770948b80593 Mon Sep 17 00:00:00 2001 -From: Miroslav Rezanina -Date: Mon, 17 Apr 2023 01:24:18 -0400 -Subject: Disable unwanted new devices - -QEMU 8.0 adds two new device we do not want to support that can't -be disabled using configure switch. - -1) ide-cf - virtual CompactFlash card - -2) i2c-echo - testing echo device - -Use manual disabling of the device by changing code (1) and meson configs (2). - -Signed-off-by: Miroslav Rezanina ---- - hw/ide/qdev.c | 9 +++++++++ - hw/misc/meson.build | 3 ++- - 2 files changed, 11 insertions(+), 1 deletion(-) - -diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c -index 1b3b4da01d..454bfa5783 100644 ---- a/hw/ide/qdev.c -+++ b/hw/ide/qdev.c -@@ -283,10 +283,13 @@ static void ide_cd_realize(IDEDevice *dev, Error **errp) - ide_dev_initfn(dev, IDE_CD, errp); - } - -+/* Disabled for Red Hat Enterprise Linux */ -+#if 0 - static void ide_cf_realize(IDEDevice *dev, Error **errp) - { - ide_dev_initfn(dev, IDE_CFATA, errp); - } -+#endif - - #define DEFINE_IDE_DEV_PROPERTIES() \ - DEFINE_BLOCK_PROPERTIES(IDEDrive, dev.conf), \ -@@ -346,6 +349,8 @@ static const TypeInfo ide_cd_info = { - .class_init = ide_cd_class_init, - }; - -+/* Disabled for Red Hat Enterprise Linux */ -+#if 0 - static Property ide_cf_properties[] = { - DEFINE_IDE_DEV_PROPERTIES(), - DEFINE_BLOCK_CHS_PROPERTIES(IDEDrive, dev.conf), -@@ -371,6 +376,7 @@ static const TypeInfo ide_cf_info = { - .instance_size = sizeof(IDEDrive), - .class_init = ide_cf_class_init, - }; -+#endif - - static void ide_device_class_init(ObjectClass *klass, void *data) - { -@@ -396,7 +402,10 @@ static void ide_register_types(void) - type_register_static(&ide_bus_info); - type_register_static(&ide_hd_info); - type_register_static(&ide_cd_info); -+/* Disabled for Red Hat Enterprise Linux */ -+#if 0 - type_register_static(&ide_cf_info); -+#endif - type_register_static(&ide_device_type_info); - } - -diff --git a/hw/misc/meson.build b/hw/misc/meson.build -index a40245ad44..9cc5a61ed7 100644 ---- a/hw/misc/meson.build -+++ b/hw/misc/meson.build -@@ -128,7 +128,8 @@ softmmu_ss.add(when: 'CONFIG_NRF51_SOC', if_true: files('nrf51_rng.c')) - - softmmu_ss.add(when: 'CONFIG_GRLIB', if_true: files('grlib_ahb_apb_pnp.c')) - --softmmu_ss.add(when: 'CONFIG_I2C', if_true: files('i2c-echo.c')) -+# Disabled for Red Hat Enterprise Linux -+# softmmu_ss.add(when: 'CONFIG_I2C', if_true: files('i2c-echo.c')) - - specific_ss.add(when: 'CONFIG_AVR_POWER', if_true: files('avr_power.c')) - --- -2.39.1 - diff --git a/SOURCES/README.tests b/SOURCES/README.tests index 9932773..739e2c6 100644 --- a/SOURCES/README.tests +++ b/SOURCES/README.tests @@ -28,7 +28,7 @@ avocado_qemu tests: The avocado_qemu tests can be executed by running the following avocado command: avocado run -p qemu_bin=/usr/libexec/qemu-kvm /usr/lib64/qemu-kvm/tests/acceptance/ Avocado needs to be installed separately using either pip or from source as -Avocado is not being packaged for RHEL-8. +Avocado is not being packaged for RHEL. qemu-iotests: symlinks to corresponding binaries need to be created for QEMU_PROG, @@ -36,4 +36,4 @@ QEMU_IO_PROG, QEMU_IMG_PROG, and QEMU_NBD_PROG before the iotests can be executed. The primary purpose of this package is to make these tests available to be -executed as gating tests for the virt module in the RHEL-8 OSCI environment. +executed as gating tests for the qemu-kvm in the RHEL OSCI environment. diff --git a/SOURCES/kvm-Compile-IOMMUFD-object-on-aarch64.patch b/SOURCES/kvm-Compile-IOMMUFD-object-on-aarch64.patch new file mode 100644 index 0000000..ed776c0 --- /dev/null +++ b/SOURCES/kvm-Compile-IOMMUFD-object-on-aarch64.patch @@ -0,0 +1,37 @@ +From 363d6aedc82314a70bdfbe9fa23b7e8fdda50138 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Thu, 11 Jan 2024 12:26:19 -0500 +Subject: [PATCH 066/101] Compile IOMMUFD object on aarch64 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [65/67] 9358030fdd499c5fe122dee3bb4f114966fac9c2 (eauger1/centos-qemu-kvm) + +Upstream: RHEL only + +Compiles the IOMMUFD object on aarch64 to be able to use +the IOMMUFD VFIO backend. + +Signed-off-by: Eric Auger +--- + configs/devices/aarch64-softmmu/aarch64-rh-devices.mak | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak +index aec1831199..b0191d3c69 100644 +--- a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak ++++ b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak +@@ -39,3 +39,4 @@ CONFIG_PXB=y + CONFIG_VHOST_VSOCK=y + CONFIG_VHOST_USER_VSOCK=y + CONFIG_VHOST_USER_FS=y ++CONFIG_IOMMUFD=y +-- +2.39.3 + diff --git a/SOURCES/kvm-Compile-IOMMUFD-on-s390x.patch b/SOURCES/kvm-Compile-IOMMUFD-on-s390x.patch new file mode 100644 index 0000000..9a98477 --- /dev/null +++ b/SOURCES/kvm-Compile-IOMMUFD-on-s390x.patch @@ -0,0 +1,37 @@ +From c1e9ddf8d0ea6d358fcaa5cacd3a91920f36e73b Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Thu, 11 Jan 2024 12:33:17 -0500 +Subject: [PATCH 067/101] Compile IOMMUFD on s390x +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [66/67] d3004aafca2bb76d817ac99c3d65973b8fbd4557 (eauger1/centos-qemu-kvm) + +Upstream: RHEL only + +Compiles the IOMMUFD object on s390x to be able to use +the IOMMUFD VFIO backend. + +Signed-off-by: Eric Auger +--- + configs/devices/s390x-softmmu/s390x-rh-devices.mak | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/configs/devices/s390x-softmmu/s390x-rh-devices.mak b/configs/devices/s390x-softmmu/s390x-rh-devices.mak +index 69a799adbd..24cf6dbd03 100644 +--- a/configs/devices/s390x-softmmu/s390x-rh-devices.mak ++++ b/configs/devices/s390x-softmmu/s390x-rh-devices.mak +@@ -16,3 +16,4 @@ CONFIG_WDT_DIAG288=y + CONFIG_VHOST_VSOCK=y + CONFIG_VHOST_USER_VSOCK=y + CONFIG_VHOST_USER_FS=y ++CONFIG_IOMMUFD=y +-- +2.39.3 + diff --git a/SOURCES/kvm-Compile-IOMMUFD-on-x86_64.patch b/SOURCES/kvm-Compile-IOMMUFD-on-x86_64.patch new file mode 100644 index 0000000..a3eb40e --- /dev/null +++ b/SOURCES/kvm-Compile-IOMMUFD-on-x86_64.patch @@ -0,0 +1,37 @@ +From be2c3d9bbee1bdec061c901f507bc999fa40a53e Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Thu, 11 Jan 2024 12:34:44 -0500 +Subject: [PATCH 068/101] Compile IOMMUFD on x86_64 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [67/67] 411d48a5cc7ce1f05be793fd9a89c143ce34c91a (eauger1/centos-qemu-kvm) + +Upstream: RHEL only + +Compiles the IOMMUFD object on s390x to be able to use +the IOMMUFD VFIO backend. + +Signed-off-by: Eric Auger +--- + configs/devices/x86_64-softmmu/x86_64-rh-devices.mak | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak +index ce5be73633..ba41108e0c 100644 +--- a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak ++++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak +@@ -108,3 +108,4 @@ CONFIG_SGX=y + CONFIG_VHOST_VSOCK=y + CONFIG_VHOST_USER_VSOCK=y + CONFIG_VHOST_USER_FS=y ++CONFIG_IOMMUFD=y +-- +2.39.3 + diff --git a/SOURCES/kvm-Implement-SMBIOS-type-9-v2.6.patch b/SOURCES/kvm-Implement-SMBIOS-type-9-v2.6.patch new file mode 100644 index 0000000..31439d7 --- /dev/null +++ b/SOURCES/kvm-Implement-SMBIOS-type-9-v2.6.patch @@ -0,0 +1,155 @@ +From 5c639f8ce65183ce8e44ee8e0230e9d627a440d7 Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Wed, 21 Feb 2024 17:00:27 +0000 +Subject: [PATCH 05/20] Implement SMBIOS type 9 v2.6 + +RH-Author: Igor Mammedov +RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS +RH-Jira: RHEL-21705 +RH-Acked-by: MST +RH-Acked-by: Ani Sinha +RH-Commit: [3/18] ead230527d93938907a561cf5b985ee4f54d82b1 + +JIRA: https://issues.redhat.com/browse/RHEL-21705 +Author: Felix Wu + + Signed-off-by: Felix Wu + Signed-off-by: Nabih Estefan + Message-Id: <20240221170027.1027325-3-nabihestefan@google.com> + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + +(cherry picked from commit 04f143d828845d0fd52dd4a52664d81a4f5431f7) +Signed-off-by: Igor Mammedov +--- + hw/smbios/smbios.c | 49 +++++++++++++++++++++++++++++++++--- + include/hw/firmware/smbios.h | 4 +++ + qemu-options.hx | 2 +- + 3 files changed, 51 insertions(+), 4 deletions(-) + +diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c +index 4f5637d445..074705fa4c 100644 +--- a/hw/smbios/smbios.c ++++ b/hw/smbios/smbios.c +@@ -124,7 +124,7 @@ static QTAILQ_HEAD(, type8_instance) type8 = QTAILQ_HEAD_INITIALIZER(type8); + + /* type 9 instance for parsing */ + struct type9_instance { +- const char *slot_designation; ++ const char *slot_designation, *pcidev; + uint8_t slot_type, slot_data_bus_width, current_usage, slot_length, + slot_characteristics1, slot_characteristics2; + uint16_t slot_id; +@@ -427,6 +427,11 @@ static const QemuOptDesc qemu_smbios_type9_opts[] = { + .type = QEMU_OPT_NUMBER, + .help = "slot characteristics2, see the spec", + }, ++ { ++ .name = "pci_device", ++ .type = QEMU_OPT_STRING, ++ .help = "PCI device, if provided." ++ } + }; + + static const QemuOptDesc qemu_smbios_type11_opts[] = { +@@ -851,7 +856,7 @@ static void smbios_build_type_8_table(void) + } + } + +-static void smbios_build_type_9_table(void) ++static void smbios_build_type_9_table(Error **errp) + { + unsigned instance = 0; + struct type9_instance *t9; +@@ -868,6 +873,43 @@ static void smbios_build_type_9_table(void) + t->slot_characteristics1 = t9->slot_characteristics1; + t->slot_characteristics2 = t9->slot_characteristics2; + ++ if (t9->pcidev) { ++ PCIDevice *pdev = NULL; ++ int rc = pci_qdev_find_device(t9->pcidev, &pdev); ++ if (rc != 0) { ++ error_setg(errp, ++ "No PCI device %s for SMBIOS type 9 entry %s", ++ t9->pcidev, t9->slot_designation); ++ return; ++ } ++ /* ++ * We only handle the case were the device is attached to ++ * the PCI root bus. The general case is more complex as ++ * bridges are enumerated later and the table would need ++ * to be updated at this moment. ++ */ ++ if (!pci_bus_is_root(pci_get_bus(pdev))) { ++ error_setg(errp, ++ "Cannot create type 9 entry for PCI device %s: " ++ "not attached to the root bus", ++ t9->pcidev); ++ return; ++ } ++ t->segment_group_number = cpu_to_le16(0); ++ t->bus_number = pci_dev_bus_num(pdev); ++ t->device_number = pdev->devfn; ++ } else { ++ /* ++ * Per SMBIOS spec, For slots that are not of the PCI, AGP, PCI-X, ++ * or PCI-Express type that do not have bus/device/function ++ * information, 0FFh should be populated in the fields of Segment ++ * Group Number, Bus Number, Device/Function Number. ++ */ ++ t->segment_group_number = 0xff; ++ t->bus_number = 0xff; ++ t->device_number = 0xff; ++ } ++ + SMBIOS_BUILD_TABLE_POST; + instance++; + } +@@ -1222,7 +1264,7 @@ void smbios_get_tables(MachineState *ms, + } + + smbios_build_type_8_table(); +- smbios_build_type_9_table(); ++ smbios_build_type_9_table(errp); + smbios_build_type_11_table(); + + #define MAX_DIMM_SZ (16 * GiB) +@@ -1568,6 +1610,7 @@ void smbios_entry_add(QemuOpts *opts, Error **errp) + t->slot_id = qemu_opt_get_number(opts, "slot_id", 0); + t->slot_characteristics1 = qemu_opt_get_number(opts, "slot_characteristics1", 0); + t->slot_characteristics2 = qemu_opt_get_number(opts, "slot_characteristics2", 0); ++ save_opt(&t->pcidev, opts, "pcidev"); + QTAILQ_INSERT_TAIL(&type9, t, next); + return; + } +diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h +index 6bbd5a4c20..f8dd07fe4c 100644 +--- a/include/hw/firmware/smbios.h ++++ b/include/hw/firmware/smbios.h +@@ -222,6 +222,10 @@ struct smbios_type_9 { + uint16_t slot_id; + uint8_t slot_characteristics1; + uint8_t slot_characteristics2; ++ /* SMBIOS spec v2.6+ */ ++ uint16_t segment_group_number; ++ uint8_t bus_number; ++ uint8_t device_number; + } QEMU_PACKED; + + /* SMBIOS type 11 - OEM strings */ +diff --git a/qemu-options.hx b/qemu-options.hx +index 94cacc2c63..93364e1765 100644 +--- a/qemu-options.hx ++++ b/qemu-options.hx +@@ -2710,7 +2710,7 @@ SRST + ``-smbios type=4[,sock_pfx=str][,manufacturer=str][,version=str][,serial=str][,asset=str][,part=str][,processor-id=%d]`` + Specify SMBIOS type 4 fields + +-``-smbios type=9[,slot_designation=str][,slot_type=%d][,slot_data_bus_width=%d][,current_usage=%d][,slot_length=%d][,slot_id=%d][,slot_characteristics1=%d][,slot_characteristics12=%d]`` ++``-smbios type=9[,slot_designation=str][,slot_type=%d][,slot_data_bus_width=%d][,current_usage=%d][,slot_length=%d][,slot_id=%d][,slot_characteristics1=%d][,slot_characteristics12=%d][,pci_device=str]`` + Specify SMBIOS type 9 fields + + ``-smbios type=11[,value=str][,path=filename]`` +-- +2.39.3 + diff --git a/SOURCES/kvm-Implement-base-of-SMBIOS-type-9-descriptor.patch b/SOURCES/kvm-Implement-base-of-SMBIOS-type-9-descriptor.patch new file mode 100644 index 0000000..89466c7 --- /dev/null +++ b/SOURCES/kvm-Implement-base-of-SMBIOS-type-9-descriptor.patch @@ -0,0 +1,218 @@ +From 84fc607d678bd72397a41d706e91fa241fd97266 Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Wed, 21 Feb 2024 17:00:26 +0000 +Subject: [PATCH 04/20] Implement base of SMBIOS type 9 descriptor. + +RH-Author: Igor Mammedov +RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS +RH-Jira: RHEL-21705 +RH-Acked-by: MST +RH-Acked-by: Ani Sinha +RH-Commit: [2/18] 2678cc080bfbf3357fa2f94ceaf42fc61b690d32 + +JIRA: https://issues.redhat.com/browse/RHEL-21705 + +commit: 735eee07d1f963635d3c3bf9f5e4bf1bc000870e +Author: Felix Wu + + Version 2.1+. + + Signed-off-by: Felix Wu + Signed-off-by: Nabih Estefan + Message-Id: <20240221170027.1027325-2-nabihestefan@google.com> + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + +Signed-off-by: Igor Mammedov +--- + hw/smbios/smbios.c | 99 ++++++++++++++++++++++++++++++++++++ + include/hw/firmware/smbios.h | 13 +++++ + qemu-options.hx | 3 ++ + 3 files changed, 115 insertions(+) + +diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c +index 7bde23e59d..4f5637d445 100644 +--- a/hw/smbios/smbios.c ++++ b/hw/smbios/smbios.c +@@ -122,6 +122,16 @@ struct type8_instance { + }; + static QTAILQ_HEAD(, type8_instance) type8 = QTAILQ_HEAD_INITIALIZER(type8); + ++/* type 9 instance for parsing */ ++struct type9_instance { ++ const char *slot_designation; ++ uint8_t slot_type, slot_data_bus_width, current_usage, slot_length, ++ slot_characteristics1, slot_characteristics2; ++ uint16_t slot_id; ++ QTAILQ_ENTRY(type9_instance) next; ++}; ++static QTAILQ_HEAD(, type9_instance) type9 = QTAILQ_HEAD_INITIALIZER(type9); ++ + static struct { + size_t nvalues; + char **values; +@@ -371,6 +381,54 @@ static const QemuOptDesc qemu_smbios_type8_opts[] = { + }, + }; + ++static const QemuOptDesc qemu_smbios_type9_opts[] = { ++ { ++ .name = "type", ++ .type = QEMU_OPT_NUMBER, ++ .help = "SMBIOS element type", ++ }, ++ { ++ .name = "slot_designation", ++ .type = QEMU_OPT_STRING, ++ .help = "string number for reference designation", ++ }, ++ { ++ .name = "slot_type", ++ .type = QEMU_OPT_NUMBER, ++ .help = "connector type", ++ }, ++ { ++ .name = "slot_data_bus_width", ++ .type = QEMU_OPT_NUMBER, ++ .help = "port type", ++ }, ++ { ++ .name = "current_usage", ++ .type = QEMU_OPT_NUMBER, ++ .help = "current usage", ++ }, ++ { ++ .name = "slot_length", ++ .type = QEMU_OPT_NUMBER, ++ .help = "system slot length", ++ }, ++ { ++ .name = "slot_id", ++ .type = QEMU_OPT_NUMBER, ++ .help = "system slot id", ++ }, ++ { ++ .name = "slot_characteristics1", ++ .type = QEMU_OPT_NUMBER, ++ .help = "slot characteristics1, see the spec", ++ }, ++ { ++ .name = "slot_characteristics2", ++ .type = QEMU_OPT_NUMBER, ++ .help = "slot characteristics2, see the spec", ++ }, ++}; ++ + static const QemuOptDesc qemu_smbios_type11_opts[] = { + { + .name = "value", +@@ -594,6 +652,7 @@ bool smbios_skip_table(uint8_t type, bool required_table) + #define T2_BASE 0x200 + #define T3_BASE 0x300 + #define T4_BASE 0x400 ++#define T9_BASE 0x900 + #define T11_BASE 0xe00 + + #define T16_BASE 0x1000 +@@ -792,6 +851,28 @@ static void smbios_build_type_8_table(void) + } + } + ++static void smbios_build_type_9_table(void) ++{ ++ unsigned instance = 0; ++ struct type9_instance *t9; ++ ++ QTAILQ_FOREACH(t9, &type9, next) { ++ SMBIOS_BUILD_TABLE_PRE(9, T9_BASE + instance, true); ++ ++ SMBIOS_TABLE_SET_STR(9, slot_designation, t9->slot_designation); ++ t->slot_type = t9->slot_type; ++ t->slot_data_bus_width = t9->slot_data_bus_width; ++ t->current_usage = t9->current_usage; ++ t->slot_length = t9->slot_length; ++ t->slot_id = t9->slot_id; ++ t->slot_characteristics1 = t9->slot_characteristics1; ++ t->slot_characteristics2 = t9->slot_characteristics2; ++ ++ SMBIOS_BUILD_TABLE_POST; ++ instance++; ++ } ++} ++ + static void smbios_build_type_11_table(void) + { + char count_str[128]; +@@ -1141,6 +1222,7 @@ void smbios_get_tables(MachineState *ms, + } + + smbios_build_type_8_table(); ++ smbios_build_type_9_table(); + smbios_build_type_11_table(); + + #define MAX_DIMM_SZ (16 * GiB) +@@ -1472,6 +1554,23 @@ void smbios_entry_add(QemuOpts *opts, Error **errp) + t8_i->port_type = qemu_opt_get_number(opts, "port_type", 0); + QTAILQ_INSERT_TAIL(&type8, t8_i, next); + return; ++ case 9: { ++ if (!qemu_opts_validate(opts, qemu_smbios_type9_opts, errp)) { ++ return; ++ } ++ struct type9_instance *t; ++ t = g_new0(struct type9_instance, 1); ++ save_opt(&t->slot_designation, opts, "slot_designation"); ++ t->slot_type = qemu_opt_get_number(opts, "slot_type", 0); ++ t->slot_data_bus_width = qemu_opt_get_number(opts, "slot_data_bus_width", 0); ++ t->current_usage = qemu_opt_get_number(opts, "current_usage", 0); ++ t->slot_length = qemu_opt_get_number(opts, "slot_length", 0); ++ t->slot_id = qemu_opt_get_number(opts, "slot_id", 0); ++ t->slot_characteristics1 = qemu_opt_get_number(opts, "slot_characteristics1", 0); ++ t->slot_characteristics2 = qemu_opt_get_number(opts, "slot_characteristics2", 0); ++ QTAILQ_INSERT_TAIL(&type9, t, next); ++ return; ++ } + case 11: + if (!qemu_opts_validate(opts, qemu_smbios_type11_opts, errp)) { + return; +diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h +index d24b3ccd32..6bbd5a4c20 100644 +--- a/include/hw/firmware/smbios.h ++++ b/include/hw/firmware/smbios.h +@@ -211,6 +211,19 @@ struct smbios_type_8 { + uint8_t port_type; + } QEMU_PACKED; + ++/* SMBIOS type 9 - System Slots (v2.1+) */ ++struct smbios_type_9 { ++ struct smbios_structure_header header; ++ uint8_t slot_designation; ++ uint8_t slot_type; ++ uint8_t slot_data_bus_width; ++ uint8_t current_usage; ++ uint8_t slot_length; ++ uint16_t slot_id; ++ uint8_t slot_characteristics1; ++ uint8_t slot_characteristics2; ++} QEMU_PACKED; ++ + /* SMBIOS type 11 - OEM strings */ + struct smbios_type_11 { + struct smbios_structure_header header; +diff --git a/qemu-options.hx b/qemu-options.hx +index 0814f43066..94cacc2c63 100644 +--- a/qemu-options.hx ++++ b/qemu-options.hx +@@ -2710,6 +2710,9 @@ SRST + ``-smbios type=4[,sock_pfx=str][,manufacturer=str][,version=str][,serial=str][,asset=str][,part=str][,processor-id=%d]`` + Specify SMBIOS type 4 fields + ++``-smbios type=9[,slot_designation=str][,slot_type=%d][,slot_data_bus_width=%d][,current_usage=%d][,slot_length=%d][,slot_id=%d][,slot_characteristics1=%d][,slot_characteristics12=%d]`` ++ Specify SMBIOS type 9 fields ++ + ``-smbios type=11[,value=str][,path=filename]`` + Specify SMBIOS type 11 fields + +-- +2.39.3 + diff --git a/SOURCES/kvm-Revert-chardev-char-socket-Fix-TLS-io-channels-sendi.patch b/SOURCES/kvm-Revert-chardev-char-socket-Fix-TLS-io-channels-sendi.patch new file mode 100644 index 0000000..bc0f15a --- /dev/null +++ b/SOURCES/kvm-Revert-chardev-char-socket-Fix-TLS-io-channels-sendi.patch @@ -0,0 +1,60 @@ +From 2e0e4355b2d4edb66b7d8c198339e17940abd682 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= +Date: Mon, 18 Mar 2024 13:03:19 +0000 +Subject: [PATCH 2/3] Revert "chardev/char-socket: Fix TLS io channels sending + too much data to the backend" +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Daniel P. Berrangé +RH-MergeRequest: 233: Fix handling of TLS sessions in chardevs +RH-Jira: RHEL-24614 +RH-Acked-by: Thomas Huth +RH-Acked-by: Marc-André Lureau +RH-Commit: [2/3] 1cb3d72b86ced0f70a09dfa0d325ae8a85db1b2b (berrange/centos-src-qemu) + +This commit results in unexpected termination of the TLS connection. +When 'fd_can_read' returns 0, the code goes on to pass a zero length +buffer to qio_channel_read. The TLS impl calls into gnutls_recv() +with this zero length buffer, at which point GNUTLS returns an error +GNUTLS_E_INVALID_REQUEST. This is treated as fatal by QEMU's TLS code +resulting in the connection being torn down by the chardev. + +Simply skipping the qio_channel_read when the buffer length is zero +is also not satisfactory, as it results in a high CPU burn busy loop +massively slowing QEMU's functionality. + +The proper solution is to avoid tcp_chr_read being called at all +unless the frontend is able to accept more data. This will be done +in a followup commit. + +This reverts commit 462945cd22d2bcd233401ed3aa167d83a8e35b05 + +Reviewed-by: Thomas Huth +Signed-off-by: Daniel P. Berrangé +(cherry picked from commit e8ee827ffdb86ebbd5f5213a1f78123c25a90864) +--- + chardev/char-socket.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/chardev/char-socket.c b/chardev/char-socket.c +index f48d341ebc..51d0943fce 100644 +--- a/chardev/char-socket.c ++++ b/chardev/char-socket.c +@@ -492,9 +492,9 @@ static gboolean tcp_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque) + s->max_size <= 0) { + return TRUE; + } +- len = tcp_chr_read_poll(opaque); +- if (len > sizeof(buf)) { +- len = sizeof(buf); ++ len = sizeof(buf); ++ if (len > s->max_size) { ++ len = s->max_size; + } + size = tcp_chr_recv(chr, (void *)buf, len); + if (size == 0 || (size == -1 && errno != EAGAIN)) { +-- +2.39.3 + diff --git a/SOURCES/kvm-Revert-chardev-use-a-child-source-for-qio-input-sour.patch b/SOURCES/kvm-Revert-chardev-use-a-child-source-for-qio-input-sour.patch new file mode 100644 index 0000000..135afbe --- /dev/null +++ b/SOURCES/kvm-Revert-chardev-use-a-child-source-for-qio-input-sour.patch @@ -0,0 +1,216 @@ +From ab5a33d57b48e35388928e388bb6e6479bc77651 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= +Date: Mon, 18 Mar 2024 17:08:30 +0000 +Subject: [PATCH 3/3] Revert "chardev: use a child source for qio input source" +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Daniel P. Berrangé +RH-MergeRequest: 233: Fix handling of TLS sessions in chardevs +RH-Jira: RHEL-24614 +RH-Acked-by: Thomas Huth +RH-Acked-by: Marc-André Lureau +RH-Commit: [3/3] b58e6c19c2b11d5d28db31cf1386226fb01d195e (berrange/centos-src-qemu) + +This reverts commit a7077b8e354d90fec26c2921aa2dea85b90dff90, +and add comments to explain why child sources cannot be used. + +When a GSource is added as a child of another GSource, if its +'prepare' function indicates readiness, then the parent's +'prepare' function will never be run. The io_watch_poll_prepare +absolutely *must* be run on every iteration of the main loop, +to ensure that the chardev backend doesn't feed data to the +frontend that it is unable to consume. + +At the time a7077b8e354d90fec26c2921aa2dea85b90dff90 was made, +all the child GSource impls were relying on poll'ing an FD, +so their 'prepare' functions would never indicate readiness +ahead of poll() being invoked. So the buggy behaviour was +not noticed and lay dormant. + +Relatively recently the QIOChannelTLS impl introduced a +level 2 child GSource, which checks with GNUTLS whether it +has cached any data that was decoded but not yet consumed: + + commit ffda5db65aef42266a5053a4be34515106c4c7ee + Author: Antoine Damhet + Date: Tue Nov 15 15:23:29 2022 +0100 + + io/channel-tls: fix handling of bigger read buffers + + Since the TLS backend can read more data from the underlying QIOChannel + we introduce a minimal child GSource to notify if we still have more + data available to be read. + + Signed-off-by: Antoine Damhet + Signed-off-by: Charles Frey + Signed-off-by: Daniel P. Berrangé + +With this, it is now quite common for the 'prepare' function +on a QIOChannelTLS GSource to indicate immediate readiness, +bypassing the parent GSource 'prepare' function. IOW, the +critical 'io_watch_poll_prepare' is being skipped on some +iterations of the main loop. As a result chardev frontend +asserts are now being triggered as they are fed data they +are not ready to consume. + +A reproducer is as follows: + + * In terminal 1 run a GNUTLS *echo* server + + $ gnutls-serv --echo \ + --x509cafile ca-cert.pem \ + --x509keyfile server-key.pem \ + --x509certfile server-cert.pem \ + -p 9000 + + * In terminal 2 run a QEMU guest + + $ qemu-system-s390x \ + -nodefaults \ + -display none \ + -object tls-creds-x509,id=tls0,dir=$PWD,endpoint=client \ + -chardev socket,id=con0,host=localhost,port=9000,tls-creds=tls0 \ + -device sclpconsole,chardev=con0 \ + -hda Fedora-Cloud-Base-39-1.5.s390x.qcow2 + +After the previous patch revert, but before this patch revert, +this scenario will crash: + + qemu-system-s390x: ../hw/char/sclpconsole.c:73: chr_read: Assertion + `size <= SIZE_BUFFER_VT220 - scon->iov_data_len' failed. + +This assert indicates that 'tcp_chr_read' was called without +'tcp_chr_read_poll' having first been checked for ability to +receive more data + +QEMU's use of a 'prepare' function to create/delete another +GSource is rather a hack and not normally the kind of thing that +is expected to be done by a GSource. There is no mechanism to +force GLib to always run the 'prepare' function of a parent +GSource. The best option is to simply not use the child source +concept, and go back to the functional approach previously +relied on. + +Reviewed-by: Marc-André Lureau +Reviewed-by: Thomas Huth +Tested-by: Thomas Huth +Signed-off-by: Daniel P. Berrangé +(cherry picked from commit 038b4217884c6f297278bb1ec6f0463c6c8221de) +--- + chardev/char-io.c | 56 ++++++++++++++++++++++++++++++++++++++++++----- + 1 file changed, 51 insertions(+), 5 deletions(-) + +diff --git a/chardev/char-io.c b/chardev/char-io.c +index 4451128cba..dab77b112e 100644 +--- a/chardev/char-io.c ++++ b/chardev/char-io.c +@@ -33,6 +33,7 @@ typedef struct IOWatchPoll { + IOCanReadHandler *fd_can_read; + GSourceFunc fd_read; + void *opaque; ++ GMainContext *context; + } IOWatchPoll; + + static IOWatchPoll *io_watch_poll_from_source(GSource *source) +@@ -50,28 +51,59 @@ static gboolean io_watch_poll_prepare(GSource *source, + return FALSE; + } + ++ /* ++ * We do not register the QIOChannel watch as a child GSource. ++ * The 'prepare' function on the parent GSource will be ++ * skipped if a child GSource's 'prepare' function indicates ++ * readiness. We need this prepare function be guaranteed ++ * to run on *every* iteration of the main loop, because ++ * it is critical to ensure we remove the QIOChannel watch ++ * if 'fd_can_read' indicates the frontend cannot receive ++ * more data. ++ */ + if (now_active) { + iwp->src = qio_channel_create_watch( + iwp->ioc, G_IO_IN | G_IO_ERR | G_IO_HUP | G_IO_NVAL); + g_source_set_callback(iwp->src, iwp->fd_read, iwp->opaque, NULL); +- g_source_add_child_source(source, iwp->src); +- g_source_unref(iwp->src); ++ g_source_attach(iwp->src, iwp->context); + } else { +- g_source_remove_child_source(source, iwp->src); ++ g_source_destroy(iwp->src); ++ g_source_unref(iwp->src); + iwp->src = NULL; + } + return FALSE; + } + ++static gboolean io_watch_poll_check(GSource *source) ++{ ++ return FALSE; ++} ++ + static gboolean io_watch_poll_dispatch(GSource *source, GSourceFunc callback, + gpointer user_data) + { +- return G_SOURCE_CONTINUE; ++ abort(); ++} ++ ++static void io_watch_poll_finalize(GSource *source) ++{ ++ /* ++ * Due to a glib bug, removing the last reference to a source ++ * inside a finalize callback causes recursive locking (and a ++ * deadlock). This is not a problem inside other callbacks, ++ * including dispatch callbacks, so we call io_remove_watch_poll ++ * to remove this source. At this point, iwp->src must ++ * be NULL, or we would leak it. ++ */ ++ IOWatchPoll *iwp = io_watch_poll_from_source(source); ++ assert(iwp->src == NULL); + } + + static GSourceFuncs io_watch_poll_funcs = { + .prepare = io_watch_poll_prepare, ++ .check = io_watch_poll_check, + .dispatch = io_watch_poll_dispatch, ++ .finalize = io_watch_poll_finalize, + }; + + GSource *io_add_watch_poll(Chardev *chr, +@@ -91,6 +123,7 @@ GSource *io_add_watch_poll(Chardev *chr, + iwp->ioc = ioc; + iwp->fd_read = (GSourceFunc) fd_read; + iwp->src = NULL; ++ iwp->context = context; + + name = g_strdup_printf("chardev-iowatch-%s", chr->label); + g_source_set_name((GSource *)iwp, name); +@@ -101,10 +134,23 @@ GSource *io_add_watch_poll(Chardev *chr, + return (GSource *)iwp; + } + ++static void io_remove_watch_poll(GSource *source) ++{ ++ IOWatchPoll *iwp; ++ ++ iwp = io_watch_poll_from_source(source); ++ if (iwp->src) { ++ g_source_destroy(iwp->src); ++ g_source_unref(iwp->src); ++ iwp->src = NULL; ++ } ++ g_source_destroy(&iwp->parent); ++} ++ + void remove_fd_in_watch(Chardev *chr) + { + if (chr->gsource) { +- g_source_destroy(chr->gsource); ++ io_remove_watch_poll(chr->gsource); + chr->gsource = NULL; + } + } +-- +2.39.3 + diff --git a/SOURCES/kvm-acpi-pcihp-allow-repeating-hot-unplug-requests.patch b/SOURCES/kvm-acpi-pcihp-allow-repeating-hot-unplug-requests.patch deleted file mode 100644 index b937d27..0000000 --- a/SOURCES/kvm-acpi-pcihp-allow-repeating-hot-unplug-requests.patch +++ /dev/null @@ -1,84 +0,0 @@ -From 61256a82ce78f40222455becb8850b5f5ebb5d72 Mon Sep 17 00:00:00 2001 -From: Igor Mammedov -Date: Tue, 18 Apr 2023 11:04:49 +0200 -Subject: [PATCH 1/3] acpi: pcihp: allow repeating hot-unplug requests - -RH-Author: Igor Mammedov -RH-MergeRequest: 159: acpi: pcihp: allow repeating hot-unplug requests -RH-Bugzilla: 2087047 -RH-Acked-by: Ani Sinha -RH-Acked-by: Julia Suvorova -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: MST -RH-Commit: [1/1] 9c597232466b27d91f127ee6004322d6ba69755f (imammedo/qemu-kvm-c-9-s-imam) - -with Q35 using ACPI PCI hotplug by default, user's request to unplug -device is ignored when it's issued before guest OS has been booted. -And any additional attempt to request device hot-unplug afterwards -results in following error: - - "Device XYZ is already in the process of unplug" - -arguably it can be considered as a regression introduced by [2], -before which it was possible to issue unplug request multiple -times. - -Accept new uplug requests after timeout (1ms). This brings ACPI PCI -hotplug on par with native PCIe unplug behavior [1] and allows user -to repeat unplug requests at propper times. -Set expire timeout to arbitrary 1msec so user won't be able to -flood guest with SCI interrupts by calling device_del in tight loop. - -PS: -ACPI spec doesn't mandate what OSPM can do with GPEx.status -bits set before it's booted => it's impl. depended. -Status bits may be retained (I tested with one Windows version) -or cleared (Linux since 2.6 kernel times) during guest's ACPI -subsystem initialization. -Clearing status bits (though not wrong per se) hides the unplug -event from guest, and it's upto user to repeat device_del later -when guest is able to handle unplug requests. - -1) 18416c62e3 ("pcie: expire pending delete") -2) -Fixes: cce8944cc9ef ("qdev-monitor: Forbid repeated device_del") -Signed-off-by: Igor Mammedov -Acked-by: Gerd Hoffmann -CC: mst@redhat.com -CC: anisinha@redhat.com -CC: jusual@redhat.com -CC: kraxel@redhat.com -Message-Id: <20230418090449.2155757-1-imammedo@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Reviewed-by: Ani Sinha -(cherry picked from commit 0f689cf5ada4d5df5ab95c7f7aa9fc221afa855d) -Signed-off-by: Igor Mammedov ---- - hw/acpi/pcihp.c | 10 ++++++++++ - 1 file changed, 10 insertions(+) - -diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c -index dcfb779a7a..cdd6f775a1 100644 ---- a/hw/acpi/pcihp.c -+++ b/hw/acpi/pcihp.c -@@ -357,6 +357,16 @@ void acpi_pcihp_device_unplug_request_cb(HotplugHandler *hotplug_dev, - * acpi_pcihp_eject_slot() when the operation is completed. - */ - pdev->qdev.pending_deleted_event = true; -+ /* if unplug was requested before OSPM is initialized, -+ * linux kernel will clear GPE0.sts[] bits during boot, which effectively -+ * hides unplug event. And than followup qmp_device_del() calls remain -+ * blocked by above flag permanently. -+ * Unblock qmp_device_del() by setting expire limit, so user can -+ * repeat unplug request later when OSPM has been booted. -+ */ -+ pdev->qdev.pending_deleted_expires_ms = -+ qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); /* 1 msec */ -+ - s->acpi_pcihp_pci_status[bsel].down |= (1U << slot); - acpi_send_event(DEVICE(hotplug_dev), ACPI_PCI_HOTPLUG_STATUS); - } --- -2.39.1 - diff --git a/SOURCES/kvm-aio-make-aio_context_acquire-aio_context_release-a-n.patch b/SOURCES/kvm-aio-make-aio_context_acquire-aio_context_release-a-n.patch new file mode 100644 index 0000000..f30b81f --- /dev/null +++ b/SOURCES/kvm-aio-make-aio_context_acquire-aio_context_release-a-n.patch @@ -0,0 +1,60 @@ +From 6b5cfed21e20b372090046a934387255ff4bda58 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 5 Dec 2023 13:20:01 -0500 +Subject: [PATCH 084/101] aio: make aio_context_acquire()/aio_context_release() + a no-op + +RH-Author: Kevin Wolf +RH-MergeRequest: 214: Remove AioContext lock +RH-Jira: RHEL-15965 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [15/26] 723dcada900aaf08862e8221921be22506b561a8 (kmwolf/centos-qemu-kvm) + +aio_context_acquire()/aio_context_release() has been replaced by +fine-grained locking to protect state shared by multiple threads. The +AioContext lock still plays the role of balancing locking in +AIO_WAIT_WHILE() and many functions in QEMU either require that the +AioContext lock is held or not held for this reason. In other words, the +AioContext lock is purely there for consistency with itself and serves +no real purpose anymore. + +Stop actually acquiring/releasing the lock in +aio_context_acquire()/aio_context_release() so that subsequent patches +can remove callers across the codebase incrementally. + +I have performed "make check" and qemu-iotests stress tests across +x86-64, ppc64le, and aarch64 to confirm that there are no failures as a +result of eliminating the lock. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Eric Blake +Acked-by: Kevin Wolf +Message-ID: <20231205182011.1976568-5-stefanha@redhat.com> +Signed-off-by: Kevin Wolf +--- + util/async.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/util/async.c b/util/async.c +index 8f90ddc304..04ee83d220 100644 +--- a/util/async.c ++++ b/util/async.c +@@ -725,12 +725,12 @@ void aio_context_unref(AioContext *ctx) + + void aio_context_acquire(AioContext *ctx) + { +- qemu_rec_mutex_lock(&ctx->lock); ++ /* TODO remove this function */ + } + + void aio_context_release(AioContext *ctx) + { +- qemu_rec_mutex_unlock(&ctx->lock); ++ /* TODO remove this function */ + } + + QEMU_DEFINE_STATIC_CO_TLS(AioContext *, my_aiocontext) +-- +2.39.3 + diff --git a/SOURCES/kvm-aio-remove-aio_context_acquire-aio_context_release-A.patch b/SOURCES/kvm-aio-remove-aio_context_acquire-aio_context_release-A.patch new file mode 100644 index 0000000..a64e246 --- /dev/null +++ b/SOURCES/kvm-aio-remove-aio_context_acquire-aio_context_release-A.patch @@ -0,0 +1,102 @@ +From 14913d8970090c8914dc19dad14f3b9f91985ec3 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 5 Dec 2023 13:20:07 -0500 +Subject: [PATCH 090/101] aio: remove + aio_context_acquire()/aio_context_release() API + +RH-Author: Kevin Wolf +RH-MergeRequest: 214: Remove AioContext lock +RH-Jira: RHEL-15965 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [21/26] 4b6d4afcac79d3248a6722b063b5fc777dc418df (kmwolf/centos-qemu-kvm) + +Delete these functions because nothing calls these functions anymore. + +I introduced these APIs in commit 98563fc3ec44 ("aio: add +aio_context_acquire() and aio_context_release()") in 2014. It's with a +sigh of relief that I delete these APIs almost 10 years later. + +Thanks to Paolo Bonzini's vision for multi-queue QEMU, we got an +understanding of where the code needed to go in order to remove the +limitations that the original dataplane and the IOThread/AioContext +approach that followed it. + +Emanuele Giuseppe Esposito had the splendid determination to convert +large parts of the codebase so that they no longer needed the AioContext +lock. This was a painstaking process, both in the actual code changes +required and the iterations of code review that Emanuele eked out of +Kevin and me over many months. + +Kevin Wolf tackled multitudes of graph locking conversions to protect +in-flight I/O from run-time changes to the block graph as well as the +clang Thread Safety Analysis annotations that allow the compiler to +check whether the graph lock is being used correctly. + +And me, well, I'm just here to add some pizzazz to the QEMU multi-queue +block layer :). Thank you to everyone who helped with this effort, +including Eric Blake, code reviewer extraordinaire, and others who I've +forgotten to mention. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Eric Blake +Message-ID: <20231205182011.1976568-11-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +--- + include/block/aio.h | 17 ----------------- + util/async.c | 10 ---------- + 2 files changed, 27 deletions(-) + +diff --git a/include/block/aio.h b/include/block/aio.h +index f08b358077..af05512a7d 100644 +--- a/include/block/aio.h ++++ b/include/block/aio.h +@@ -278,23 +278,6 @@ void aio_context_ref(AioContext *ctx); + */ + void aio_context_unref(AioContext *ctx); + +-/* Take ownership of the AioContext. If the AioContext will be shared between +- * threads, and a thread does not want to be interrupted, it will have to +- * take ownership around calls to aio_poll(). Otherwise, aio_poll() +- * automatically takes care of calling aio_context_acquire and +- * aio_context_release. +- * +- * Note that this is separate from bdrv_drained_begin/bdrv_drained_end. A +- * thread still has to call those to avoid being interrupted by the guest. +- * +- * Bottom halves, timers and callbacks can be created or removed without +- * acquiring the AioContext. +- */ +-void aio_context_acquire(AioContext *ctx); +- +-/* Relinquish ownership of the AioContext. */ +-void aio_context_release(AioContext *ctx); +- + /** + * aio_bh_schedule_oneshot_full: Allocate a new bottom half structure that will + * run only once and as soon as possible. +diff --git a/util/async.c b/util/async.c +index dfd44ef612..460529057c 100644 +--- a/util/async.c ++++ b/util/async.c +@@ -719,16 +719,6 @@ void aio_context_unref(AioContext *ctx) + g_source_unref(&ctx->source); + } + +-void aio_context_acquire(AioContext *ctx) +-{ +- /* TODO remove this function */ +-} +- +-void aio_context_release(AioContext *ctx) +-{ +- /* TODO remove this function */ +-} +- + QEMU_DEFINE_STATIC_CO_TLS(AioContext *, my_aiocontext) + + AioContext *qemu_get_current_aio_context(void) +-- +2.39.3 + diff --git a/SOURCES/kvm-aio-wait-draw-equivalence-between-AIO_WAIT_WHILE-and.patch b/SOURCES/kvm-aio-wait-draw-equivalence-between-AIO_WAIT_WHILE-and.patch new file mode 100644 index 0000000..7f95b67 --- /dev/null +++ b/SOURCES/kvm-aio-wait-draw-equivalence-between-AIO_WAIT_WHILE-and.patch @@ -0,0 +1,81 @@ +From e1e2f3972065c4b5d6fcf37e0e1c4fb92a0d5260 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 5 Dec 2023 13:20:06 -0500 +Subject: [PATCH 089/101] aio-wait: draw equivalence between AIO_WAIT_WHILE() + and AIO_WAIT_WHILE_UNLOCKED() + +RH-Author: Kevin Wolf +RH-MergeRequest: 214: Remove AioContext lock +RH-Jira: RHEL-15965 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [20/26] 20e49777869714c99769263103f1b0c2c370cfcd (kmwolf/centos-qemu-kvm) + +Now that the AioContext lock no longer exists, AIO_WAIT_WHILE() and +AIO_WAIT_WHILE_UNLOCKED() are equivalent. + +A future patch will get rid of AIO_WAIT_WHILE_UNLOCKED(). + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Eric Blake +Message-ID: <20231205182011.1976568-10-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +--- + include/block/aio-wait.h | 16 ++++------------ + 1 file changed, 4 insertions(+), 12 deletions(-) + +diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h +index 5449b6d742..157f105916 100644 +--- a/include/block/aio-wait.h ++++ b/include/block/aio-wait.h +@@ -63,9 +63,6 @@ extern AioWait global_aio_wait; + * @ctx: the aio context, or NULL if multiple aio contexts (for which the + * caller does not hold a lock) are involved in the polling condition. + * @cond: wait while this conditional expression is true +- * @unlock: whether to unlock and then lock again @ctx. This applies +- * only when waiting for another AioContext from the main loop. +- * Otherwise it's ignored. + * + * Wait while a condition is true. Use this to implement synchronous + * operations that require event loop activity. +@@ -78,7 +75,7 @@ extern AioWait global_aio_wait; + * wait on conditions between two IOThreads since that could lead to deadlock, + * go via the main loop instead. + */ +-#define AIO_WAIT_WHILE_INTERNAL(ctx, cond, unlock) ({ \ ++#define AIO_WAIT_WHILE_INTERNAL(ctx, cond) ({ \ + bool waited_ = false; \ + AioWait *wait_ = &global_aio_wait; \ + AioContext *ctx_ = (ctx); \ +@@ -95,13 +92,7 @@ extern AioWait global_aio_wait; + assert(qemu_get_current_aio_context() == \ + qemu_get_aio_context()); \ + while ((cond)) { \ +- if (unlock && ctx_) { \ +- aio_context_release(ctx_); \ +- } \ + aio_poll(qemu_get_aio_context(), true); \ +- if (unlock && ctx_) { \ +- aio_context_acquire(ctx_); \ +- } \ + waited_ = true; \ + } \ + } \ +@@ -109,10 +100,11 @@ extern AioWait global_aio_wait; + waited_; }) + + #define AIO_WAIT_WHILE(ctx, cond) \ +- AIO_WAIT_WHILE_INTERNAL(ctx, cond, true) ++ AIO_WAIT_WHILE_INTERNAL(ctx, cond) + ++/* TODO replace this with AIO_WAIT_WHILE() in a future patch */ + #define AIO_WAIT_WHILE_UNLOCKED(ctx, cond) \ +- AIO_WAIT_WHILE_INTERNAL(ctx, cond, false) ++ AIO_WAIT_WHILE_INTERNAL(ctx, cond) + + /** + * aio_wait_kick: +-- +2.39.3 + diff --git a/SOURCES/kvm-apic-disable-reentrancy-detection-for-apic-msi.patch b/SOURCES/kvm-apic-disable-reentrancy-detection-for-apic-msi.patch deleted file mode 100644 index 69505f8..0000000 --- a/SOURCES/kvm-apic-disable-reentrancy-detection-for-apic-msi.patch +++ /dev/null @@ -1,55 +0,0 @@ -From 5beea8b889a38aa59259679d7f1ba050f09eb0f0 Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Tue, 9 May 2023 10:29:03 -0400 -Subject: [PATCH 12/21] apic: disable reentrancy detection for apic-msi - -RH-Author: Jon Maloy -RH-MergeRequest: 165: memory: prevent dma-reentracy issues -RH-Jira: RHEL-516 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [8/13] 329f3b1c02fc42d85c821dd14c70e6b885cf849a (jmaloy/jmaloy-qemu-kvm-2) - -Jira: https://issues.redhat.com/browse/RHEL-516 -Upstream: Merged -CVE: CVE-2023-2680 - -commit 50795ee051a342c681a9b45671c552fbd6274db8 -Author: Alexander Bulekov -Date: Thu Apr 27 17:10:13 2023 -0400 - - apic: disable reentrancy detection for apic-msi - - As the code is designed for re-entrant calls to apic-msi, mark apic-msi - as reentrancy-safe. - - Signed-off-by: Alexander Bulekov - Reviewed-by: Darren Kenny - Message-Id: <20230427211013.2994127-9-alxndr@bu.edu> - Signed-off-by: Thomas Huth - -Signed-off-by: Jon Maloy ---- - hw/intc/apic.c | 7 +++++++ - 1 file changed, 7 insertions(+) - -diff --git a/hw/intc/apic.c b/hw/intc/apic.c -index 20b5a94073..ac3d47d231 100644 ---- a/hw/intc/apic.c -+++ b/hw/intc/apic.c -@@ -885,6 +885,13 @@ static void apic_realize(DeviceState *dev, Error **errp) - memory_region_init_io(&s->io_memory, OBJECT(s), &apic_io_ops, s, "apic-msi", - APIC_SPACE_SIZE); - -+ /* -+ * apic-msi's apic_mem_write can call into ioapic_eoi_broadcast, which can -+ * write back to apic-msi. As such mark the apic-msi region re-entrancy -+ * safe. -+ */ -+ s->io_memory.disable_reentrancy_guard = true; -+ - s->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, apic_timer, s); - local_apics[s->id] = s; - --- -2.39.3 - diff --git a/SOURCES/kvm-async-Add-an-optional-reentrancy-guard-to-the-BH-API.patch b/SOURCES/kvm-async-Add-an-optional-reentrancy-guard-to-the-BH-API.patch deleted file mode 100644 index 65ba3be..0000000 --- a/SOURCES/kvm-async-Add-an-optional-reentrancy-guard-to-the-BH-API.patch +++ /dev/null @@ -1,231 +0,0 @@ -From f6db359f543723e2eb840653d35004af357ea5ac Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Tue, 9 May 2023 10:29:03 -0400 -Subject: [PATCH 06/21] async: Add an optional reentrancy guard to the BH API - -RH-Author: Jon Maloy -RH-MergeRequest: 165: memory: prevent dma-reentracy issues -RH-Jira: RHEL-516 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [2/13] 009a9a68c1c25b9ad0cd9bc0d73b3e07bee2a19d (jmaloy/jmaloy-qemu-kvm-2) - -Jira: https://issues.redhat.com/browse/RHEL-516 -Upstream: Merged -CVE: CVE-2023-2680 - -commit 9c86c97f12c060bf7484dd931f38634e166a81f0 -Author: Alexander Bulekov -Date: Thu Apr 27 17:10:07 2023 -0400 - - async: Add an optional reentrancy guard to the BH API - - Devices can pass their MemoryReentrancyGuard (from their DeviceState), - when creating new BHes. Then, the async API will toggle the guard - before/after calling the BH call-back. This prevents bh->mmio reentrancy - issues. - - Signed-off-by: Alexander Bulekov - Reviewed-by: Darren Kenny - Message-Id: <20230427211013.2994127-3-alxndr@bu.edu> - [thuth: Fix "line over 90 characters" checkpatch.pl error] - Signed-off-by: Thomas Huth - -Signed-off-by: Jon Maloy ---- - docs/devel/multiple-iothreads.txt | 7 +++++++ - include/block/aio.h | 18 ++++++++++++++++-- - include/qemu/main-loop.h | 7 +++++-- - tests/unit/ptimer-test-stubs.c | 3 ++- - util/async.c | 18 +++++++++++++++++- - util/main-loop.c | 6 ++++-- - util/trace-events | 1 + - 7 files changed, 52 insertions(+), 8 deletions(-) - -diff --git a/docs/devel/multiple-iothreads.txt b/docs/devel/multiple-iothreads.txt -index 343120f2ef..a3e949f6b3 100644 ---- a/docs/devel/multiple-iothreads.txt -+++ b/docs/devel/multiple-iothreads.txt -@@ -61,6 +61,7 @@ There are several old APIs that use the main loop AioContext: - * LEGACY qemu_aio_set_event_notifier() - monitor an event notifier - * LEGACY timer_new_ms() - create a timer - * LEGACY qemu_bh_new() - create a BH -+ * LEGACY qemu_bh_new_guarded() - create a BH with a device re-entrancy guard - * LEGACY qemu_aio_wait() - run an event loop iteration - - Since they implicitly work on the main loop they cannot be used in code that -@@ -72,8 +73,14 @@ Instead, use the AioContext functions directly (see include/block/aio.h): - * aio_set_event_notifier() - monitor an event notifier - * aio_timer_new() - create a timer - * aio_bh_new() - create a BH -+ * aio_bh_new_guarded() - create a BH with a device re-entrancy guard - * aio_poll() - run an event loop iteration - -+The qemu_bh_new_guarded/aio_bh_new_guarded APIs accept a "MemReentrancyGuard" -+argument, which is used to check for and prevent re-entrancy problems. For -+BHs associated with devices, the reentrancy-guard is contained in the -+corresponding DeviceState and named "mem_reentrancy_guard". -+ - The AioContext can be obtained from the IOThread using - iothread_get_aio_context() or for the main loop using qemu_get_aio_context(). - Code that takes an AioContext argument works both in IOThreads or the main -diff --git a/include/block/aio.h b/include/block/aio.h -index 543717f294..db6f23c619 100644 ---- a/include/block/aio.h -+++ b/include/block/aio.h -@@ -23,6 +23,8 @@ - #include "qemu/thread.h" - #include "qemu/timer.h" - #include "block/graph-lock.h" -+#include "hw/qdev-core.h" -+ - - typedef struct BlockAIOCB BlockAIOCB; - typedef void BlockCompletionFunc(void *opaque, int ret); -@@ -331,9 +333,11 @@ void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, - * is opaque and must be allocated prior to its use. - * - * @name: A human-readable identifier for debugging purposes. -+ * @reentrancy_guard: A guard set when entering a cb to prevent -+ * device-reentrancy issues - */ - QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, -- const char *name); -+ const char *name, MemReentrancyGuard *reentrancy_guard); - - /** - * aio_bh_new: Allocate a new bottom half structure -@@ -342,7 +346,17 @@ QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, - * string. - */ - #define aio_bh_new(ctx, cb, opaque) \ -- aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb))) -+ aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)), NULL) -+ -+/** -+ * aio_bh_new_guarded: Allocate a new bottom half structure with a -+ * reentrancy_guard -+ * -+ * A convenience wrapper for aio_bh_new_full() that uses the cb as the name -+ * string. -+ */ -+#define aio_bh_new_guarded(ctx, cb, opaque, guard) \ -+ aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)), guard) - - /** - * aio_notify: Force processing of pending events. -diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h -index b3e54e00bc..68e70e61aa 100644 ---- a/include/qemu/main-loop.h -+++ b/include/qemu/main-loop.h -@@ -387,9 +387,12 @@ void qemu_cond_timedwait_iothread(QemuCond *cond, int ms); - - /* internal interfaces */ - -+#define qemu_bh_new_guarded(cb, opaque, guard) \ -+ qemu_bh_new_full((cb), (opaque), (stringify(cb)), guard) - #define qemu_bh_new(cb, opaque) \ -- qemu_bh_new_full((cb), (opaque), (stringify(cb))) --QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name); -+ qemu_bh_new_full((cb), (opaque), (stringify(cb)), NULL) -+QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name, -+ MemReentrancyGuard *reentrancy_guard); - void qemu_bh_schedule_idle(QEMUBH *bh); - - enum { -diff --git a/tests/unit/ptimer-test-stubs.c b/tests/unit/ptimer-test-stubs.c -index f2bfcede93..8c9407c560 100644 ---- a/tests/unit/ptimer-test-stubs.c -+++ b/tests/unit/ptimer-test-stubs.c -@@ -107,7 +107,8 @@ int64_t qemu_clock_deadline_ns_all(QEMUClockType type, int attr_mask) - return deadline; - } - --QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name) -+QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name, -+ MemReentrancyGuard *reentrancy_guard) - { - QEMUBH *bh = g_new(QEMUBH, 1); - -diff --git a/util/async.c b/util/async.c -index 21016a1ac7..a9b528c370 100644 ---- a/util/async.c -+++ b/util/async.c -@@ -65,6 +65,7 @@ struct QEMUBH { - void *opaque; - QSLIST_ENTRY(QEMUBH) next; - unsigned flags; -+ MemReentrancyGuard *reentrancy_guard; - }; - - /* Called concurrently from any thread */ -@@ -137,7 +138,7 @@ void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb, - } - - QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, -- const char *name) -+ const char *name, MemReentrancyGuard *reentrancy_guard) - { - QEMUBH *bh; - bh = g_new(QEMUBH, 1); -@@ -146,13 +147,28 @@ QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, - .cb = cb, - .opaque = opaque, - .name = name, -+ .reentrancy_guard = reentrancy_guard, - }; - return bh; - } - - void aio_bh_call(QEMUBH *bh) - { -+ bool last_engaged_in_io = false; -+ -+ if (bh->reentrancy_guard) { -+ last_engaged_in_io = bh->reentrancy_guard->engaged_in_io; -+ if (bh->reentrancy_guard->engaged_in_io) { -+ trace_reentrant_aio(bh->ctx, bh->name); -+ } -+ bh->reentrancy_guard->engaged_in_io = true; -+ } -+ - bh->cb(bh->opaque); -+ -+ if (bh->reentrancy_guard) { -+ bh->reentrancy_guard->engaged_in_io = last_engaged_in_io; -+ } - } - - /* Multiple occurrences of aio_bh_poll cannot be called concurrently. */ -diff --git a/util/main-loop.c b/util/main-loop.c -index e180c85145..7022f02ef8 100644 ---- a/util/main-loop.c -+++ b/util/main-loop.c -@@ -605,9 +605,11 @@ void main_loop_wait(int nonblocking) - - /* Functions to operate on the main QEMU AioContext. */ - --QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name) -+QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name, -+ MemReentrancyGuard *reentrancy_guard) - { -- return aio_bh_new_full(qemu_aio_context, cb, opaque, name); -+ return aio_bh_new_full(qemu_aio_context, cb, opaque, name, -+ reentrancy_guard); - } - - /* -diff --git a/util/trace-events b/util/trace-events -index 16f78d8fe5..3f7e766683 100644 ---- a/util/trace-events -+++ b/util/trace-events -@@ -11,6 +11,7 @@ poll_remove(void *ctx, void *node, int fd) "ctx %p node %p fd %d" - # async.c - aio_co_schedule(void *ctx, void *co) "ctx %p co %p" - aio_co_schedule_bh_cb(void *ctx, void *co) "ctx %p co %p" -+reentrant_aio(void *ctx, const char *name) "ctx %p name %s" - - # thread-pool.c - thread_pool_submit(void *pool, void *req, void *opaque) "pool %p req %p opaque %p" --- -2.39.3 - diff --git a/SOURCES/kvm-async-avoid-use-after-free-on-re-entrancy-guard.patch b/SOURCES/kvm-async-avoid-use-after-free-on-re-entrancy-guard.patch deleted file mode 100644 index df71fa2..0000000 --- a/SOURCES/kvm-async-avoid-use-after-free-on-re-entrancy-guard.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 137e84f68da06666ebf7f391766cc6209ce1c39c Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Tue, 9 May 2023 10:29:03 -0400 -Subject: [PATCH 13/21] async: avoid use-after-free on re-entrancy guard - -RH-Author: Jon Maloy -RH-MergeRequest: 165: memory: prevent dma-reentracy issues -RH-Jira: RHEL-516 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [9/13] d4b957108aaacf4a597122aaeeaa8e56985f1fca (jmaloy/jmaloy-qemu-kvm-2) - -Jira: https://issues.redhat.com/browse/RHEL-516 -Upstream: Merged -CVE: CVE-2023-2680 - -commit 7915bd06f25e1803778081161bf6fa10c42dc7cd -Author: Alexander Bulekov -Date: Mon May 1 10:19:56 2023 -0400 - - async: avoid use-after-free on re-entrancy guard - - A BH callback can free the BH, causing a use-after-free in aio_bh_call. - Fix that by keeping a local copy of the re-entrancy guard pointer. - - Buglink: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=58513 - Fixes: 9c86c97f12 ("async: Add an optional reentrancy guard to the BH API") - Signed-off-by: Alexander Bulekov - Message-Id: <20230501141956.3444868-1-alxndr@bu.edu> - Reviewed-by: Thomas Huth - Signed-off-by: Thomas Huth - -Signed-off-by: Jon Maloy ---- - util/async.c | 14 ++++++++------ - 1 file changed, 8 insertions(+), 6 deletions(-) - -diff --git a/util/async.c b/util/async.c -index a9b528c370..cd1a1815f9 100644 ---- a/util/async.c -+++ b/util/async.c -@@ -156,18 +156,20 @@ void aio_bh_call(QEMUBH *bh) - { - bool last_engaged_in_io = false; - -- if (bh->reentrancy_guard) { -- last_engaged_in_io = bh->reentrancy_guard->engaged_in_io; -- if (bh->reentrancy_guard->engaged_in_io) { -+ /* Make a copy of the guard-pointer as cb may free the bh */ -+ MemReentrancyGuard *reentrancy_guard = bh->reentrancy_guard; -+ if (reentrancy_guard) { -+ last_engaged_in_io = reentrancy_guard->engaged_in_io; -+ if (reentrancy_guard->engaged_in_io) { - trace_reentrant_aio(bh->ctx, bh->name); - } -- bh->reentrancy_guard->engaged_in_io = true; -+ reentrancy_guard->engaged_in_io = true; - } - - bh->cb(bh->opaque); - -- if (bh->reentrancy_guard) { -- bh->reentrancy_guard->engaged_in_io = last_engaged_in_io; -+ if (reentrancy_guard) { -+ reentrancy_guard->engaged_in_io = last_engaged_in_io; - } - } - --- -2.39.3 - diff --git a/SOURCES/kvm-backends-iommufd-Introduce-the-iommufd-object.patch b/SOURCES/kvm-backends-iommufd-Introduce-the-iommufd-object.patch new file mode 100644 index 0000000..898e35b --- /dev/null +++ b/SOURCES/kvm-backends-iommufd-Introduce-the-iommufd-object.patch @@ -0,0 +1,476 @@ +From 0d8255c98b3ef6f603ff0279592d3e91de26de0e Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Tue, 21 Nov 2023 16:44:00 +0800 +Subject: [PATCH 021/101] backends/iommufd: Introduce the iommufd object +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [20/67] 8a56344ab4a2126f248bfa492ccddd19265f39be (eauger1/centos-qemu-kvm) + +Introduce an iommufd object which allows the interaction +with the host /dev/iommu device. + +The /dev/iommu can have been already pre-opened outside of qemu, +in which case the fd can be passed directly along with the +iommufd object: + +This allows the iommufd object to be shared accross several +subsystems (VFIO, VDPA, ...). For example, libvirt would open +the /dev/iommu once. + +If no fd is passed along with the iommufd object, the /dev/iommu +is opened by the qemu code. + +Suggested-by: Alex Williamson +Signed-off-by: Eric Auger +Signed-off-by: Yi Liu +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Tested-by: Eric Auger +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +(cherry picked from commit 6e6d8ac62b5b38dc9d4b69ffdf073f0a0b43b7be) +Signed-off-by: Eric Auger +--- + MAINTAINERS | 8 ++ + backends/Kconfig | 4 + + backends/iommufd.c | 245 +++++++++++++++++++++++++++++++++++++++ + backends/meson.build | 1 + + backends/trace-events | 10 ++ + include/sysemu/iommufd.h | 38 ++++++ + qapi/qom.json | 19 +++ + qemu-options.hx | 12 ++ + 8 files changed, 337 insertions(+) + create mode 100644 backends/iommufd.c + create mode 100644 include/sysemu/iommufd.h + +diff --git a/MAINTAINERS b/MAINTAINERS +index 695e0bd34f..a5a446914a 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -2167,6 +2167,14 @@ F: hw/vfio/ap.c + F: docs/system/s390x/vfio-ap.rst + L: qemu-s390x@nongnu.org + ++iommufd ++M: Yi Liu ++M: Eric Auger ++M: Zhenzhong Duan ++S: Supported ++F: backends/iommufd.c ++F: include/sysemu/iommufd.h ++ + vhost + M: Michael S. Tsirkin + S: Supported +diff --git a/backends/Kconfig b/backends/Kconfig +index f35abc1609..2cb23f62fa 100644 +--- a/backends/Kconfig ++++ b/backends/Kconfig +@@ -1 +1,5 @@ + source tpm/Kconfig ++ ++config IOMMUFD ++ bool ++ depends on VFIO +diff --git a/backends/iommufd.c b/backends/iommufd.c +new file mode 100644 +index 0000000000..ba58a0eb0d +--- /dev/null ++++ b/backends/iommufd.c +@@ -0,0 +1,245 @@ ++/* ++ * iommufd container backend ++ * ++ * Copyright (C) 2023 Intel Corporation. ++ * Copyright Red Hat, Inc. 2023 ++ * ++ * Authors: Yi Liu ++ * Eric Auger ++ * ++ * SPDX-License-Identifier: GPL-2.0-or-later ++ */ ++ ++#include "qemu/osdep.h" ++#include "sysemu/iommufd.h" ++#include "qapi/error.h" ++#include "qapi/qmp/qerror.h" ++#include "qemu/module.h" ++#include "qom/object_interfaces.h" ++#include "qemu/error-report.h" ++#include "monitor/monitor.h" ++#include "trace.h" ++#include ++#include ++ ++static void iommufd_backend_init(Object *obj) ++{ ++ IOMMUFDBackend *be = IOMMUFD_BACKEND(obj); ++ ++ be->fd = -1; ++ be->users = 0; ++ be->owned = true; ++ qemu_mutex_init(&be->lock); ++} ++ ++static void iommufd_backend_finalize(Object *obj) ++{ ++ IOMMUFDBackend *be = IOMMUFD_BACKEND(obj); ++ ++ if (be->owned) { ++ close(be->fd); ++ be->fd = -1; ++ } ++} ++ ++static void iommufd_backend_set_fd(Object *obj, const char *str, Error **errp) ++{ ++ IOMMUFDBackend *be = IOMMUFD_BACKEND(obj); ++ int fd = -1; ++ ++ fd = monitor_fd_param(monitor_cur(), str, errp); ++ if (fd == -1) { ++ error_prepend(errp, "Could not parse remote object fd %s:", str); ++ return; ++ } ++ qemu_mutex_lock(&be->lock); ++ be->fd = fd; ++ be->owned = false; ++ qemu_mutex_unlock(&be->lock); ++ trace_iommu_backend_set_fd(be->fd); ++} ++ ++static bool iommufd_backend_can_be_deleted(UserCreatable *uc) ++{ ++ IOMMUFDBackend *be = IOMMUFD_BACKEND(uc); ++ ++ return !be->users; ++} ++ ++static void iommufd_backend_class_init(ObjectClass *oc, void *data) ++{ ++ UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc); ++ ++ ucc->can_be_deleted = iommufd_backend_can_be_deleted; ++ ++ object_class_property_add_str(oc, "fd", NULL, iommufd_backend_set_fd); ++} ++ ++int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp) ++{ ++ int fd, ret = 0; ++ ++ qemu_mutex_lock(&be->lock); ++ if (be->users == UINT32_MAX) { ++ error_setg(errp, "too many connections"); ++ ret = -E2BIG; ++ goto out; ++ } ++ if (be->owned && !be->users) { ++ fd = qemu_open_old("/dev/iommu", O_RDWR); ++ if (fd < 0) { ++ error_setg_errno(errp, errno, "/dev/iommu opening failed"); ++ ret = fd; ++ goto out; ++ } ++ be->fd = fd; ++ } ++ be->users++; ++out: ++ trace_iommufd_backend_connect(be->fd, be->owned, ++ be->users, ret); ++ qemu_mutex_unlock(&be->lock); ++ return ret; ++} ++ ++void iommufd_backend_disconnect(IOMMUFDBackend *be) ++{ ++ qemu_mutex_lock(&be->lock); ++ if (!be->users) { ++ goto out; ++ } ++ be->users--; ++ if (!be->users && be->owned) { ++ close(be->fd); ++ be->fd = -1; ++ } ++out: ++ trace_iommufd_backend_disconnect(be->fd, be->users); ++ qemu_mutex_unlock(&be->lock); ++} ++ ++int iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id, ++ Error **errp) ++{ ++ int ret, fd = be->fd; ++ struct iommu_ioas_alloc alloc_data = { ++ .size = sizeof(alloc_data), ++ .flags = 0, ++ }; ++ ++ ret = ioctl(fd, IOMMU_IOAS_ALLOC, &alloc_data); ++ if (ret) { ++ error_setg_errno(errp, errno, "Failed to allocate ioas"); ++ return ret; ++ } ++ ++ *ioas_id = alloc_data.out_ioas_id; ++ trace_iommufd_backend_alloc_ioas(fd, *ioas_id, ret); ++ ++ return ret; ++} ++ ++void iommufd_backend_free_id(IOMMUFDBackend *be, uint32_t id) ++{ ++ int ret, fd = be->fd; ++ struct iommu_destroy des = { ++ .size = sizeof(des), ++ .id = id, ++ }; ++ ++ ret = ioctl(fd, IOMMU_DESTROY, &des); ++ trace_iommufd_backend_free_id(fd, id, ret); ++ if (ret) { ++ error_report("Failed to free id: %u %m", id); ++ } ++} ++ ++int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova, ++ ram_addr_t size, void *vaddr, bool readonly) ++{ ++ int ret, fd = be->fd; ++ struct iommu_ioas_map map = { ++ .size = sizeof(map), ++ .flags = IOMMU_IOAS_MAP_READABLE | ++ IOMMU_IOAS_MAP_FIXED_IOVA, ++ .ioas_id = ioas_id, ++ .__reserved = 0, ++ .user_va = (uintptr_t)vaddr, ++ .iova = iova, ++ .length = size, ++ }; ++ ++ if (!readonly) { ++ map.flags |= IOMMU_IOAS_MAP_WRITEABLE; ++ } ++ ++ ret = ioctl(fd, IOMMU_IOAS_MAP, &map); ++ trace_iommufd_backend_map_dma(fd, ioas_id, iova, size, ++ vaddr, readonly, ret); ++ if (ret) { ++ ret = -errno; ++ ++ /* TODO: Not support mapping hardware PCI BAR region for now. */ ++ if (errno == EFAULT) { ++ warn_report("IOMMU_IOAS_MAP failed: %m, PCI BAR?"); ++ } else { ++ error_report("IOMMU_IOAS_MAP failed: %m"); ++ } ++ } ++ return ret; ++} ++ ++int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id, ++ hwaddr iova, ram_addr_t size) ++{ ++ int ret, fd = be->fd; ++ struct iommu_ioas_unmap unmap = { ++ .size = sizeof(unmap), ++ .ioas_id = ioas_id, ++ .iova = iova, ++ .length = size, ++ }; ++ ++ ret = ioctl(fd, IOMMU_IOAS_UNMAP, &unmap); ++ /* ++ * IOMMUFD takes mapping as some kind of object, unmapping ++ * nonexistent mapping is treated as deleting a nonexistent ++ * object and return ENOENT. This is different from legacy ++ * backend which allows it. vIOMMU may trigger a lot of ++ * redundant unmapping, to avoid flush the log, treat them ++ * as succeess for IOMMUFD just like legacy backend. ++ */ ++ if (ret && errno == ENOENT) { ++ trace_iommufd_backend_unmap_dma_non_exist(fd, ioas_id, iova, size, ret); ++ ret = 0; ++ } else { ++ trace_iommufd_backend_unmap_dma(fd, ioas_id, iova, size, ret); ++ } ++ ++ if (ret) { ++ ret = -errno; ++ error_report("IOMMU_IOAS_UNMAP failed: %m"); ++ } ++ return ret; ++} ++ ++static const TypeInfo iommufd_backend_info = { ++ .name = TYPE_IOMMUFD_BACKEND, ++ .parent = TYPE_OBJECT, ++ .instance_size = sizeof(IOMMUFDBackend), ++ .instance_init = iommufd_backend_init, ++ .instance_finalize = iommufd_backend_finalize, ++ .class_size = sizeof(IOMMUFDBackendClass), ++ .class_init = iommufd_backend_class_init, ++ .interfaces = (InterfaceInfo[]) { ++ { TYPE_USER_CREATABLE }, ++ { } ++ } ++}; ++ ++static void register_types(void) ++{ ++ type_register_static(&iommufd_backend_info); ++} ++ ++type_init(register_types); +diff --git a/backends/meson.build b/backends/meson.build +index 914c7c4afb..9a5cea480d 100644 +--- a/backends/meson.build ++++ b/backends/meson.build +@@ -20,6 +20,7 @@ if have_vhost_user + system_ss.add(when: 'CONFIG_VIRTIO', if_true: files('vhost-user.c')) + endif + system_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('cryptodev-vhost.c')) ++system_ss.add(when: 'CONFIG_IOMMUFD', if_true: files('iommufd.c')) + if have_vhost_user_crypto + system_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('cryptodev-vhost-user.c')) + endif +diff --git a/backends/trace-events b/backends/trace-events +index 652eb76a57..d45c6e31a6 100644 +--- a/backends/trace-events ++++ b/backends/trace-events +@@ -5,3 +5,13 @@ dbus_vmstate_pre_save(void) + dbus_vmstate_post_load(int version_id) "version_id: %d" + dbus_vmstate_loading(const char *id) "id: %s" + dbus_vmstate_saving(const char *id) "id: %s" ++ ++# iommufd.c ++iommufd_backend_connect(int fd, bool owned, uint32_t users, int ret) "fd=%d owned=%d users=%d (%d)" ++iommufd_backend_disconnect(int fd, uint32_t users) "fd=%d users=%d" ++iommu_backend_set_fd(int fd) "pre-opened /dev/iommu fd=%d" ++iommufd_backend_map_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, void *vaddr, bool readonly, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" addr=%p readonly=%d (%d)" ++iommufd_backend_unmap_dma_non_exist(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int ret) " Unmap nonexistent mapping: iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)" ++iommufd_backend_unmap_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)" ++iommufd_backend_alloc_ioas(int iommufd, uint32_t ioas, int ret) " iommufd=%d ioas=%d (%d)" ++iommufd_backend_free_id(int iommufd, uint32_t id, int ret) " iommufd=%d id=%d (%d)" +diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h +new file mode 100644 +index 0000000000..9c5524b0ed +--- /dev/null ++++ b/include/sysemu/iommufd.h +@@ -0,0 +1,38 @@ ++#ifndef SYSEMU_IOMMUFD_H ++#define SYSEMU_IOMMUFD_H ++ ++#include "qom/object.h" ++#include "qemu/thread.h" ++#include "exec/hwaddr.h" ++#include "exec/cpu-common.h" ++ ++#define TYPE_IOMMUFD_BACKEND "iommufd" ++OBJECT_DECLARE_TYPE(IOMMUFDBackend, IOMMUFDBackendClass, IOMMUFD_BACKEND) ++ ++struct IOMMUFDBackendClass { ++ ObjectClass parent_class; ++}; ++ ++struct IOMMUFDBackend { ++ Object parent; ++ ++ /*< protected >*/ ++ int fd; /* /dev/iommu file descriptor */ ++ bool owned; /* is the /dev/iommu opened internally */ ++ QemuMutex lock; ++ uint32_t users; ++ ++ /*< public >*/ ++}; ++ ++int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp); ++void iommufd_backend_disconnect(IOMMUFDBackend *be); ++ ++int iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id, ++ Error **errp); ++void iommufd_backend_free_id(IOMMUFDBackend *be, uint32_t id); ++int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova, ++ ram_addr_t size, void *vaddr, bool readonly); ++int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id, ++ hwaddr iova, ram_addr_t size); ++#endif +diff --git a/qapi/qom.json b/qapi/qom.json +index c53ef978ff..95516ba325 100644 +--- a/qapi/qom.json ++++ b/qapi/qom.json +@@ -794,6 +794,23 @@ + { 'struct': 'VfioUserServerProperties', + 'data': { 'socket': 'SocketAddress', 'device': 'str' } } + ++## ++# @IOMMUFDProperties: ++# ++# Properties for iommufd objects. ++# ++# @fd: file descriptor name previously passed via 'getfd' command, ++# which represents a pre-opened /dev/iommu. This allows the ++# iommufd object to be shared accross several subsystems ++# (VFIO, VDPA, ...), and the file descriptor to be shared ++# with other process, e.g. DPDK. (default: QEMU opens ++# /dev/iommu by itself) ++# ++# Since: 9.0 ++## ++{ 'struct': 'IOMMUFDProperties', ++ 'data': { '*fd': 'str' } } ++ + ## + # @RngProperties: + # +@@ -934,6 +951,7 @@ + 'input-barrier', + { 'name': 'input-linux', + 'if': 'CONFIG_LINUX' }, ++ 'iommufd', + 'iothread', + 'main-loop', + { 'name': 'memory-backend-epc', +@@ -1003,6 +1021,7 @@ + 'input-barrier': 'InputBarrierProperties', + 'input-linux': { 'type': 'InputLinuxProperties', + 'if': 'CONFIG_LINUX' }, ++ 'iommufd': 'IOMMUFDProperties', + 'iothread': 'IothreadProperties', + 'main-loop': 'MainLoopProperties', + 'memory-backend-epc': { 'type': 'MemoryBackendEpcProperties', +diff --git a/qemu-options.hx b/qemu-options.hx +index 557118cb1f..0814f43066 100644 +--- a/qemu-options.hx ++++ b/qemu-options.hx +@@ -5224,6 +5224,18 @@ SRST + + The ``share`` boolean option is on by default with memfd. + ++ ``-object iommufd,id=id[,fd=fd]`` ++ Creates an iommufd backend which allows control of DMA mapping ++ through the ``/dev/iommu`` device. ++ ++ The ``id`` parameter is a unique ID which frontends (such as ++ vfio-pci of vdpa) will use to connect with the iommufd backend. ++ ++ The ``fd`` parameter is an optional pre-opened file descriptor ++ resulting from ``/dev/iommu`` opening. Usually the iommufd is shared ++ across all subsystems, bringing the benefit of centralized ++ reference counting. ++ + ``-object rng-builtin,id=id`` + Creates a random number generator backend which obtains entropy + from QEMU builtin functions. The ``id`` parameter is a unique ID +-- +2.39.3 + diff --git a/SOURCES/kvm-backends-iommufd-Remove-check-on-number-of-backend-u.patch b/SOURCES/kvm-backends-iommufd-Remove-check-on-number-of-backend-u.patch new file mode 100644 index 0000000..5ee365b --- /dev/null +++ b/SOURCES/kvm-backends-iommufd-Remove-check-on-number-of-backend-u.patch @@ -0,0 +1,47 @@ +From da9a24793e876f6f2727d57f939d882be26a47b8 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Fri, 22 Dec 2023 08:55:23 +0100 +Subject: [PATCH 064/101] backends/iommufd: Remove check on number of backend + users +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [63/67] ac4d4589d1f2de5ac3f0adfd8d1f27dbf6bbfdee (eauger1/centos-qemu-kvm) + +QOM already has a ref count on objects and it will assert much +earlier, when INT_MAX is reached. + +Reviewed-by: Eric Auger +Reviewed-by: Zhenzhong Duan +Signed-off-by: Cédric Le Goater +(cherry picked from commit c2ab3a6f7411c895e538e8350fee8948ac07c1a0) +Signed-off-by: Eric Auger +--- + backends/iommufd.c | 5 ----- + 1 file changed, 5 deletions(-) + +diff --git a/backends/iommufd.c b/backends/iommufd.c +index ba58a0eb0d..393c0d9a37 100644 +--- a/backends/iommufd.c ++++ b/backends/iommufd.c +@@ -80,11 +80,6 @@ int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp) + int fd, ret = 0; + + qemu_mutex_lock(&be->lock); +- if (be->users == UINT32_MAX) { +- error_setg(errp, "too many connections"); +- ret = -E2BIG; +- goto out; +- } + if (be->owned && !be->users) { + fd = qemu_open_old("/dev/iommu", O_RDWR); + if (fd < 0) { +-- +2.39.3 + diff --git a/SOURCES/kvm-backends-iommufd-Remove-mutex.patch b/SOURCES/kvm-backends-iommufd-Remove-mutex.patch new file mode 100644 index 0000000..83878d5 --- /dev/null +++ b/SOURCES/kvm-backends-iommufd-Remove-mutex.patch @@ -0,0 +1,112 @@ +From 92aff3cc1a412de01e9563802fa48848eae5283f Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Thu, 21 Dec 2023 16:58:41 +0100 +Subject: [PATCH 065/101] backends/iommufd: Remove mutex +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [64/67] 65518432b18f18ceadafe1b0698cdaa962e84f61 (eauger1/centos-qemu-kvm) + +Coverity reports a concurrent data access violation because be->users +is being accessed in iommufd_backend_can_be_deleted() without holding +the mutex. + +However, these routines are called from the QEMU main thread when a +device is created. In this case, the code paths should be protected by +the BQL lock and it should be safe to drop the IOMMUFD backend mutex. +Simply remove it. + +Fixes: CID 1531550 +Fixes: CID 1531549 +Reviewed-by: Eric Auger +Reviewed-by: Zhenzhong Duan +Signed-off-by: Cédric Le Goater +(cherry picked from commit 19368b1905b4b917e915526fcbd5bfa3f7439451) +Signed-off-by: Eric Auger +--- + backends/iommufd.c | 7 ------- + include/sysemu/iommufd.h | 2 -- + 2 files changed, 9 deletions(-) + +diff --git a/backends/iommufd.c b/backends/iommufd.c +index 393c0d9a37..1ef683c7b0 100644 +--- a/backends/iommufd.c ++++ b/backends/iommufd.c +@@ -29,7 +29,6 @@ static void iommufd_backend_init(Object *obj) + be->fd = -1; + be->users = 0; + be->owned = true; +- qemu_mutex_init(&be->lock); + } + + static void iommufd_backend_finalize(Object *obj) +@@ -52,10 +51,8 @@ static void iommufd_backend_set_fd(Object *obj, const char *str, Error **errp) + error_prepend(errp, "Could not parse remote object fd %s:", str); + return; + } +- qemu_mutex_lock(&be->lock); + be->fd = fd; + be->owned = false; +- qemu_mutex_unlock(&be->lock); + trace_iommu_backend_set_fd(be->fd); + } + +@@ -79,7 +76,6 @@ int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp) + { + int fd, ret = 0; + +- qemu_mutex_lock(&be->lock); + if (be->owned && !be->users) { + fd = qemu_open_old("/dev/iommu", O_RDWR); + if (fd < 0) { +@@ -93,13 +89,11 @@ int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp) + out: + trace_iommufd_backend_connect(be->fd, be->owned, + be->users, ret); +- qemu_mutex_unlock(&be->lock); + return ret; + } + + void iommufd_backend_disconnect(IOMMUFDBackend *be) + { +- qemu_mutex_lock(&be->lock); + if (!be->users) { + goto out; + } +@@ -110,7 +104,6 @@ void iommufd_backend_disconnect(IOMMUFDBackend *be) + } + out: + trace_iommufd_backend_disconnect(be->fd, be->users); +- qemu_mutex_unlock(&be->lock); + } + + int iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id, +diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h +index 9c5524b0ed..9af27ebd6c 100644 +--- a/include/sysemu/iommufd.h ++++ b/include/sysemu/iommufd.h +@@ -2,7 +2,6 @@ + #define SYSEMU_IOMMUFD_H + + #include "qom/object.h" +-#include "qemu/thread.h" + #include "exec/hwaddr.h" + #include "exec/cpu-common.h" + +@@ -19,7 +18,6 @@ struct IOMMUFDBackend { + /*< protected >*/ + int fd; /* /dev/iommu file descriptor */ + bool owned; /* is the /dev/iommu opened internally */ +- QemuMutex lock; + uint32_t users; + + /*< public >*/ +-- +2.39.3 + diff --git a/SOURCES/kvm-bcm2835_property-disable-reentrancy-detection-for-io.patch b/SOURCES/kvm-bcm2835_property-disable-reentrancy-detection-for-io.patch deleted file mode 100644 index 6d9abb8..0000000 --- a/SOURCES/kvm-bcm2835_property-disable-reentrancy-detection-for-io.patch +++ /dev/null @@ -1,57 +0,0 @@ -From 40866640d15e6a8c9f6af7e437edc1ec1e17ba34 Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Tue, 9 May 2023 10:29:03 -0400 -Subject: [PATCH 10/21] bcm2835_property: disable reentrancy detection for - iomem - -RH-Author: Jon Maloy -RH-MergeRequest: 165: memory: prevent dma-reentracy issues -RH-Jira: RHEL-516 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [6/13] 128ebc85e228674af66553af82fba70eb87960e6 (jmaloy/jmaloy-qemu-kvm-2) - -Jira: https://issues.redhat.com/browse/RHEL-516 -Upstream: Merged -CVE: CVE-2023-2680 - -commit 985c4a4e547afb9573b6bd6843d20eb2c3d1d1cd -Author: Alexander Bulekov -Date: Thu Apr 27 17:10:11 2023 -0400 - - bcm2835_property: disable reentrancy detection for iomem - - As the code is designed for re-entrant calls from bcm2835_property to - bcm2835_mbox and back into bcm2835_property, mark iomem as - reentrancy-safe. - - Signed-off-by: Alexander Bulekov - Reviewed-by: Thomas Huth - Message-Id: <20230427211013.2994127-7-alxndr@bu.edu> - Signed-off-by: Thomas Huth - -Signed-off-by: Jon Maloy ---- - hw/misc/bcm2835_property.c | 7 +++++++ - 1 file changed, 7 insertions(+) - -diff --git a/hw/misc/bcm2835_property.c b/hw/misc/bcm2835_property.c -index 890ae7bae5..de056ea2df 100644 ---- a/hw/misc/bcm2835_property.c -+++ b/hw/misc/bcm2835_property.c -@@ -382,6 +382,13 @@ static void bcm2835_property_init(Object *obj) - - memory_region_init_io(&s->iomem, OBJECT(s), &bcm2835_property_ops, s, - TYPE_BCM2835_PROPERTY, 0x10); -+ -+ /* -+ * bcm2835_property_ops call into bcm2835_mbox, which in-turn reads from -+ * iomem. As such, mark iomem as re-entracy safe. -+ */ -+ s->iomem.disable_reentrancy_guard = true; -+ - sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->iomem); - sysbus_init_irq(SYS_BUS_DEVICE(s), &s->mbox_irq); - } --- -2.39.3 - diff --git a/SOURCES/kvm-block-Collapse-padded-I-O-vecs-exceeding-IOV_MAX.patch b/SOURCES/kvm-block-Collapse-padded-I-O-vecs-exceeding-IOV_MAX.patch deleted file mode 100644 index 6de5d65..0000000 --- a/SOURCES/kvm-block-Collapse-padded-I-O-vecs-exceeding-IOV_MAX.patch +++ /dev/null @@ -1,354 +0,0 @@ -From ff05c0b0d3414c0e5b3903048280accdc6c75ca0 Mon Sep 17 00:00:00 2001 -From: Hanna Czenczek -Date: Tue, 11 Apr 2023 19:34:16 +0200 -Subject: [PATCH 2/9] block: Collapse padded I/O vecs exceeding IOV_MAX - -RH-Author: Hanna Czenczek -RH-MergeRequest: 189: block: Split padded I/O vectors exceeding IOV_MAX -RH-Bugzilla: 2174676 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [2/5] 84c56bd16f841a18cf2baa918dfeab3240e3944d (hreitz/qemu-kvm-c-9-s) - -When processing vectored guest requests that are not aligned to the -storage request alignment, we pad them by adding head and/or tail -buffers for a read-modify-write cycle. - -The guest can submit I/O vectors up to IOV_MAX (1024) in length, but -with this padding, the vector can exceed that limit. As of -4c002cef0e9abe7135d7916c51abce47f7fc1ee2 ("util/iov: make -qemu_iovec_init_extended() honest"), we refuse to pad vectors beyond the -limit, instead returning an error to the guest. - -To the guest, this appears as a random I/O error. We should not return -an I/O error to the guest when it issued a perfectly valid request. - -Before 4c002cef0e9abe7135d7916c51abce47f7fc1ee2, we just made the vector -longer than IOV_MAX, which generally seems to work (because the guest -assumes a smaller alignment than we really have, file-posix's -raw_co_prw() will generally see bdrv_qiov_is_aligned() return false, and -so emulate the request, so that the IOV_MAX does not matter). However, -that does not seem exactly great. - -I see two ways to fix this problem: -1. We split such long requests into two requests. -2. We join some elements of the vector into new buffers to make it - shorter. - -I am wary of (1), because it seems like it may have unintended side -effects. - -(2) on the other hand seems relatively simple to implement, with -hopefully few side effects, so this patch does that. - -To do this, the use of qemu_iovec_init_extended() in bdrv_pad_request() -is effectively replaced by the new function bdrv_create_padded_qiov(), -which not only wraps the request IOV with padding head/tail, but also -ensures that the resulting vector will not have more than IOV_MAX -elements. Putting that functionality into qemu_iovec_init_extended() is -infeasible because it requires allocating a bounce buffer; doing so -would require many more parameters (buffer alignment, how to initialize -the buffer, and out parameters like the buffer, its length, and the -original elements), which is not reasonable. - -Conversely, it is not difficult to move qemu_iovec_init_extended()'s -functionality into bdrv_create_padded_qiov() by using public -qemu_iovec_* functions, so that is what this patch does. - -Because bdrv_pad_request() was the only "serious" user of -qemu_iovec_init_extended(), the next patch will remove the latter -function, so the functionality is not implemented twice. - -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2141964 -Signed-off-by: Hanna Czenczek -Message-Id: <20230411173418.19549-3-hreitz@redhat.com> -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit 18743311b829cafc1737a5f20bc3248d5f91ee2a) -Signed-off-by: Hanna Czenczek ---- - block/io.c | 166 ++++++++++++++++++++++++++++++++++++++++++++++++----- - 1 file changed, 151 insertions(+), 15 deletions(-) - -diff --git a/block/io.c b/block/io.c -index 2e267a85ab..4e8e90208b 100644 ---- a/block/io.c -+++ b/block/io.c -@@ -1439,6 +1439,14 @@ out: - * @merge_reads is true for small requests, - * if @buf_len == @head + bytes + @tail. In this case it is possible that both - * head and tail exist but @buf_len == align and @tail_buf == @buf. -+ * -+ * @write is true for write requests, false for read requests. -+ * -+ * If padding makes the vector too long (exceeding IOV_MAX), then we need to -+ * merge existing vector elements into a single one. @collapse_bounce_buf acts -+ * as the bounce buffer in such cases. @pre_collapse_qiov has the pre-collapse -+ * I/O vector elements so for read requests, the data can be copied back after -+ * the read is done. - */ - typedef struct BdrvRequestPadding { - uint8_t *buf; -@@ -1447,11 +1455,17 @@ typedef struct BdrvRequestPadding { - size_t head; - size_t tail; - bool merge_reads; -+ bool write; - QEMUIOVector local_qiov; -+ -+ uint8_t *collapse_bounce_buf; -+ size_t collapse_len; -+ QEMUIOVector pre_collapse_qiov; - } BdrvRequestPadding; - - static bool bdrv_init_padding(BlockDriverState *bs, - int64_t offset, int64_t bytes, -+ bool write, - BdrvRequestPadding *pad) - { - int64_t align = bs->bl.request_alignment; -@@ -1483,6 +1497,8 @@ static bool bdrv_init_padding(BlockDriverState *bs, - pad->tail_buf = pad->buf + pad->buf_len - align; - } - -+ pad->write = write; -+ - return true; - } - -@@ -1547,8 +1563,23 @@ zero_mem: - return 0; - } - --static void bdrv_padding_destroy(BdrvRequestPadding *pad) -+/** -+ * Free *pad's associated buffers, and perform any necessary finalization steps. -+ */ -+static void bdrv_padding_finalize(BdrvRequestPadding *pad) - { -+ if (pad->collapse_bounce_buf) { -+ if (!pad->write) { -+ /* -+ * If padding required elements in the vector to be collapsed into a -+ * bounce buffer, copy the bounce buffer content back -+ */ -+ qemu_iovec_from_buf(&pad->pre_collapse_qiov, 0, -+ pad->collapse_bounce_buf, pad->collapse_len); -+ } -+ qemu_vfree(pad->collapse_bounce_buf); -+ qemu_iovec_destroy(&pad->pre_collapse_qiov); -+ } - if (pad->buf) { - qemu_vfree(pad->buf); - qemu_iovec_destroy(&pad->local_qiov); -@@ -1556,6 +1587,101 @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad) - memset(pad, 0, sizeof(*pad)); - } - -+/* -+ * Create pad->local_qiov by wrapping @iov in the padding head and tail, while -+ * ensuring that the resulting vector will not exceed IOV_MAX elements. -+ * -+ * To ensure this, when necessary, the first two or three elements of @iov are -+ * merged into pad->collapse_bounce_buf and replaced by a reference to that -+ * bounce buffer in pad->local_qiov. -+ * -+ * After performing a read request, the data from the bounce buffer must be -+ * copied back into pad->pre_collapse_qiov (e.g. by bdrv_padding_finalize()). -+ */ -+static int bdrv_create_padded_qiov(BlockDriverState *bs, -+ BdrvRequestPadding *pad, -+ struct iovec *iov, int niov, -+ size_t iov_offset, size_t bytes) -+{ -+ int padded_niov, surplus_count, collapse_count; -+ -+ /* Assert this invariant */ -+ assert(niov <= IOV_MAX); -+ -+ /* -+ * Cannot pad if resulting length would exceed SIZE_MAX. Returning an error -+ * to the guest is not ideal, but there is little else we can do. At least -+ * this will practically never happen on 64-bit systems. -+ */ -+ if (SIZE_MAX - pad->head < bytes || -+ SIZE_MAX - pad->head - bytes < pad->tail) -+ { -+ return -EINVAL; -+ } -+ -+ /* Length of the resulting IOV if we just concatenated everything */ -+ padded_niov = !!pad->head + niov + !!pad->tail; -+ -+ qemu_iovec_init(&pad->local_qiov, MIN(padded_niov, IOV_MAX)); -+ -+ if (pad->head) { -+ qemu_iovec_add(&pad->local_qiov, pad->buf, pad->head); -+ } -+ -+ /* -+ * If padded_niov > IOV_MAX, we cannot just concatenate everything. -+ * Instead, merge the first two or three elements of @iov to reduce the -+ * number of vector elements as necessary. -+ */ -+ if (padded_niov > IOV_MAX) { -+ /* -+ * Only head and tail can have lead to the number of entries exceeding -+ * IOV_MAX, so we can exceed it by the head and tail at most. We need -+ * to reduce the number of elements by `surplus_count`, so we merge that -+ * many elements plus one into one element. -+ */ -+ surplus_count = padded_niov - IOV_MAX; -+ assert(surplus_count <= !!pad->head + !!pad->tail); -+ collapse_count = surplus_count + 1; -+ -+ /* -+ * Move the elements to collapse into `pad->pre_collapse_qiov`, then -+ * advance `iov` (and associated variables) by those elements. -+ */ -+ qemu_iovec_init(&pad->pre_collapse_qiov, collapse_count); -+ qemu_iovec_concat_iov(&pad->pre_collapse_qiov, iov, -+ collapse_count, iov_offset, SIZE_MAX); -+ iov += collapse_count; -+ iov_offset = 0; -+ niov -= collapse_count; -+ bytes -= pad->pre_collapse_qiov.size; -+ -+ /* -+ * Construct the bounce buffer to match the length of the to-collapse -+ * vector elements, and for write requests, initialize it with the data -+ * from those elements. Then add it to `pad->local_qiov`. -+ */ -+ pad->collapse_len = pad->pre_collapse_qiov.size; -+ pad->collapse_bounce_buf = qemu_blockalign(bs, pad->collapse_len); -+ if (pad->write) { -+ qemu_iovec_to_buf(&pad->pre_collapse_qiov, 0, -+ pad->collapse_bounce_buf, pad->collapse_len); -+ } -+ qemu_iovec_add(&pad->local_qiov, -+ pad->collapse_bounce_buf, pad->collapse_len); -+ } -+ -+ qemu_iovec_concat_iov(&pad->local_qiov, iov, niov, iov_offset, bytes); -+ -+ if (pad->tail) { -+ qemu_iovec_add(&pad->local_qiov, -+ pad->buf + pad->buf_len - pad->tail, pad->tail); -+ } -+ -+ assert(pad->local_qiov.niov == MIN(padded_niov, IOV_MAX)); -+ return 0; -+} -+ - /* - * bdrv_pad_request - * -@@ -1563,6 +1689,8 @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad) - * read of padding, bdrv_padding_rmw_read() should be called separately if - * needed. - * -+ * @write is true for write requests, false for read requests. -+ * - * Request parameters (@qiov, &qiov_offset, &offset, &bytes) are in-out: - * - on function start they represent original request - * - on failure or when padding is not needed they are unchanged -@@ -1571,26 +1699,34 @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad) - static int bdrv_pad_request(BlockDriverState *bs, - QEMUIOVector **qiov, size_t *qiov_offset, - int64_t *offset, int64_t *bytes, -+ bool write, - BdrvRequestPadding *pad, bool *padded, - BdrvRequestFlags *flags) - { - int ret; -+ struct iovec *sliced_iov; -+ int sliced_niov; -+ size_t sliced_head, sliced_tail; - - bdrv_check_qiov_request(*offset, *bytes, *qiov, *qiov_offset, &error_abort); - -- if (!bdrv_init_padding(bs, *offset, *bytes, pad)) { -+ if (!bdrv_init_padding(bs, *offset, *bytes, write, pad)) { - if (padded) { - *padded = false; - } - return 0; - } - -- ret = qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head, -- *qiov, *qiov_offset, *bytes, -- pad->buf + pad->buf_len - pad->tail, -- pad->tail); -+ sliced_iov = qemu_iovec_slice(*qiov, *qiov_offset, *bytes, -+ &sliced_head, &sliced_tail, -+ &sliced_niov); -+ -+ /* Guaranteed by bdrv_check_qiov_request() */ -+ assert(*bytes <= SIZE_MAX); -+ ret = bdrv_create_padded_qiov(bs, pad, sliced_iov, sliced_niov, -+ sliced_head, *bytes); - if (ret < 0) { -- bdrv_padding_destroy(pad); -+ bdrv_padding_finalize(pad); - return ret; - } - *bytes += pad->head + pad->tail; -@@ -1657,8 +1793,8 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child, - flags |= BDRV_REQ_COPY_ON_READ; - } - -- ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad, -- NULL, &flags); -+ ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, false, -+ &pad, NULL, &flags); - if (ret < 0) { - goto fail; - } -@@ -1668,7 +1804,7 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child, - bs->bl.request_alignment, - qiov, qiov_offset, flags); - tracked_request_end(&req); -- bdrv_padding_destroy(&pad); -+ bdrv_padding_finalize(&pad); - - fail: - bdrv_dec_in_flight(bs); -@@ -2000,7 +2136,7 @@ bdrv_co_do_zero_pwritev(BdrvChild *child, int64_t offset, int64_t bytes, - /* This flag doesn't make sense for padding or zero writes */ - flags &= ~BDRV_REQ_REGISTERED_BUF; - -- padding = bdrv_init_padding(bs, offset, bytes, &pad); -+ padding = bdrv_init_padding(bs, offset, bytes, true, &pad); - if (padding) { - assert(!(flags & BDRV_REQ_NO_WAIT)); - bdrv_make_request_serialising(req, align); -@@ -2048,7 +2184,7 @@ bdrv_co_do_zero_pwritev(BdrvChild *child, int64_t offset, int64_t bytes, - } - - out: -- bdrv_padding_destroy(&pad); -+ bdrv_padding_finalize(&pad); - - return ret; - } -@@ -2116,8 +2252,8 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child, - * bdrv_co_do_zero_pwritev() does aligning by itself, so, we do - * alignment only if there is no ZERO flag. - */ -- ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad, -- &padded, &flags); -+ ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, true, -+ &pad, &padded, &flags); - if (ret < 0) { - return ret; - } -@@ -2147,7 +2283,7 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child, - ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align, - qiov, qiov_offset, flags); - -- bdrv_padding_destroy(&pad); -+ bdrv_padding_finalize(&pad); - - out: - tracked_request_end(&req); --- -2.39.3 - diff --git a/SOURCES/kvm-block-Don-t-call-no_coroutine_fns-in-qmp_block_resiz.patch b/SOURCES/kvm-block-Don-t-call-no_coroutine_fns-in-qmp_block_resiz.patch deleted file mode 100644 index fbab82d..0000000 --- a/SOURCES/kvm-block-Don-t-call-no_coroutine_fns-in-qmp_block_resiz.patch +++ /dev/null @@ -1,56 +0,0 @@ -From dfa2811e88afaf996345552330e97f0513c1803c Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Thu, 4 May 2023 13:57:34 +0200 -Subject: [PATCH 53/56] block: Don't call no_coroutine_fns in - qmp_block_resize() - -RH-Author: Kevin Wolf -RH-MergeRequest: 164: block: Fix hangs in qmp_block_resize() -RH-Bugzilla: 2185688 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Eric Blake -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [2/4] 7ac7e34821cfc8bd5f0daadd7a1c4a5596bc60a6 (kmwolf/centos-qemu-kvm) - -This QMP handler runs in a coroutine, so it must use the corresponding -no_co_wrappers instead. - -Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=2185688 -Cc: qemu-stable@nongnu.org -Signed-off-by: Kevin Wolf -Reviewed-by: Eric Blake -Reviewed-by: Stefan Hajnoczi -Message-Id: <20230504115750.54437-5-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit 0c7d204f50c382c6baac8c94bd57af4a022b3888) -Signed-off-by: Kevin Wolf ---- - blockdev.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/blockdev.c b/blockdev.c -index d7b5c18f0a..eb509cf964 100644 ---- a/blockdev.c -+++ b/blockdev.c -@@ -2430,7 +2430,7 @@ void coroutine_fn qmp_block_resize(const char *device, const char *node_name, - return; - } - -- blk = blk_new_with_bs(bs, BLK_PERM_RESIZE, BLK_PERM_ALL, errp); -+ blk = blk_co_new_with_bs(bs, BLK_PERM_RESIZE, BLK_PERM_ALL, errp); - if (!blk) { - return; - } -@@ -2445,7 +2445,7 @@ void coroutine_fn qmp_block_resize(const char *device, const char *node_name, - - bdrv_co_lock(bs); - bdrv_drained_end(bs); -- blk_unref(blk); -+ blk_co_unref(blk); - bdrv_co_unlock(bs); - } - --- -2.39.1 - diff --git a/SOURCES/kvm-block-Fix-pad_request-s-request-restriction.patch b/SOURCES/kvm-block-Fix-pad_request-s-request-restriction.patch deleted file mode 100644 index c0ab8c2..0000000 --- a/SOURCES/kvm-block-Fix-pad_request-s-request-restriction.patch +++ /dev/null @@ -1,73 +0,0 @@ -From 547f6bf93734f7c13675eebb93273ef2273f7c31 Mon Sep 17 00:00:00 2001 -From: Hanna Czenczek -Date: Fri, 14 Jul 2023 10:59:38 +0200 -Subject: [PATCH 5/9] block: Fix pad_request's request restriction - -RH-Author: Hanna Czenczek -RH-MergeRequest: 189: block: Split padded I/O vectors exceeding IOV_MAX -RH-Bugzilla: 2174676 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [5/5] e8abc0485f6e0608a1ec55143ff40a14d273dfc8 (hreitz/qemu-kvm-c-9-s) - -bdrv_pad_request() relies on requests' lengths not to exceed SIZE_MAX, -which bdrv_check_qiov_request() does not guarantee. - -bdrv_check_request32() however will guarantee this, and both of -bdrv_pad_request()'s callers (bdrv_co_preadv_part() and -bdrv_co_pwritev_part()) already run it before calling -bdrv_pad_request(). Therefore, bdrv_pad_request() can safely call -bdrv_check_request32() without expecting error, too. - -In effect, this patch will not change guest-visible behavior. It is a -clean-up to tighten a condition to match what is guaranteed by our -callers, and which exists purely to show clearly why the subsequent -assertion (`assert(*bytes <= SIZE_MAX)`) is always true. - -Note there is a difference between the interfaces of -bdrv_check_qiov_request() and bdrv_check_request32(): The former takes -an errp, the latter does not, so we can no longer just pass -&error_abort. Instead, we need to check the returned value. While we -do expect success (because the callers have already run this function), -an assert(ret == 0) is not much simpler than just to return an error if -it occurs, so let us handle errors by returning them up the stack now. - -Reported-by: Peter Maydell -Signed-off-by: Hanna Czenczek -Message-id: 20230714085938.202730-1-hreitz@redhat.com -Fixes: 18743311b829cafc1737a5f20bc3248d5f91ee2a - ("block: Collapse padded I/O vecs exceeding IOV_MAX") -Signed-off-by: Hanna Czenczek -Signed-off-by: Stefan Hajnoczi ---- - block/io.c | 8 ++++++-- - 1 file changed, 6 insertions(+), 2 deletions(-) - -diff --git a/block/io.c b/block/io.c -index 4e8e90208b..807c9fb720 100644 ---- a/block/io.c -+++ b/block/io.c -@@ -1708,7 +1708,11 @@ static int bdrv_pad_request(BlockDriverState *bs, - int sliced_niov; - size_t sliced_head, sliced_tail; - -- bdrv_check_qiov_request(*offset, *bytes, *qiov, *qiov_offset, &error_abort); -+ /* Should have been checked by the caller already */ -+ ret = bdrv_check_request32(*offset, *bytes, *qiov, *qiov_offset); -+ if (ret < 0) { -+ return ret; -+ } - - if (!bdrv_init_padding(bs, *offset, *bytes, write, pad)) { - if (padded) { -@@ -1721,7 +1725,7 @@ static int bdrv_pad_request(BlockDriverState *bs, - &sliced_head, &sliced_tail, - &sliced_niov); - -- /* Guaranteed by bdrv_check_qiov_request() */ -+ /* Guaranteed by bdrv_check_request32() */ - assert(*bytes <= SIZE_MAX); - ret = bdrv_create_padded_qiov(bs, pad, sliced_iov, sliced_niov, - sliced_head, *bytes); --- -2.39.3 - diff --git a/SOURCES/kvm-block-backend-Allow-concurrent-context-changes.patch b/SOURCES/kvm-block-backend-Allow-concurrent-context-changes.patch new file mode 100644 index 0000000..155fa19 --- /dev/null +++ b/SOURCES/kvm-block-backend-Allow-concurrent-context-changes.patch @@ -0,0 +1,104 @@ +From afa842e9fdf6e1d6e5d5785679a22779632142bd Mon Sep 17 00:00:00 2001 +From: Hanna Czenczek +Date: Fri, 2 Feb 2024 15:47:54 +0100 +Subject: [PATCH 03/22] block-backend: Allow concurrent context changes + +RH-Author: Hanna Czenczek +RH-MergeRequest: 222: Allow concurrent BlockBackend context changes +RH-Jira: RHEL-24593 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Kevin Wolf +RH-Commit: [1/2] 9e1b535f60f7afa94a0817dc3e71136e41631c71 (hreitz/qemu-kvm-c-9-s) + +Since AioContext locks have been removed, a BlockBackend's AioContext +may really change at any time (only exception is that it is often +confined to a drained section, as noted in this patch). Therefore, +blk_get_aio_context() cannot rely on its root node's context always +matching that of the BlockBackend. + +In practice, whether they match does not matter anymore anyway: Requests +can be sent to BDSs from any context, so anyone who requests the BB's +context should have no reason to require the root node to have the same +context. Therefore, we can and should remove the assertion to that +effect. + +In addition, because the context can be set and queried from different +threads concurrently, it has to be accessed with atomic operations. + +Buglink: https://issues.redhat.com/browse/RHEL-19381 +Suggested-by: Kevin Wolf +Signed-off-by: Hanna Czenczek +Message-ID: <20240202144755.671354-2-hreitz@redhat.com> +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit ad893672027ffe26db498947d70cde6d4f58a111) +--- + block/block-backend.c | 22 +++++++++++----------- + 1 file changed, 11 insertions(+), 11 deletions(-) + +diff --git a/block/block-backend.c b/block/block-backend.c +index 209eb07528..9c4de79e6b 100644 +--- a/block/block-backend.c ++++ b/block/block-backend.c +@@ -44,7 +44,7 @@ struct BlockBackend { + char *name; + int refcnt; + BdrvChild *root; +- AioContext *ctx; ++ AioContext *ctx; /* access with atomic operations only */ + DriveInfo *legacy_dinfo; /* null unless created by drive_new() */ + QTAILQ_ENTRY(BlockBackend) link; /* for block_backends */ + QTAILQ_ENTRY(BlockBackend) monitor_link; /* for monitor_block_backends */ +@@ -2414,22 +2414,22 @@ void blk_op_unblock_all(BlockBackend *blk, Error *reason) + } + } + ++/** ++ * Return BB's current AioContext. Note that this context may change ++ * concurrently at any time, with one exception: If the BB has a root node ++ * attached, its context will only change through bdrv_try_change_aio_context(), ++ * which creates a drained section. Therefore, incrementing such a BB's ++ * in-flight counter will prevent its context from changing. ++ */ + AioContext *blk_get_aio_context(BlockBackend *blk) + { +- BlockDriverState *bs; + IO_CODE(); + + if (!blk) { + return qemu_get_aio_context(); + } + +- bs = blk_bs(blk); +- if (bs) { +- AioContext *ctx = bdrv_get_aio_context(blk_bs(blk)); +- assert(ctx == blk->ctx); +- } +- +- return blk->ctx; ++ return qatomic_read(&blk->ctx); + } + + int blk_set_aio_context(BlockBackend *blk, AioContext *new_context, +@@ -2442,7 +2442,7 @@ int blk_set_aio_context(BlockBackend *blk, AioContext *new_context, + GLOBAL_STATE_CODE(); + + if (!bs) { +- blk->ctx = new_context; ++ qatomic_set(&blk->ctx, new_context); + return 0; + } + +@@ -2471,7 +2471,7 @@ static void blk_root_set_aio_ctx_commit(void *opaque) + AioContext *new_context = s->new_ctx; + ThrottleGroupMember *tgm = &blk->public.throttle_group_member; + +- blk->ctx = new_context; ++ qatomic_set(&blk->ctx, new_context); + if (tgm->throttle_state) { + throttle_group_detach_aio_context(tgm); + throttle_group_attach_aio_context(tgm, new_context); +-- +2.39.3 + diff --git a/SOURCES/kvm-block-bdrv-blk_co_unref-for-calls-in-coroutine-conte.patch b/SOURCES/kvm-block-bdrv-blk_co_unref-for-calls-in-coroutine-conte.patch deleted file mode 100644 index 0f0347b..0000000 --- a/SOURCES/kvm-block-bdrv-blk_co_unref-for-calls-in-coroutine-conte.patch +++ /dev/null @@ -1,386 +0,0 @@ -From 7baea25be90e184175dd5a919ee5878cbd4970c2 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Thu, 4 May 2023 13:57:33 +0200 -Subject: [PATCH 52/56] block: bdrv/blk_co_unref() for calls in coroutine - context - -RH-Author: Kevin Wolf -RH-MergeRequest: 164: block: Fix hangs in qmp_block_resize() -RH-Bugzilla: 2185688 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Eric Blake -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [1/4] 8ebf8486b082c30ca1b39a6ede35e471eaaccfa3 (kmwolf/centos-qemu-kvm) - -These functions must not be called in coroutine context, because they -need write access to the graph. - -Cc: qemu-stable@nongnu.org -Signed-off-by: Kevin Wolf -Reviewed-by: Eric Blake -Reviewed-by: Stefan Hajnoczi -Message-Id: <20230504115750.54437-4-kwolf@redhat.com> -Signed-off-by: Kevin Wolf -(cherry picked from commit b2ab5f545fa1eaaf2955dd617bee19a8b3279786) -Signed-off-by: Kevin Wolf ---- - block.c | 2 +- - block/crypto.c | 6 +++--- - block/parallels.c | 6 +++--- - block/qcow.c | 6 +++--- - block/qcow2.c | 14 +++++++------- - block/qed.c | 6 +++--- - block/vdi.c | 6 +++--- - block/vhdx.c | 6 +++--- - block/vmdk.c | 18 +++++++++--------- - block/vpc.c | 6 +++--- - include/block/block-global-state.h | 3 ++- - include/sysemu/block-backend-global-state.h | 5 ++++- - 12 files changed, 44 insertions(+), 40 deletions(-) - -diff --git a/block.c b/block.c -index d79a52ca74..a48112f945 100644 ---- a/block.c -+++ b/block.c -@@ -680,7 +680,7 @@ int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv, - - ret = 0; - out: -- blk_unref(blk); -+ blk_co_unref(blk); - return ret; - } - -diff --git a/block/crypto.c b/block/crypto.c -index ca67289187..8fd3ad0054 100644 ---- a/block/crypto.c -+++ b/block/crypto.c -@@ -355,7 +355,7 @@ block_crypto_co_create_generic(BlockDriverState *bs, int64_t size, - ret = 0; - cleanup: - qcrypto_block_free(crypto); -- blk_unref(blk); -+ blk_co_unref(blk); - return ret; - } - -@@ -661,7 +661,7 @@ block_crypto_co_create_luks(BlockdevCreateOptions *create_options, Error **errp) - - ret = 0; - fail: -- bdrv_unref(bs); -+ bdrv_co_unref(bs); - return ret; - } - -@@ -730,7 +730,7 @@ fail: - bdrv_co_delete_file_noerr(bs); - } - -- bdrv_unref(bs); -+ bdrv_co_unref(bs); - qapi_free_QCryptoBlockCreateOptions(create_opts); - qobject_unref(cryptoopts); - return ret; -diff --git a/block/parallels.c b/block/parallels.c -index 013684801a..b49c35929e 100644 ---- a/block/parallels.c -+++ b/block/parallels.c -@@ -613,8 +613,8 @@ static int coroutine_fn parallels_co_create(BlockdevCreateOptions* opts, - - ret = 0; - out: -- blk_unref(blk); -- bdrv_unref(bs); -+ blk_co_unref(blk); -+ bdrv_co_unref(bs); - return ret; - - exit: -@@ -691,7 +691,7 @@ parallels_co_create_opts(BlockDriver *drv, const char *filename, - - done: - qobject_unref(qdict); -- bdrv_unref(bs); -+ bdrv_co_unref(bs); - qapi_free_BlockdevCreateOptions(create_options); - return ret; - } -diff --git a/block/qcow.c b/block/qcow.c -index 490e4f819e..a0c701f578 100644 ---- a/block/qcow.c -+++ b/block/qcow.c -@@ -915,8 +915,8 @@ static int coroutine_fn qcow_co_create(BlockdevCreateOptions *opts, - g_free(tmp); - ret = 0; - exit: -- blk_unref(qcow_blk); -- bdrv_unref(bs); -+ blk_co_unref(qcow_blk); -+ bdrv_co_unref(bs); - qcrypto_block_free(crypto); - return ret; - } -@@ -1015,7 +1015,7 @@ qcow_co_create_opts(BlockDriver *drv, const char *filename, - fail: - g_free(backing_fmt); - qobject_unref(qdict); -- bdrv_unref(bs); -+ bdrv_co_unref(bs); - qapi_free_BlockdevCreateOptions(create_options); - return ret; - } -diff --git a/block/qcow2.c b/block/qcow2.c -index 22084730f9..0b8beb8b47 100644 ---- a/block/qcow2.c -+++ b/block/qcow2.c -@@ -3711,7 +3711,7 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) - goto out; - } - -- blk_unref(blk); -+ blk_co_unref(blk); - blk = NULL; - - /* -@@ -3791,7 +3791,7 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) - } - } - -- blk_unref(blk); -+ blk_co_unref(blk); - blk = NULL; - - /* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning. -@@ -3816,9 +3816,9 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp) - - ret = 0; - out: -- blk_unref(blk); -- bdrv_unref(bs); -- bdrv_unref(data_bs); -+ blk_co_unref(blk); -+ bdrv_co_unref(bs); -+ bdrv_co_unref(data_bs); - return ret; - } - -@@ -3949,8 +3949,8 @@ finish: - } - - qobject_unref(qdict); -- bdrv_unref(bs); -- bdrv_unref(data_bs); -+ bdrv_co_unref(bs); -+ bdrv_co_unref(data_bs); - qapi_free_BlockdevCreateOptions(create_options); - return ret; - } -diff --git a/block/qed.c b/block/qed.c -index 0705a7b4e2..aff2a2076e 100644 ---- a/block/qed.c -+++ b/block/qed.c -@@ -748,8 +748,8 @@ static int coroutine_fn bdrv_qed_co_create(BlockdevCreateOptions *opts, - ret = 0; /* success */ - out: - g_free(l1_table); -- blk_unref(blk); -- bdrv_unref(bs); -+ blk_co_unref(blk); -+ bdrv_co_unref(bs); - return ret; - } - -@@ -819,7 +819,7 @@ bdrv_qed_co_create_opts(BlockDriver *drv, const char *filename, - - fail: - qobject_unref(qdict); -- bdrv_unref(bs); -+ bdrv_co_unref(bs); - qapi_free_BlockdevCreateOptions(create_options); - return ret; - } -diff --git a/block/vdi.c b/block/vdi.c -index f2434d6153..08331d2dd7 100644 ---- a/block/vdi.c -+++ b/block/vdi.c -@@ -886,8 +886,8 @@ static int coroutine_fn vdi_co_do_create(BlockdevCreateOptions *create_options, - - ret = 0; - exit: -- blk_unref(blk); -- bdrv_unref(bs_file); -+ blk_co_unref(blk); -+ bdrv_co_unref(bs_file); - g_free(bmap); - return ret; - } -@@ -975,7 +975,7 @@ vdi_co_create_opts(BlockDriver *drv, const char *filename, - done: - qobject_unref(qdict); - qapi_free_BlockdevCreateOptions(create_options); -- bdrv_unref(bs_file); -+ bdrv_co_unref(bs_file); - return ret; - } - -diff --git a/block/vhdx.c b/block/vhdx.c -index 81420722a1..00777da91a 100644 ---- a/block/vhdx.c -+++ b/block/vhdx.c -@@ -2053,8 +2053,8 @@ static int coroutine_fn vhdx_co_create(BlockdevCreateOptions *opts, - - ret = 0; - delete_and_exit: -- blk_unref(blk); -- bdrv_unref(bs); -+ blk_co_unref(blk); -+ bdrv_co_unref(bs); - g_free(creator); - return ret; - } -@@ -2144,7 +2144,7 @@ vhdx_co_create_opts(BlockDriver *drv, const char *filename, - - fail: - qobject_unref(qdict); -- bdrv_unref(bs); -+ bdrv_co_unref(bs); - qapi_free_BlockdevCreateOptions(create_options); - return ret; - } -diff --git a/block/vmdk.c b/block/vmdk.c -index f5f49018fe..01ca13c82b 100644 ---- a/block/vmdk.c -+++ b/block/vmdk.c -@@ -2306,7 +2306,7 @@ exit: - if (pbb) { - *pbb = blk; - } else { -- blk_unref(blk); -+ blk_co_unref(blk); - blk = NULL; - } - } -@@ -2516,12 +2516,12 @@ vmdk_co_do_create(int64_t size, - if (strcmp(blk_bs(backing)->drv->format_name, "vmdk")) { - error_setg(errp, "Invalid backing file format: %s. Must be vmdk", - blk_bs(backing)->drv->format_name); -- blk_unref(backing); -+ blk_co_unref(backing); - ret = -EINVAL; - goto exit; - } - ret = vmdk_read_cid(blk_bs(backing), 0, &parent_cid); -- blk_unref(backing); -+ blk_co_unref(backing); - if (ret) { - error_setg(errp, "Failed to read parent CID"); - goto exit; -@@ -2542,14 +2542,14 @@ vmdk_co_do_create(int64_t size, - blk_bs(extent_blk)->filename); - created_size += cur_size; - extent_idx++; -- blk_unref(extent_blk); -+ blk_co_unref(extent_blk); - } - - /* Check whether we got excess extents */ - extent_blk = extent_fn(-1, extent_idx, flat, split, compress, zeroed_grain, - opaque, NULL); - if (extent_blk) { -- blk_unref(extent_blk); -+ blk_co_unref(extent_blk); - error_setg(errp, "List of extents contains unused extents"); - ret = -EINVAL; - goto exit; -@@ -2590,7 +2590,7 @@ vmdk_co_do_create(int64_t size, - ret = 0; - exit: - if (blk) { -- blk_unref(blk); -+ blk_co_unref(blk); - } - g_free(desc); - g_free(parent_desc_line); -@@ -2641,7 +2641,7 @@ vmdk_co_create_opts_cb(int64_t size, int idx, bool flat, bool split, - errp)) { - goto exit; - } -- bdrv_unref(bs); -+ bdrv_co_unref(bs); - exit: - g_free(ext_filename); - return blk; -@@ -2797,12 +2797,12 @@ static BlockBackend * coroutine_fn vmdk_co_create_cb(int64_t size, int idx, - return NULL; - } - blk_set_allow_write_beyond_eof(blk, true); -- bdrv_unref(bs); -+ bdrv_co_unref(bs); - - if (size != -1) { - ret = vmdk_init_extent(blk, size, flat, compress, zeroed_grain, errp); - if (ret) { -- blk_unref(blk); -+ blk_co_unref(blk); - blk = NULL; - } - } -diff --git a/block/vpc.c b/block/vpc.c -index b89b0ff8e2..07ddda5b99 100644 ---- a/block/vpc.c -+++ b/block/vpc.c -@@ -1082,8 +1082,8 @@ static int coroutine_fn vpc_co_create(BlockdevCreateOptions *opts, - } - - out: -- blk_unref(blk); -- bdrv_unref(bs); -+ blk_co_unref(blk); -+ bdrv_co_unref(bs); - return ret; - } - -@@ -1162,7 +1162,7 @@ vpc_co_create_opts(BlockDriver *drv, const char *filename, - - fail: - qobject_unref(qdict); -- bdrv_unref(bs); -+ bdrv_co_unref(bs); - qapi_free_BlockdevCreateOptions(create_options); - return ret; - } -diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h -index 399200a9a3..cd4ea554bf 100644 ---- a/include/block/block-global-state.h -+++ b/include/block/block-global-state.h -@@ -214,7 +214,8 @@ void bdrv_img_create(const char *filename, const char *fmt, - bool quiet, Error **errp); - - void bdrv_ref(BlockDriverState *bs); --void bdrv_unref(BlockDriverState *bs); -+void no_coroutine_fn bdrv_unref(BlockDriverState *bs); -+void coroutine_fn no_co_wrapper bdrv_co_unref(BlockDriverState *bs); - void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child); - BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, - BlockDriverState *child_bs, -diff --git a/include/sysemu/block-backend-global-state.h b/include/sysemu/block-backend-global-state.h -index 2b6d27db7c..fa83f9389c 100644 ---- a/include/sysemu/block-backend-global-state.h -+++ b/include/sysemu/block-backend-global-state.h -@@ -42,7 +42,10 @@ blk_co_new_open(const char *filename, const char *reference, QDict *options, - - int blk_get_refcnt(BlockBackend *blk); - void blk_ref(BlockBackend *blk); --void blk_unref(BlockBackend *blk); -+ -+void no_coroutine_fn blk_unref(BlockBackend *blk); -+void coroutine_fn no_co_wrapper blk_co_unref(BlockBackend *blk); -+ - void blk_remove_all_bs(void); - BlockBackend *blk_by_name(const char *name); - BlockBackend *blk_next(BlockBackend *blk); --- -2.39.1 - diff --git a/SOURCES/kvm-block-blkio-do-not-use-open-flags-in-qemu_open.patch b/SOURCES/kvm-block-blkio-do-not-use-open-flags-in-qemu_open.patch deleted file mode 100644 index caf6694..0000000 --- a/SOURCES/kvm-block-blkio-do-not-use-open-flags-in-qemu_open.patch +++ /dev/null @@ -1,74 +0,0 @@ -From b1f0546548e561856252c2bc610a8f4f8fcdf007 Mon Sep 17 00:00:00 2001 -From: Stefano Garzarella -Date: Wed, 26 Jul 2023 09:48:07 +0200 -Subject: [PATCH 02/14] block/blkio: do not use open flags in qemu_open() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Stefano Garzarella -RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers -RH-Bugzilla: 2225354 2225439 -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Alberto Faria -RH-Commit: [2/6] 1ccd0ef56182bb5e2374c3b5be98ee1ec05066d6 (sgarzarella/qemu-kvm-c-9-s) - -qemu_open() in blkio_virtio_blk_common_open() is used to open the -character device (e.g. /dev/vhost-vdpa-0 or /dev/vfio/vfio) or in -the future eventually the unix socket. - -In all these cases we cannot open the path in read-only mode, -when the `read-only` option of blockdev is on, because the exchange -of IOCTL commands for example will fail. - -In order to open the device read-only, we have to use the `read-only` -property of the libblkio driver as we already do in blkio_file_open(). - -Fixes: cad2ccc395 ("block/blkio: use qemu_open() to support fd passing for virtio-blk") -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2225439 -Reported-by: Qing Wang -Signed-off-by: Stefano Garzarella -Reviewed-by: Daniel P. Berrangé -Message-id: 20230726074807.14041-1-sgarzare@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit a5942c177b7bcc1357e496b7d68668befcfc2bb9) -Signed-off-by: Stefano Garzarella ---- - block/blkio.c | 21 ++++++++++++--------- - 1 file changed, 12 insertions(+), 9 deletions(-) - -diff --git a/block/blkio.c b/block/blkio.c -index 3ea9841bd8..5a82c6cb1a 100644 ---- a/block/blkio.c -+++ b/block/blkio.c -@@ -685,15 +685,18 @@ static int blkio_virtio_blk_common_open(BlockDriverState *bs, - * layer through the "/dev/fdset/N" special path. - */ - if (fd_supported) { -- int open_flags; -- -- if (flags & BDRV_O_RDWR) { -- open_flags = O_RDWR; -- } else { -- open_flags = O_RDONLY; -- } -- -- fd = qemu_open(path, open_flags, errp); -+ /* -+ * `path` can contain the path of a character device -+ * (e.g. /dev/vhost-vdpa-0 or /dev/vfio/vfio) or a unix socket. -+ * -+ * So, we should always open it with O_RDWR flag, also if BDRV_O_RDWR -+ * is not set in the open flags, because the exchange of IOCTL commands -+ * for example will fail. -+ * -+ * In order to open the device read-only, we are using the `read-only` -+ * property of the libblkio driver in blkio_file_open(). -+ */ -+ fd = qemu_open(path, O_RDWR, errp); - if (fd < 0) { - return -EINVAL; - } --- -2.39.3 - diff --git a/SOURCES/kvm-block-blkio-enable-the-completion-eventfd.patch b/SOURCES/kvm-block-blkio-enable-the-completion-eventfd.patch deleted file mode 100644 index 8a6f72b..0000000 --- a/SOURCES/kvm-block-blkio-enable-the-completion-eventfd.patch +++ /dev/null @@ -1,54 +0,0 @@ -From ef99db21e9469f3fc946b7bf3edc1837d7b24e0b Mon Sep 17 00:00:00 2001 -From: Stefano Garzarella -Date: Tue, 25 Jul 2023 12:37:44 +0200 -Subject: [PATCH 01/14] block/blkio: enable the completion eventfd - -RH-Author: Stefano Garzarella -RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers -RH-Bugzilla: 2225354 2225439 -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Alberto Faria -RH-Commit: [1/6] d91b3a465942863550130105ae2f38f47a82a360 (sgarzarella/qemu-kvm-c-9-s) - -Until libblkio 1.3.0, virtio-blk drivers had completion eventfd -notifications enabled from the start, but from the next releases -this is no longer the case, so we have to explicitly enable them. - -In fact, the libblkio documentation says they could be disabled, -so we should always enable them at the start if we want to be -sure to get completion eventfd notifications: - - By default, the driver might not generate completion events for - requests so it is necessary to explicitly enable the completion - file descriptor before use: - - void blkioq_set_completion_fd_enabled(struct blkioq *q, bool enable); - -I discovered this while trying a development version of libblkio: -the guest kernel hangs during boot, while probing the device. - -Fixes: fd66dbd424f5 ("blkio: add libblkio block driver") -Signed-off-by: Stefano Garzarella -Message-id: 20230725103744.77343-1-sgarzare@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 9359c459889fce1804c4e1b2a2ff8f182b4a9ae8) -Signed-off-by: Stefano Garzarella ---- - block/blkio.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/block/blkio.c b/block/blkio.c -index afcec359f2..3ea9841bd8 100644 ---- a/block/blkio.c -+++ b/block/blkio.c -@@ -844,6 +844,7 @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags, - QLIST_INIT(&s->bounce_bufs); - s->blkioq = blkio_get_queue(s->blkio, 0); - s->completion_fd = blkioq_get_completion_fd(s->blkioq); -+ blkioq_set_completion_fd_enabled(s->blkioq, true); - - blkio_attach_aio_context(bs, bdrv_get_aio_context(bs)); - return 0; --- -2.39.3 - diff --git a/SOURCES/kvm-block-blkio-fall-back-on-using-path-when-fd-setting-.patch b/SOURCES/kvm-block-blkio-fall-back-on-using-path-when-fd-setting-.patch deleted file mode 100644 index f4d6e3c..0000000 --- a/SOURCES/kvm-block-blkio-fall-back-on-using-path-when-fd-setting-.patch +++ /dev/null @@ -1,67 +0,0 @@ -From c1ce3ba81698b9d52ac9dff83c01ee8141ca403d Mon Sep 17 00:00:00 2001 -From: Stefano Garzarella -Date: Thu, 27 Jul 2023 18:10:19 +0200 -Subject: [PATCH 05/14] block/blkio: fall back on using `path` when `fd` - setting fails - -RH-Author: Stefano Garzarella -RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers -RH-Bugzilla: 2225354 2225439 -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Alberto Faria -RH-Commit: [5/6] c03cea95146a59b2830ffe2dd56ef77a6630ce3e (sgarzarella/qemu-kvm-c-9-s) - -qemu_open() fails if called with an unix domain socket in this way: - -blockdev node-name=drive0,driver=virtio-blk-vhost-user,path=vhost-user-blk.sock,cache.direct=on: Could not open 'vhost-user-blk.sock': No such device or address - -Since virtio-blk-vhost-user does not support fd passing, let`s always fall back -on using `path` if we fail the fd passing. - -Fixes: cad2ccc395 ("block/blkio: use qemu_open() to support fd passing for virtio-blk") -Reported-by: Qing Wang -Signed-off-by: Stefano Garzarella -Message-id: 20230727161020.84213-4-sgarzare@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 723bea27b127969931fa26bc0de79372a3d9e148) -Signed-off-by: Stefano Garzarella ---- - block/blkio.c | 20 ++++++++++---------- - 1 file changed, 10 insertions(+), 10 deletions(-) - -diff --git a/block/blkio.c b/block/blkio.c -index 93a8f8fc5c..eef80e9ce5 100644 ---- a/block/blkio.c -+++ b/block/blkio.c -@@ -710,19 +710,19 @@ static int blkio_virtio_blk_connect(BlockDriverState *bs, QDict *options, - * In order to open the device read-only, we are using the `read-only` - * property of the libblkio driver in blkio_file_open(). - */ -- fd = qemu_open(path, O_RDWR, errp); -+ fd = qemu_open(path, O_RDWR, NULL); - if (fd < 0) { -- return -EINVAL; -+ fd_supported = false; -+ } else { -+ ret = blkio_set_int(s->blkio, "fd", fd); -+ if (ret < 0) { -+ fd_supported = false; -+ qemu_close(fd); -+ } - } -+ } - -- ret = blkio_set_int(s->blkio, "fd", fd); -- if (ret < 0) { -- error_setg_errno(errp, -ret, "failed to set fd: %s", -- blkio_get_error_msg()); -- qemu_close(fd); -- return ret; -- } -- } else { -+ if (!fd_supported) { - ret = blkio_set_str(s->blkio, "path", path); - if (ret < 0) { - error_setg_errno(errp, -ret, "failed to set path: %s", --- -2.39.3 - diff --git a/SOURCES/kvm-block-blkio-fix-module_block.py-parsing.patch b/SOURCES/kvm-block-blkio-fix-module_block.py-parsing.patch deleted file mode 100644 index 1c89a0b..0000000 --- a/SOURCES/kvm-block-blkio-fix-module_block.py-parsing.patch +++ /dev/null @@ -1,205 +0,0 @@ -From 545482400ea87d54b1b839587f8aaad41e30692f Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Tue, 4 Jul 2023 14:34:36 +0200 -Subject: [PATCH 36/37] block/blkio: fix module_block.py parsing - -RH-Author: Stefan Hajnoczi -RH-MergeRequest: 181: block/blkio: fix module_block.py parsing -RH-Bugzilla: 2213317 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Kevin Wolf -RH-Commit: [1/2] c85df95824f4889526a73527771dec9efcb06926 (stefanha/centos-stream-qemu-kvm) - -When QEMU is built with --enable-modules, the module_block.py script -parses block/*.c to find block drivers that are built as modules. The -script generates a table of block drivers called block_driver_modules[]. -This table is used for block driver module loading. - -The blkio.c driver uses macros to define its BlockDriver structs. This -was done to avoid code duplication but the module_block.py script is -unable to parse the macro. The result is that libblkio-based block -drivers can be built as modules but will not be found at runtime. - -One fix is to make the module_block.py script or build system fancier so -it can parse C macros (e.g. by parsing the preprocessed source code). I -chose not to do this because it raises the complexity of the build, -making future issues harder to debug. - -Keep things simple: use the macro to avoid duplicating BlockDriver -function pointers but define .format_name and .protocol_name manually -for each BlockDriver. This way the module_block.py is able to parse the -code. - -Also get rid of the block driver name macros (e.g. DRIVER_IO_URING) -because module_block.py cannot parse them either. - -Fixes: fd66dbd424f5 ("blkio: add libblkio block driver") -Reported-by: Qing Wang -Signed-off-by: Stefan Hajnoczi -Reviewed-by: Stefano Garzarella -Message-id: 20230704123436.187761-1-stefanha@redhat.com -Cc: Stefano Garzarella -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit c21eae1ccc782440f320accb6f90c66cb8f45ee9) - -Conflicts: -- Downstream lacks commit 28ff7b4dfbb5 ("block/blkio: convert to - blk_io_plug_call() API") so keep the .bdrv_co_io_unplug callback. - -Signed-off-by: Stefan Hajnoczi ---- - block/blkio.c | 118 ++++++++++++++++++++++++++------------------------ - 1 file changed, 61 insertions(+), 57 deletions(-) - -diff --git a/block/blkio.c b/block/blkio.c -index 6a6f20f923..afcec359f2 100644 ---- a/block/blkio.c -+++ b/block/blkio.c -@@ -21,16 +21,6 @@ - - #include "block/block-io.h" - --/* -- * Keep the QEMU BlockDriver names identical to the libblkio driver names. -- * Using macros instead of typing out the string literals avoids typos. -- */ --#define DRIVER_IO_URING "io_uring" --#define DRIVER_NVME_IO_URING "nvme-io_uring" --#define DRIVER_VIRTIO_BLK_VFIO_PCI "virtio-blk-vfio-pci" --#define DRIVER_VIRTIO_BLK_VHOST_USER "virtio-blk-vhost-user" --#define DRIVER_VIRTIO_BLK_VHOST_VDPA "virtio-blk-vhost-vdpa" -- - /* - * Allocated bounce buffers are kept in a list sorted by buffer address. - */ -@@ -743,15 +733,15 @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags, - return ret; - } - -- if (strcmp(blkio_driver, DRIVER_IO_URING) == 0) { -+ if (strcmp(blkio_driver, "io_uring") == 0) { - ret = blkio_io_uring_open(bs, options, flags, errp); -- } else if (strcmp(blkio_driver, DRIVER_NVME_IO_URING) == 0) { -+ } else if (strcmp(blkio_driver, "nvme-io_uring") == 0) { - ret = blkio_nvme_io_uring(bs, options, flags, errp); -- } else if (strcmp(blkio_driver, DRIVER_VIRTIO_BLK_VFIO_PCI) == 0) { -+ } else if (strcmp(blkio_driver, "virtio-blk-vfio-pci") == 0) { - ret = blkio_virtio_blk_common_open(bs, options, flags, errp); -- } else if (strcmp(blkio_driver, DRIVER_VIRTIO_BLK_VHOST_USER) == 0) { -+ } else if (strcmp(blkio_driver, "virtio-blk-vhost-user") == 0) { - ret = blkio_virtio_blk_common_open(bs, options, flags, errp); -- } else if (strcmp(blkio_driver, DRIVER_VIRTIO_BLK_VHOST_VDPA) == 0) { -+ } else if (strcmp(blkio_driver, "virtio-blk-vhost-vdpa") == 0) { - ret = blkio_virtio_blk_common_open(bs, options, flags, errp); - } else { - g_assert_not_reached(); -@@ -1027,50 +1017,64 @@ static void blkio_refresh_limits(BlockDriverState *bs, Error **errp) - * - truncate - */ - --#define BLKIO_DRIVER(name, ...) \ -- { \ -- .format_name = name, \ -- .protocol_name = name, \ -- .instance_size = sizeof(BDRVBlkioState), \ -- .bdrv_file_open = blkio_file_open, \ -- .bdrv_close = blkio_close, \ -- .bdrv_co_getlength = blkio_co_getlength, \ -- .bdrv_co_truncate = blkio_truncate, \ -- .bdrv_co_get_info = blkio_co_get_info, \ -- .bdrv_attach_aio_context = blkio_attach_aio_context, \ -- .bdrv_detach_aio_context = blkio_detach_aio_context, \ -- .bdrv_co_pdiscard = blkio_co_pdiscard, \ -- .bdrv_co_preadv = blkio_co_preadv, \ -- .bdrv_co_pwritev = blkio_co_pwritev, \ -- .bdrv_co_flush_to_disk = blkio_co_flush, \ -- .bdrv_co_pwrite_zeroes = blkio_co_pwrite_zeroes, \ -- .bdrv_co_io_unplug = blkio_co_io_unplug, \ -- .bdrv_refresh_limits = blkio_refresh_limits, \ -- .bdrv_register_buf = blkio_register_buf, \ -- .bdrv_unregister_buf = blkio_unregister_buf, \ -- __VA_ARGS__ \ -- } -- --static BlockDriver bdrv_io_uring = BLKIO_DRIVER( -- DRIVER_IO_URING, -- .bdrv_needs_filename = true, --); -- --static BlockDriver bdrv_nvme_io_uring = BLKIO_DRIVER( -- DRIVER_NVME_IO_URING, --); -- --static BlockDriver bdrv_virtio_blk_vfio_pci = BLKIO_DRIVER( -- DRIVER_VIRTIO_BLK_VFIO_PCI --); -+/* -+ * Do not include .format_name and .protocol_name because module_block.py -+ * does not parse macros in the source code. -+ */ -+#define BLKIO_DRIVER_COMMON \ -+ .instance_size = sizeof(BDRVBlkioState), \ -+ .bdrv_file_open = blkio_file_open, \ -+ .bdrv_close = blkio_close, \ -+ .bdrv_co_getlength = blkio_co_getlength, \ -+ .bdrv_co_truncate = blkio_truncate, \ -+ .bdrv_co_get_info = blkio_co_get_info, \ -+ .bdrv_attach_aio_context = blkio_attach_aio_context, \ -+ .bdrv_detach_aio_context = blkio_detach_aio_context, \ -+ .bdrv_co_pdiscard = blkio_co_pdiscard, \ -+ .bdrv_co_preadv = blkio_co_preadv, \ -+ .bdrv_co_pwritev = blkio_co_pwritev, \ -+ .bdrv_co_flush_to_disk = blkio_co_flush, \ -+ .bdrv_co_pwrite_zeroes = blkio_co_pwrite_zeroes, \ -+ .bdrv_co_io_unplug = blkio_co_io_unplug, \ -+ .bdrv_refresh_limits = blkio_refresh_limits, \ -+ .bdrv_register_buf = blkio_register_buf, \ -+ .bdrv_unregister_buf = blkio_unregister_buf, - --static BlockDriver bdrv_virtio_blk_vhost_user = BLKIO_DRIVER( -- DRIVER_VIRTIO_BLK_VHOST_USER --); -+/* -+ * Use the same .format_name and .protocol_name as the libblkio driver name for -+ * consistency. -+ */ - --static BlockDriver bdrv_virtio_blk_vhost_vdpa = BLKIO_DRIVER( -- DRIVER_VIRTIO_BLK_VHOST_VDPA --); -+static BlockDriver bdrv_io_uring = { -+ .format_name = "io_uring", -+ .protocol_name = "io_uring", -+ .bdrv_needs_filename = true, -+ BLKIO_DRIVER_COMMON -+}; -+ -+static BlockDriver bdrv_nvme_io_uring = { -+ .format_name = "nvme-io_uring", -+ .protocol_name = "nvme-io_uring", -+ BLKIO_DRIVER_COMMON -+}; -+ -+static BlockDriver bdrv_virtio_blk_vfio_pci = { -+ .format_name = "virtio-blk-vfio-pci", -+ .protocol_name = "virtio-blk-vfio-pci", -+ BLKIO_DRIVER_COMMON -+}; -+ -+static BlockDriver bdrv_virtio_blk_vhost_user = { -+ .format_name = "virtio-blk-vhost-user", -+ .protocol_name = "virtio-blk-vhost-user", -+ BLKIO_DRIVER_COMMON -+}; -+ -+static BlockDriver bdrv_virtio_blk_vhost_vdpa = { -+ .format_name = "virtio-blk-vhost-vdpa", -+ .protocol_name = "virtio-blk-vhost-vdpa", -+ BLKIO_DRIVER_COMMON -+}; - - static void bdrv_blkio_init(void) - { --- -2.39.3 - diff --git a/SOURCES/kvm-block-blkio-move-blkio_connect-in-the-drivers-functi.patch b/SOURCES/kvm-block-blkio-move-blkio_connect-in-the-drivers-functi.patch deleted file mode 100644 index e3ec1ee..0000000 --- a/SOURCES/kvm-block-blkio-move-blkio_connect-in-the-drivers-functi.patch +++ /dev/null @@ -1,151 +0,0 @@ -From 458c33c9f19ed01beeb9b2b494ce6ed10d2ed4ac Mon Sep 17 00:00:00 2001 -From: Stefano Garzarella -Date: Thu, 27 Jul 2023 18:10:17 +0200 -Subject: [PATCH 03/14] block/blkio: move blkio_connect() in the drivers - functions - -RH-Author: Stefano Garzarella -RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers -RH-Bugzilla: 2225354 2225439 -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Alberto Faria -RH-Commit: [3/6] c356108d7dfe1ba2098c094f8d12b6e40853560c (sgarzarella/qemu-kvm-c-9-s) - -This is in preparation for the next patch, where for virtio-blk -drivers we need to handle the failure of blkio_connect(). - -Let's also rename the *_open() functions to *_connect() to make -the code reflect the changes applied. - -Signed-off-by: Stefano Garzarella -Message-id: 20230727161020.84213-2-sgarzare@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 69785d66ae1ec43f77fc65109a21721992bead9f) -Signed-off-by: Stefano Garzarella ---- - block/blkio.c | 67 ++++++++++++++++++++++++++++++--------------------- - 1 file changed, 40 insertions(+), 27 deletions(-) - -diff --git a/block/blkio.c b/block/blkio.c -index 5a82c6cb1a..85d1eed5fb 100644 ---- a/block/blkio.c -+++ b/block/blkio.c -@@ -602,8 +602,8 @@ static void blkio_unregister_buf(BlockDriverState *bs, void *host, size_t size) - } - } - --static int blkio_io_uring_open(BlockDriverState *bs, QDict *options, int flags, -- Error **errp) -+static int blkio_io_uring_connect(BlockDriverState *bs, QDict *options, -+ int flags, Error **errp) - { - const char *filename = qdict_get_str(options, "filename"); - BDRVBlkioState *s = bs->opaque; -@@ -626,11 +626,18 @@ static int blkio_io_uring_open(BlockDriverState *bs, QDict *options, int flags, - } - } - -+ ret = blkio_connect(s->blkio); -+ if (ret < 0) { -+ error_setg_errno(errp, -ret, "blkio_connect failed: %s", -+ blkio_get_error_msg()); -+ return ret; -+ } -+ - return 0; - } - --static int blkio_nvme_io_uring(BlockDriverState *bs, QDict *options, int flags, -- Error **errp) -+static int blkio_nvme_io_uring_connect(BlockDriverState *bs, QDict *options, -+ int flags, Error **errp) - { - const char *path = qdict_get_try_str(options, "path"); - BDRVBlkioState *s = bs->opaque; -@@ -654,11 +661,18 @@ static int blkio_nvme_io_uring(BlockDriverState *bs, QDict *options, int flags, - return -EINVAL; - } - -+ ret = blkio_connect(s->blkio); -+ if (ret < 0) { -+ error_setg_errno(errp, -ret, "blkio_connect failed: %s", -+ blkio_get_error_msg()); -+ return ret; -+ } -+ - return 0; - } - --static int blkio_virtio_blk_common_open(BlockDriverState *bs, -- QDict *options, int flags, Error **errp) -+static int blkio_virtio_blk_connect(BlockDriverState *bs, QDict *options, -+ int flags, Error **errp) - { - const char *path = qdict_get_try_str(options, "path"); - BDRVBlkioState *s = bs->opaque; -@@ -717,6 +731,13 @@ static int blkio_virtio_blk_common_open(BlockDriverState *bs, - } - } - -+ ret = blkio_connect(s->blkio); -+ if (ret < 0) { -+ error_setg_errno(errp, -ret, "blkio_connect failed: %s", -+ blkio_get_error_msg()); -+ return ret; -+ } -+ - qdict_del(options, "path"); - - return 0; -@@ -736,24 +757,6 @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags, - return ret; - } - -- if (strcmp(blkio_driver, "io_uring") == 0) { -- ret = blkio_io_uring_open(bs, options, flags, errp); -- } else if (strcmp(blkio_driver, "nvme-io_uring") == 0) { -- ret = blkio_nvme_io_uring(bs, options, flags, errp); -- } else if (strcmp(blkio_driver, "virtio-blk-vfio-pci") == 0) { -- ret = blkio_virtio_blk_common_open(bs, options, flags, errp); -- } else if (strcmp(blkio_driver, "virtio-blk-vhost-user") == 0) { -- ret = blkio_virtio_blk_common_open(bs, options, flags, errp); -- } else if (strcmp(blkio_driver, "virtio-blk-vhost-vdpa") == 0) { -- ret = blkio_virtio_blk_common_open(bs, options, flags, errp); -- } else { -- g_assert_not_reached(); -- } -- if (ret < 0) { -- blkio_destroy(&s->blkio); -- return ret; -- } -- - if (!(flags & BDRV_O_RDWR)) { - ret = blkio_set_bool(s->blkio, "read-only", true); - if (ret < 0) { -@@ -764,10 +767,20 @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags, - } - } - -- ret = blkio_connect(s->blkio); -+ if (strcmp(blkio_driver, "io_uring") == 0) { -+ ret = blkio_io_uring_connect(bs, options, flags, errp); -+ } else if (strcmp(blkio_driver, "nvme-io_uring") == 0) { -+ ret = blkio_nvme_io_uring_connect(bs, options, flags, errp); -+ } else if (strcmp(blkio_driver, "virtio-blk-vfio-pci") == 0) { -+ ret = blkio_virtio_blk_connect(bs, options, flags, errp); -+ } else if (strcmp(blkio_driver, "virtio-blk-vhost-user") == 0) { -+ ret = blkio_virtio_blk_connect(bs, options, flags, errp); -+ } else if (strcmp(blkio_driver, "virtio-blk-vhost-vdpa") == 0) { -+ ret = blkio_virtio_blk_connect(bs, options, flags, errp); -+ } else { -+ g_assert_not_reached(); -+ } - if (ret < 0) { -- error_setg_errno(errp, -ret, "blkio_connect failed: %s", -- blkio_get_error_msg()); - blkio_destroy(&s->blkio); - return ret; - } --- -2.39.3 - diff --git a/SOURCES/kvm-block-blkio-retry-blkio_connect-if-it-fails-using-fd.patch b/SOURCES/kvm-block-blkio-retry-blkio_connect-if-it-fails-using-fd.patch deleted file mode 100644 index 5ec9e0b..0000000 --- a/SOURCES/kvm-block-blkio-retry-blkio_connect-if-it-fails-using-fd.patch +++ /dev/null @@ -1,85 +0,0 @@ -From ece855a71d9234c58497f37cb5498f507742167d Mon Sep 17 00:00:00 2001 -From: Stefano Garzarella -Date: Thu, 27 Jul 2023 18:10:18 +0200 -Subject: [PATCH 04/14] block/blkio: retry blkio_connect() if it fails using - `fd` - -RH-Author: Stefano Garzarella -RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers -RH-Bugzilla: 2225354 2225439 -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Alberto Faria -RH-Commit: [4/6] 14ebc1f333617ce22c68693dec1c9a186d4f8a08 (sgarzarella/qemu-kvm-c-9-s) - -libblkio 1.3.0 added support of "fd" property for virtio-blk-vhost-vdpa -driver. In QEMU, starting from commit cad2ccc395 ("block/blkio: use -qemu_open() to support fd passing for virtio-blk") we are using -`blkio_get_int(..., "fd")` to check if the "fd" property is supported -for all the virtio-blk-* driver. - -Unfortunately that property is also available for those driver that do -not support it, such as virtio-blk-vhost-user. - -So, `blkio_get_int()` is not enough to check whether the driver supports -the `fd` property or not. This is because the virito-blk common libblkio -driver only checks whether or not `fd` is set during `blkio_connect()` -and fails with -EINVAL for those transports that do not support it -(all except vhost-vdpa for now). - -So let's handle the `blkio_connect()` failure, retrying it using `path` -directly. - -Fixes: cad2ccc395 ("block/blkio: use qemu_open() to support fd passing for virtio-blk") -Suggested-by: Stefan Hajnoczi -Signed-off-by: Stefano Garzarella -Message-id: 20230727161020.84213-3-sgarzare@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 809c319f8a089fbc49223dc29e1cc2b978beeada) -Signed-off-by: Stefano Garzarella ---- - block/blkio.c | 29 +++++++++++++++++++++++++++++ - 1 file changed, 29 insertions(+) - -diff --git a/block/blkio.c b/block/blkio.c -index 85d1eed5fb..93a8f8fc5c 100644 ---- a/block/blkio.c -+++ b/block/blkio.c -@@ -732,6 +732,35 @@ static int blkio_virtio_blk_connect(BlockDriverState *bs, QDict *options, - } - - ret = blkio_connect(s->blkio); -+ /* -+ * If the libblkio driver doesn't support the `fd` property, blkio_connect() -+ * will fail with -EINVAL. So let's try calling blkio_connect() again by -+ * directly setting `path`. -+ */ -+ if (fd_supported && ret == -EINVAL) { -+ qemu_close(fd); -+ -+ /* -+ * We need to clear the `fd` property we set previously by setting -+ * it to -1. -+ */ -+ ret = blkio_set_int(s->blkio, "fd", -1); -+ if (ret < 0) { -+ error_setg_errno(errp, -ret, "failed to set fd: %s", -+ blkio_get_error_msg()); -+ return ret; -+ } -+ -+ ret = blkio_set_str(s->blkio, "path", path); -+ if (ret < 0) { -+ error_setg_errno(errp, -ret, "failed to set path: %s", -+ blkio_get_error_msg()); -+ return ret; -+ } -+ -+ ret = blkio_connect(s->blkio); -+ } -+ - if (ret < 0) { - error_setg_errno(errp, -ret, "blkio_connect failed: %s", - blkio_get_error_msg()); --- -2.39.3 - diff --git a/SOURCES/kvm-block-blkio-use-blkio_set_int-fd-to-check-fd-support.patch b/SOURCES/kvm-block-blkio-use-blkio_set_int-fd-to-check-fd-support.patch deleted file mode 100644 index c6e1cd8..0000000 --- a/SOURCES/kvm-block-blkio-use-blkio_set_int-fd-to-check-fd-support.patch +++ /dev/null @@ -1,49 +0,0 @@ -From 2f4436e7cc2f63d198229dc8ba32783460c0b185 Mon Sep 17 00:00:00 2001 -From: Stefano Garzarella -Date: Thu, 27 Jul 2023 18:10:20 +0200 -Subject: [PATCH 06/14] block/blkio: use blkio_set_int("fd") to check fd - support - -RH-Author: Stefano Garzarella -RH-MergeRequest: 194: block/blkio: backport latest fixes for virtio-blk-* drivers -RH-Bugzilla: 2225354 2225439 -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Alberto Faria -RH-Commit: [6/6] d57aafb2c3a8ed13aa3c6dcce5525a9cc8f5aa21 (sgarzarella/qemu-kvm-c-9-s) - -Setting the `fd` property fails with virtio-blk-* libblkio drivers -that do not support fd passing since -https://gitlab.com/libblkio/libblkio/-/merge_requests/208. - -Getting the `fd` property, on the other hand, always succeeds for -virtio-blk-* libblkio drivers even when they don't support fd passing. - -This patch switches to setting the `fd` property because it is a -better mechanism for probing fd passing support than getting the `fd` -property. - -Signed-off-by: Stefano Garzarella -Message-id: 20230727161020.84213-5-sgarzare@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 1c38fe69e2b8a05c1762b122292fa7e3662f06fd) -Signed-off-by: Stefano Garzarella ---- - block/blkio.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/block/blkio.c b/block/blkio.c -index eef80e9ce5..8defbf744f 100644 ---- a/block/blkio.c -+++ b/block/blkio.c -@@ -689,7 +689,7 @@ static int blkio_virtio_blk_connect(BlockDriverState *bs, QDict *options, - return -EINVAL; - } - -- if (blkio_get_int(s->blkio, "fd", &fd) == 0) { -+ if (blkio_set_int(s->blkio, "fd", -1) == 0) { - fd_supported = true; - } - --- -2.39.3 - diff --git a/SOURCES/kvm-block-blkio-use-qemu_open-to-support-fd-passing-for-.patch b/SOURCES/kvm-block-blkio-use-qemu_open-to-support-fd-passing-for-.patch deleted file mode 100644 index 3b32299..0000000 --- a/SOURCES/kvm-block-blkio-use-qemu_open-to-support-fd-passing-for-.patch +++ /dev/null @@ -1,108 +0,0 @@ -From fd57241cf0f8c2906fa56118f8da1e65a5b1e4d8 Mon Sep 17 00:00:00 2001 -From: Stefano Garzarella -Date: Tue, 30 May 2023 09:19:40 +0200 -Subject: [PATCH 3/5] block/blkio: use qemu_open() to support fd passing for - virtio-blk - -RH-Author: Stefano Garzarella -RH-MergeRequest: 169: block/blkio: support fd passing for virtio-blk-vhost-vdpa driver -RH-Bugzilla: 2180076 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [1/2] 9ff1a1510500db101648341207a36318a0c41c5a (sgarzarella/qemu-kvm-c-9-s) - -Some virtio-blk drivers (e.g. virtio-blk-vhost-vdpa) supports the fd -passing. Let's expose this to the user, so the management layer -can pass the file descriptor of an already opened path. - -If the libblkio virtio-blk driver supports fd passing, let's always -use qemu_open() to open the `path`, so we can handle fd passing -from the management layer through the "/dev/fdset/N" special path. - -Reviewed-by: Stefan Hajnoczi -Signed-off-by: Stefano Garzarella -Message-id: 20230530071941.8954-2-sgarzare@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit cad2ccc395c7113fb30bc9390774b67b34f06c68) -Signed-off-by: Stefano Garzarella ---- - block/blkio.c | 53 ++++++++++++++++++++++++++++++++++++++++++--------- - 1 file changed, 44 insertions(+), 9 deletions(-) - -diff --git a/block/blkio.c b/block/blkio.c -index 0cdc99a729..6a6f20f923 100644 ---- a/block/blkio.c -+++ b/block/blkio.c -@@ -672,25 +672,60 @@ static int blkio_virtio_blk_common_open(BlockDriverState *bs, - { - const char *path = qdict_get_try_str(options, "path"); - BDRVBlkioState *s = bs->opaque; -- int ret; -+ bool fd_supported = false; -+ int fd, ret; - - if (!path) { - error_setg(errp, "missing 'path' option"); - return -EINVAL; - } - -- ret = blkio_set_str(s->blkio, "path", path); -- qdict_del(options, "path"); -- if (ret < 0) { -- error_setg_errno(errp, -ret, "failed to set path: %s", -- blkio_get_error_msg()); -- return ret; -- } -- - if (!(flags & BDRV_O_NOCACHE)) { - error_setg(errp, "cache.direct=off is not supported"); - return -EINVAL; - } -+ -+ if (blkio_get_int(s->blkio, "fd", &fd) == 0) { -+ fd_supported = true; -+ } -+ -+ /* -+ * If the libblkio driver supports fd passing, let's always use qemu_open() -+ * to open the `path`, so we can handle fd passing from the management -+ * layer through the "/dev/fdset/N" special path. -+ */ -+ if (fd_supported) { -+ int open_flags; -+ -+ if (flags & BDRV_O_RDWR) { -+ open_flags = O_RDWR; -+ } else { -+ open_flags = O_RDONLY; -+ } -+ -+ fd = qemu_open(path, open_flags, errp); -+ if (fd < 0) { -+ return -EINVAL; -+ } -+ -+ ret = blkio_set_int(s->blkio, "fd", fd); -+ if (ret < 0) { -+ error_setg_errno(errp, -ret, "failed to set fd: %s", -+ blkio_get_error_msg()); -+ qemu_close(fd); -+ return ret; -+ } -+ } else { -+ ret = blkio_set_str(s->blkio, "path", path); -+ if (ret < 0) { -+ error_setg_errno(errp, -ret, "failed to set path: %s", -+ blkio_get_error_msg()); -+ return ret; -+ } -+ } -+ -+ qdict_del(options, "path"); -+ - return 0; - } - --- -2.39.3 - diff --git a/SOURCES/kvm-block-compile-out-assert_bdrv_graph_readable-by-defa.patch b/SOURCES/kvm-block-compile-out-assert_bdrv_graph_readable-by-defa.patch deleted file mode 100644 index b6eebf3..0000000 --- a/SOURCES/kvm-block-compile-out-assert_bdrv_graph_readable-by-defa.patch +++ /dev/null @@ -1,121 +0,0 @@ -From d9190117f3c701380701d6e9b2aa3c2446b9708f Mon Sep 17 00:00:00 2001 -From: Stefan Hajnoczi -Date: Mon, 1 May 2023 13:34:43 -0400 -Subject: [PATCH 01/21] block: compile out assert_bdrv_graph_readable() by - default - -RH-Author: Kevin Wolf -RH-MergeRequest: 166: block/graph-lock: Disable locking for now -RH-Bugzilla: 2186725 -RH-Acked-by: Eric Blake -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [1/4] d8cb4bb832c85e8216d97e57679a34c7bc6a8f71 (kmwolf/centos-qemu-kvm) - -reader_count() is a performance bottleneck because the global -aio_context_list_lock mutex causes thread contention. Put this debugging -assertion behind a new ./configure --enable-debug-graph-lock option and -disable it by default. - -The --enable-debug-graph-lock option is also enabled by the more general ---enable-debug option. - -Signed-off-by: Stefan Hajnoczi -Message-Id: <20230501173443.153062-1-stefanha@redhat.com> -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 58a2e3f5c37be02dac3086b81bdda9414b931edf) -Signed-off-by: Kevin Wolf ---- - block/graph-lock.c | 3 +++ - configure | 1 + - meson.build | 2 ++ - meson_options.txt | 2 ++ - scripts/meson-buildoptions.sh | 4 ++++ - 5 files changed, 12 insertions(+) - -diff --git a/block/graph-lock.c b/block/graph-lock.c -index 454c31e691..259a7a0bde 100644 ---- a/block/graph-lock.c -+++ b/block/graph-lock.c -@@ -265,7 +265,10 @@ void bdrv_graph_rdunlock_main_loop(void) - - void assert_bdrv_graph_readable(void) - { -+ /* reader_count() is slow due to aio_context_list_lock lock contention */ -+#ifdef CONFIG_DEBUG_GRAPH_LOCK - assert(qemu_in_main_thread() || reader_count()); -+#endif - } - - void assert_bdrv_graph_writable(void) -diff --git a/configure b/configure -index 800b5850f4..a62a3e6be9 100755 ---- a/configure -+++ b/configure -@@ -806,6 +806,7 @@ for opt do - --enable-debug) - # Enable debugging options that aren't excessively noisy - debug_tcg="yes" -+ meson_option_parse --enable-debug-graph-lock "" - meson_option_parse --enable-debug-mutex "" - meson_option_add -Doptimization=0 - fortify_source="no" -diff --git a/meson.build b/meson.build -index c44d05a13f..d964e741e7 100644 ---- a/meson.build -+++ b/meson.build -@@ -1956,6 +1956,7 @@ if get_option('debug_stack_usage') and have_coroutine_pool - have_coroutine_pool = false - endif - config_host_data.set10('CONFIG_COROUTINE_POOL', have_coroutine_pool) -+config_host_data.set('CONFIG_DEBUG_GRAPH_LOCK', get_option('debug_graph_lock')) - config_host_data.set('CONFIG_DEBUG_MUTEX', get_option('debug_mutex')) - config_host_data.set('CONFIG_DEBUG_STACK_USAGE', get_option('debug_stack_usage')) - config_host_data.set('CONFIG_GPROF', get_option('gprof')) -@@ -3833,6 +3834,7 @@ summary_info += {'PIE': get_option('b_pie')} - summary_info += {'static build': config_host.has_key('CONFIG_STATIC')} - summary_info += {'malloc trim support': has_malloc_trim} - summary_info += {'membarrier': have_membarrier} -+summary_info += {'debug graph lock': get_option('debug_graph_lock')} - summary_info += {'debug stack usage': get_option('debug_stack_usage')} - summary_info += {'mutex debugging': get_option('debug_mutex')} - summary_info += {'memory allocator': get_option('malloc')} -diff --git a/meson_options.txt b/meson_options.txt -index fc9447d267..bc857fe68b 100644 ---- a/meson_options.txt -+++ b/meson_options.txt -@@ -311,6 +311,8 @@ option('rng_none', type: 'boolean', value: false, - description: 'dummy RNG, avoid using /dev/(u)random and getrandom()') - option('coroutine_pool', type: 'boolean', value: true, - description: 'coroutine freelist (better performance)') -+option('debug_graph_lock', type: 'boolean', value: false, -+ description: 'graph lock debugging support') - option('debug_mutex', type: 'boolean', value: false, - description: 'mutex debugging support') - option('debug_stack_usage', type: 'boolean', value: false, -diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh -index 009fab1515..30e1f25259 100644 ---- a/scripts/meson-buildoptions.sh -+++ b/scripts/meson-buildoptions.sh -@@ -21,6 +21,8 @@ meson_options_help() { - printf "%s\n" ' QEMU' - printf "%s\n" ' --enable-cfi Control-Flow Integrity (CFI)' - printf "%s\n" ' --enable-cfi-debug Verbose errors in case of CFI violation' -+ printf "%s\n" ' --enable-debug-graph-lock' -+ printf "%s\n" ' graph lock debugging support' - printf "%s\n" ' --enable-debug-mutex mutex debugging support' - printf "%s\n" ' --enable-debug-stack-usage' - printf "%s\n" ' measure coroutine stack usage' -@@ -249,6 +251,8 @@ _meson_option_parse() { - --datadir=*) quote_sh "-Ddatadir=$2" ;; - --enable-dbus-display) printf "%s" -Ddbus_display=enabled ;; - --disable-dbus-display) printf "%s" -Ddbus_display=disabled ;; -+ --enable-debug-graph-lock) printf "%s" -Ddebug_graph_lock=true ;; -+ --disable-debug-graph-lock) printf "%s" -Ddebug_graph_lock=false ;; - --enable-debug-mutex) printf "%s" -Ddebug_mutex=true ;; - --disable-debug-mutex) printf "%s" -Ddebug_mutex=false ;; - --enable-debug-stack-usage) printf "%s" -Ddebug_stack_usage=true ;; --- -2.39.3 - diff --git a/SOURCES/kvm-block-coroutine-wrapper-use-qemu_get_current_aio_con.patch b/SOURCES/kvm-block-coroutine-wrapper-use-qemu_get_current_aio_con.patch new file mode 100644 index 0000000..df764fb --- /dev/null +++ b/SOURCES/kvm-block-coroutine-wrapper-use-qemu_get_current_aio_con.patch @@ -0,0 +1,69 @@ +From b1a68aebadecd7d339cf5eaffeda15099c998528 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 12 Sep 2023 19:10:37 -0400 +Subject: [PATCH 095/101] block-coroutine-wrapper: use + qemu_get_current_aio_context() + +RH-Author: Kevin Wolf +RH-MergeRequest: 214: Remove AioContext lock +RH-Jira: RHEL-15965 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [26/26] cde767bcdc626e90721792e3889952057a548ac5 (kmwolf/centos-qemu-kvm) + +Use qemu_get_current_aio_context() in mixed wrappers and coroutine +wrappers so that code runs in the caller's AioContext instead of moving +to the BlockDriverState's AioContext. This change is necessary for the +multi-queue block layer where any thread can call into the block layer. + +Most wrappers are IO_CODE where it's safe to use the current AioContext +nowadays. BlockDrivers and the core block layer use their own locks and +no longer depend on the AioContext lock for thread-safety. + +The bdrv_create() wrapper invokes GLOBAL_STATE code. Using the current +AioContext is safe because this code is only called with the BQL held +from the main loop thread. + +Signed-off-by: Stefan Hajnoczi +Message-ID: <20230912231037.826804-6-stefanha@redhat.com> +Reviewed-by: Eric Blake +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +--- + scripts/block-coroutine-wrapper.py | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +diff --git a/scripts/block-coroutine-wrapper.py b/scripts/block-coroutine-wrapper.py +index c9c09fcacd..dbbde99e39 100644 +--- a/scripts/block-coroutine-wrapper.py ++++ b/scripts/block-coroutine-wrapper.py +@@ -92,8 +92,6 @@ def __init__(self, wrapper_type: str, return_type: str, name: str, + f"{self.name}") + self.target_name = f'{subsystem}_{subname}' + +- self.ctx = self.gen_ctx() +- + self.get_result = 's->ret = ' + self.ret = 'return s.ret;' + self.co_ret = 'return ' +@@ -167,7 +165,7 @@ def create_mixed_wrapper(func: FuncDecl) -> str: + {func.co_ret}{name}({ func.gen_list('{name}') }); + }} else {{ + {struct_name} s = {{ +- .poll_state.ctx = {func.ctx}, ++ .poll_state.ctx = qemu_get_current_aio_context(), + .poll_state.in_progress = true, + + { func.gen_block(' .{name} = {name},') } +@@ -191,7 +189,7 @@ def create_co_wrapper(func: FuncDecl) -> str: + {func.return_type} {func.name}({ func.gen_list('{decl}') }) + {{ + {struct_name} s = {{ +- .poll_state.ctx = {func.ctx}, ++ .poll_state.ctx = qemu_get_current_aio_context(), + .poll_state.in_progress = true, + + { func.gen_block(' .{name} = {name},') } +-- +2.39.3 + diff --git a/SOURCES/kvm-block-file-posix-set-up-Linux-AIO-and-io_uring-in-th.patch b/SOURCES/kvm-block-file-posix-set-up-Linux-AIO-and-io_uring-in-th.patch new file mode 100644 index 0000000..1783a64 --- /dev/null +++ b/SOURCES/kvm-block-file-posix-set-up-Linux-AIO-and-io_uring-in-th.patch @@ -0,0 +1,217 @@ +From 25cce5df341861e8ba8ec57722558e2dee3ce56a Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Thu, 14 Sep 2023 10:00:58 -0400 +Subject: [PATCH 073/101] block/file-posix: set up Linux AIO and io_uring in + the current thread + +RH-Author: Kevin Wolf +RH-MergeRequest: 214: Remove AioContext lock +RH-Jira: RHEL-15965 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [4/26] 74c7daf805daefe706378308c3afeb28d861164b (kmwolf/centos-qemu-kvm) + +The file-posix block driver currently only sets up Linux AIO and +io_uring in the BDS's AioContext. In the multi-queue block layer we must +be able to submit I/O requests in AioContexts that do not have Linux AIO +and io_uring set up yet since any thread can call into the block driver. + +Set up Linux AIO and io_uring for the current AioContext during request +submission. We lose the ability to return an error from +.bdrv_file_open() when Linux AIO and io_uring setup fails (e.g. due to +resource limits). Instead the user only gets warnings and we fall back +to aio=threads. This is still better than a fatal error after startup. + +Signed-off-by: Stefan Hajnoczi +Message-ID: <20230914140101.1065008-2-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Reviewed-by: Eric Blake +Signed-off-by: Kevin Wolf +--- + block/file-posix.c | 103 ++++++++++++++++++++++----------------------- + 1 file changed, 51 insertions(+), 52 deletions(-) + +diff --git a/block/file-posix.c b/block/file-posix.c +index b862406c71..35684f7e21 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -712,17 +712,11 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, + + #ifdef CONFIG_LINUX_AIO + /* Currently Linux does AIO only for files opened with O_DIRECT */ +- if (s->use_linux_aio) { +- if (!(s->open_flags & O_DIRECT)) { +- error_setg(errp, "aio=native was specified, but it requires " +- "cache.direct=on, which was not specified."); +- ret = -EINVAL; +- goto fail; +- } +- if (!aio_setup_linux_aio(bdrv_get_aio_context(bs), errp)) { +- error_prepend(errp, "Unable to use native AIO: "); +- goto fail; +- } ++ if (s->use_linux_aio && !(s->open_flags & O_DIRECT)) { ++ error_setg(errp, "aio=native was specified, but it requires " ++ "cache.direct=on, which was not specified."); ++ ret = -EINVAL; ++ goto fail; + } + #else + if (s->use_linux_aio) { +@@ -733,14 +727,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, + } + #endif /* !defined(CONFIG_LINUX_AIO) */ + +-#ifdef CONFIG_LINUX_IO_URING +- if (s->use_linux_io_uring) { +- if (!aio_setup_linux_io_uring(bdrv_get_aio_context(bs), errp)) { +- error_prepend(errp, "Unable to use io_uring: "); +- goto fail; +- } +- } +-#else ++#ifndef CONFIG_LINUX_IO_URING + if (s->use_linux_io_uring) { + error_setg(errp, "aio=io_uring was specified, but is not supported " + "in this build."); +@@ -2444,6 +2431,48 @@ static bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov) + return true; + } + ++#ifdef CONFIG_LINUX_IO_URING ++static inline bool raw_check_linux_io_uring(BDRVRawState *s) ++{ ++ Error *local_err = NULL; ++ AioContext *ctx; ++ ++ if (!s->use_linux_io_uring) { ++ return false; ++ } ++ ++ ctx = qemu_get_current_aio_context(); ++ if (unlikely(!aio_setup_linux_io_uring(ctx, &local_err))) { ++ error_reportf_err(local_err, "Unable to use linux io_uring, " ++ "falling back to thread pool: "); ++ s->use_linux_io_uring = false; ++ return false; ++ } ++ return true; ++} ++#endif ++ ++#ifdef CONFIG_LINUX_AIO ++static inline bool raw_check_linux_aio(BDRVRawState *s) ++{ ++ Error *local_err = NULL; ++ AioContext *ctx; ++ ++ if (!s->use_linux_aio) { ++ return false; ++ } ++ ++ ctx = qemu_get_current_aio_context(); ++ if (unlikely(!aio_setup_linux_aio(ctx, &local_err))) { ++ error_reportf_err(local_err, "Unable to use Linux AIO, " ++ "falling back to thread pool: "); ++ s->use_linux_aio = false; ++ return false; ++ } ++ return true; ++} ++#endif ++ + static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr, + uint64_t bytes, QEMUIOVector *qiov, int type) + { +@@ -2474,13 +2503,13 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr, + if (s->needs_alignment && !bdrv_qiov_is_aligned(bs, qiov)) { + type |= QEMU_AIO_MISALIGNED; + #ifdef CONFIG_LINUX_IO_URING +- } else if (s->use_linux_io_uring) { ++ } else if (raw_check_linux_io_uring(s)) { + assert(qiov->size == bytes); + ret = luring_co_submit(bs, s->fd, offset, qiov, type); + goto out; + #endif + #ifdef CONFIG_LINUX_AIO +- } else if (s->use_linux_aio) { ++ } else if (raw_check_linux_aio(s)) { + assert(qiov->size == bytes); + ret = laio_co_submit(s->fd, offset, qiov, type, + s->aio_max_batch); +@@ -2567,39 +2596,13 @@ static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs) + }; + + #ifdef CONFIG_LINUX_IO_URING +- if (s->use_linux_io_uring) { ++ if (raw_check_linux_io_uring(s)) { + return luring_co_submit(bs, s->fd, 0, NULL, QEMU_AIO_FLUSH); + } + #endif + return raw_thread_pool_submit(handle_aiocb_flush, &acb); + } + +-static void raw_aio_attach_aio_context(BlockDriverState *bs, +- AioContext *new_context) +-{ +- BDRVRawState __attribute__((unused)) *s = bs->opaque; +-#ifdef CONFIG_LINUX_AIO +- if (s->use_linux_aio) { +- Error *local_err = NULL; +- if (!aio_setup_linux_aio(new_context, &local_err)) { +- error_reportf_err(local_err, "Unable to use native AIO, " +- "falling back to thread pool: "); +- s->use_linux_aio = false; +- } +- } +-#endif +-#ifdef CONFIG_LINUX_IO_URING +- if (s->use_linux_io_uring) { +- Error *local_err = NULL; +- if (!aio_setup_linux_io_uring(new_context, &local_err)) { +- error_reportf_err(local_err, "Unable to use linux io_uring, " +- "falling back to thread pool: "); +- s->use_linux_io_uring = false; +- } +- } +-#endif +-} +- + static void raw_close(BlockDriverState *bs) + { + BDRVRawState *s = bs->opaque; +@@ -3896,7 +3899,6 @@ BlockDriver bdrv_file = { + .bdrv_co_copy_range_from = raw_co_copy_range_from, + .bdrv_co_copy_range_to = raw_co_copy_range_to, + .bdrv_refresh_limits = raw_refresh_limits, +- .bdrv_attach_aio_context = raw_aio_attach_aio_context, + + .bdrv_co_truncate = raw_co_truncate, + .bdrv_co_getlength = raw_co_getlength, +@@ -4266,7 +4268,6 @@ static BlockDriver bdrv_host_device = { + .bdrv_co_copy_range_from = raw_co_copy_range_from, + .bdrv_co_copy_range_to = raw_co_copy_range_to, + .bdrv_refresh_limits = raw_refresh_limits, +- .bdrv_attach_aio_context = raw_aio_attach_aio_context, + + .bdrv_co_truncate = raw_co_truncate, + .bdrv_co_getlength = raw_co_getlength, +@@ -4402,7 +4403,6 @@ static BlockDriver bdrv_host_cdrom = { + .bdrv_co_pwritev = raw_co_pwritev, + .bdrv_co_flush_to_disk = raw_co_flush_to_disk, + .bdrv_refresh_limits = cdrom_refresh_limits, +- .bdrv_attach_aio_context = raw_aio_attach_aio_context, + + .bdrv_co_truncate = raw_co_truncate, + .bdrv_co_getlength = raw_co_getlength, +@@ -4528,7 +4528,6 @@ static BlockDriver bdrv_host_cdrom = { + .bdrv_co_pwritev = raw_co_pwritev, + .bdrv_co_flush_to_disk = raw_co_flush_to_disk, + .bdrv_refresh_limits = cdrom_refresh_limits, +- .bdrv_attach_aio_context = raw_aio_attach_aio_context, + + .bdrv_co_truncate = raw_co_truncate, + .bdrv_co_getlength = raw_co_getlength, +-- +2.39.3 + diff --git a/SOURCES/kvm-block-remove-AioContext-locking.patch b/SOURCES/kvm-block-remove-AioContext-locking.patch new file mode 100644 index 0000000..5bcd859 --- /dev/null +++ b/SOURCES/kvm-block-remove-AioContext-locking.patch @@ -0,0 +1,4438 @@ +From df1400991580e8a60d711079865b56ed95830b28 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 5 Dec 2023 13:20:03 -0500 +Subject: [PATCH 086/101] block: remove AioContext locking + +RH-Author: Kevin Wolf +RH-MergeRequest: 214: Remove AioContext lock +RH-Jira: RHEL-15965 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [17/26] b29c3ac7ea91ca356335ba047c66187317c482f9 (kmwolf/centos-qemu-kvm) + +This is the big patch that removes +aio_context_acquire()/aio_context_release() from the block layer and +affected block layer users. + +There isn't a clean way to split this patch and the reviewers are likely +the same group of people, so I decided to do it in one patch. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Eric Blake +Reviewed-by: Kevin Wolf +Reviewed-by: Paul Durrant +Message-ID: <20231205182011.1976568-7-stefanha@redhat.com> +Signed-off-by: Kevin Wolf +--- + block.c | 234 +--------------------- + block/block-backend.c | 14 -- + block/copy-before-write.c | 22 +-- + block/export/export.c | 22 +-- + block/io.c | 45 +---- + block/mirror.c | 19 -- + block/monitor/bitmap-qmp-cmds.c | 20 +- + block/monitor/block-hmp-cmds.c | 29 --- + block/qapi-sysemu.c | 27 +-- + block/qapi.c | 18 +- + block/raw-format.c | 5 - + block/replication.c | 58 +----- + block/snapshot.c | 22 +-- + block/write-threshold.c | 6 - + blockdev.c | 307 +++++------------------------ + blockjob.c | 18 -- + hw/block/dataplane/virtio-blk.c | 10 - + hw/block/dataplane/xen-block.c | 17 +- + hw/block/virtio-blk.c | 13 -- + hw/core/qdev-properties-system.c | 9 - + include/block/block-global-state.h | 9 +- + include/block/block-io.h | 3 +- + include/block/snapshot.h | 2 - + job.c | 16 -- + migration/block.c | 34 +--- + migration/migration-hmp-cmds.c | 3 - + migration/savevm.c | 22 --- + net/colo-compare.c | 2 - + qemu-img.c | 4 - + qemu-io.c | 10 +- + qemu-nbd.c | 2 - + replay/replay-debugging.c | 4 - + scripts/block-coroutine-wrapper.py | 3 - + tests/tsan/suppressions.tsan | 1 - + tests/unit/test-bdrv-drain.c | 51 +---- + tests/unit/test-bdrv-graph-mod.c | 6 - + tests/unit/test-block-iothread.c | 31 --- + tests/unit/test-blockjob.c | 137 ------------- + tests/unit/test-replication.c | 11 -- + util/async.c | 4 - + util/vhost-user-server.c | 3 - + 41 files changed, 104 insertions(+), 1169 deletions(-) + +diff --git a/block.c b/block.c +index 25e1ebc606..91ace5d2d5 100644 +--- a/block.c ++++ b/block.c +@@ -1625,7 +1625,6 @@ static int no_coroutine_fn GRAPH_UNLOCKED + bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, const char *node_name, + QDict *options, int open_flags, Error **errp) + { +- AioContext *ctx; + Error *local_err = NULL; + int i, ret; + GLOBAL_STATE_CODE(); +@@ -1673,21 +1672,15 @@ bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, const char *node_name, + bs->supported_read_flags |= BDRV_REQ_REGISTERED_BUF; + bs->supported_write_flags |= BDRV_REQ_REGISTERED_BUF; + +- /* Get the context after .bdrv_open, it can change the context */ +- ctx = bdrv_get_aio_context(bs); +- aio_context_acquire(ctx); +- + ret = bdrv_refresh_total_sectors(bs, bs->total_sectors); + if (ret < 0) { + error_setg_errno(errp, -ret, "Could not refresh total sector count"); +- aio_context_release(ctx); + return ret; + } + + bdrv_graph_rdlock_main_loop(); + bdrv_refresh_limits(bs, NULL, &local_err); + bdrv_graph_rdunlock_main_loop(); +- aio_context_release(ctx); + + if (local_err) { + error_propagate(errp, local_err); +@@ -3062,7 +3055,7 @@ bdrv_attach_child_common(BlockDriverState *child_bs, + Transaction *tran, Error **errp) + { + BdrvChild *new_child; +- AioContext *parent_ctx, *new_child_ctx; ++ AioContext *parent_ctx; + AioContext *child_ctx = bdrv_get_aio_context(child_bs); + + assert(child_class->get_parent_desc); +@@ -3114,12 +3107,6 @@ bdrv_attach_child_common(BlockDriverState *child_bs, + } + } + +- new_child_ctx = bdrv_get_aio_context(child_bs); +- if (new_child_ctx != child_ctx) { +- aio_context_release(child_ctx); +- aio_context_acquire(new_child_ctx); +- } +- + bdrv_ref(child_bs); + /* + * Let every new BdrvChild start with a drained parent. Inserting the child +@@ -3149,11 +3136,6 @@ bdrv_attach_child_common(BlockDriverState *child_bs, + }; + tran_add(tran, &bdrv_attach_child_common_drv, s); + +- if (new_child_ctx != child_ctx) { +- aio_context_release(new_child_ctx); +- aio_context_acquire(child_ctx); +- } +- + return new_child; + } + +@@ -3605,7 +3587,6 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, + int ret = 0; + bool implicit_backing = false; + BlockDriverState *backing_hd; +- AioContext *backing_hd_ctx; + QDict *options; + QDict *tmp_parent_options = NULL; + Error *local_err = NULL; +@@ -3691,11 +3672,8 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, + + /* Hook up the backing file link; drop our reference, bs owns the + * backing_hd reference now */ +- backing_hd_ctx = bdrv_get_aio_context(backing_hd); +- aio_context_acquire(backing_hd_ctx); + ret = bdrv_set_backing_hd(bs, backing_hd, errp); + bdrv_unref(backing_hd); +- aio_context_release(backing_hd_ctx); + + if (ret < 0) { + goto free_exit; +@@ -3780,7 +3758,6 @@ BdrvChild *bdrv_open_child(const char *filename, + { + BlockDriverState *bs; + BdrvChild *child; +- AioContext *ctx; + + GLOBAL_STATE_CODE(); + +@@ -3791,11 +3768,8 @@ BdrvChild *bdrv_open_child(const char *filename, + } + + bdrv_graph_wrlock(); +- ctx = bdrv_get_aio_context(bs); +- aio_context_acquire(ctx); + child = bdrv_attach_child(parent, bs, bdref_key, child_class, child_role, + errp); +- aio_context_release(ctx); + bdrv_graph_wrunlock(); + + return child; +@@ -3881,7 +3855,6 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs, + int64_t total_size; + QemuOpts *opts = NULL; + BlockDriverState *bs_snapshot = NULL; +- AioContext *ctx = bdrv_get_aio_context(bs); + int ret; + + GLOBAL_STATE_CODE(); +@@ -3890,9 +3863,7 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs, + instead of opening 'filename' directly */ + + /* Get the required size from the image */ +- aio_context_acquire(ctx); + total_size = bdrv_getlength(bs); +- aio_context_release(ctx); + + if (total_size < 0) { + error_setg_errno(errp, -total_size, "Could not get image size"); +@@ -3927,10 +3898,7 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs, + goto out; + } + +- aio_context_acquire(ctx); + ret = bdrv_append(bs_snapshot, bs, errp); +- aio_context_release(ctx); +- + if (ret < 0) { + bs_snapshot = NULL; + goto out; +@@ -3974,7 +3942,6 @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options, + Error *local_err = NULL; + QDict *snapshot_options = NULL; + int snapshot_flags = 0; +- AioContext *ctx = qemu_get_aio_context(); + + assert(!child_class || !flags); + assert(!child_class == !parent); +@@ -4115,12 +4082,10 @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options, + /* Not requesting BLK_PERM_CONSISTENT_READ because we're only + * looking at the header to guess the image format. This works even + * in cases where a guest would not see a consistent state. */ +- ctx = bdrv_get_aio_context(file_bs); +- aio_context_acquire(ctx); ++ AioContext *ctx = bdrv_get_aio_context(file_bs); + file = blk_new(ctx, 0, BLK_PERM_ALL); + blk_insert_bs(file, file_bs, &local_err); + bdrv_unref(file_bs); +- aio_context_release(ctx); + + if (local_err) { + goto fail; +@@ -4167,13 +4132,8 @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options, + goto fail; + } + +- /* The AioContext could have changed during bdrv_open_common() */ +- ctx = bdrv_get_aio_context(bs); +- + if (file) { +- aio_context_acquire(ctx); + blk_unref(file); +- aio_context_release(ctx); + file = NULL; + } + +@@ -4231,16 +4191,13 @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options, + * (snapshot_bs); thus, we have to drop the strong reference to bs + * (which we obtained by calling bdrv_new()). bs will not be deleted, + * though, because the overlay still has a reference to it. */ +- aio_context_acquire(ctx); + bdrv_unref(bs); +- aio_context_release(ctx); + bs = snapshot_bs; + } + + return bs; + + fail: +- aio_context_acquire(ctx); + blk_unref(file); + qobject_unref(snapshot_options); + qobject_unref(bs->explicit_options); +@@ -4249,14 +4206,11 @@ fail: + bs->options = NULL; + bs->explicit_options = NULL; + bdrv_unref(bs); +- aio_context_release(ctx); + error_propagate(errp, local_err); + return NULL; + + close_and_fail: +- aio_context_acquire(ctx); + bdrv_unref(bs); +- aio_context_release(ctx); + qobject_unref(snapshot_options); + qobject_unref(options); + error_propagate(errp, local_err); +@@ -4540,12 +4494,7 @@ void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue) + if (bs_queue) { + BlockReopenQueueEntry *bs_entry, *next; + QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { +- AioContext *ctx = bdrv_get_aio_context(bs_entry->state.bs); +- +- aio_context_acquire(ctx); + bdrv_drained_end(bs_entry->state.bs); +- aio_context_release(ctx); +- + qobject_unref(bs_entry->state.explicit_options); + qobject_unref(bs_entry->state.options); + g_free(bs_entry); +@@ -4577,7 +4526,6 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) + { + int ret = -1; + BlockReopenQueueEntry *bs_entry, *next; +- AioContext *ctx; + Transaction *tran = tran_new(); + g_autoptr(GSList) refresh_list = NULL; + +@@ -4586,10 +4534,7 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) + GLOBAL_STATE_CODE(); + + QTAILQ_FOREACH(bs_entry, bs_queue, entry) { +- ctx = bdrv_get_aio_context(bs_entry->state.bs); +- aio_context_acquire(ctx); + ret = bdrv_flush(bs_entry->state.bs); +- aio_context_release(ctx); + if (ret < 0) { + error_setg_errno(errp, -ret, "Error flushing drive"); + goto abort; +@@ -4598,10 +4543,7 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) + + QTAILQ_FOREACH(bs_entry, bs_queue, entry) { + assert(bs_entry->state.bs->quiesce_counter > 0); +- ctx = bdrv_get_aio_context(bs_entry->state.bs); +- aio_context_acquire(ctx); + ret = bdrv_reopen_prepare(&bs_entry->state, bs_queue, tran, errp); +- aio_context_release(ctx); + if (ret < 0) { + goto abort; + } +@@ -4644,10 +4586,7 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) + * to first element. + */ + QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) { +- ctx = bdrv_get_aio_context(bs_entry->state.bs); +- aio_context_acquire(ctx); + bdrv_reopen_commit(&bs_entry->state); +- aio_context_release(ctx); + } + + bdrv_graph_wrlock(); +@@ -4658,10 +4597,7 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) + BlockDriverState *bs = bs_entry->state.bs; + + if (bs->drv->bdrv_reopen_commit_post) { +- ctx = bdrv_get_aio_context(bs); +- aio_context_acquire(ctx); + bs->drv->bdrv_reopen_commit_post(&bs_entry->state); +- aio_context_release(ctx); + } + } + +@@ -4675,10 +4611,7 @@ abort: + + QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { + if (bs_entry->prepared) { +- ctx = bdrv_get_aio_context(bs_entry->state.bs); +- aio_context_acquire(ctx); + bdrv_reopen_abort(&bs_entry->state); +- aio_context_release(ctx); + } + } + +@@ -4691,24 +4624,13 @@ cleanup: + int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts, + Error **errp) + { +- AioContext *ctx = bdrv_get_aio_context(bs); + BlockReopenQueue *queue; +- int ret; + + GLOBAL_STATE_CODE(); + + queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts); + +- if (ctx != qemu_get_aio_context()) { +- aio_context_release(ctx); +- } +- ret = bdrv_reopen_multiple(queue, errp); +- +- if (ctx != qemu_get_aio_context()) { +- aio_context_acquire(ctx); +- } +- +- return ret; ++ return bdrv_reopen_multiple(queue, errp); + } + + int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only, +@@ -4760,7 +4682,6 @@ bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state, + const char *child_name = is_backing ? "backing" : "file"; + QObject *value; + const char *str; +- AioContext *ctx, *old_ctx; + bool has_child; + int ret; + +@@ -4844,13 +4765,6 @@ bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state, + bdrv_drained_begin(old_child_bs); + } + +- old_ctx = bdrv_get_aio_context(bs); +- ctx = bdrv_get_aio_context(new_child_bs); +- if (old_ctx != ctx) { +- aio_context_release(old_ctx); +- aio_context_acquire(ctx); +- } +- + bdrv_graph_rdunlock_main_loop(); + bdrv_graph_wrlock(); + +@@ -4859,11 +4773,6 @@ bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state, + + bdrv_graph_wrunlock(); + +- if (old_ctx != ctx) { +- aio_context_release(ctx); +- aio_context_acquire(old_ctx); +- } +- + if (old_child_bs) { + bdrv_drained_end(old_child_bs); + bdrv_unref(old_child_bs); +@@ -5537,7 +5446,6 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, + int ret; + BdrvChild *child; + Transaction *tran = tran_new(); +- AioContext *old_context, *new_context = NULL; + + GLOBAL_STATE_CODE(); + +@@ -5545,21 +5453,8 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, + assert(!bs_new->backing); + bdrv_graph_rdunlock_main_loop(); + +- old_context = bdrv_get_aio_context(bs_top); + bdrv_drained_begin(bs_top); +- +- /* +- * bdrv_drained_begin() requires that only the AioContext of the drained +- * node is locked, and at this point it can still differ from the AioContext +- * of bs_top. +- */ +- new_context = bdrv_get_aio_context(bs_new); +- aio_context_release(old_context); +- aio_context_acquire(new_context); + bdrv_drained_begin(bs_new); +- aio_context_release(new_context); +- aio_context_acquire(old_context); +- new_context = NULL; + + bdrv_graph_wrlock(); + +@@ -5571,18 +5466,6 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, + goto out; + } + +- /* +- * bdrv_attach_child_noperm could change the AioContext of bs_top and +- * bs_new, but at least they are in the same AioContext now. This is the +- * AioContext that we need to lock for the rest of the function. +- */ +- new_context = bdrv_get_aio_context(bs_top); +- +- if (old_context != new_context) { +- aio_context_release(old_context); +- aio_context_acquire(new_context); +- } +- + ret = bdrv_replace_node_noperm(bs_top, bs_new, true, tran, errp); + if (ret < 0) { + goto out; +@@ -5598,11 +5481,6 @@ out: + bdrv_drained_end(bs_top); + bdrv_drained_end(bs_new); + +- if (new_context && old_context != new_context) { +- aio_context_release(new_context); +- aio_context_acquire(old_context); +- } +- + return ret; + } + +@@ -5697,12 +5575,8 @@ BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *options, + + GLOBAL_STATE_CODE(); + +- aio_context_release(ctx); +- aio_context_acquire(qemu_get_aio_context()); + new_node_bs = bdrv_new_open_driver_opts(drv, node_name, options, flags, + errp); +- aio_context_release(qemu_get_aio_context()); +- aio_context_acquire(ctx); + assert(bdrv_get_aio_context(bs) == ctx); + + options = NULL; /* bdrv_new_open_driver() eats options */ +@@ -7037,12 +6911,9 @@ void bdrv_activate_all(Error **errp) + GRAPH_RDLOCK_GUARD_MAINLOOP(); + + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { +- AioContext *aio_context = bdrv_get_aio_context(bs); + int ret; + +- aio_context_acquire(aio_context); + ret = bdrv_activate(bs, errp); +- aio_context_release(aio_context); + if (ret < 0) { + bdrv_next_cleanup(&it); + return; +@@ -7137,20 +7008,10 @@ int bdrv_inactivate_all(void) + BlockDriverState *bs = NULL; + BdrvNextIterator it; + int ret = 0; +- GSList *aio_ctxs = NULL, *ctx; + + GLOBAL_STATE_CODE(); + GRAPH_RDLOCK_GUARD_MAINLOOP(); + +- for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { +- AioContext *aio_context = bdrv_get_aio_context(bs); +- +- if (!g_slist_find(aio_ctxs, aio_context)) { +- aio_ctxs = g_slist_prepend(aio_ctxs, aio_context); +- aio_context_acquire(aio_context); +- } +- } +- + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { + /* Nodes with BDS parents are covered by recursion from the last + * parent that gets inactivated. Don't inactivate them a second +@@ -7161,17 +7022,10 @@ int bdrv_inactivate_all(void) + ret = bdrv_inactivate_recurse(bs); + if (ret < 0) { + bdrv_next_cleanup(&it); +- goto out; ++ break; + } + } + +-out: +- for (ctx = aio_ctxs; ctx != NULL; ctx = ctx->next) { +- AioContext *aio_context = ctx->data; +- aio_context_release(aio_context); +- } +- g_slist_free(aio_ctxs); +- + return ret; + } + +@@ -7257,11 +7111,8 @@ void bdrv_unref(BlockDriverState *bs) + static void bdrv_schedule_unref_bh(void *opaque) + { + BlockDriverState *bs = opaque; +- AioContext *ctx = bdrv_get_aio_context(bs); + +- aio_context_acquire(ctx); + bdrv_unref(bs); +- aio_context_release(ctx); + } + + /* +@@ -7398,8 +7249,6 @@ void bdrv_img_create(const char *filename, const char *fmt, + return; + } + +- aio_context_acquire(qemu_get_aio_context()); +- + /* Create parameter list */ + create_opts = qemu_opts_append(create_opts, drv->create_opts); + create_opts = qemu_opts_append(create_opts, proto_drv->create_opts); +@@ -7549,7 +7398,6 @@ out: + qemu_opts_del(opts); + qemu_opts_free(create_opts); + error_propagate(errp, local_err); +- aio_context_release(qemu_get_aio_context()); + } + + AioContext *bdrv_get_aio_context(BlockDriverState *bs) +@@ -7585,29 +7433,12 @@ void coroutine_fn bdrv_co_leave(BlockDriverState *bs, AioContext *old_ctx) + + void coroutine_fn bdrv_co_lock(BlockDriverState *bs) + { +- AioContext *ctx = bdrv_get_aio_context(bs); +- +- /* In the main thread, bs->aio_context won't change concurrently */ +- assert(qemu_get_current_aio_context() == qemu_get_aio_context()); +- +- /* +- * We're in coroutine context, so we already hold the lock of the main +- * loop AioContext. Don't lock it twice to avoid deadlocks. +- */ +- assert(qemu_in_coroutine()); +- if (ctx != qemu_get_aio_context()) { +- aio_context_acquire(ctx); +- } ++ /* TODO removed in next patch */ + } + + void coroutine_fn bdrv_co_unlock(BlockDriverState *bs) + { +- AioContext *ctx = bdrv_get_aio_context(bs); +- +- assert(qemu_in_coroutine()); +- if (ctx != qemu_get_aio_context()) { +- aio_context_release(ctx); +- } ++ /* TODO removed in next patch */ + } + + static void bdrv_do_remove_aio_context_notifier(BdrvAioNotifier *ban) +@@ -7728,21 +7559,8 @@ static void bdrv_set_aio_context_commit(void *opaque) + BdrvStateSetAioContext *state = (BdrvStateSetAioContext *) opaque; + BlockDriverState *bs = (BlockDriverState *) state->bs; + AioContext *new_context = state->new_ctx; +- AioContext *old_context = bdrv_get_aio_context(bs); + +- /* +- * Take the old AioContex when detaching it from bs. +- * At this point, new_context lock is already acquired, and we are now +- * also taking old_context. This is safe as long as bdrv_detach_aio_context +- * does not call AIO_POLL_WHILE(). +- */ +- if (old_context != qemu_get_aio_context()) { +- aio_context_acquire(old_context); +- } + bdrv_detach_aio_context(bs); +- if (old_context != qemu_get_aio_context()) { +- aio_context_release(old_context); +- } + bdrv_attach_aio_context(bs, new_context); + } + +@@ -7827,7 +7645,6 @@ int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx, + Transaction *tran; + GHashTable *visited; + int ret; +- AioContext *old_context = bdrv_get_aio_context(bs); + GLOBAL_STATE_CODE(); + + /* +@@ -7857,34 +7674,7 @@ int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx, + return -EPERM; + } + +- /* +- * Release old AioContext, it won't be needed anymore, as all +- * bdrv_drained_begin() have been called already. +- */ +- if (qemu_get_aio_context() != old_context) { +- aio_context_release(old_context); +- } +- +- /* +- * Acquire new AioContext since bdrv_drained_end() is going to be called +- * after we switched all nodes in the new AioContext, and the function +- * assumes that the lock of the bs is always taken. +- */ +- if (qemu_get_aio_context() != ctx) { +- aio_context_acquire(ctx); +- } +- + tran_commit(tran); +- +- if (qemu_get_aio_context() != ctx) { +- aio_context_release(ctx); +- } +- +- /* Re-acquire the old AioContext, since the caller takes and releases it. */ +- if (qemu_get_aio_context() != old_context) { +- aio_context_acquire(old_context); +- } +- + return 0; + } + +@@ -8006,7 +7796,6 @@ BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs, + const char *node_name, Error **errp) + { + BlockDriverState *to_replace_bs = bdrv_find_node(node_name); +- AioContext *aio_context; + + GLOBAL_STATE_CODE(); + +@@ -8015,12 +7804,8 @@ BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs, + return NULL; + } + +- aio_context = bdrv_get_aio_context(to_replace_bs); +- aio_context_acquire(aio_context); +- + if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) { +- to_replace_bs = NULL; +- goto out; ++ return NULL; + } + + /* We don't want arbitrary node of the BDS chain to be replaced only the top +@@ -8033,12 +7818,9 @@ BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs, + "because it cannot be guaranteed that doing so would not " + "lead to an abrupt change of visible data", + node_name, parent_bs->node_name); +- to_replace_bs = NULL; +- goto out; ++ return NULL; + } + +-out: +- aio_context_release(aio_context); + return to_replace_bs; + } + +diff --git a/block/block-backend.c b/block/block-backend.c +index abac4e0235..f412bed274 100644 +--- a/block/block-backend.c ++++ b/block/block-backend.c +@@ -429,7 +429,6 @@ BlockBackend *blk_new_open(const char *filename, const char *reference, + { + BlockBackend *blk; + BlockDriverState *bs; +- AioContext *ctx; + uint64_t perm = 0; + uint64_t shared = BLK_PERM_ALL; + +@@ -459,23 +458,18 @@ BlockBackend *blk_new_open(const char *filename, const char *reference, + shared = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED; + } + +- aio_context_acquire(qemu_get_aio_context()); + bs = bdrv_open(filename, reference, options, flags, errp); +- aio_context_release(qemu_get_aio_context()); + if (!bs) { + return NULL; + } + + /* bdrv_open() could have moved bs to a different AioContext */ +- ctx = bdrv_get_aio_context(bs); + blk = blk_new(bdrv_get_aio_context(bs), perm, shared); + blk->perm = perm; + blk->shared_perm = shared; + +- aio_context_acquire(ctx); + blk_insert_bs(blk, bs, errp); + bdrv_unref(bs); +- aio_context_release(ctx); + + if (!blk->root) { + blk_unref(blk); +@@ -577,13 +571,9 @@ void blk_remove_all_bs(void) + GLOBAL_STATE_CODE(); + + while ((blk = blk_all_next(blk)) != NULL) { +- AioContext *ctx = blk_get_aio_context(blk); +- +- aio_context_acquire(ctx); + if (blk->root) { + blk_remove_bs(blk); + } +- aio_context_release(ctx); + } + } + +@@ -2736,20 +2726,16 @@ int blk_commit_all(void) + GRAPH_RDLOCK_GUARD_MAINLOOP(); + + while ((blk = blk_all_next(blk)) != NULL) { +- AioContext *aio_context = blk_get_aio_context(blk); + BlockDriverState *unfiltered_bs = bdrv_skip_filters(blk_bs(blk)); + +- aio_context_acquire(aio_context); + if (blk_is_inserted(blk) && bdrv_cow_child(unfiltered_bs)) { + int ret; + + ret = bdrv_commit(unfiltered_bs); + if (ret < 0) { +- aio_context_release(aio_context); + return ret; + } + } +- aio_context_release(aio_context); + } + return 0; + } +diff --git a/block/copy-before-write.c b/block/copy-before-write.c +index 13972879b1..0842a1a6df 100644 +--- a/block/copy-before-write.c ++++ b/block/copy-before-write.c +@@ -412,7 +412,6 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags, + int64_t cluster_size; + g_autoptr(BlockdevOptions) full_opts = NULL; + BlockdevOptionsCbw *opts; +- AioContext *ctx; + int ret; + + full_opts = cbw_parse_options(options, errp); +@@ -435,15 +434,11 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags, + + GRAPH_RDLOCK_GUARD_MAINLOOP(); + +- ctx = bdrv_get_aio_context(bs); +- aio_context_acquire(ctx); +- + if (opts->bitmap) { + bitmap = block_dirty_bitmap_lookup(opts->bitmap->node, + opts->bitmap->name, NULL, errp); + if (!bitmap) { +- ret = -EINVAL; +- goto out; ++ return -EINVAL; + } + } + s->on_cbw_error = opts->has_on_cbw_error ? opts->on_cbw_error : +@@ -461,24 +456,21 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags, + s->bcs = block_copy_state_new(bs->file, s->target, bitmap, errp); + if (!s->bcs) { + error_prepend(errp, "Cannot create block-copy-state: "); +- ret = -EINVAL; +- goto out; ++ return -EINVAL; + } + + cluster_size = block_copy_cluster_size(s->bcs); + + s->done_bitmap = bdrv_create_dirty_bitmap(bs, cluster_size, NULL, errp); + if (!s->done_bitmap) { +- ret = -EINVAL; +- goto out; ++ return -EINVAL; + } + bdrv_disable_dirty_bitmap(s->done_bitmap); + + /* s->access_bitmap starts equal to bcs bitmap */ + s->access_bitmap = bdrv_create_dirty_bitmap(bs, cluster_size, NULL, errp); + if (!s->access_bitmap) { +- ret = -EINVAL; +- goto out; ++ return -EINVAL; + } + bdrv_disable_dirty_bitmap(s->access_bitmap); + bdrv_dirty_bitmap_merge_internal(s->access_bitmap, +@@ -487,11 +479,7 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags, + + qemu_co_mutex_init(&s->lock); + QLIST_INIT(&s->frozen_read_reqs); +- +- ret = 0; +-out: +- aio_context_release(ctx); +- return ret; ++ return 0; + } + + static void cbw_close(BlockDriverState *bs) +diff --git a/block/export/export.c b/block/export/export.c +index a8f274e526..6d51ae8ed7 100644 +--- a/block/export/export.c ++++ b/block/export/export.c +@@ -114,7 +114,6 @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp) + } + + ctx = bdrv_get_aio_context(bs); +- aio_context_acquire(ctx); + + if (export->iothread) { + IOThread *iothread; +@@ -133,8 +132,6 @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp) + set_context_errp = fixed_iothread ? errp : NULL; + ret = bdrv_try_change_aio_context(bs, new_ctx, NULL, set_context_errp); + if (ret == 0) { +- aio_context_release(ctx); +- aio_context_acquire(new_ctx); + ctx = new_ctx; + } else if (fixed_iothread) { + goto fail; +@@ -191,8 +188,6 @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp) + assert(exp->blk != NULL); + + QLIST_INSERT_HEAD(&block_exports, exp, next); +- +- aio_context_release(ctx); + return exp; + + fail: +@@ -200,7 +195,6 @@ fail: + blk_set_dev_ops(blk, NULL, NULL); + blk_unref(blk); + } +- aio_context_release(ctx); + if (exp) { + g_free(exp->id); + g_free(exp); +@@ -218,9 +212,6 @@ void blk_exp_ref(BlockExport *exp) + static void blk_exp_delete_bh(void *opaque) + { + BlockExport *exp = opaque; +- AioContext *aio_context = exp->ctx; +- +- aio_context_acquire(aio_context); + + assert(exp->refcount == 0); + QLIST_REMOVE(exp, next); +@@ -230,8 +221,6 @@ static void blk_exp_delete_bh(void *opaque) + qapi_event_send_block_export_deleted(exp->id); + g_free(exp->id); + g_free(exp); +- +- aio_context_release(aio_context); + } + + void blk_exp_unref(BlockExport *exp) +@@ -249,22 +238,16 @@ void blk_exp_unref(BlockExport *exp) + * connections and other internally held references start to shut down. When + * the function returns, there may still be active references while the export + * is in the process of shutting down. +- * +- * Acquires exp->ctx internally. Callers must *not* hold the lock. + */ + void blk_exp_request_shutdown(BlockExport *exp) + { +- AioContext *aio_context = exp->ctx; +- +- aio_context_acquire(aio_context); +- + /* + * If the user doesn't own the export any more, it is already shutting + * down. We must not call .request_shutdown and decrease the refcount a + * second time. + */ + if (!exp->user_owned) { +- goto out; ++ return; + } + + exp->drv->request_shutdown(exp); +@@ -272,9 +255,6 @@ void blk_exp_request_shutdown(BlockExport *exp) + assert(exp->user_owned); + exp->user_owned = false; + blk_exp_unref(exp); +- +-out: +- aio_context_release(aio_context); + } + + /* +diff --git a/block/io.c b/block/io.c +index 7e62fabbf5..8fa7670571 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -294,8 +294,6 @@ static void bdrv_co_drain_bh_cb(void *opaque) + BlockDriverState *bs = data->bs; + + if (bs) { +- AioContext *ctx = bdrv_get_aio_context(bs); +- aio_context_acquire(ctx); + bdrv_dec_in_flight(bs); + if (data->begin) { + bdrv_do_drained_begin(bs, data->parent, data->poll); +@@ -303,7 +301,6 @@ static void bdrv_co_drain_bh_cb(void *opaque) + assert(!data->poll); + bdrv_do_drained_end(bs, data->parent); + } +- aio_context_release(ctx); + } else { + assert(data->begin); + bdrv_drain_all_begin(); +@@ -320,8 +317,6 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, + { + BdrvCoDrainData data; + Coroutine *self = qemu_coroutine_self(); +- AioContext *ctx = bdrv_get_aio_context(bs); +- AioContext *co_ctx = qemu_coroutine_get_aio_context(self); + + /* Calling bdrv_drain() from a BH ensures the current coroutine yields and + * other coroutines run if they were queued by aio_co_enter(). */ +@@ -340,17 +335,6 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, + bdrv_inc_in_flight(bs); + } + +- /* +- * Temporarily drop the lock across yield or we would get deadlocks. +- * bdrv_co_drain_bh_cb() reaquires the lock as needed. +- * +- * When we yield below, the lock for the current context will be +- * released, so if this is actually the lock that protects bs, don't drop +- * it a second time. +- */ +- if (ctx != co_ctx) { +- aio_context_release(ctx); +- } + replay_bh_schedule_oneshot_event(qemu_get_aio_context(), + bdrv_co_drain_bh_cb, &data); + +@@ -358,11 +342,6 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, + /* If we are resumed from some other event (such as an aio completion or a + * timer callback), it is a bug in the caller that should be fixed. */ + assert(data.done); +- +- /* Reacquire the AioContext of bs if we dropped it */ +- if (ctx != co_ctx) { +- aio_context_acquire(ctx); +- } + } + + static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, +@@ -478,13 +457,12 @@ static bool bdrv_drain_all_poll(void) + GLOBAL_STATE_CODE(); + GRAPH_RDLOCK_GUARD_MAINLOOP(); + +- /* bdrv_drain_poll() can't make changes to the graph and we are holding the +- * main AioContext lock, so iterating bdrv_next_all_states() is safe. */ ++ /* ++ * bdrv_drain_poll() can't make changes to the graph and we hold the BQL, ++ * so iterating bdrv_next_all_states() is safe. ++ */ + while ((bs = bdrv_next_all_states(bs))) { +- AioContext *aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(aio_context); + result |= bdrv_drain_poll(bs, NULL, true); +- aio_context_release(aio_context); + } + + return result; +@@ -525,11 +503,7 @@ void bdrv_drain_all_begin_nopoll(void) + /* Quiesce all nodes, without polling in-flight requests yet. The graph + * cannot change during this loop. */ + while ((bs = bdrv_next_all_states(bs))) { +- AioContext *aio_context = bdrv_get_aio_context(bs); +- +- aio_context_acquire(aio_context); + bdrv_do_drained_begin(bs, NULL, false); +- aio_context_release(aio_context); + } + } + +@@ -588,11 +562,7 @@ void bdrv_drain_all_end(void) + } + + while ((bs = bdrv_next_all_states(bs))) { +- AioContext *aio_context = bdrv_get_aio_context(bs); +- +- aio_context_acquire(aio_context); + bdrv_do_drained_end(bs, NULL); +- aio_context_release(aio_context); + } + + assert(qemu_get_current_aio_context() == qemu_get_aio_context()); +@@ -2368,15 +2338,10 @@ int bdrv_flush_all(void) + } + + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { +- AioContext *aio_context = bdrv_get_aio_context(bs); +- int ret; +- +- aio_context_acquire(aio_context); +- ret = bdrv_flush(bs); ++ int ret = bdrv_flush(bs); + if (ret < 0 && !result) { + result = ret; + } +- aio_context_release(aio_context); + } + + return result; +diff --git a/block/mirror.c b/block/mirror.c +index 51f9e2f17c..5145eb53e1 100644 +--- a/block/mirror.c ++++ b/block/mirror.c +@@ -662,7 +662,6 @@ static int mirror_exit_common(Job *job) + MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job); + BlockJob *bjob = &s->common; + MirrorBDSOpaque *bs_opaque; +- AioContext *replace_aio_context = NULL; + BlockDriverState *src; + BlockDriverState *target_bs; + BlockDriverState *mirror_top_bs; +@@ -677,7 +676,6 @@ static int mirror_exit_common(Job *job) + } + s->prepared = true; + +- aio_context_acquire(qemu_get_aio_context()); + bdrv_graph_rdlock_main_loop(); + + mirror_top_bs = s->mirror_top_bs; +@@ -742,11 +740,6 @@ static int mirror_exit_common(Job *job) + } + bdrv_graph_rdunlock_main_loop(); + +- if (s->to_replace) { +- replace_aio_context = bdrv_get_aio_context(s->to_replace); +- aio_context_acquire(replace_aio_context); +- } +- + if (s->should_complete && !abort) { + BlockDriverState *to_replace = s->to_replace ?: src; + bool ro = bdrv_is_read_only(to_replace); +@@ -785,9 +778,6 @@ static int mirror_exit_common(Job *job) + error_free(s->replace_blocker); + bdrv_unref(s->to_replace); + } +- if (replace_aio_context) { +- aio_context_release(replace_aio_context); +- } + g_free(s->replaces); + + /* +@@ -811,8 +801,6 @@ static int mirror_exit_common(Job *job) + bdrv_unref(mirror_top_bs); + bdrv_unref(src); + +- aio_context_release(qemu_get_aio_context()); +- + return ret; + } + +@@ -1191,24 +1179,17 @@ static void mirror_complete(Job *job, Error **errp) + + /* block all operations on to_replace bs */ + if (s->replaces) { +- AioContext *replace_aio_context; +- + s->to_replace = bdrv_find_node(s->replaces); + if (!s->to_replace) { + error_setg(errp, "Node name '%s' not found", s->replaces); + return; + } + +- replace_aio_context = bdrv_get_aio_context(s->to_replace); +- aio_context_acquire(replace_aio_context); +- + /* TODO Translate this into child freeze system. */ + error_setg(&s->replace_blocker, + "block device is in use by block-job-complete"); + bdrv_op_block_all(s->to_replace, s->replace_blocker); + bdrv_ref(s->to_replace); +- +- aio_context_release(replace_aio_context); + } + + s->should_complete = true; +diff --git a/block/monitor/bitmap-qmp-cmds.c b/block/monitor/bitmap-qmp-cmds.c +index 70d01a3776..a738e7bbf7 100644 +--- a/block/monitor/bitmap-qmp-cmds.c ++++ b/block/monitor/bitmap-qmp-cmds.c +@@ -95,7 +95,6 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name, + { + BlockDriverState *bs; + BdrvDirtyBitmap *bitmap; +- AioContext *aio_context; + + if (!name || name[0] == '\0') { + error_setg(errp, "Bitmap name cannot be empty"); +@@ -107,14 +106,11 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name, + return; + } + +- aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(aio_context); +- + if (has_granularity) { + if (granularity < 512 || !is_power_of_2(granularity)) { + error_setg(errp, "Granularity must be power of 2 " + "and at least 512"); +- goto out; ++ return; + } + } else { + /* Default to cluster size, if available: */ +@@ -132,12 +128,12 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name, + if (persistent && + !bdrv_can_store_new_dirty_bitmap(bs, name, granularity, errp)) + { +- goto out; ++ return; + } + + bitmap = bdrv_create_dirty_bitmap(bs, granularity, name, errp); + if (bitmap == NULL) { +- goto out; ++ return; + } + + if (disabled) { +@@ -145,9 +141,6 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name, + } + + bdrv_dirty_bitmap_set_persistence(bitmap, persistent); +- +-out: +- aio_context_release(aio_context); + } + + BdrvDirtyBitmap *block_dirty_bitmap_remove(const char *node, const char *name, +@@ -157,7 +150,6 @@ BdrvDirtyBitmap *block_dirty_bitmap_remove(const char *node, const char *name, + { + BlockDriverState *bs; + BdrvDirtyBitmap *bitmap; +- AioContext *aio_context; + + GLOBAL_STATE_CODE(); + +@@ -166,19 +158,14 @@ BdrvDirtyBitmap *block_dirty_bitmap_remove(const char *node, const char *name, + return NULL; + } + +- aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(aio_context); +- + if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_BUSY | BDRV_BITMAP_RO, + errp)) { +- aio_context_release(aio_context); + return NULL; + } + + if (bdrv_dirty_bitmap_get_persistence(bitmap) && + bdrv_remove_persistent_dirty_bitmap(bs, name, errp) < 0) + { +- aio_context_release(aio_context); + return NULL; + } + +@@ -190,7 +177,6 @@ BdrvDirtyBitmap *block_dirty_bitmap_remove(const char *node, const char *name, + *bitmap_bs = bs; + } + +- aio_context_release(aio_context); + return release ? NULL : bitmap; + } + +diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c +index c729cbf1eb..bdbb5cb141 100644 +--- a/block/monitor/block-hmp-cmds.c ++++ b/block/monitor/block-hmp-cmds.c +@@ -141,7 +141,6 @@ void hmp_drive_del(Monitor *mon, const QDict *qdict) + const char *id = qdict_get_str(qdict, "id"); + BlockBackend *blk; + BlockDriverState *bs; +- AioContext *aio_context; + Error *local_err = NULL; + + GLOBAL_STATE_CODE(); +@@ -168,14 +167,10 @@ void hmp_drive_del(Monitor *mon, const QDict *qdict) + return; + } + +- aio_context = blk_get_aio_context(blk); +- aio_context_acquire(aio_context); +- + bs = blk_bs(blk); + if (bs) { + if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_DRIVE_DEL, &local_err)) { + error_report_err(local_err); +- aio_context_release(aio_context); + return; + } + +@@ -196,8 +191,6 @@ void hmp_drive_del(Monitor *mon, const QDict *qdict) + } else { + blk_unref(blk); + } +- +- aio_context_release(aio_context); + } + + void hmp_commit(Monitor *mon, const QDict *qdict) +@@ -213,7 +206,6 @@ void hmp_commit(Monitor *mon, const QDict *qdict) + ret = blk_commit_all(); + } else { + BlockDriverState *bs; +- AioContext *aio_context; + + blk = blk_by_name(device); + if (!blk) { +@@ -222,18 +214,13 @@ void hmp_commit(Monitor *mon, const QDict *qdict) + } + + bs = bdrv_skip_implicit_filters(blk_bs(blk)); +- aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(aio_context); + + if (!blk_is_available(blk)) { + error_report("Device '%s' has no medium", device); +- aio_context_release(aio_context); + return; + } + + ret = bdrv_commit(bs); +- +- aio_context_release(aio_context); + } + if (ret < 0) { + error_report("'commit' error for '%s': %s", device, strerror(-ret)); +@@ -560,7 +547,6 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict) + BlockBackend *blk = NULL; + BlockDriverState *bs = NULL; + BlockBackend *local_blk = NULL; +- AioContext *ctx = NULL; + bool qdev = qdict_get_try_bool(qdict, "qdev", false); + const char *device = qdict_get_str(qdict, "device"); + const char *command = qdict_get_str(qdict, "command"); +@@ -582,9 +568,6 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict) + } + } + +- ctx = blk ? blk_get_aio_context(blk) : bdrv_get_aio_context(bs); +- aio_context_acquire(ctx); +- + if (bs) { + blk = local_blk = blk_new(bdrv_get_aio_context(bs), 0, BLK_PERM_ALL); + ret = blk_insert_bs(blk, bs, &err); +@@ -622,11 +605,6 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict) + + fail: + blk_unref(local_blk); +- +- if (ctx) { +- aio_context_release(ctx); +- } +- + hmp_handle_error(mon, err); + } + +@@ -882,7 +860,6 @@ void hmp_info_snapshots(Monitor *mon, const QDict *qdict) + int nb_sns, i; + int total; + int *global_snapshots; +- AioContext *aio_context; + + typedef struct SnapshotEntry { + QEMUSnapshotInfo sn; +@@ -909,11 +886,8 @@ void hmp_info_snapshots(Monitor *mon, const QDict *qdict) + error_report_err(err); + return; + } +- aio_context = bdrv_get_aio_context(bs); + +- aio_context_acquire(aio_context); + nb_sns = bdrv_snapshot_list(bs, &sn_tab); +- aio_context_release(aio_context); + + if (nb_sns < 0) { + monitor_printf(mon, "bdrv_snapshot_list: error %d\n", nb_sns); +@@ -924,9 +898,7 @@ void hmp_info_snapshots(Monitor *mon, const QDict *qdict) + int bs1_nb_sns = 0; + ImageEntry *ie; + SnapshotEntry *se; +- AioContext *ctx = bdrv_get_aio_context(bs1); + +- aio_context_acquire(ctx); + if (bdrv_can_snapshot(bs1)) { + sn = NULL; + bs1_nb_sns = bdrv_snapshot_list(bs1, &sn); +@@ -944,7 +916,6 @@ void hmp_info_snapshots(Monitor *mon, const QDict *qdict) + } + g_free(sn); + } +- aio_context_release(ctx); + } + + if (no_snapshot) { +diff --git a/block/qapi-sysemu.c b/block/qapi-sysemu.c +index 1618cd225a..e4282631d2 100644 +--- a/block/qapi-sysemu.c ++++ b/block/qapi-sysemu.c +@@ -174,7 +174,6 @@ blockdev_remove_medium(const char *device, const char *id, Error **errp) + { + BlockBackend *blk; + BlockDriverState *bs; +- AioContext *aio_context; + bool has_attached_device; + + GLOBAL_STATE_CODE(); +@@ -204,13 +203,10 @@ blockdev_remove_medium(const char *device, const char *id, Error **errp) + return; + } + +- aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(aio_context); +- + bdrv_graph_rdlock_main_loop(); + if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_EJECT, errp)) { + bdrv_graph_rdunlock_main_loop(); +- goto out; ++ return; + } + bdrv_graph_rdunlock_main_loop(); + +@@ -223,9 +219,6 @@ blockdev_remove_medium(const char *device, const char *id, Error **errp) + * value passed here (i.e. false). */ + blk_dev_change_media_cb(blk, false, &error_abort); + } +- +-out: +- aio_context_release(aio_context); + } + + void qmp_blockdev_remove_medium(const char *id, Error **errp) +@@ -237,7 +230,6 @@ static void qmp_blockdev_insert_anon_medium(BlockBackend *blk, + BlockDriverState *bs, Error **errp) + { + Error *local_err = NULL; +- AioContext *ctx; + bool has_device; + int ret; + +@@ -259,11 +251,7 @@ static void qmp_blockdev_insert_anon_medium(BlockBackend *blk, + return; + } + +- ctx = bdrv_get_aio_context(bs); +- aio_context_acquire(ctx); + ret = blk_insert_bs(blk, bs, errp); +- aio_context_release(ctx); +- + if (ret < 0) { + return; + } +@@ -374,9 +362,7 @@ void qmp_blockdev_change_medium(const char *device, + qdict_put_str(options, "driver", format); + } + +- aio_context_acquire(qemu_get_aio_context()); + medium_bs = bdrv_open(filename, NULL, options, bdrv_flags, errp); +- aio_context_release(qemu_get_aio_context()); + + if (!medium_bs) { + goto fail; +@@ -437,20 +423,16 @@ void qmp_block_set_io_throttle(BlockIOThrottle *arg, Error **errp) + ThrottleConfig cfg; + BlockDriverState *bs; + BlockBackend *blk; +- AioContext *aio_context; + + blk = qmp_get_blk(arg->device, arg->id, errp); + if (!blk) { + return; + } + +- aio_context = blk_get_aio_context(blk); +- aio_context_acquire(aio_context); +- + bs = blk_bs(blk); + if (!bs) { + error_setg(errp, "Device has no medium"); +- goto out; ++ return; + } + + throttle_config_init(&cfg); +@@ -505,7 +487,7 @@ void qmp_block_set_io_throttle(BlockIOThrottle *arg, Error **errp) + } + + if (!throttle_is_valid(&cfg, errp)) { +- goto out; ++ return; + } + + if (throttle_enabled(&cfg)) { +@@ -522,9 +504,6 @@ void qmp_block_set_io_throttle(BlockIOThrottle *arg, Error **errp) + /* If all throttling settings are set to 0, disable I/O limits */ + blk_io_limits_disable(blk); + } +- +-out: +- aio_context_release(aio_context); + } + + void qmp_block_latency_histogram_set( +diff --git a/block/qapi.c b/block/qapi.c +index 82a30b38fe..9e806fa230 100644 +--- a/block/qapi.c ++++ b/block/qapi.c +@@ -234,13 +234,11 @@ bdrv_do_query_node_info(BlockDriverState *bs, BlockNodeInfo *info, Error **errp) + int ret; + Error *err = NULL; + +- aio_context_acquire(bdrv_get_aio_context(bs)); +- + size = bdrv_getlength(bs); + if (size < 0) { + error_setg_errno(errp, -size, "Can't get image size '%s'", + bs->exact_filename); +- goto out; ++ return; + } + + bdrv_refresh_filename(bs); +@@ -265,7 +263,7 @@ bdrv_do_query_node_info(BlockDriverState *bs, BlockNodeInfo *info, Error **errp) + info->format_specific = bdrv_get_specific_info(bs, &err); + if (err) { + error_propagate(errp, err); +- goto out; ++ return; + } + backing_filename = bs->backing_file; + if (backing_filename[0] != '\0') { +@@ -300,11 +298,8 @@ bdrv_do_query_node_info(BlockDriverState *bs, BlockNodeInfo *info, Error **errp) + break; + default: + error_propagate(errp, err); +- goto out; ++ return; + } +- +-out: +- aio_context_release(bdrv_get_aio_context(bs)); + } + + /** +@@ -709,15 +704,10 @@ BlockStatsList *qmp_query_blockstats(bool has_query_nodes, + /* Just to be safe if query_nodes is not always initialized */ + if (has_query_nodes && query_nodes) { + for (bs = bdrv_next_node(NULL); bs; bs = bdrv_next_node(bs)) { +- AioContext *ctx = bdrv_get_aio_context(bs); +- +- aio_context_acquire(ctx); + QAPI_LIST_APPEND(tail, bdrv_query_bds_stats(bs, false)); +- aio_context_release(ctx); + } + } else { + for (blk = blk_all_next(NULL); blk; blk = blk_all_next(blk)) { +- AioContext *ctx = blk_get_aio_context(blk); + BlockStats *s; + char *qdev; + +@@ -725,7 +715,6 @@ BlockStatsList *qmp_query_blockstats(bool has_query_nodes, + continue; + } + +- aio_context_acquire(ctx); + s = bdrv_query_bds_stats(blk_bs(blk), true); + s->device = g_strdup(blk_name(blk)); + +@@ -737,7 +726,6 @@ BlockStatsList *qmp_query_blockstats(bool has_query_nodes, + } + + bdrv_query_blk_stats(s->stats, blk); +- aio_context_release(ctx); + + QAPI_LIST_APPEND(tail, s); + } +diff --git a/block/raw-format.c b/block/raw-format.c +index 1111dffd54..ac7e8495f6 100644 +--- a/block/raw-format.c ++++ b/block/raw-format.c +@@ -470,7 +470,6 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags, + Error **errp) + { + BDRVRawState *s = bs->opaque; +- AioContext *ctx; + bool has_size; + uint64_t offset, size; + BdrvChildRole file_role; +@@ -522,11 +521,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags, + bs->file->bs->filename); + } + +- ctx = bdrv_get_aio_context(bs); +- aio_context_acquire(ctx); + ret = raw_apply_options(bs, s, offset, has_size, size, errp); +- aio_context_release(ctx); +- + if (ret < 0) { + return ret; + } +diff --git a/block/replication.c b/block/replication.c +index 424b537ff7..ca6bd0a720 100644 +--- a/block/replication.c ++++ b/block/replication.c +@@ -394,14 +394,7 @@ static void reopen_backing_file(BlockDriverState *bs, bool writable, + } + + if (reopen_queue) { +- AioContext *ctx = bdrv_get_aio_context(bs); +- if (ctx != qemu_get_aio_context()) { +- aio_context_release(ctx); +- } + bdrv_reopen_multiple(reopen_queue, errp); +- if (ctx != qemu_get_aio_context()) { +- aio_context_acquire(ctx); +- } + } + } + +@@ -462,14 +455,11 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, + BlockDriverState *top_bs; + BdrvChild *active_disk, *hidden_disk, *secondary_disk; + int64_t active_length, hidden_length, disk_length; +- AioContext *aio_context; + Error *local_err = NULL; + BackupPerf perf = { .use_copy_range = true, .max_workers = 1 }; + + GLOBAL_STATE_CODE(); + +- aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(aio_context); + s = bs->opaque; + + if (s->stage == BLOCK_REPLICATION_DONE || +@@ -479,20 +469,17 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, + * Ignore the request because the secondary side of replication + * doesn't have to do anything anymore. + */ +- aio_context_release(aio_context); + return; + } + + if (s->stage != BLOCK_REPLICATION_NONE) { + error_setg(errp, "Block replication is running or done"); +- aio_context_release(aio_context); + return; + } + + if (s->mode != mode) { + error_setg(errp, "The parameter mode's value is invalid, needs %d," + " but got %d", s->mode, mode); +- aio_context_release(aio_context); + return; + } + +@@ -505,7 +492,6 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, + if (!active_disk || !active_disk->bs || !active_disk->bs->backing) { + error_setg(errp, "Active disk doesn't have backing file"); + bdrv_graph_rdunlock_main_loop(); +- aio_context_release(aio_context); + return; + } + +@@ -513,7 +499,6 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, + if (!hidden_disk->bs || !hidden_disk->bs->backing) { + error_setg(errp, "Hidden disk doesn't have backing file"); + bdrv_graph_rdunlock_main_loop(); +- aio_context_release(aio_context); + return; + } + +@@ -521,7 +506,6 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, + if (!secondary_disk->bs || !bdrv_has_blk(secondary_disk->bs)) { + error_setg(errp, "The secondary disk doesn't have block backend"); + bdrv_graph_rdunlock_main_loop(); +- aio_context_release(aio_context); + return; + } + bdrv_graph_rdunlock_main_loop(); +@@ -534,7 +518,6 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, + active_length != hidden_length || hidden_length != disk_length) { + error_setg(errp, "Active disk, hidden disk, secondary disk's length" + " are not the same"); +- aio_context_release(aio_context); + return; + } + +@@ -546,7 +529,6 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, + !hidden_disk->bs->drv->bdrv_make_empty) { + error_setg(errp, + "Active disk or hidden disk doesn't support make_empty"); +- aio_context_release(aio_context); + bdrv_graph_rdunlock_main_loop(); + return; + } +@@ -556,7 +538,6 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, + reopen_backing_file(bs, true, &local_err); + if (local_err) { + error_propagate(errp, local_err); +- aio_context_release(aio_context); + return; + } + +@@ -569,7 +550,6 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, + if (local_err) { + error_propagate(errp, local_err); + bdrv_graph_wrunlock(); +- aio_context_release(aio_context); + return; + } + +@@ -580,7 +560,6 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, + if (local_err) { + error_propagate(errp, local_err); + bdrv_graph_wrunlock(); +- aio_context_release(aio_context); + return; + } + +@@ -594,7 +573,6 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, + error_setg(errp, "No top_bs or it is invalid"); + bdrv_graph_wrunlock(); + reopen_backing_file(bs, false, NULL); +- aio_context_release(aio_context); + return; + } + bdrv_op_block_all(top_bs, s->blocker); +@@ -612,13 +590,11 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, + if (local_err) { + error_propagate(errp, local_err); + backup_job_cleanup(bs); +- aio_context_release(aio_context); + return; + } + job_start(&s->backup_job->job); + break; + default: +- aio_context_release(aio_context); + abort(); + } + +@@ -629,18 +605,12 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, + } + + s->error = 0; +- aio_context_release(aio_context); + } + + static void replication_do_checkpoint(ReplicationState *rs, Error **errp) + { + BlockDriverState *bs = rs->opaque; +- BDRVReplicationState *s; +- AioContext *aio_context; +- +- aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(aio_context); +- s = bs->opaque; ++ BDRVReplicationState *s = bs->opaque; + + if (s->stage == BLOCK_REPLICATION_DONE || + s->stage == BLOCK_REPLICATION_FAILOVER) { +@@ -649,38 +619,28 @@ static void replication_do_checkpoint(ReplicationState *rs, Error **errp) + * Ignore the request because the secondary side of replication + * doesn't have to do anything anymore. + */ +- aio_context_release(aio_context); + return; + } + + if (s->mode == REPLICATION_MODE_SECONDARY) { + secondary_do_checkpoint(bs, errp); + } +- aio_context_release(aio_context); + } + + static void replication_get_error(ReplicationState *rs, Error **errp) + { + BlockDriverState *bs = rs->opaque; +- BDRVReplicationState *s; +- AioContext *aio_context; +- +- aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(aio_context); +- s = bs->opaque; ++ BDRVReplicationState *s = bs->opaque; + + if (s->stage == BLOCK_REPLICATION_NONE) { + error_setg(errp, "Block replication is not running"); +- aio_context_release(aio_context); + return; + } + + if (s->error) { + error_setg(errp, "I/O error occurred"); +- aio_context_release(aio_context); + return; + } +- aio_context_release(aio_context); + } + + static void replication_done(void *opaque, int ret) +@@ -708,12 +668,7 @@ static void replication_done(void *opaque, int ret) + static void replication_stop(ReplicationState *rs, bool failover, Error **errp) + { + BlockDriverState *bs = rs->opaque; +- BDRVReplicationState *s; +- AioContext *aio_context; +- +- aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(aio_context); +- s = bs->opaque; ++ BDRVReplicationState *s = bs->opaque; + + if (s->stage == BLOCK_REPLICATION_DONE || + s->stage == BLOCK_REPLICATION_FAILOVER) { +@@ -722,13 +677,11 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp) + * Ignore the request because the secondary side of replication + * doesn't have to do anything anymore. + */ +- aio_context_release(aio_context); + return; + } + + if (s->stage != BLOCK_REPLICATION_RUNNING) { + error_setg(errp, "Block replication is not running"); +- aio_context_release(aio_context); + return; + } + +@@ -744,15 +697,12 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp) + * disk, secondary disk in backup_job_completed(). + */ + if (s->backup_job) { +- aio_context_release(aio_context); + job_cancel_sync(&s->backup_job->job, true); +- aio_context_acquire(aio_context); + } + + if (!failover) { + secondary_do_checkpoint(bs, errp); + s->stage = BLOCK_REPLICATION_DONE; +- aio_context_release(aio_context); + return; + } + +@@ -765,10 +715,8 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp) + bdrv_graph_rdunlock_main_loop(); + break; + default: +- aio_context_release(aio_context); + abort(); + } +- aio_context_release(aio_context); + } + + static const char *const replication_strong_runtime_opts[] = { +diff --git a/block/snapshot.c b/block/snapshot.c +index e486d3e205..a28f2b039f 100644 +--- a/block/snapshot.c ++++ b/block/snapshot.c +@@ -525,9 +525,7 @@ static bool GRAPH_RDLOCK bdrv_all_snapshots_includes_bs(BlockDriverState *bs) + return bdrv_has_blk(bs) || QLIST_EMPTY(&bs->parents); + } + +-/* Group operations. All block drivers are involved. +- * These functions will properly handle dataplane (take aio_context_acquire +- * when appropriate for appropriate block drivers) */ ++/* Group operations. All block drivers are involved. */ + + bool bdrv_all_can_snapshot(bool has_devices, strList *devices, + Error **errp) +@@ -545,14 +543,11 @@ bool bdrv_all_can_snapshot(bool has_devices, strList *devices, + iterbdrvs = bdrvs; + while (iterbdrvs) { + BlockDriverState *bs = iterbdrvs->data; +- AioContext *ctx = bdrv_get_aio_context(bs); + bool ok = true; + +- aio_context_acquire(ctx); + if (devices || bdrv_all_snapshots_includes_bs(bs)) { + ok = bdrv_can_snapshot(bs); + } +- aio_context_release(ctx); + if (!ok) { + error_setg(errp, "Device '%s' is writable but does not support " + "snapshots", bdrv_get_device_or_node_name(bs)); +@@ -582,18 +577,15 @@ int bdrv_all_delete_snapshot(const char *name, + iterbdrvs = bdrvs; + while (iterbdrvs) { + BlockDriverState *bs = iterbdrvs->data; +- AioContext *ctx = bdrv_get_aio_context(bs); + QEMUSnapshotInfo sn1, *snapshot = &sn1; + int ret = 0; + +- aio_context_acquire(ctx); + if ((devices || bdrv_all_snapshots_includes_bs(bs)) && + bdrv_snapshot_find(bs, snapshot, name) >= 0) + { + ret = bdrv_snapshot_delete(bs, snapshot->id_str, + snapshot->name, errp); + } +- aio_context_release(ctx); + if (ret < 0) { + error_prepend(errp, "Could not delete snapshot '%s' on '%s': ", + name, bdrv_get_device_or_node_name(bs)); +@@ -628,17 +620,14 @@ int bdrv_all_goto_snapshot(const char *name, + iterbdrvs = bdrvs; + while (iterbdrvs) { + BlockDriverState *bs = iterbdrvs->data; +- AioContext *ctx = bdrv_get_aio_context(bs); + bool all_snapshots_includes_bs; + +- aio_context_acquire(ctx); + bdrv_graph_rdlock_main_loop(); + all_snapshots_includes_bs = bdrv_all_snapshots_includes_bs(bs); + bdrv_graph_rdunlock_main_loop(); + + ret = (devices || all_snapshots_includes_bs) ? + bdrv_snapshot_goto(bs, name, errp) : 0; +- aio_context_release(ctx); + if (ret < 0) { + bdrv_graph_rdlock_main_loop(); + error_prepend(errp, "Could not load snapshot '%s' on '%s': ", +@@ -670,15 +659,12 @@ int bdrv_all_has_snapshot(const char *name, + iterbdrvs = bdrvs; + while (iterbdrvs) { + BlockDriverState *bs = iterbdrvs->data; +- AioContext *ctx = bdrv_get_aio_context(bs); + QEMUSnapshotInfo sn; + int ret = 0; + +- aio_context_acquire(ctx); + if (devices || bdrv_all_snapshots_includes_bs(bs)) { + ret = bdrv_snapshot_find(bs, &sn, name); + } +- aio_context_release(ctx); + if (ret < 0) { + if (ret == -ENOENT) { + return 0; +@@ -715,10 +701,8 @@ int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn, + iterbdrvs = bdrvs; + while (iterbdrvs) { + BlockDriverState *bs = iterbdrvs->data; +- AioContext *ctx = bdrv_get_aio_context(bs); + int ret = 0; + +- aio_context_acquire(ctx); + if (bs == vm_state_bs) { + sn->vm_state_size = vm_state_size; + ret = bdrv_snapshot_create(bs, sn); +@@ -726,7 +710,6 @@ int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn, + sn->vm_state_size = 0; + ret = bdrv_snapshot_create(bs, sn); + } +- aio_context_release(ctx); + if (ret < 0) { + error_setg(errp, "Could not create snapshot '%s' on '%s'", + sn->name, bdrv_get_device_or_node_name(bs)); +@@ -757,13 +740,10 @@ BlockDriverState *bdrv_all_find_vmstate_bs(const char *vmstate_bs, + iterbdrvs = bdrvs; + while (iterbdrvs) { + BlockDriverState *bs = iterbdrvs->data; +- AioContext *ctx = bdrv_get_aio_context(bs); + bool found = false; + +- aio_context_acquire(ctx); + found = (devices || bdrv_all_snapshots_includes_bs(bs)) && + bdrv_can_snapshot(bs); +- aio_context_release(ctx); + + if (vmstate_bs) { + if (g_str_equal(vmstate_bs, +diff --git a/block/write-threshold.c b/block/write-threshold.c +index 76d8885677..56fe88de81 100644 +--- a/block/write-threshold.c ++++ b/block/write-threshold.c +@@ -33,7 +33,6 @@ void qmp_block_set_write_threshold(const char *node_name, + Error **errp) + { + BlockDriverState *bs; +- AioContext *aio_context; + + bs = bdrv_find_node(node_name); + if (!bs) { +@@ -41,12 +40,7 @@ void qmp_block_set_write_threshold(const char *node_name, + return; + } + +- aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(aio_context); +- + bdrv_write_threshold_set(bs, threshold_bytes); +- +- aio_context_release(aio_context); + } + + void bdrv_write_threshold_check_write(BlockDriverState *bs, int64_t offset, +diff --git a/blockdev.c b/blockdev.c +index 9e1381169d..5d8b3a23eb 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -662,7 +662,6 @@ err_no_opts: + /* Takes the ownership of bs_opts */ + BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp) + { +- BlockDriverState *bs; + int bdrv_flags = 0; + + GLOBAL_STATE_CODE(); +@@ -677,11 +676,7 @@ BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp) + bdrv_flags |= BDRV_O_INACTIVE; + } + +- aio_context_acquire(qemu_get_aio_context()); +- bs = bdrv_open(NULL, NULL, bs_opts, bdrv_flags, errp); +- aio_context_release(qemu_get_aio_context()); +- +- return bs; ++ return bdrv_open(NULL, NULL, bs_opts, bdrv_flags, errp); + } + + void blockdev_close_all_bdrv_states(void) +@@ -690,11 +685,7 @@ void blockdev_close_all_bdrv_states(void) + + GLOBAL_STATE_CODE(); + QTAILQ_FOREACH_SAFE(bs, &monitor_bdrv_states, monitor_list, next_bs) { +- AioContext *ctx = bdrv_get_aio_context(bs); +- +- aio_context_acquire(ctx); + bdrv_unref(bs); +- aio_context_release(ctx); + } + } + +@@ -1048,7 +1039,6 @@ fail: + static BlockDriverState *qmp_get_root_bs(const char *name, Error **errp) + { + BlockDriverState *bs; +- AioContext *aio_context; + + GRAPH_RDLOCK_GUARD_MAINLOOP(); + +@@ -1062,16 +1052,11 @@ static BlockDriverState *qmp_get_root_bs(const char *name, Error **errp) + return NULL; + } + +- aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(aio_context); +- + if (!bdrv_is_inserted(bs)) { + error_setg(errp, "Device has no medium"); + bs = NULL; + } + +- aio_context_release(aio_context); +- + return bs; + } + +@@ -1141,7 +1126,6 @@ SnapshotInfo *qmp_blockdev_snapshot_delete_internal_sync(const char *device, + Error **errp) + { + BlockDriverState *bs; +- AioContext *aio_context; + QEMUSnapshotInfo sn; + Error *local_err = NULL; + SnapshotInfo *info = NULL; +@@ -1154,39 +1138,35 @@ SnapshotInfo *qmp_blockdev_snapshot_delete_internal_sync(const char *device, + if (!bs) { + return NULL; + } +- aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(aio_context); + + if (!id && !name) { + error_setg(errp, "Name or id must be provided"); +- goto out_aio_context; ++ return NULL; + } + + if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_INTERNAL_SNAPSHOT_DELETE, errp)) { +- goto out_aio_context; ++ return NULL; + } + + ret = bdrv_snapshot_find_by_id_and_name(bs, id, name, &sn, &local_err); + if (local_err) { + error_propagate(errp, local_err); +- goto out_aio_context; ++ return NULL; + } + if (!ret) { + error_setg(errp, + "Snapshot with id '%s' and name '%s' does not exist on " + "device '%s'", + STR_OR_NULL(id), STR_OR_NULL(name), device); +- goto out_aio_context; ++ return NULL; + } + + bdrv_snapshot_delete(bs, id, name, &local_err); + if (local_err) { + error_propagate(errp, local_err); +- goto out_aio_context; ++ return NULL; + } + +- aio_context_release(aio_context); +- + info = g_new0(SnapshotInfo, 1); + info->id = g_strdup(sn.id_str); + info->name = g_strdup(sn.name); +@@ -1201,10 +1181,6 @@ SnapshotInfo *qmp_blockdev_snapshot_delete_internal_sync(const char *device, + } + + return info; +- +-out_aio_context: +- aio_context_release(aio_context); +- return NULL; + } + + /* internal snapshot private data */ +@@ -1232,7 +1208,6 @@ static void internal_snapshot_action(BlockdevSnapshotInternal *internal, + bool ret; + int64_t rt; + InternalSnapshotState *state = g_new0(InternalSnapshotState, 1); +- AioContext *aio_context; + int ret1; + + GLOBAL_STATE_CODE(); +@@ -1248,33 +1223,30 @@ static void internal_snapshot_action(BlockdevSnapshotInternal *internal, + return; + } + +- aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(aio_context); +- + state->bs = bs; + + /* Paired with .clean() */ + bdrv_drained_begin(bs); + + if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_INTERNAL_SNAPSHOT, errp)) { +- goto out; ++ return; + } + + if (bdrv_is_read_only(bs)) { + error_setg(errp, "Device '%s' is read only", device); +- goto out; ++ return; + } + + if (!bdrv_can_snapshot(bs)) { + error_setg(errp, "Block format '%s' used by device '%s' " + "does not support internal snapshots", + bs->drv->format_name, device); +- goto out; ++ return; + } + + if (!strlen(name)) { + error_setg(errp, "Name is empty"); +- goto out; ++ return; + } + + /* check whether a snapshot with name exist */ +@@ -1282,12 +1254,12 @@ static void internal_snapshot_action(BlockdevSnapshotInternal *internal, + &local_err); + if (local_err) { + error_propagate(errp, local_err); +- goto out; ++ return; + } else if (ret) { + error_setg(errp, + "Snapshot with name '%s' already exists on device '%s'", + name, device); +- goto out; ++ return; + } + + /* 3. take the snapshot */ +@@ -1308,14 +1280,11 @@ static void internal_snapshot_action(BlockdevSnapshotInternal *internal, + error_setg_errno(errp, -ret1, + "Failed to create snapshot '%s' on device '%s'", + name, device); +- goto out; ++ return; + } + + /* 4. succeed, mark a snapshot is created */ + state->created = true; +- +-out: +- aio_context_release(aio_context); + } + + static void internal_snapshot_abort(void *opaque) +@@ -1323,7 +1292,6 @@ static void internal_snapshot_abort(void *opaque) + InternalSnapshotState *state = opaque; + BlockDriverState *bs = state->bs; + QEMUSnapshotInfo *sn = &state->sn; +- AioContext *aio_context; + Error *local_error = NULL; + + GLOBAL_STATE_CODE(); +@@ -1333,9 +1301,6 @@ static void internal_snapshot_abort(void *opaque) + return; + } + +- aio_context = bdrv_get_aio_context(state->bs); +- aio_context_acquire(aio_context); +- + if (bdrv_snapshot_delete(bs, sn->id_str, sn->name, &local_error) < 0) { + error_reportf_err(local_error, + "Failed to delete snapshot with id '%s' and " +@@ -1343,25 +1308,17 @@ static void internal_snapshot_abort(void *opaque) + sn->id_str, sn->name, + bdrv_get_device_name(bs)); + } +- +- aio_context_release(aio_context); + } + + static void internal_snapshot_clean(void *opaque) + { + g_autofree InternalSnapshotState *state = opaque; +- AioContext *aio_context; + + if (!state->bs) { + return; + } + +- aio_context = bdrv_get_aio_context(state->bs); +- aio_context_acquire(aio_context); +- + bdrv_drained_end(state->bs); +- +- aio_context_release(aio_context); + } + + /* external snapshot private data */ +@@ -1395,7 +1352,6 @@ static void external_snapshot_action(TransactionAction *action, + /* File name of the new image (for 'blockdev-snapshot-sync') */ + const char *new_image_file; + ExternalSnapshotState *state = g_new0(ExternalSnapshotState, 1); +- AioContext *aio_context; + uint64_t perm, shared; + + /* TODO We'll eventually have to take a writer lock in this function */ +@@ -1435,26 +1391,23 @@ static void external_snapshot_action(TransactionAction *action, + return; + } + +- aio_context = bdrv_get_aio_context(state->old_bs); +- aio_context_acquire(aio_context); +- + /* Paired with .clean() */ + bdrv_drained_begin(state->old_bs); + + if (!bdrv_is_inserted(state->old_bs)) { + error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, device); +- goto out; ++ return; + } + + if (bdrv_op_is_blocked(state->old_bs, + BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT, errp)) { +- goto out; ++ return; + } + + if (!bdrv_is_read_only(state->old_bs)) { + if (bdrv_flush(state->old_bs)) { + error_setg(errp, QERR_IO_ERROR); +- goto out; ++ return; + } + } + +@@ -1466,13 +1419,13 @@ static void external_snapshot_action(TransactionAction *action, + + if (node_name && !snapshot_node_name) { + error_setg(errp, "New overlay node-name missing"); +- goto out; ++ return; + } + + if (snapshot_node_name && + bdrv_lookup_bs(snapshot_node_name, snapshot_node_name, NULL)) { + error_setg(errp, "New overlay node-name already in use"); +- goto out; ++ return; + } + + flags = state->old_bs->open_flags; +@@ -1485,20 +1438,18 @@ static void external_snapshot_action(TransactionAction *action, + int64_t size = bdrv_getlength(state->old_bs); + if (size < 0) { + error_setg_errno(errp, -size, "bdrv_getlength failed"); +- goto out; ++ return; + } + bdrv_refresh_filename(state->old_bs); + +- aio_context_release(aio_context); + bdrv_img_create(new_image_file, format, + state->old_bs->filename, + state->old_bs->drv->format_name, + NULL, size, flags, false, &local_err); +- aio_context_acquire(aio_context); + + if (local_err) { + error_propagate(errp, local_err); +- goto out; ++ return; + } + } + +@@ -1508,20 +1459,15 @@ static void external_snapshot_action(TransactionAction *action, + } + qdict_put_str(options, "driver", format); + } +- aio_context_release(aio_context); + +- aio_context_acquire(qemu_get_aio_context()); + state->new_bs = bdrv_open(new_image_file, snapshot_ref, options, flags, + errp); +- aio_context_release(qemu_get_aio_context()); + + /* We will manually add the backing_hd field to the bs later */ + if (!state->new_bs) { + return; + } + +- aio_context_acquire(aio_context); +- + /* + * Allow attaching a backing file to an overlay that's already in use only + * if the parents don't assume that they are already seeing a valid image. +@@ -1530,41 +1476,34 @@ static void external_snapshot_action(TransactionAction *action, + bdrv_get_cumulative_perm(state->new_bs, &perm, &shared); + if (perm & BLK_PERM_CONSISTENT_READ) { + error_setg(errp, "The overlay is already in use"); +- goto out; ++ return; + } + + if (state->new_bs->drv->is_filter) { + error_setg(errp, "Filters cannot be used as overlays"); +- goto out; ++ return; + } + + if (bdrv_cow_child(state->new_bs)) { + error_setg(errp, "The overlay already has a backing image"); +- goto out; ++ return; + } + + if (!state->new_bs->drv->supports_backing) { + error_setg(errp, "The overlay does not support backing images"); +- goto out; ++ return; + } + + ret = bdrv_append(state->new_bs, state->old_bs, errp); + if (ret < 0) { +- goto out; ++ return; + } + state->overlay_appended = true; +- +-out: +- aio_context_release(aio_context); + } + + static void external_snapshot_commit(void *opaque) + { + ExternalSnapshotState *state = opaque; +- AioContext *aio_context; +- +- aio_context = bdrv_get_aio_context(state->old_bs); +- aio_context_acquire(aio_context); + + /* We don't need (or want) to use the transactional + * bdrv_reopen_multiple() across all the entries at once, because we +@@ -1572,8 +1511,6 @@ static void external_snapshot_commit(void *opaque) + if (!qatomic_read(&state->old_bs->copy_on_read)) { + bdrv_reopen_set_read_only(state->old_bs, true, NULL); + } +- +- aio_context_release(aio_context); + } + + static void external_snapshot_abort(void *opaque) +@@ -1586,7 +1523,6 @@ static void external_snapshot_abort(void *opaque) + int ret; + + aio_context = bdrv_get_aio_context(state->old_bs); +- aio_context_acquire(aio_context); + + bdrv_ref(state->old_bs); /* we can't let bdrv_set_backind_hd() + close state->old_bs; we need it */ +@@ -1599,15 +1535,9 @@ static void external_snapshot_abort(void *opaque) + */ + tmp_context = bdrv_get_aio_context(state->old_bs); + if (aio_context != tmp_context) { +- aio_context_release(aio_context); +- aio_context_acquire(tmp_context); +- + ret = bdrv_try_change_aio_context(state->old_bs, + aio_context, NULL, NULL); + assert(ret == 0); +- +- aio_context_release(tmp_context); +- aio_context_acquire(aio_context); + } + + bdrv_drained_begin(state->new_bs); +@@ -1617,8 +1547,6 @@ static void external_snapshot_abort(void *opaque) + bdrv_drained_end(state->new_bs); + + bdrv_unref(state->old_bs); /* bdrv_replace_node() ref'ed old_bs */ +- +- aio_context_release(aio_context); + } + } + } +@@ -1626,19 +1554,13 @@ static void external_snapshot_abort(void *opaque) + static void external_snapshot_clean(void *opaque) + { + g_autofree ExternalSnapshotState *state = opaque; +- AioContext *aio_context; + + if (!state->old_bs) { + return; + } + +- aio_context = bdrv_get_aio_context(state->old_bs); +- aio_context_acquire(aio_context); +- + bdrv_drained_end(state->old_bs); + bdrv_unref(state->new_bs); +- +- aio_context_release(aio_context); + } + + typedef struct DriveBackupState { +@@ -1670,7 +1592,6 @@ static void drive_backup_action(DriveBackup *backup, + BlockDriverState *target_bs; + BlockDriverState *source = NULL; + AioContext *aio_context; +- AioContext *old_context; + const char *format; + QDict *options; + Error *local_err = NULL; +@@ -1698,7 +1619,6 @@ static void drive_backup_action(DriveBackup *backup, + } + + aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(aio_context); + + state->bs = bs; + /* Paired with .clean() */ +@@ -1713,7 +1633,7 @@ static void drive_backup_action(DriveBackup *backup, + bdrv_graph_rdlock_main_loop(); + if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) { + bdrv_graph_rdunlock_main_loop(); +- goto out; ++ return; + } + + flags = bs->open_flags | BDRV_O_RDWR; +@@ -1744,7 +1664,7 @@ static void drive_backup_action(DriveBackup *backup, + size = bdrv_getlength(bs); + if (size < 0) { + error_setg_errno(errp, -size, "bdrv_getlength failed"); +- goto out; ++ return; + } + + if (backup->mode != NEW_IMAGE_MODE_EXISTING) { +@@ -1770,7 +1690,7 @@ static void drive_backup_action(DriveBackup *backup, + + if (local_err) { + error_propagate(errp, local_err); +- goto out; ++ return; + } + + options = qdict_new(); +@@ -1779,30 +1699,18 @@ static void drive_backup_action(DriveBackup *backup, + if (format) { + qdict_put_str(options, "driver", format); + } +- aio_context_release(aio_context); + +- aio_context_acquire(qemu_get_aio_context()); + target_bs = bdrv_open(backup->target, NULL, options, flags, errp); +- aio_context_release(qemu_get_aio_context()); +- + if (!target_bs) { + return; + } + +- /* Honor bdrv_try_change_aio_context() context acquisition requirements. */ +- old_context = bdrv_get_aio_context(target_bs); +- aio_context_acquire(old_context); +- + ret = bdrv_try_change_aio_context(target_bs, aio_context, NULL, errp); + if (ret < 0) { + bdrv_unref(target_bs); +- aio_context_release(old_context); + return; + } + +- aio_context_release(old_context); +- aio_context_acquire(aio_context); +- + if (set_backing_hd) { + if (bdrv_set_backing_hd(target_bs, source, errp) < 0) { + goto unref; +@@ -1815,22 +1723,14 @@ static void drive_backup_action(DriveBackup *backup, + + unref: + bdrv_unref(target_bs); +-out: +- aio_context_release(aio_context); + } + + static void drive_backup_commit(void *opaque) + { + DriveBackupState *state = opaque; +- AioContext *aio_context; +- +- aio_context = bdrv_get_aio_context(state->bs); +- aio_context_acquire(aio_context); + + assert(state->job); + job_start(&state->job->job); +- +- aio_context_release(aio_context); + } + + static void drive_backup_abort(void *opaque) +@@ -1845,18 +1745,12 @@ static void drive_backup_abort(void *opaque) + static void drive_backup_clean(void *opaque) + { + g_autofree DriveBackupState *state = opaque; +- AioContext *aio_context; + + if (!state->bs) { + return; + } + +- aio_context = bdrv_get_aio_context(state->bs); +- aio_context_acquire(aio_context); +- + bdrv_drained_end(state->bs); +- +- aio_context_release(aio_context); + } + + typedef struct BlockdevBackupState { +@@ -1881,7 +1775,6 @@ static void blockdev_backup_action(BlockdevBackup *backup, + BlockDriverState *bs; + BlockDriverState *target_bs; + AioContext *aio_context; +- AioContext *old_context; + int ret; + + tran_add(tran, &blockdev_backup_drv, state); +@@ -1898,17 +1791,12 @@ static void blockdev_backup_action(BlockdevBackup *backup, + + /* Honor bdrv_try_change_aio_context() context acquisition requirements. */ + aio_context = bdrv_get_aio_context(bs); +- old_context = bdrv_get_aio_context(target_bs); +- aio_context_acquire(old_context); + + ret = bdrv_try_change_aio_context(target_bs, aio_context, NULL, errp); + if (ret < 0) { +- aio_context_release(old_context); + return; + } + +- aio_context_release(old_context); +- aio_context_acquire(aio_context); + state->bs = bs; + + /* Paired with .clean() */ +@@ -1917,22 +1805,14 @@ static void blockdev_backup_action(BlockdevBackup *backup, + state->job = do_backup_common(qapi_BlockdevBackup_base(backup), + bs, target_bs, aio_context, + block_job_txn, errp); +- +- aio_context_release(aio_context); + } + + static void blockdev_backup_commit(void *opaque) + { + BlockdevBackupState *state = opaque; +- AioContext *aio_context; +- +- aio_context = bdrv_get_aio_context(state->bs); +- aio_context_acquire(aio_context); + + assert(state->job); + job_start(&state->job->job); +- +- aio_context_release(aio_context); + } + + static void blockdev_backup_abort(void *opaque) +@@ -1947,18 +1827,12 @@ static void blockdev_backup_abort(void *opaque) + static void blockdev_backup_clean(void *opaque) + { + g_autofree BlockdevBackupState *state = opaque; +- AioContext *aio_context; + + if (!state->bs) { + return; + } + +- aio_context = bdrv_get_aio_context(state->bs); +- aio_context_acquire(aio_context); +- + bdrv_drained_end(state->bs); +- +- aio_context_release(aio_context); + } + + typedef struct BlockDirtyBitmapState { +@@ -2454,7 +2328,6 @@ void qmp_block_stream(const char *job_id, const char *device, + } + + aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(aio_context); + + bdrv_graph_rdlock_main_loop(); + if (base) { +@@ -2521,7 +2394,7 @@ void qmp_block_stream(const char *job_id, const char *device, + if (!base_bs && backing_file) { + error_setg(errp, "backing file specified, but streaming the " + "entire chain"); +- goto out; ++ return; + } + + if (has_auto_finalize && !auto_finalize) { +@@ -2536,18 +2409,14 @@ void qmp_block_stream(const char *job_id, const char *device, + filter_node_name, &local_err); + if (local_err) { + error_propagate(errp, local_err); +- goto out; ++ return; + } + + trace_qmp_block_stream(bs); +- +-out: +- aio_context_release(aio_context); + return; + + out_rdlock: + bdrv_graph_rdunlock_main_loop(); +- aio_context_release(aio_context); + } + + void qmp_block_commit(const char *job_id, const char *device, +@@ -2606,10 +2475,9 @@ void qmp_block_commit(const char *job_id, const char *device, + } + + aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(aio_context); + + if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, errp)) { +- goto out; ++ return; + } + + /* default top_bs is the active layer */ +@@ -2617,16 +2485,16 @@ void qmp_block_commit(const char *job_id, const char *device, + + if (top_node && top) { + error_setg(errp, "'top-node' and 'top' are mutually exclusive"); +- goto out; ++ return; + } else if (top_node) { + top_bs = bdrv_lookup_bs(NULL, top_node, errp); + if (top_bs == NULL) { +- goto out; ++ return; + } + if (!bdrv_chain_contains(bs, top_bs)) { + error_setg(errp, "'%s' is not in this backing file chain", + top_node); +- goto out; ++ return; + } + } else if (top) { + /* This strcmp() is just a shortcut, there is no need to +@@ -2640,35 +2508,35 @@ void qmp_block_commit(const char *job_id, const char *device, + + if (top_bs == NULL) { + error_setg(errp, "Top image file %s not found", top ? top : "NULL"); +- goto out; ++ return; + } + + assert(bdrv_get_aio_context(top_bs) == aio_context); + + if (base_node && base) { + error_setg(errp, "'base-node' and 'base' are mutually exclusive"); +- goto out; ++ return; + } else if (base_node) { + base_bs = bdrv_lookup_bs(NULL, base_node, errp); + if (base_bs == NULL) { +- goto out; ++ return; + } + if (!bdrv_chain_contains(top_bs, base_bs)) { + error_setg(errp, "'%s' is not in this backing file chain", + base_node); +- goto out; ++ return; + } + } else if (base) { + base_bs = bdrv_find_backing_image(top_bs, base); + if (base_bs == NULL) { + error_setg(errp, "Can't find '%s' in the backing chain", base); +- goto out; ++ return; + } + } else { + base_bs = bdrv_find_base(top_bs); + if (base_bs == NULL) { + error_setg(errp, "There is no backimg image"); +- goto out; ++ return; + } + } + +@@ -2678,14 +2546,14 @@ void qmp_block_commit(const char *job_id, const char *device, + iter = bdrv_filter_or_cow_bs(iter)) + { + if (bdrv_op_is_blocked(iter, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) { +- goto out; ++ return; + } + } + + /* Do not allow attempts to commit an image into itself */ + if (top_bs == base_bs) { + error_setg(errp, "cannot commit an image into itself"); +- goto out; ++ return; + } + + /* +@@ -2708,7 +2576,7 @@ void qmp_block_commit(const char *job_id, const char *device, + error_setg(errp, "'backing-file' specified, but 'top' has a " + "writer on it"); + } +- goto out; ++ return; + } + if (!job_id) { + /* +@@ -2724,7 +2592,7 @@ void qmp_block_commit(const char *job_id, const char *device, + } else { + BlockDriverState *overlay_bs = bdrv_find_overlay(bs, top_bs); + if (bdrv_op_is_blocked(overlay_bs, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) { +- goto out; ++ return; + } + commit_start(job_id, bs, base_bs, top_bs, job_flags, + speed, on_error, backing_file, +@@ -2732,11 +2600,8 @@ void qmp_block_commit(const char *job_id, const char *device, + } + if (local_err != NULL) { + error_propagate(errp, local_err); +- goto out; ++ return; + } +- +-out: +- aio_context_release(aio_context); + } + + /* Common QMP interface for drive-backup and blockdev-backup */ +@@ -2985,8 +2850,6 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, + + if (replaces) { + BlockDriverState *to_replace_bs; +- AioContext *aio_context; +- AioContext *replace_aio_context; + int64_t bs_size, replace_size; + + bs_size = bdrv_getlength(bs); +@@ -3000,19 +2863,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs, + return; + } + +- aio_context = bdrv_get_aio_context(bs); +- replace_aio_context = bdrv_get_aio_context(to_replace_bs); +- /* +- * bdrv_getlength() is a co-wrapper and uses AIO_WAIT_WHILE. Be sure not +- * to acquire the same AioContext twice. +- */ +- if (replace_aio_context != aio_context) { +- aio_context_acquire(replace_aio_context); +- } + replace_size = bdrv_getlength(to_replace_bs); +- if (replace_aio_context != aio_context) { +- aio_context_release(replace_aio_context); +- } + + if (replace_size < 0) { + error_setg_errno(errp, -replace_size, +@@ -3041,7 +2892,6 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) + BlockDriverState *bs; + BlockDriverState *target_backing_bs, *target_bs; + AioContext *aio_context; +- AioContext *old_context; + BlockMirrorBackingMode backing_mode; + Error *local_err = NULL; + QDict *options = NULL; +@@ -3064,7 +2914,6 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) + } + + aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(aio_context); + + if (!arg->has_mode) { + arg->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS; +@@ -3088,14 +2937,14 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) + size = bdrv_getlength(bs); + if (size < 0) { + error_setg_errno(errp, -size, "bdrv_getlength failed"); +- goto out; ++ return; + } + + if (arg->replaces) { + if (!arg->node_name) { + error_setg(errp, "a node-name must be provided when replacing a" + " named node of the graph"); +- goto out; ++ return; + } + } + +@@ -3143,7 +2992,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) + + if (local_err) { + error_propagate(errp, local_err); +- goto out; ++ return; + } + + options = qdict_new(); +@@ -3153,15 +3002,11 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) + if (format) { + qdict_put_str(options, "driver", format); + } +- aio_context_release(aio_context); + + /* Mirroring takes care of copy-on-write using the source's backing + * file. + */ +- aio_context_acquire(qemu_get_aio_context()); + target_bs = bdrv_open(arg->target, NULL, options, flags, errp); +- aio_context_release(qemu_get_aio_context()); +- + if (!target_bs) { + return; + } +@@ -3173,20 +3018,12 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) + bdrv_graph_rdunlock_main_loop(); + + +- /* Honor bdrv_try_change_aio_context() context acquisition requirements. */ +- old_context = bdrv_get_aio_context(target_bs); +- aio_context_acquire(old_context); +- + ret = bdrv_try_change_aio_context(target_bs, aio_context, NULL, errp); + if (ret < 0) { + bdrv_unref(target_bs); +- aio_context_release(old_context); + return; + } + +- aio_context_release(old_context); +- aio_context_acquire(aio_context); +- + blockdev_mirror_common(arg->job_id, bs, target_bs, + arg->replaces, arg->sync, + backing_mode, zero_target, +@@ -3202,8 +3039,6 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp) + arg->has_auto_dismiss, arg->auto_dismiss, + errp); + bdrv_unref(target_bs); +-out: +- aio_context_release(aio_context); + } + + void qmp_blockdev_mirror(const char *job_id, +@@ -3226,7 +3061,6 @@ void qmp_blockdev_mirror(const char *job_id, + BlockDriverState *bs; + BlockDriverState *target_bs; + AioContext *aio_context; +- AioContext *old_context; + BlockMirrorBackingMode backing_mode = MIRROR_LEAVE_BACKING_CHAIN; + bool zero_target; + int ret; +@@ -3243,18 +3077,11 @@ void qmp_blockdev_mirror(const char *job_id, + + zero_target = (sync == MIRROR_SYNC_MODE_FULL); + +- /* Honor bdrv_try_change_aio_context() context acquisition requirements. */ +- old_context = bdrv_get_aio_context(target_bs); + aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(old_context); + + ret = bdrv_try_change_aio_context(target_bs, aio_context, NULL, errp); +- +- aio_context_release(old_context); +- aio_context_acquire(aio_context); +- + if (ret < 0) { +- goto out; ++ return; + } + + blockdev_mirror_common(job_id, bs, target_bs, +@@ -3269,8 +3096,6 @@ void qmp_blockdev_mirror(const char *job_id, + has_auto_finalize, auto_finalize, + has_auto_dismiss, auto_dismiss, + errp); +-out: +- aio_context_release(aio_context); + } + + /* +@@ -3433,7 +3258,6 @@ void qmp_change_backing_file(const char *device, + Error **errp) + { + BlockDriverState *bs = NULL; +- AioContext *aio_context; + BlockDriverState *image_bs = NULL; + Error *local_err = NULL; + bool ro; +@@ -3444,9 +3268,6 @@ void qmp_change_backing_file(const char *device, + return; + } + +- aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(aio_context); +- + bdrv_graph_rdlock_main_loop(); + + image_bs = bdrv_lookup_bs(NULL, image_node_name, &local_err); +@@ -3485,7 +3306,7 @@ void qmp_change_backing_file(const char *device, + + if (ro) { + if (bdrv_reopen_set_read_only(image_bs, false, errp) != 0) { +- goto out; ++ return; + } + } + +@@ -3503,14 +3324,10 @@ void qmp_change_backing_file(const char *device, + if (ro) { + bdrv_reopen_set_read_only(image_bs, true, errp); + } +- +-out: +- aio_context_release(aio_context); + return; + + out_rdlock: + bdrv_graph_rdunlock_main_loop(); +- aio_context_release(aio_context); + } + + void qmp_blockdev_add(BlockdevOptions *options, Error **errp) +@@ -3550,7 +3367,6 @@ void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp) + for (; reopen_list != NULL; reopen_list = reopen_list->next) { + BlockdevOptions *options = reopen_list->value; + BlockDriverState *bs; +- AioContext *ctx; + QObject *obj; + Visitor *v; + QDict *qdict; +@@ -3578,12 +3394,7 @@ void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp) + + qdict_flatten(qdict); + +- ctx = bdrv_get_aio_context(bs); +- aio_context_acquire(ctx); +- + queue = bdrv_reopen_queue(queue, bs, qdict, false); +- +- aio_context_release(ctx); + } + + /* Perform the reopen operation */ +@@ -3596,7 +3407,6 @@ fail: + + void qmp_blockdev_del(const char *node_name, Error **errp) + { +- AioContext *aio_context; + BlockDriverState *bs; + + GLOBAL_STATE_CODE(); +@@ -3611,30 +3421,25 @@ void qmp_blockdev_del(const char *node_name, Error **errp) + error_setg(errp, "Node %s is in use", node_name); + return; + } +- aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(aio_context); + + if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_DRIVE_DEL, errp)) { +- goto out; ++ return; + } + + if (!QTAILQ_IN_USE(bs, monitor_list)) { + error_setg(errp, "Node %s is not owned by the monitor", + bs->node_name); +- goto out; ++ return; + } + + if (bs->refcnt > 1) { + error_setg(errp, "Block device %s is in use", + bdrv_get_device_or_node_name(bs)); +- goto out; ++ return; + } + + QTAILQ_REMOVE(&monitor_bdrv_states, bs, monitor_list); + bdrv_unref(bs); +- +-out: +- aio_context_release(aio_context); + } + + static BdrvChild * GRAPH_RDLOCK +@@ -3724,7 +3529,6 @@ BlockJobInfoList *qmp_query_block_jobs(Error **errp) + void qmp_x_blockdev_set_iothread(const char *node_name, StrOrNull *iothread, + bool has_force, bool force, Error **errp) + { +- AioContext *old_context; + AioContext *new_context; + BlockDriverState *bs; + +@@ -3756,12 +3560,7 @@ void qmp_x_blockdev_set_iothread(const char *node_name, StrOrNull *iothread, + new_context = qemu_get_aio_context(); + } + +- old_context = bdrv_get_aio_context(bs); +- aio_context_acquire(old_context); +- + bdrv_try_change_aio_context(bs, new_context, NULL, errp); +- +- aio_context_release(old_context); + } + + QemuOptsList qemu_common_drive_opts = { +diff --git a/blockjob.c b/blockjob.c +index 7310412313..d5f29e14af 100644 +--- a/blockjob.c ++++ b/blockjob.c +@@ -198,9 +198,7 @@ void block_job_remove_all_bdrv(BlockJob *job) + * one to make sure that such a concurrent access does not attempt + * to process an already freed BdrvChild. + */ +- aio_context_release(job->job.aio_context); + bdrv_graph_wrlock(); +- aio_context_acquire(job->job.aio_context); + while (job->nodes) { + GSList *l = job->nodes; + BdrvChild *c = l->data; +@@ -234,28 +232,12 @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs, + uint64_t perm, uint64_t shared_perm, Error **errp) + { + BdrvChild *c; +- AioContext *ctx = bdrv_get_aio_context(bs); +- bool need_context_ops; + GLOBAL_STATE_CODE(); + + bdrv_ref(bs); + +- need_context_ops = ctx != job->job.aio_context; +- +- if (need_context_ops) { +- if (job->job.aio_context != qemu_get_aio_context()) { +- aio_context_release(job->job.aio_context); +- } +- aio_context_acquire(ctx); +- } + c = bdrv_root_attach_child(bs, name, &child_job, 0, perm, shared_perm, job, + errp); +- if (need_context_ops) { +- aio_context_release(ctx); +- if (job->job.aio_context != qemu_get_aio_context()) { +- aio_context_acquire(job->job.aio_context); +- } +- } + if (c == NULL) { + return -EPERM; + } +diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c +index f83bb0f116..7bbbd981ad 100644 +--- a/hw/block/dataplane/virtio-blk.c ++++ b/hw/block/dataplane/virtio-blk.c +@@ -124,7 +124,6 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) + VirtIOBlockDataPlane *s = vblk->dataplane; + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vblk))); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); +- AioContext *old_context; + unsigned i; + unsigned nvqs = s->conf->num_queues; + Error *local_err = NULL; +@@ -178,10 +177,7 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) + + trace_virtio_blk_data_plane_start(s); + +- old_context = blk_get_aio_context(s->conf->conf.blk); +- aio_context_acquire(old_context); + r = blk_set_aio_context(s->conf->conf.blk, s->ctx, &local_err); +- aio_context_release(old_context); + if (r < 0) { + error_report_err(local_err); + goto fail_aio_context; +@@ -208,13 +204,11 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) + + /* Get this show started by hooking up our callbacks */ + if (!blk_in_drain(s->conf->conf.blk)) { +- aio_context_acquire(s->ctx); + for (i = 0; i < nvqs; i++) { + VirtQueue *vq = virtio_get_queue(s->vdev, i); + + virtio_queue_aio_attach_host_notifier(vq, s->ctx); + } +- aio_context_release(s->ctx); + } + return 0; + +@@ -314,8 +308,6 @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev) + */ + vblk->dataplane_started = false; + +- aio_context_acquire(s->ctx); +- + /* Wait for virtio_blk_dma_restart_bh() and in flight I/O to complete */ + blk_drain(s->conf->conf.blk); + +@@ -325,8 +317,6 @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev) + */ + blk_set_aio_context(s->conf->conf.blk, qemu_get_aio_context(), NULL); + +- aio_context_release(s->ctx); +- + /* Clean up guest notifier (irq) */ + k->set_guest_notifiers(qbus->parent, nvqs, false); + +diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c +index c4bb28c66f..98501e6885 100644 +--- a/hw/block/dataplane/xen-block.c ++++ b/hw/block/dataplane/xen-block.c +@@ -260,8 +260,6 @@ static void xen_block_complete_aio(void *opaque, int ret) + XenBlockRequest *request = opaque; + XenBlockDataPlane *dataplane = request->dataplane; + +- aio_context_acquire(dataplane->ctx); +- + if (ret != 0) { + error_report("%s I/O error", + request->req.operation == BLKIF_OP_READ ? +@@ -273,10 +271,10 @@ static void xen_block_complete_aio(void *opaque, int ret) + if (request->presync) { + request->presync = 0; + xen_block_do_aio(request); +- goto done; ++ return; + } + if (request->aio_inflight > 0) { +- goto done; ++ return; + } + + switch (request->req.operation) { +@@ -318,9 +316,6 @@ static void xen_block_complete_aio(void *opaque, int ret) + if (dataplane->more_work) { + qemu_bh_schedule(dataplane->bh); + } +- +-done: +- aio_context_release(dataplane->ctx); + } + + static bool xen_block_split_discard(XenBlockRequest *request, +@@ -601,9 +596,7 @@ static void xen_block_dataplane_bh(void *opaque) + { + XenBlockDataPlane *dataplane = opaque; + +- aio_context_acquire(dataplane->ctx); + xen_block_handle_requests(dataplane); +- aio_context_release(dataplane->ctx); + } + + static bool xen_block_dataplane_event(void *opaque) +@@ -703,10 +696,8 @@ void xen_block_dataplane_stop(XenBlockDataPlane *dataplane) + xen_block_dataplane_detach(dataplane); + } + +- aio_context_acquire(dataplane->ctx); + /* Xen doesn't have multiple users for nodes, so this can't fail */ + blk_set_aio_context(dataplane->blk, qemu_get_aio_context(), &error_abort); +- aio_context_release(dataplane->ctx); + + /* + * Now that the context has been moved onto the main thread, cancel +@@ -752,7 +743,6 @@ void xen_block_dataplane_start(XenBlockDataPlane *dataplane, + { + ERRP_GUARD(); + XenDevice *xendev = dataplane->xendev; +- AioContext *old_context; + unsigned int ring_size; + unsigned int i; + +@@ -836,11 +826,8 @@ void xen_block_dataplane_start(XenBlockDataPlane *dataplane, + goto stop; + } + +- old_context = blk_get_aio_context(dataplane->blk); +- aio_context_acquire(old_context); + /* If other users keep the BlockBackend in the iothread, that's ok */ + blk_set_aio_context(dataplane->blk, dataplane->ctx, NULL); +- aio_context_release(old_context); + + if (!blk_in_drain(dataplane->blk)) { + xen_block_dataplane_attach(dataplane); +diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c +index e110f9718b..ec9ed09a6a 100644 +--- a/hw/block/virtio-blk.c ++++ b/hw/block/virtio-blk.c +@@ -1210,17 +1210,13 @@ static void virtio_blk_dma_restart_cb(void *opaque, bool running, + static void virtio_blk_reset(VirtIODevice *vdev) + { + VirtIOBlock *s = VIRTIO_BLK(vdev); +- AioContext *ctx; + VirtIOBlockReq *req; + + /* Dataplane has stopped... */ + assert(!s->dataplane_started); + + /* ...but requests may still be in flight. */ +- ctx = blk_get_aio_context(s->blk); +- aio_context_acquire(ctx); + blk_drain(s->blk); +- aio_context_release(ctx); + + /* We drop queued requests after blk_drain() because blk_drain() itself can + * produce them. */ +@@ -1250,10 +1246,6 @@ static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config) + uint64_t capacity; + int64_t length; + int blk_size = conf->logical_block_size; +- AioContext *ctx; +- +- ctx = blk_get_aio_context(s->blk); +- aio_context_acquire(ctx); + + blk_get_geometry(s->blk, &capacity); + memset(&blkcfg, 0, sizeof(blkcfg)); +@@ -1277,7 +1269,6 @@ static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config) + * per track (cylinder). + */ + length = blk_getlength(s->blk); +- aio_context_release(ctx); + if (length > 0 && length / conf->heads / conf->secs % blk_size) { + blkcfg.geometry.sectors = conf->secs & ~s->sector_mask; + } else { +@@ -1344,9 +1335,7 @@ static void virtio_blk_set_config(VirtIODevice *vdev, const uint8_t *config) + + memcpy(&blkcfg, config, s->config_size); + +- aio_context_acquire(blk_get_aio_context(s->blk)); + blk_set_enable_write_cache(s->blk, blkcfg.wce != 0); +- aio_context_release(blk_get_aio_context(s->blk)); + } + + static uint64_t virtio_blk_get_features(VirtIODevice *vdev, uint64_t features, +@@ -1414,11 +1403,9 @@ static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status) + * s->blk would erroneously be placed in writethrough mode. + */ + if (!virtio_vdev_has_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE)) { +- aio_context_acquire(blk_get_aio_context(s->blk)); + blk_set_enable_write_cache(s->blk, + virtio_vdev_has_feature(vdev, + VIRTIO_BLK_F_WCE)); +- aio_context_release(blk_get_aio_context(s->blk)); + } + } + +diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c +index 1473ab3d5e..73cced4626 100644 +--- a/hw/core/qdev-properties-system.c ++++ b/hw/core/qdev-properties-system.c +@@ -120,9 +120,7 @@ static void set_drive_helper(Object *obj, Visitor *v, const char *name, + "node"); + } + +- aio_context_acquire(ctx); + blk_replace_bs(blk, bs, errp); +- aio_context_release(ctx); + return; + } + +@@ -148,10 +146,7 @@ static void set_drive_helper(Object *obj, Visitor *v, const char *name, + 0, BLK_PERM_ALL); + blk_created = true; + +- aio_context_acquire(ctx); + ret = blk_insert_bs(blk, bs, errp); +- aio_context_release(ctx); +- + if (ret < 0) { + goto fail; + } +@@ -207,12 +202,8 @@ static void release_drive(Object *obj, const char *name, void *opaque) + BlockBackend **ptr = object_field_prop_ptr(obj, prop); + + if (*ptr) { +- AioContext *ctx = blk_get_aio_context(*ptr); +- +- aio_context_acquire(ctx); + blockdev_auto_del(*ptr); + blk_detach_dev(*ptr, dev); +- aio_context_release(ctx); + } + } + +diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h +index 6b21fbc73f..0327f1c605 100644 +--- a/include/block/block-global-state.h ++++ b/include/block/block-global-state.h +@@ -31,11 +31,10 @@ + /* + * Global state (GS) API. These functions run under the BQL. + * +- * If a function modifies the graph, it also uses drain and/or +- * aio_context_acquire/release to be sure it has unique access. +- * aio_context locking is needed together with BQL because of +- * the thread-safe I/O API that concurrently runs and accesses +- * the graph without the BQL. ++ * If a function modifies the graph, it also uses the graph lock to be sure it ++ * has unique access. The graph lock is needed together with BQL because of the ++ * thread-safe I/O API that concurrently runs and accesses the graph without ++ * the BQL. + * + * It is important to note that not all of these functions are + * necessarily limited to running under the BQL, but they would +diff --git a/include/block/block-io.h b/include/block/block-io.h +index f8729ccc55..8eb39a858b 100644 +--- a/include/block/block-io.h ++++ b/include/block/block-io.h +@@ -31,8 +31,7 @@ + + /* + * I/O API functions. These functions are thread-safe, and therefore +- * can run in any thread as long as the thread has called +- * aio_context_acquire/release(). ++ * can run in any thread. + * + * These functions can only call functions from I/O and Common categories, + * but can be invoked by GS, "I/O or GS" and I/O APIs. +diff --git a/include/block/snapshot.h b/include/block/snapshot.h +index d49c5599d9..304cc6ea61 100644 +--- a/include/block/snapshot.h ++++ b/include/block/snapshot.h +@@ -86,8 +86,6 @@ int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs, + + /* + * Group operations. All block drivers are involved. +- * These functions will properly handle dataplane (take aio_context_acquire +- * when appropriate for appropriate block drivers + */ + + bool bdrv_all_can_snapshot(bool has_devices, strList *devices, +diff --git a/job.c b/job.c +index 99a2e54b54..660ce22c56 100644 +--- a/job.c ++++ b/job.c +@@ -464,12 +464,8 @@ void job_unref_locked(Job *job) + assert(!job->txn); + + if (job->driver->free) { +- AioContext *aio_context = job->aio_context; + job_unlock(); +- /* FIXME: aiocontext lock is required because cb calls blk_unref */ +- aio_context_acquire(aio_context); + job->driver->free(job); +- aio_context_release(aio_context); + job_lock(); + } + +@@ -840,12 +836,10 @@ static void job_clean(Job *job) + + /* + * Called with job_mutex held, but releases it temporarily. +- * Takes AioContext lock internally to invoke a job->driver callback. + */ + static int job_finalize_single_locked(Job *job) + { + int job_ret; +- AioContext *ctx = job->aio_context; + + assert(job_is_completed_locked(job)); + +@@ -854,7 +848,6 @@ static int job_finalize_single_locked(Job *job) + + job_ret = job->ret; + job_unlock(); +- aio_context_acquire(ctx); + + if (!job_ret) { + job_commit(job); +@@ -867,7 +860,6 @@ static int job_finalize_single_locked(Job *job) + job->cb(job->opaque, job_ret); + } + +- aio_context_release(ctx); + job_lock(); + + /* Emit events only if we actually started */ +@@ -886,17 +878,13 @@ static int job_finalize_single_locked(Job *job) + + /* + * Called with job_mutex held, but releases it temporarily. +- * Takes AioContext lock internally to invoke a job->driver callback. + */ + static void job_cancel_async_locked(Job *job, bool force) + { +- AioContext *ctx = job->aio_context; + GLOBAL_STATE_CODE(); + if (job->driver->cancel) { + job_unlock(); +- aio_context_acquire(ctx); + force = job->driver->cancel(job, force); +- aio_context_release(ctx); + job_lock(); + } else { + /* No .cancel() means the job will behave as if force-cancelled */ +@@ -931,7 +919,6 @@ static void job_cancel_async_locked(Job *job, bool force) + + /* + * Called with job_mutex held, but releases it temporarily. +- * Takes AioContext lock internally to invoke a job->driver callback. + */ + static void job_completed_txn_abort_locked(Job *job) + { +@@ -979,15 +966,12 @@ static void job_completed_txn_abort_locked(Job *job) + static int job_prepare_locked(Job *job) + { + int ret; +- AioContext *ctx = job->aio_context; + + GLOBAL_STATE_CODE(); + + if (job->ret == 0 && job->driver->prepare) { + job_unlock(); +- aio_context_acquire(ctx); + ret = job->driver->prepare(job); +- aio_context_release(ctx); + job_lock(); + job->ret = ret; + job_update_rc_locked(job); +diff --git a/migration/block.c b/migration/block.c +index a15f9bddcb..6ec6a1d6e6 100644 +--- a/migration/block.c ++++ b/migration/block.c +@@ -66,7 +66,7 @@ typedef struct BlkMigDevState { + /* Protected by block migration lock. */ + int64_t completed_sectors; + +- /* During migration this is protected by iothread lock / AioContext. ++ /* During migration this is protected by bdrv_dirty_bitmap_lock(). + * Allocation and free happen during setup and cleanup respectively. + */ + BdrvDirtyBitmap *dirty_bitmap; +@@ -101,7 +101,7 @@ typedef struct BlkMigState { + int prev_progress; + int bulk_completed; + +- /* Lock must be taken _inside_ the iothread lock and any AioContexts. */ ++ /* Lock must be taken _inside_ the iothread lock. */ + QemuMutex lock; + } BlkMigState; + +@@ -270,7 +270,6 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds) + + if (bmds->shared_base) { + qemu_mutex_lock_iothread(); +- aio_context_acquire(blk_get_aio_context(bb)); + /* Skip unallocated sectors; intentionally treats failure or + * partial sector as an allocated sector */ + while (cur_sector < total_sectors && +@@ -281,7 +280,6 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds) + } + cur_sector += count >> BDRV_SECTOR_BITS; + } +- aio_context_release(blk_get_aio_context(bb)); + qemu_mutex_unlock_iothread(); + } + +@@ -313,21 +311,16 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds) + block_mig_state.submitted++; + blk_mig_unlock(); + +- /* We do not know if bs is under the main thread (and thus does +- * not acquire the AioContext when doing AIO) or rather under +- * dataplane. Thus acquire both the iothread mutex and the +- * AioContext. +- * +- * This is ugly and will disappear when we make bdrv_* thread-safe, +- * without the need to acquire the AioContext. ++ /* ++ * The migration thread does not have an AioContext. Lock the BQL so that ++ * I/O runs in the main loop AioContext (see ++ * qemu_get_current_aio_context()). + */ + qemu_mutex_lock_iothread(); +- aio_context_acquire(blk_get_aio_context(bmds->blk)); + bdrv_reset_dirty_bitmap(bmds->dirty_bitmap, cur_sector * BDRV_SECTOR_SIZE, + nr_sectors * BDRV_SECTOR_SIZE); + blk->aiocb = blk_aio_preadv(bb, cur_sector * BDRV_SECTOR_SIZE, &blk->qiov, + 0, blk_mig_read_cb, blk); +- aio_context_release(blk_get_aio_context(bmds->blk)); + qemu_mutex_unlock_iothread(); + + bmds->cur_sector = cur_sector + nr_sectors; +@@ -512,7 +505,7 @@ static void blk_mig_reset_dirty_cursor(void) + } + } + +-/* Called with iothread lock and AioContext taken. */ ++/* Called with iothread lock taken. */ + + static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds, + int is_async) +@@ -606,9 +599,7 @@ static int blk_mig_save_dirty_block(QEMUFile *f, int is_async) + int ret = 1; + + QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) { +- aio_context_acquire(blk_get_aio_context(bmds->blk)); + ret = mig_save_device_dirty(f, bmds, is_async); +- aio_context_release(blk_get_aio_context(bmds->blk)); + if (ret <= 0) { + break; + } +@@ -666,9 +657,9 @@ static int64_t get_remaining_dirty(void) + int64_t dirty = 0; + + QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) { +- aio_context_acquire(blk_get_aio_context(bmds->blk)); ++ bdrv_dirty_bitmap_lock(bmds->dirty_bitmap); + dirty += bdrv_get_dirty_count(bmds->dirty_bitmap); +- aio_context_release(blk_get_aio_context(bmds->blk)); ++ bdrv_dirty_bitmap_unlock(bmds->dirty_bitmap); + } + + return dirty; +@@ -681,7 +672,6 @@ static void block_migration_cleanup_bmds(void) + { + BlkMigDevState *bmds; + BlockDriverState *bs; +- AioContext *ctx; + + unset_dirty_tracking(); + +@@ -693,13 +683,7 @@ static void block_migration_cleanup_bmds(void) + bdrv_op_unblock_all(bs, bmds->blocker); + } + error_free(bmds->blocker); +- +- /* Save ctx, because bmds->blk can disappear during blk_unref. */ +- ctx = blk_get_aio_context(bmds->blk); +- aio_context_acquire(ctx); + blk_unref(bmds->blk); +- aio_context_release(ctx); +- + g_free(bmds->blk_name); + g_free(bmds->aio_bitmap); + g_free(bmds); +diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c +index 86ae832176..99710c8ffb 100644 +--- a/migration/migration-hmp-cmds.c ++++ b/migration/migration-hmp-cmds.c +@@ -852,14 +852,11 @@ static void vm_completion(ReadLineState *rs, const char *str) + + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { + SnapshotInfoList *snapshots, *snapshot; +- AioContext *ctx = bdrv_get_aio_context(bs); + bool ok = false; + +- aio_context_acquire(ctx); + if (bdrv_can_snapshot(bs)) { + ok = bdrv_query_snapshot_info_list(bs, &snapshots, NULL) == 0; + } +- aio_context_release(ctx); + if (!ok) { + continue; + } +diff --git a/migration/savevm.c b/migration/savevm.c +index eec5503a42..1b9ab7b8ee 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -3049,7 +3049,6 @@ bool save_snapshot(const char *name, bool overwrite, const char *vmstate, + int saved_vm_running; + uint64_t vm_state_size; + g_autoptr(GDateTime) now = g_date_time_new_now_local(); +- AioContext *aio_context; + + GLOBAL_STATE_CODE(); + +@@ -3092,7 +3091,6 @@ bool save_snapshot(const char *name, bool overwrite, const char *vmstate, + if (bs == NULL) { + return false; + } +- aio_context = bdrv_get_aio_context(bs); + + saved_vm_running = runstate_is_running(); + +@@ -3101,8 +3099,6 @@ bool save_snapshot(const char *name, bool overwrite, const char *vmstate, + + bdrv_drain_all_begin(); + +- aio_context_acquire(aio_context); +- + memset(sn, 0, sizeof(*sn)); + + /* fill auxiliary fields */ +@@ -3139,14 +3135,6 @@ bool save_snapshot(const char *name, bool overwrite, const char *vmstate, + goto the_end; + } + +- /* The bdrv_all_create_snapshot() call that follows acquires the AioContext +- * for itself. BDRV_POLL_WHILE() does not support nested locking because +- * it only releases the lock once. Therefore synchronous I/O will deadlock +- * unless we release the AioContext before bdrv_all_create_snapshot(). +- */ +- aio_context_release(aio_context); +- aio_context = NULL; +- + ret = bdrv_all_create_snapshot(sn, bs, vm_state_size, + has_devices, devices, errp); + if (ret < 0) { +@@ -3157,10 +3145,6 @@ bool save_snapshot(const char *name, bool overwrite, const char *vmstate, + ret = 0; + + the_end: +- if (aio_context) { +- aio_context_release(aio_context); +- } +- + bdrv_drain_all_end(); + + if (saved_vm_running) { +@@ -3258,7 +3242,6 @@ bool load_snapshot(const char *name, const char *vmstate, + QEMUSnapshotInfo sn; + QEMUFile *f; + int ret; +- AioContext *aio_context; + MigrationIncomingState *mis = migration_incoming_get_current(); + + if (!bdrv_all_can_snapshot(has_devices, devices, errp)) { +@@ -3278,12 +3261,9 @@ bool load_snapshot(const char *name, const char *vmstate, + if (!bs_vm_state) { + return false; + } +- aio_context = bdrv_get_aio_context(bs_vm_state); + + /* Don't even try to load empty VM states */ +- aio_context_acquire(aio_context); + ret = bdrv_snapshot_find(bs_vm_state, &sn, name); +- aio_context_release(aio_context); + if (ret < 0) { + return false; + } else if (sn.vm_state_size == 0) { +@@ -3320,10 +3300,8 @@ bool load_snapshot(const char *name, const char *vmstate, + ret = -EINVAL; + goto err_drain; + } +- aio_context_acquire(aio_context); + ret = qemu_loadvm_state(f); + migration_incoming_state_destroy(); +- aio_context_release(aio_context); + + bdrv_drain_all_end(); + +diff --git a/net/colo-compare.c b/net/colo-compare.c +index 7f9e6f89ce..f2dfc0ebdc 100644 +--- a/net/colo-compare.c ++++ b/net/colo-compare.c +@@ -1439,12 +1439,10 @@ static void colo_compare_finalize(Object *obj) + qemu_bh_delete(s->event_bh); + + AioContext *ctx = iothread_get_aio_context(s->iothread); +- aio_context_acquire(ctx); + AIO_WAIT_WHILE(ctx, !s->out_sendco.done); + if (s->notify_dev) { + AIO_WAIT_WHILE(ctx, !s->notify_sendco.done); + } +- aio_context_release(ctx); + + /* Release all unhandled packets after compare thead exited */ + g_queue_foreach(&s->conn_list, colo_flush_packets, s); +diff --git a/qemu-img.c b/qemu-img.c +index 5a77f67719..7668f86769 100644 +--- a/qemu-img.c ++++ b/qemu-img.c +@@ -960,7 +960,6 @@ static int img_commit(int argc, char **argv) + Error *local_err = NULL; + CommonBlockJobCBInfo cbi; + bool image_opts = false; +- AioContext *aio_context; + int64_t rate_limit = 0; + + fmt = NULL; +@@ -1078,12 +1077,9 @@ static int img_commit(int argc, char **argv) + .bs = bs, + }; + +- aio_context = bdrv_get_aio_context(bs); +- aio_context_acquire(aio_context); + commit_active_start("commit", bs, base_bs, JOB_DEFAULT, rate_limit, + BLOCKDEV_ON_ERROR_REPORT, NULL, common_block_job_cb, + &cbi, false, &local_err); +- aio_context_release(aio_context); + if (local_err) { + goto done; + } +diff --git a/qemu-io.c b/qemu-io.c +index 050c70835f..6cb1e00385 100644 +--- a/qemu-io.c ++++ b/qemu-io.c +@@ -414,15 +414,7 @@ static void prep_fetchline(void *opaque) + + static int do_qemuio_command(const char *cmd) + { +- int ret; +- AioContext *ctx = +- qemuio_blk ? blk_get_aio_context(qemuio_blk) : qemu_get_aio_context(); +- +- aio_context_acquire(ctx); +- ret = qemuio_command(qemuio_blk, cmd); +- aio_context_release(ctx); +- +- return ret; ++ return qemuio_command(qemuio_blk, cmd); + } + + static int command_loop(void) +diff --git a/qemu-nbd.c b/qemu-nbd.c +index 186e6468b1..bac0b5e3ec 100644 +--- a/qemu-nbd.c ++++ b/qemu-nbd.c +@@ -1123,9 +1123,7 @@ int main(int argc, char **argv) + qdict_put_str(raw_opts, "file", bs->node_name); + qdict_put_int(raw_opts, "offset", dev_offset); + +- aio_context_acquire(qemu_get_aio_context()); + bs = bdrv_open(NULL, NULL, raw_opts, flags, &error_fatal); +- aio_context_release(qemu_get_aio_context()); + + blk_remove_bs(blk); + blk_insert_bs(blk, bs, &error_fatal); +diff --git a/replay/replay-debugging.c b/replay/replay-debugging.c +index 3e60549a4a..82c66fff26 100644 +--- a/replay/replay-debugging.c ++++ b/replay/replay-debugging.c +@@ -144,7 +144,6 @@ static char *replay_find_nearest_snapshot(int64_t icount, + char *ret = NULL; + int rv; + int nb_sns, i; +- AioContext *aio_context; + + *snapshot_icount = -1; + +@@ -152,11 +151,8 @@ static char *replay_find_nearest_snapshot(int64_t icount, + if (!bs) { + goto fail; + } +- aio_context = bdrv_get_aio_context(bs); + +- aio_context_acquire(aio_context); + nb_sns = bdrv_snapshot_list(bs, &sn_tab); +- aio_context_release(aio_context); + + for (i = 0; i < nb_sns; i++) { + rv = bdrv_all_has_snapshot(sn_tab[i].name, false, NULL, NULL); +diff --git a/scripts/block-coroutine-wrapper.py b/scripts/block-coroutine-wrapper.py +index 38364fa557..c9c09fcacd 100644 +--- a/scripts/block-coroutine-wrapper.py ++++ b/scripts/block-coroutine-wrapper.py +@@ -278,12 +278,9 @@ def gen_no_co_wrapper(func: FuncDecl) -> str: + static void {name}_bh(void *opaque) + {{ + {struct_name} *s = opaque; +- AioContext *ctx = {func.gen_ctx('s->')}; + + {graph_lock} +- aio_context_acquire(ctx); + {func.get_result}{name}({ func.gen_list('s->{name}') }); +- aio_context_release(ctx); + {graph_unlock} + + aio_co_wake(s->co); +diff --git a/tests/tsan/suppressions.tsan b/tests/tsan/suppressions.tsan +index d9a002a2ef..b3ef59c27c 100644 +--- a/tests/tsan/suppressions.tsan ++++ b/tests/tsan/suppressions.tsan +@@ -4,7 +4,6 @@ + + # TSan reports a double lock on RECURSIVE mutexes. + # Since the recursive lock is intentional, we choose to ignore it. +-mutex:aio_context_acquire + mutex:pthread_mutex_lock + + # TSan reports a race between pthread_mutex_init() and +diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c +index d9754dfebc..17830a69c1 100644 +--- a/tests/unit/test-bdrv-drain.c ++++ b/tests/unit/test-bdrv-drain.c +@@ -179,13 +179,7 @@ static void do_drain_end(enum drain_type drain_type, BlockDriverState *bs) + + static void do_drain_begin_unlocked(enum drain_type drain_type, BlockDriverState *bs) + { +- if (drain_type != BDRV_DRAIN_ALL) { +- aio_context_acquire(bdrv_get_aio_context(bs)); +- } + do_drain_begin(drain_type, bs); +- if (drain_type != BDRV_DRAIN_ALL) { +- aio_context_release(bdrv_get_aio_context(bs)); +- } + } + + static BlockBackend * no_coroutine_fn test_setup(void) +@@ -209,13 +203,7 @@ static BlockBackend * no_coroutine_fn test_setup(void) + + static void do_drain_end_unlocked(enum drain_type drain_type, BlockDriverState *bs) + { +- if (drain_type != BDRV_DRAIN_ALL) { +- aio_context_acquire(bdrv_get_aio_context(bs)); +- } + do_drain_end(drain_type, bs); +- if (drain_type != BDRV_DRAIN_ALL) { +- aio_context_release(bdrv_get_aio_context(bs)); +- } + } + + /* +@@ -520,12 +508,8 @@ static void test_iothread_main_thread_bh(void *opaque) + { + struct test_iothread_data *data = opaque; + +- /* Test that the AioContext is not yet locked in a random BH that is +- * executed during drain, otherwise this would deadlock. */ +- aio_context_acquire(bdrv_get_aio_context(data->bs)); + bdrv_flush(data->bs); + bdrv_dec_in_flight(data->bs); /* incremented by test_iothread_common() */ +- aio_context_release(bdrv_get_aio_context(data->bs)); + } + + /* +@@ -567,7 +551,6 @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread) + blk_set_disable_request_queuing(blk, true); + + blk_set_aio_context(blk, ctx_a, &error_abort); +- aio_context_acquire(ctx_a); + + s->bh_indirection_ctx = ctx_b; + +@@ -582,8 +565,6 @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread) + g_assert(acb != NULL); + g_assert_cmpint(aio_ret, ==, -EINPROGRESS); + +- aio_context_release(ctx_a); +- + data = (struct test_iothread_data) { + .bs = bs, + .drain_type = drain_type, +@@ -592,10 +573,6 @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread) + + switch (drain_thread) { + case 0: +- if (drain_type != BDRV_DRAIN_ALL) { +- aio_context_acquire(ctx_a); +- } +- + /* + * Increment in_flight so that do_drain_begin() waits for + * test_iothread_main_thread_bh(). This prevents the race between +@@ -613,20 +590,10 @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread) + do_drain_begin(drain_type, bs); + g_assert_cmpint(bs->in_flight, ==, 0); + +- if (drain_type != BDRV_DRAIN_ALL) { +- aio_context_release(ctx_a); +- } + qemu_event_wait(&done_event); +- if (drain_type != BDRV_DRAIN_ALL) { +- aio_context_acquire(ctx_a); +- } + + g_assert_cmpint(aio_ret, ==, 0); + do_drain_end(drain_type, bs); +- +- if (drain_type != BDRV_DRAIN_ALL) { +- aio_context_release(ctx_a); +- } + break; + case 1: + co = qemu_coroutine_create(test_iothread_drain_co_entry, &data); +@@ -637,9 +604,7 @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread) + g_assert_not_reached(); + } + +- aio_context_acquire(ctx_a); + blk_set_aio_context(blk, qemu_get_aio_context(), &error_abort); +- aio_context_release(ctx_a); + + bdrv_unref(bs); + blk_unref(blk); +@@ -757,7 +722,6 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type, + BlockJob *job; + TestBlockJob *tjob; + IOThread *iothread = NULL; +- AioContext *ctx; + int ret; + + src = bdrv_new_open_driver(&bdrv_test, "source", BDRV_O_RDWR, +@@ -787,11 +751,11 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type, + } + + if (use_iothread) { ++ AioContext *ctx; ++ + iothread = iothread_new(); + ctx = iothread_get_aio_context(iothread); + blk_set_aio_context(blk_src, ctx, &error_abort); +- } else { +- ctx = qemu_get_aio_context(); + } + + target = bdrv_new_open_driver(&bdrv_test, "target", BDRV_O_RDWR, +@@ -800,7 +764,6 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type, + blk_insert_bs(blk_target, target, &error_abort); + blk_set_allow_aio_context_change(blk_target, true); + +- aio_context_acquire(ctx); + tjob = block_job_create("job0", &test_job_driver, NULL, src, + 0, BLK_PERM_ALL, + 0, 0, NULL, NULL, &error_abort); +@@ -821,7 +784,6 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type, + tjob->prepare_ret = -EIO; + break; + } +- aio_context_release(ctx); + + job_start(&job->job); + +@@ -912,12 +874,10 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type, + } + g_assert_cmpint(ret, ==, (result == TEST_JOB_SUCCESS ? 0 : -EIO)); + +- aio_context_acquire(ctx); + if (use_iothread) { + blk_set_aio_context(blk_src, qemu_get_aio_context(), &error_abort); + assert(blk_get_aio_context(blk_target) == qemu_get_aio_context()); + } +- aio_context_release(ctx); + + blk_unref(blk_src); + blk_unref(blk_target); +@@ -1401,9 +1361,7 @@ static void test_append_to_drained(void) + g_assert_cmpint(base_s->drain_count, ==, 1); + g_assert_cmpint(base->in_flight, ==, 0); + +- aio_context_acquire(qemu_get_aio_context()); + bdrv_append(overlay, base, &error_abort); +- aio_context_release(qemu_get_aio_context()); + + g_assert_cmpint(base->in_flight, ==, 0); + g_assert_cmpint(overlay->in_flight, ==, 0); +@@ -1438,16 +1396,11 @@ static void test_set_aio_context(void) + + bdrv_drained_begin(bs); + bdrv_try_change_aio_context(bs, ctx_a, NULL, &error_abort); +- +- aio_context_acquire(ctx_a); + bdrv_drained_end(bs); + + bdrv_drained_begin(bs); + bdrv_try_change_aio_context(bs, ctx_b, NULL, &error_abort); +- aio_context_release(ctx_a); +- aio_context_acquire(ctx_b); + bdrv_try_change_aio_context(bs, qemu_get_aio_context(), NULL, &error_abort); +- aio_context_release(ctx_b); + bdrv_drained_end(bs); + + bdrv_unref(bs); +diff --git a/tests/unit/test-bdrv-graph-mod.c b/tests/unit/test-bdrv-graph-mod.c +index 8ee6ef38d8..cafc023db4 100644 +--- a/tests/unit/test-bdrv-graph-mod.c ++++ b/tests/unit/test-bdrv-graph-mod.c +@@ -142,10 +142,8 @@ static void test_update_perm_tree(void) + BDRV_CHILD_DATA, &error_abort); + bdrv_graph_wrunlock(); + +- aio_context_acquire(qemu_get_aio_context()); + ret = bdrv_append(filter, bs, NULL); + g_assert_cmpint(ret, <, 0); +- aio_context_release(qemu_get_aio_context()); + + bdrv_unref(filter); + blk_unref(root); +@@ -211,9 +209,7 @@ static void test_should_update_child(void) + bdrv_attach_child(filter, target, "target", &child_of_bds, + BDRV_CHILD_DATA, &error_abort); + bdrv_graph_wrunlock(); +- aio_context_acquire(qemu_get_aio_context()); + bdrv_append(filter, bs, &error_abort); +- aio_context_release(qemu_get_aio_context()); + + bdrv_graph_rdlock_main_loop(); + g_assert(target->backing->bs == bs); +@@ -440,9 +436,7 @@ static void test_append_greedy_filter(void) + &error_abort); + bdrv_graph_wrunlock(); + +- aio_context_acquire(qemu_get_aio_context()); + bdrv_append(fl, base, &error_abort); +- aio_context_release(qemu_get_aio_context()); + bdrv_unref(fl); + bdrv_unref(top); + } +diff --git a/tests/unit/test-block-iothread.c b/tests/unit/test-block-iothread.c +index 9b15d2768c..3766d5de6b 100644 +--- a/tests/unit/test-block-iothread.c ++++ b/tests/unit/test-block-iothread.c +@@ -483,7 +483,6 @@ static void test_sync_op(const void *opaque) + bdrv_graph_rdunlock_main_loop(); + + blk_set_aio_context(blk, ctx, &error_abort); +- aio_context_acquire(ctx); + if (t->fn) { + t->fn(c); + } +@@ -491,7 +490,6 @@ static void test_sync_op(const void *opaque) + t->blkfn(blk); + } + blk_set_aio_context(blk, qemu_get_aio_context(), &error_abort); +- aio_context_release(ctx); + + bdrv_unref(bs); + blk_unref(blk); +@@ -576,9 +574,7 @@ static void test_attach_blockjob(void) + aio_poll(qemu_get_aio_context(), false); + } + +- aio_context_acquire(ctx); + blk_set_aio_context(blk, qemu_get_aio_context(), &error_abort); +- aio_context_release(ctx); + + tjob->n = 0; + while (tjob->n == 0) { +@@ -595,9 +591,7 @@ static void test_attach_blockjob(void) + WITH_JOB_LOCK_GUARD() { + job_complete_sync_locked(&tjob->common.job, &error_abort); + } +- aio_context_acquire(ctx); + blk_set_aio_context(blk, qemu_get_aio_context(), &error_abort); +- aio_context_release(ctx); + + bdrv_unref(bs); + blk_unref(blk); +@@ -654,9 +648,7 @@ static void test_propagate_basic(void) + + /* Switch the AioContext back */ + main_ctx = qemu_get_aio_context(); +- aio_context_acquire(ctx); + blk_set_aio_context(blk, main_ctx, &error_abort); +- aio_context_release(ctx); + g_assert(blk_get_aio_context(blk) == main_ctx); + g_assert(bdrv_get_aio_context(bs_a) == main_ctx); + g_assert(bdrv_get_aio_context(bs_verify) == main_ctx); +@@ -732,9 +724,7 @@ static void test_propagate_diamond(void) + + /* Switch the AioContext back */ + main_ctx = qemu_get_aio_context(); +- aio_context_acquire(ctx); + blk_set_aio_context(blk, main_ctx, &error_abort); +- aio_context_release(ctx); + g_assert(blk_get_aio_context(blk) == main_ctx); + g_assert(bdrv_get_aio_context(bs_verify) == main_ctx); + g_assert(bdrv_get_aio_context(bs_a) == main_ctx); +@@ -764,13 +754,11 @@ static void test_propagate_mirror(void) + &error_abort); + + /* Start a mirror job */ +- aio_context_acquire(main_ctx); + mirror_start("job0", src, target, NULL, JOB_DEFAULT, 0, 0, 0, + MIRROR_SYNC_MODE_NONE, MIRROR_OPEN_BACKING_CHAIN, false, + BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT, + false, "filter_node", MIRROR_COPY_MODE_BACKGROUND, + &error_abort); +- aio_context_release(main_ctx); + + WITH_JOB_LOCK_GUARD() { + job = job_get_locked("job0"); +@@ -785,9 +773,7 @@ static void test_propagate_mirror(void) + g_assert(job->aio_context == ctx); + + /* Change the AioContext of target */ +- aio_context_acquire(ctx); + bdrv_try_change_aio_context(target, main_ctx, NULL, &error_abort); +- aio_context_release(ctx); + g_assert(bdrv_get_aio_context(src) == main_ctx); + g_assert(bdrv_get_aio_context(target) == main_ctx); + g_assert(bdrv_get_aio_context(filter) == main_ctx); +@@ -805,10 +791,8 @@ static void test_propagate_mirror(void) + g_assert(bdrv_get_aio_context(filter) == main_ctx); + + /* ...unless we explicitly allow it */ +- aio_context_acquire(ctx); + blk_set_allow_aio_context_change(blk, true); + bdrv_try_change_aio_context(target, ctx, NULL, &error_abort); +- aio_context_release(ctx); + + g_assert(blk_get_aio_context(blk) == ctx); + g_assert(bdrv_get_aio_context(src) == ctx); +@@ -817,10 +801,8 @@ static void test_propagate_mirror(void) + + job_cancel_sync_all(); + +- aio_context_acquire(ctx); + blk_set_aio_context(blk, main_ctx, &error_abort); + bdrv_try_change_aio_context(target, main_ctx, NULL, &error_abort); +- aio_context_release(ctx); + + blk_unref(blk); + bdrv_unref(src); +@@ -836,7 +818,6 @@ static void test_attach_second_node(void) + BlockDriverState *bs, *filter; + QDict *options; + +- aio_context_acquire(main_ctx); + blk = blk_new(ctx, BLK_PERM_ALL, BLK_PERM_ALL); + bs = bdrv_new_open_driver(&bdrv_test, "base", BDRV_O_RDWR, &error_abort); + blk_insert_bs(blk, bs, &error_abort); +@@ -846,15 +827,12 @@ static void test_attach_second_node(void) + qdict_put_str(options, "file", "base"); + + filter = bdrv_open(NULL, NULL, options, BDRV_O_RDWR, &error_abort); +- aio_context_release(main_ctx); + + g_assert(blk_get_aio_context(blk) == ctx); + g_assert(bdrv_get_aio_context(bs) == ctx); + g_assert(bdrv_get_aio_context(filter) == ctx); + +- aio_context_acquire(ctx); + blk_set_aio_context(blk, main_ctx, &error_abort); +- aio_context_release(ctx); + g_assert(blk_get_aio_context(blk) == main_ctx); + g_assert(bdrv_get_aio_context(bs) == main_ctx); + g_assert(bdrv_get_aio_context(filter) == main_ctx); +@@ -868,11 +846,9 @@ static void test_attach_preserve_blk_ctx(void) + { + IOThread *iothread = iothread_new(); + AioContext *ctx = iothread_get_aio_context(iothread); +- AioContext *main_ctx = qemu_get_aio_context(); + BlockBackend *blk; + BlockDriverState *bs; + +- aio_context_acquire(main_ctx); + blk = blk_new(ctx, BLK_PERM_ALL, BLK_PERM_ALL); + bs = bdrv_new_open_driver(&bdrv_test, "base", BDRV_O_RDWR, &error_abort); + bs->total_sectors = 65536 / BDRV_SECTOR_SIZE; +@@ -881,25 +857,18 @@ static void test_attach_preserve_blk_ctx(void) + blk_insert_bs(blk, bs, &error_abort); + g_assert(blk_get_aio_context(blk) == ctx); + g_assert(bdrv_get_aio_context(bs) == ctx); +- aio_context_release(main_ctx); + + /* Remove the node again */ +- aio_context_acquire(ctx); + blk_remove_bs(blk); +- aio_context_release(ctx); + g_assert(blk_get_aio_context(blk) == ctx); + g_assert(bdrv_get_aio_context(bs) == qemu_get_aio_context()); + + /* Re-attach the node */ +- aio_context_acquire(main_ctx); + blk_insert_bs(blk, bs, &error_abort); +- aio_context_release(main_ctx); + g_assert(blk_get_aio_context(blk) == ctx); + g_assert(bdrv_get_aio_context(bs) == ctx); + +- aio_context_acquire(ctx); + blk_set_aio_context(blk, qemu_get_aio_context(), &error_abort); +- aio_context_release(ctx); + bdrv_unref(bs); + blk_unref(blk); + } +diff --git a/tests/unit/test-blockjob.c b/tests/unit/test-blockjob.c +index a130f6fefb..fe3e0d2d38 100644 +--- a/tests/unit/test-blockjob.c ++++ b/tests/unit/test-blockjob.c +@@ -228,7 +228,6 @@ static void cancel_common(CancelJob *s) + BlockJob *job = &s->common; + BlockBackend *blk = s->blk; + JobStatus sts = job->job.status; +- AioContext *ctx = job->job.aio_context; + + job_cancel_sync(&job->job, true); + WITH_JOB_LOCK_GUARD() { +@@ -240,9 +239,7 @@ static void cancel_common(CancelJob *s) + job_unref_locked(&job->job); + } + +- aio_context_acquire(ctx); + destroy_blk(blk); +- aio_context_release(ctx); + + } + +@@ -391,132 +388,6 @@ static void test_cancel_concluded(void) + cancel_common(s); + } + +-/* (See test_yielding_driver for the job description) */ +-typedef struct YieldingJob { +- BlockJob common; +- bool should_complete; +-} YieldingJob; +- +-static void yielding_job_complete(Job *job, Error **errp) +-{ +- YieldingJob *s = container_of(job, YieldingJob, common.job); +- s->should_complete = true; +- job_enter(job); +-} +- +-static int coroutine_fn yielding_job_run(Job *job, Error **errp) +-{ +- YieldingJob *s = container_of(job, YieldingJob, common.job); +- +- job_transition_to_ready(job); +- +- while (!s->should_complete) { +- job_yield(job); +- } +- +- return 0; +-} +- +-/* +- * This job transitions immediately to the READY state, and then +- * yields until it is to complete. +- */ +-static const BlockJobDriver test_yielding_driver = { +- .job_driver = { +- .instance_size = sizeof(YieldingJob), +- .free = block_job_free, +- .user_resume = block_job_user_resume, +- .run = yielding_job_run, +- .complete = yielding_job_complete, +- }, +-}; +- +-/* +- * Test that job_complete_locked() works even on jobs that are in a paused +- * state (i.e., STANDBY). +- * +- * To do this, run YieldingJob in an IO thread, get it into the READY +- * state, then have a drained section. Before ending the section, +- * acquire the context so the job will not be entered and will thus +- * remain on STANDBY. +- * +- * job_complete_locked() should still work without error. +- * +- * Note that on the QMP interface, it is impossible to lock an IO +- * thread before a drained section ends. In practice, the +- * bdrv_drain_all_end() and the aio_context_acquire() will be +- * reversed. However, that makes for worse reproducibility here: +- * Sometimes, the job would no longer be in STANDBY then but already +- * be started. We cannot prevent that, because the IO thread runs +- * concurrently. We can only prevent it by taking the lock before +- * ending the drained section, so we do that. +- * +- * (You can reverse the order of operations and most of the time the +- * test will pass, but sometimes the assert(status == STANDBY) will +- * fail.) +- */ +-static void test_complete_in_standby(void) +-{ +- BlockBackend *blk; +- IOThread *iothread; +- AioContext *ctx; +- Job *job; +- BlockJob *bjob; +- +- /* Create a test drive, move it to an IO thread */ +- blk = create_blk(NULL); +- iothread = iothread_new(); +- +- ctx = iothread_get_aio_context(iothread); +- blk_set_aio_context(blk, ctx, &error_abort); +- +- /* Create our test job */ +- bjob = mk_job(blk, "job", &test_yielding_driver, true, +- JOB_MANUAL_FINALIZE | JOB_MANUAL_DISMISS); +- job = &bjob->job; +- assert_job_status_is(job, JOB_STATUS_CREATED); +- +- /* Wait for the job to become READY */ +- job_start(job); +- /* +- * Here we are waiting for the status to change, so don't bother +- * protecting the read every time. +- */ +- AIO_WAIT_WHILE_UNLOCKED(ctx, job->status != JOB_STATUS_READY); +- +- /* Begin the drained section, pausing the job */ +- bdrv_drain_all_begin(); +- assert_job_status_is(job, JOB_STATUS_STANDBY); +- +- /* Lock the IO thread to prevent the job from being run */ +- aio_context_acquire(ctx); +- /* This will schedule the job to resume it */ +- bdrv_drain_all_end(); +- aio_context_release(ctx); +- +- WITH_JOB_LOCK_GUARD() { +- /* But the job cannot run, so it will remain on standby */ +- assert(job->status == JOB_STATUS_STANDBY); +- +- /* Even though the job is on standby, this should work */ +- job_complete_locked(job, &error_abort); +- +- /* The test is done now, clean up. */ +- job_finish_sync_locked(job, NULL, &error_abort); +- assert(job->status == JOB_STATUS_PENDING); +- +- job_finalize_locked(job, &error_abort); +- assert(job->status == JOB_STATUS_CONCLUDED); +- +- job_dismiss_locked(&job, &error_abort); +- } +- +- aio_context_acquire(ctx); +- destroy_blk(blk); +- aio_context_release(ctx); +- iothread_join(iothread); +-} +- + int main(int argc, char **argv) + { + qemu_init_main_loop(&error_abort); +@@ -531,13 +402,5 @@ int main(int argc, char **argv) + g_test_add_func("/blockjob/cancel/standby", test_cancel_standby); + g_test_add_func("/blockjob/cancel/pending", test_cancel_pending); + g_test_add_func("/blockjob/cancel/concluded", test_cancel_concluded); +- +- /* +- * This test is flaky and sometimes fails in CI and otherwise: +- * don't run unless user opts in via environment variable. +- */ +- if (getenv("QEMU_TEST_FLAKY_TESTS")) { +- g_test_add_func("/blockjob/complete_in_standby", test_complete_in_standby); +- } + return g_test_run(); + } +diff --git a/tests/unit/test-replication.c b/tests/unit/test-replication.c +index afff908d77..5d2003b8ce 100644 +--- a/tests/unit/test-replication.c ++++ b/tests/unit/test-replication.c +@@ -199,17 +199,13 @@ static BlockBackend *start_primary(void) + static void teardown_primary(void) + { + BlockBackend *blk; +- AioContext *ctx; + + /* remove P_ID */ + blk = blk_by_name(P_ID); + assert(blk); + +- ctx = blk_get_aio_context(blk); +- aio_context_acquire(ctx); + monitor_remove_blk(blk); + blk_unref(blk); +- aio_context_release(ctx); + } + + static void test_primary_read(void) +@@ -345,27 +341,20 @@ static void teardown_secondary(void) + { + /* only need to destroy two BBs */ + BlockBackend *blk; +- AioContext *ctx; + + /* remove S_LOCAL_DISK_ID */ + blk = blk_by_name(S_LOCAL_DISK_ID); + assert(blk); + +- ctx = blk_get_aio_context(blk); +- aio_context_acquire(ctx); + monitor_remove_blk(blk); + blk_unref(blk); +- aio_context_release(ctx); + + /* remove S_ID */ + blk = blk_by_name(S_ID); + assert(blk); + +- ctx = blk_get_aio_context(blk); +- aio_context_acquire(ctx); + monitor_remove_blk(blk); + blk_unref(blk); +- aio_context_release(ctx); + } + + static void test_secondary_read(void) +diff --git a/util/async.c b/util/async.c +index 04ee83d220..dfd44ef612 100644 +--- a/util/async.c ++++ b/util/async.c +@@ -562,12 +562,10 @@ static void co_schedule_bh_cb(void *opaque) + Coroutine *co = QSLIST_FIRST(&straight); + QSLIST_REMOVE_HEAD(&straight, co_scheduled_next); + trace_aio_co_schedule_bh_cb(ctx, co); +- aio_context_acquire(ctx); + + /* Protected by write barrier in qemu_aio_coroutine_enter */ + qatomic_set(&co->scheduled, NULL); + qemu_aio_coroutine_enter(ctx, co); +- aio_context_release(ctx); + } + } + +@@ -707,9 +705,7 @@ void aio_co_enter(AioContext *ctx, Coroutine *co) + assert(self != co); + QSIMPLEQ_INSERT_TAIL(&self->co_queue_wakeup, co, co_queue_next); + } else { +- aio_context_acquire(ctx); + qemu_aio_coroutine_enter(ctx, co); +- aio_context_release(ctx); + } + } + +diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c +index a9a48fffb8..3bfb1ad3ec 100644 +--- a/util/vhost-user-server.c ++++ b/util/vhost-user-server.c +@@ -360,10 +360,7 @@ static void vu_accept(QIONetListener *listener, QIOChannelSocket *sioc, + + qio_channel_set_follow_coroutine_ctx(server->ioc, true); + +- /* Attaching the AioContext starts the vu_client_trip coroutine */ +- aio_context_acquire(server->ctx); + vhost_user_server_attach_aio_context(server, server->ctx); +- aio_context_release(server->ctx); + } + + /* server->ctx acquired by caller */ +-- +2.39.3 + diff --git a/SOURCES/kvm-block-remove-bdrv_co_lock.patch b/SOURCES/kvm-block-remove-bdrv_co_lock.patch new file mode 100644 index 0000000..b219c1c --- /dev/null +++ b/SOURCES/kvm-block-remove-bdrv_co_lock.patch @@ -0,0 +1,97 @@ +From d0514c7d5d6cc1aa140119c95d5ea2c1591b01e9 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 5 Dec 2023 13:20:04 -0500 +Subject: [PATCH 087/101] block: remove bdrv_co_lock() + +RH-Author: Kevin Wolf +RH-MergeRequest: 214: Remove AioContext lock +RH-Jira: RHEL-15965 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [18/26] a303f861ea5e84d8e89fd51e530fd0cb2da17b89 (kmwolf/centos-qemu-kvm) + +The bdrv_co_lock() and bdrv_co_unlock() functions are already no-ops. +Remove them. + +Signed-off-by: Stefan Hajnoczi +Message-ID: <20231205182011.1976568-8-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +--- + block.c | 10 ---------- + blockdev.c | 5 ----- + include/block/block-global-state.h | 14 -------------- + 3 files changed, 29 deletions(-) + +diff --git a/block.c b/block.c +index 91ace5d2d5..434b7f4d72 100644 +--- a/block.c ++++ b/block.c +@@ -7431,16 +7431,6 @@ void coroutine_fn bdrv_co_leave(BlockDriverState *bs, AioContext *old_ctx) + bdrv_dec_in_flight(bs); + } + +-void coroutine_fn bdrv_co_lock(BlockDriverState *bs) +-{ +- /* TODO removed in next patch */ +-} +- +-void coroutine_fn bdrv_co_unlock(BlockDriverState *bs) +-{ +- /* TODO removed in next patch */ +-} +- + static void bdrv_do_remove_aio_context_notifier(BdrvAioNotifier *ban) + { + GLOBAL_STATE_CODE(); +diff --git a/blockdev.c b/blockdev.c +index 5d8b3a23eb..3a5e7222ec 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -2264,18 +2264,13 @@ void coroutine_fn qmp_block_resize(const char *device, const char *node_name, + return; + } + +- bdrv_co_lock(bs); + bdrv_drained_begin(bs); +- bdrv_co_unlock(bs); + + old_ctx = bdrv_co_enter(bs); + blk_co_truncate(blk, size, false, PREALLOC_MODE_OFF, 0, errp); + bdrv_co_leave(bs, old_ctx); + +- bdrv_co_lock(bs); + bdrv_drained_end(bs); +- bdrv_co_unlock(bs); +- + blk_co_unref(blk); + } + +diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h +index 0327f1c605..4ec0b217f0 100644 +--- a/include/block/block-global-state.h ++++ b/include/block/block-global-state.h +@@ -267,20 +267,6 @@ int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag); + int bdrv_debug_resume(BlockDriverState *bs, const char *tag); + bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag); + +-/** +- * Locks the AioContext of @bs if it's not the current AioContext. This avoids +- * double locking which could lead to deadlocks: This is a coroutine_fn, so we +- * know we already own the lock of the current AioContext. +- * +- * May only be called in the main thread. +- */ +-void coroutine_fn bdrv_co_lock(BlockDriverState *bs); +- +-/** +- * Unlocks the AioContext of @bs if it's not the current AioContext. +- */ +-void coroutine_fn bdrv_co_unlock(BlockDriverState *bs); +- + bool bdrv_child_change_aio_context(BdrvChild *c, AioContext *ctx, + GHashTable *visited, Transaction *tran, + Error **errp); +-- +2.39.3 + diff --git a/SOURCES/kvm-block-remove-outdated-AioContext-locking-comments.patch b/SOURCES/kvm-block-remove-outdated-AioContext-locking-comments.patch new file mode 100644 index 0000000..d6670c1 --- /dev/null +++ b/SOURCES/kvm-block-remove-outdated-AioContext-locking-comments.patch @@ -0,0 +1,411 @@ +From dc4eb64185957a01948217814478abc450ce5f26 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 5 Dec 2023 13:20:11 -0500 +Subject: [PATCH 094/101] block: remove outdated AioContext locking comments + +RH-Author: Kevin Wolf +RH-MergeRequest: 214: Remove AioContext lock +RH-Jira: RHEL-15965 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [25/26] 395e18fb40d28d4bc961acee1a00da7f60748076 (kmwolf/centos-qemu-kvm) + +The AioContext lock no longer exists. + +There is one noteworthy change: + + - * More specifically, these functions use BDRV_POLL_WHILE(bs), which + - * requires the caller to be either in the main thread and hold + - * the BlockdriverState (bs) AioContext lock, or directly in the + - * home thread that runs the bs AioContext. Calling them from + - * another thread in another AioContext would cause deadlocks. + + * More specifically, these functions use BDRV_POLL_WHILE(bs), which requires + + * the caller to be either in the main thread or directly in the home thread + + * that runs the bs AioContext. Calling them from another thread in another + + * AioContext would cause deadlocks. + +I am not sure whether deadlocks are still possible. Maybe they have just +moved to the fine-grained locks that have replaced the AioContext. Since +I am not sure if the deadlocks are gone, I have kept the substance +unchanged and just removed mention of the AioContext. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Eric Blake +Message-ID: <20231205182011.1976568-15-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +--- + block.c | 73 ++++++---------------------- + block/block-backend.c | 8 --- + block/export/vhost-user-blk-server.c | 4 -- + include/block/block-common.h | 3 -- + include/block/block-io.h | 9 ++-- + include/block/block_int-common.h | 2 - + tests/qemu-iotests/202 | 2 +- + tests/qemu-iotests/203 | 3 +- + 8 files changed, 22 insertions(+), 82 deletions(-) + +diff --git a/block.c b/block.c +index 434b7f4d72..a097772238 100644 +--- a/block.c ++++ b/block.c +@@ -1616,11 +1616,6 @@ out: + g_free(gen_node_name); + } + +-/* +- * The caller must always hold @bs AioContext lock, because this function calls +- * bdrv_refresh_total_sectors() which polls when called from non-coroutine +- * context. +- */ + static int no_coroutine_fn GRAPH_UNLOCKED + bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, const char *node_name, + QDict *options, int open_flags, Error **errp) +@@ -2901,7 +2896,7 @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm) + * Replaces the node that a BdrvChild points to without updating permissions. + * + * If @new_bs is non-NULL, the parent of @child must already be drained through +- * @child and the caller must hold the AioContext lock for @new_bs. ++ * @child. + */ + static void GRAPH_WRLOCK + bdrv_replace_child_noperm(BdrvChild *child, BlockDriverState *new_bs) +@@ -3041,9 +3036,8 @@ static TransactionActionDrv bdrv_attach_child_common_drv = { + * + * Returns new created child. + * +- * The caller must hold the AioContext lock for @child_bs. Both @parent_bs and +- * @child_bs can move to a different AioContext in this function. Callers must +- * make sure that their AioContext locking is still correct after this. ++ * Both @parent_bs and @child_bs can move to a different AioContext in this ++ * function. + */ + static BdrvChild * GRAPH_WRLOCK + bdrv_attach_child_common(BlockDriverState *child_bs, +@@ -3142,9 +3136,8 @@ bdrv_attach_child_common(BlockDriverState *child_bs, + /* + * Function doesn't update permissions, caller is responsible for this. + * +- * The caller must hold the AioContext lock for @child_bs. Both @parent_bs and +- * @child_bs can move to a different AioContext in this function. Callers must +- * make sure that their AioContext locking is still correct after this. ++ * Both @parent_bs and @child_bs can move to a different AioContext in this ++ * function. + * + * After calling this function, the transaction @tran may only be completed + * while holding a writer lock for the graph. +@@ -3184,9 +3177,6 @@ bdrv_attach_child_noperm(BlockDriverState *parent_bs, + * + * On failure NULL is returned, errp is set and the reference to + * child_bs is also dropped. +- * +- * The caller must hold the AioContext lock @child_bs, but not that of @ctx +- * (unless @child_bs is already in @ctx). + */ + BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, + const char *child_name, +@@ -3226,9 +3216,6 @@ out: + * + * On failure NULL is returned, errp is set and the reference to + * child_bs is also dropped. +- * +- * If @parent_bs and @child_bs are in different AioContexts, the caller must +- * hold the AioContext lock for @child_bs, but not for @parent_bs. + */ + BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, + BlockDriverState *child_bs, +@@ -3418,9 +3405,8 @@ static BdrvChildRole bdrv_backing_role(BlockDriverState *bs) + * + * Function doesn't update permissions, caller is responsible for this. + * +- * The caller must hold the AioContext lock for @child_bs. Both @parent_bs and +- * @child_bs can move to a different AioContext in this function. Callers must +- * make sure that their AioContext locking is still correct after this. ++ * Both @parent_bs and @child_bs can move to a different AioContext in this ++ * function. + * + * After calling this function, the transaction @tran may only be completed + * while holding a writer lock for the graph. +@@ -3513,9 +3499,8 @@ out: + } + + /* +- * The caller must hold the AioContext lock for @backing_hd. Both @bs and +- * @backing_hd can move to a different AioContext in this function. Callers must +- * make sure that their AioContext locking is still correct after this. ++ * Both @bs and @backing_hd can move to a different AioContext in this ++ * function. + * + * If a backing child is already present (i.e. we're detaching a node), that + * child node must be drained. +@@ -3574,8 +3559,6 @@ int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, + * itself, all options starting with "${bdref_key}." are considered part of the + * BlockdevRef. + * +- * The caller must hold the main AioContext lock. +- * + * TODO Can this be unified with bdrv_open_image()? + */ + int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, +@@ -3745,9 +3728,7 @@ done: + * + * The BlockdevRef will be removed from the options QDict. + * +- * The caller must hold the lock of the main AioContext and no other AioContext. +- * @parent can move to a different AioContext in this function. Callers must +- * make sure that their AioContext locking is still correct after this. ++ * @parent can move to a different AioContext in this function. + */ + BdrvChild *bdrv_open_child(const char *filename, + QDict *options, const char *bdref_key, +@@ -3778,9 +3759,7 @@ BdrvChild *bdrv_open_child(const char *filename, + /* + * Wrapper on bdrv_open_child() for most popular case: open primary child of bs. + * +- * The caller must hold the lock of the main AioContext and no other AioContext. +- * @parent can move to a different AioContext in this function. Callers must +- * make sure that their AioContext locking is still correct after this. ++ * @parent can move to a different AioContext in this function. + */ + int bdrv_open_file_child(const char *filename, + QDict *options, const char *bdref_key, +@@ -3923,8 +3902,6 @@ out: + * The reference parameter may be used to specify an existing block device which + * should be opened. If specified, neither options nor a filename may be given, + * nor can an existing BDS be reused (that is, *pbs has to be NULL). +- * +- * The caller must always hold the main AioContext lock. + */ + static BlockDriverState * no_coroutine_fn + bdrv_open_inherit(const char *filename, const char *reference, QDict *options, +@@ -4217,7 +4194,6 @@ close_and_fail: + return NULL; + } + +-/* The caller must always hold the main AioContext lock. */ + BlockDriverState *bdrv_open(const char *filename, const char *reference, + QDict *options, int flags, Error **errp) + { +@@ -4665,10 +4641,7 @@ int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only, + * + * Return 0 on success, otherwise return < 0 and set @errp. + * +- * The caller must hold the AioContext lock of @reopen_state->bs. + * @reopen_state->bs can move to a different AioContext in this function. +- * Callers must make sure that their AioContext locking is still correct after +- * this. + */ + static int GRAPH_UNLOCKED + bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state, +@@ -4801,8 +4774,6 @@ out_rdlock: + * It is the responsibility of the caller to then call the abort() or + * commit() for any other BDS that have been left in a prepare() state + * +- * The caller must hold the AioContext lock of @reopen_state->bs. +- * + * After calling this function, the transaction @change_child_tran may only be + * completed while holding a writer lock for the graph. + */ +@@ -5437,8 +5408,6 @@ int bdrv_drop_filter(BlockDriverState *bs, Error **errp) + * child. + * + * This function does not create any image files. +- * +- * The caller must hold the AioContext lock for @bs_top. + */ + int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, + Error **errp) +@@ -5545,9 +5514,8 @@ static void bdrv_delete(BlockDriverState *bs) + * after the call (even on failure), so if the caller intends to reuse the + * dictionary, it needs to use qobject_ref() before calling bdrv_open. + * +- * The caller holds the AioContext lock for @bs. It must make sure that @bs +- * stays in the same AioContext, i.e. @options must not refer to nodes in a +- * different AioContext. ++ * The caller must make sure that @bs stays in the same AioContext, i.e. ++ * @options must not refer to nodes in a different AioContext. + */ + BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *options, + int flags, Error **errp) +@@ -7565,10 +7533,6 @@ static TransactionActionDrv set_aio_context = { + * + * Must be called from the main AioContext. + * +- * The caller must own the AioContext lock for the old AioContext of bs, but it +- * must not own the AioContext lock for new_context (unless new_context is the +- * same as the current context of bs). +- * + * @visited will accumulate all visited BdrvChild objects. The caller is + * responsible for freeing the list afterwards. + */ +@@ -7621,13 +7585,6 @@ static bool bdrv_change_aio_context(BlockDriverState *bs, AioContext *ctx, + * + * If ignore_child is not NULL, that child (and its subgraph) will not + * be touched. +- * +- * This function still requires the caller to take the bs current +- * AioContext lock, otherwise draining will fail since AIO_WAIT_WHILE +- * assumes the lock is always held if bs is in another AioContext. +- * For the same reason, it temporarily also holds the new AioContext, since +- * bdrv_drained_end calls BDRV_POLL_WHILE that assumes the lock is taken too. +- * Therefore the new AioContext lock must not be taken by the caller. + */ + int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx, + BdrvChild *ignore_child, Error **errp) +@@ -7653,8 +7610,8 @@ int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx, + + /* + * Linear phase: go through all callbacks collected in the transaction. +- * Run all callbacks collected in the recursion to switch all nodes +- * AioContext lock (transaction commit), or undo all changes done in the ++ * Run all callbacks collected in the recursion to switch every node's ++ * AioContext (transaction commit), or undo all changes done in the + * recursion (transaction abort). + */ + +diff --git a/block/block-backend.c b/block/block-backend.c +index f412bed274..209eb07528 100644 +--- a/block/block-backend.c ++++ b/block/block-backend.c +@@ -390,8 +390,6 @@ BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm) + * Both sets of permissions can be changed later using blk_set_perm(). + * + * Return the new BlockBackend on success, null on failure. +- * +- * Callers must hold the AioContext lock of @bs. + */ + BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm, + uint64_t shared_perm, Error **errp) +@@ -416,8 +414,6 @@ BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm, + * Just as with bdrv_open(), after having called this function the reference to + * @options belongs to the block layer (even on failure). + * +- * Called without holding an AioContext lock. +- * + * TODO: Remove @filename and @flags; it should be possible to specify a whole + * BDS tree just by specifying the @options QDict (or @reference, + * alternatively). At the time of adding this function, this is not possible, +@@ -872,8 +868,6 @@ BlockBackend *blk_by_public(BlockBackendPublic *public) + + /* + * Disassociates the currently associated BlockDriverState from @blk. +- * +- * The caller must hold the AioContext lock for the BlockBackend. + */ + void blk_remove_bs(BlockBackend *blk) + { +@@ -915,8 +909,6 @@ void blk_remove_bs(BlockBackend *blk) + + /* + * Associates a new BlockDriverState with @blk. +- * +- * Callers must hold the AioContext lock of @bs. + */ + int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp) + { +diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c +index 16f48388d3..50c358e8cd 100644 +--- a/block/export/vhost-user-blk-server.c ++++ b/block/export/vhost-user-blk-server.c +@@ -278,7 +278,6 @@ static void vu_blk_exp_resize(void *opaque) + vu_config_change_msg(&vexp->vu_server.vu_dev); + } + +-/* Called with vexp->export.ctx acquired */ + static void vu_blk_drained_begin(void *opaque) + { + VuBlkExport *vexp = opaque; +@@ -287,7 +286,6 @@ static void vu_blk_drained_begin(void *opaque) + vhost_user_server_detach_aio_context(&vexp->vu_server); + } + +-/* Called with vexp->export.blk AioContext acquired */ + static void vu_blk_drained_end(void *opaque) + { + VuBlkExport *vexp = opaque; +@@ -300,8 +298,6 @@ static void vu_blk_drained_end(void *opaque) + * Ensures that bdrv_drained_begin() waits until in-flight requests complete + * and the server->co_trip coroutine has terminated. It will be restarted in + * vhost_user_server_attach_aio_context(). +- * +- * Called with vexp->export.ctx acquired. + */ + static bool vu_blk_drained_poll(void *opaque) + { +diff --git a/include/block/block-common.h b/include/block/block-common.h +index d7599564db..a846023a09 100644 +--- a/include/block/block-common.h ++++ b/include/block/block-common.h +@@ -70,9 +70,6 @@ + * automatically takes the graph rdlock when calling the wrapped function. In + * the same way, no_co_wrapper_bdrv_wrlock functions automatically take the + * graph wrlock. +- * +- * If the first parameter of the function is a BlockDriverState, BdrvChild or +- * BlockBackend pointer, the AioContext lock for it is taken in the wrapper. + */ + #define no_co_wrapper + #define no_co_wrapper_bdrv_rdlock +diff --git a/include/block/block-io.h b/include/block/block-io.h +index 8eb39a858b..b49e0537dd 100644 +--- a/include/block/block-io.h ++++ b/include/block/block-io.h +@@ -332,11 +332,10 @@ bdrv_co_copy_range(BdrvChild *src, int64_t src_offset, + * "I/O or GS" API functions. These functions can run without + * the BQL, but only in one specific iothread/main loop. + * +- * More specifically, these functions use BDRV_POLL_WHILE(bs), which +- * requires the caller to be either in the main thread and hold +- * the BlockdriverState (bs) AioContext lock, or directly in the +- * home thread that runs the bs AioContext. Calling them from +- * another thread in another AioContext would cause deadlocks. ++ * More specifically, these functions use BDRV_POLL_WHILE(bs), which requires ++ * the caller to be either in the main thread or directly in the home thread ++ * that runs the bs AioContext. Calling them from another thread in another ++ * AioContext would cause deadlocks. + * + * Therefore, these functions are not proper I/O, because they + * can't run in *any* iothreads, but only in a specific one. +diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h +index 4e31d161c5..151279d481 100644 +--- a/include/block/block_int-common.h ++++ b/include/block/block_int-common.h +@@ -1192,8 +1192,6 @@ struct BlockDriverState { + /* The error object in use for blocking operations on backing_hd */ + Error *backing_blocker; + +- /* Protected by AioContext lock */ +- + /* + * If we are reading a disk image, give its size in sectors. + * Generally read-only; it is written to by load_snapshot and +diff --git a/tests/qemu-iotests/202 b/tests/qemu-iotests/202 +index b784dcd791..13304242e5 100755 +--- a/tests/qemu-iotests/202 ++++ b/tests/qemu-iotests/202 +@@ -21,7 +21,7 @@ + # Check that QMP 'transaction' blockdev-snapshot-sync with multiple drives on a + # single IOThread completes successfully. This particular command triggered a + # hang due to recursive AioContext locking and BDRV_POLL_WHILE(). Protect +-# against regressions. ++# against regressions even though the AioContext lock no longer exists. + + import iotests + +diff --git a/tests/qemu-iotests/203 b/tests/qemu-iotests/203 +index ab80fd0e44..1ba878522b 100755 +--- a/tests/qemu-iotests/203 ++++ b/tests/qemu-iotests/203 +@@ -21,7 +21,8 @@ + # Check that QMP 'migrate' with multiple drives on a single IOThread completes + # successfully. This particular command triggered a hang in the source QEMU + # process due to recursive AioContext locking in bdrv_invalidate_all() and +-# BDRV_POLL_WHILE(). ++# BDRV_POLL_WHILE(). Protect against regressions even though the AioContext ++# lock no longer exists. + + import iotests + +-- +2.39.3 + diff --git a/SOURCES/kvm-chardev-char-socket-Fix-TLS-io-channels-sending-too-.patch b/SOURCES/kvm-chardev-char-socket-Fix-TLS-io-channels-sending-too-.patch new file mode 100644 index 0000000..34c4e8f --- /dev/null +++ b/SOURCES/kvm-chardev-char-socket-Fix-TLS-io-channels-sending-too-.patch @@ -0,0 +1,105 @@ +From 95b2ffc5f01dc4309c2e747ed883d22cd1d26347 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Sat, 2 Mar 2024 17:00:23 +0100 +Subject: [PATCH 2/2] chardev/char-socket: Fix TLS io channels sending too much + data to the backend +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +RH-MergeRequest: 227: Fix TLS io channels sending too much data to the backend +RH-Jira: RHEL-24614 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Daniel P. Berrangé +RH-Commit: [1/1] fce871914e0ce52e16a6edae0e007513f9fec1ae (thuth/qemu-kvm-cs9) + +JIRA: https://issues.redhat.com/browse/RHEL-24614 + +commit 462945cd22d2bcd233401ed3aa167d83a8e35b05 +Author: Thomas Huth +Date: Thu Feb 29 11:43:37 2024 +0100 + + chardev/char-socket: Fix TLS io channels sending too much data to the backend + + Commit ffda5db65a ("io/channel-tls: fix handling of bigger read buffers") + changed the behavior of the TLS io channels to schedule a second reading + attempt if there is still incoming data pending. This caused a regression + with backends like the sclpconsole that check in their read function that + the sender does not try to write more bytes to it than the device can + currently handle. + + The problem can be reproduced like this: + + 1) In one terminal, do this: + + mkdir qemu-pki + cd qemu-pki + openssl genrsa 2048 > ca-key.pem + openssl req -new -x509 -nodes -days 365000 -key ca-key.pem -out ca-cert.pem + # enter some dummy value for the cert + openssl genrsa 2048 > server-key.pem + openssl req -new -x509 -nodes -days 365000 -key server-key.pem \ + -out server-cert.pem + # enter some other dummy values for the cert + + gnutls-serv --echo --x509cafile ca-cert.pem --x509keyfile server-key.pem \ + --x509certfile server-cert.pem -p 8338 + + 2) In another terminal, do this: + + wget https://download.fedoraproject.org/pub/fedora-secondary/releases/39/Cloud/s390x/images/Fedora-Cloud-Base-39-1.5.s390x.qcow2 + + qemu-system-s390x -nographic -nodefaults \ + -hda Fedora-Cloud-Base-39-1.5.s390x.qcow2 \ + -object tls-creds-x509,id=tls0,endpoint=client,verify-peer=false,dir=$PWD/qemu-pki \ + -chardev socket,id=tls_chardev,host=localhost,port=8338,tls-creds=tls0 \ + -device sclpconsole,chardev=tls_chardev,id=tls_serial + + QEMU then aborts after a second or two with: + + qemu-system-s390x: ../hw/char/sclpconsole.c:73: chr_read: Assertion + `size <= SIZE_BUFFER_VT220 - scon->iov_data_len' failed. + Aborted (core dumped) + + It looks like the second read does not trigger the chr_can_read() function + to be called before the second read, which should normally always be done + before sending bytes to a character device to see how much it can handle, + so the s->max_size in tcp_chr_read() still contains the old value from the + previous read. Let's make sure that we use the up-to-date value by calling + tcp_chr_read_poll() again here. + + Fixes: ffda5db65a ("io/channel-tls: fix handling of bigger read buffers") + Buglink: https://issues.redhat.com/browse/RHEL-24614 + Reviewed-by: "Daniel P. Berrangé" + Message-ID: <20240229104339.42574-1-thuth@redhat.com> + Reviewed-by: Antoine Damhet + Tested-by: Antoine Damhet + Reviewed-by: Marc-André Lureau + Signed-off-by: Thomas Huth + +Signed-off-by: Thomas Huth +--- + chardev/char-socket.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/chardev/char-socket.c b/chardev/char-socket.c +index 73947da188..034840593d 100644 +--- a/chardev/char-socket.c ++++ b/chardev/char-socket.c +@@ -492,9 +492,9 @@ static gboolean tcp_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque) + s->max_size <= 0) { + return TRUE; + } +- len = sizeof(buf); +- if (len > s->max_size) { +- len = s->max_size; ++ len = tcp_chr_read_poll(opaque); ++ if (len > sizeof(buf)) { ++ len = sizeof(buf); + } + size = tcp_chr_recv(chr, (void *)buf, len); + if (size == 0 || (size == -1 && errno != EAGAIN)) { +-- +2.39.3 + diff --git a/SOURCES/kvm-chardev-lower-priority-of-the-HUP-GSource-in-socket-.patch b/SOURCES/kvm-chardev-lower-priority-of-the-HUP-GSource-in-socket-.patch new file mode 100644 index 0000000..30e055f --- /dev/null +++ b/SOURCES/kvm-chardev-lower-priority-of-the-HUP-GSource-in-socket-.patch @@ -0,0 +1,78 @@ +From 4d4102f6e2f9afd6182888787ae8b570347df87d Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= +Date: Mon, 18 Mar 2024 18:06:59 +0000 +Subject: [PATCH 1/3] chardev: lower priority of the HUP GSource in socket + chardev +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Daniel P. Berrangé +RH-MergeRequest: 233: Fix handling of TLS sessions in chardevs +RH-Jira: RHEL-24614 +RH-Acked-by: Thomas Huth +RH-Acked-by: Marc-André Lureau +RH-Commit: [1/3] 842f54349191b0206e68f35a7a80155f5a584942 (berrange/centos-src-qemu) + +The socket chardev often has 2 GSource object registered against the +same FD. One is registered all the time and is just intended to handle +POLLHUP events, while the other gets registered & unregistered on the +fly as the frontend is ready to receive more data or not. + +It is very common for poll() to signal a POLLHUP event at the same time +as there is pending incoming data from the disconnected client. It is +therefore essential to process incoming data prior to processing HUP. +The problem with having 2 GSource on the same FD is that there is no +guaranteed ordering of execution between them, so the chardev code may +process HUP first and thus discard data. + +This failure scenario is non-deterministic but can be seen fairly +reliably by reverting a7077b8e354d90fec26c2921aa2dea85b90dff90, and +then running 'tests/unit/test-char', which will sometimes fail with +missing data. + +Ideally QEMU would only have 1 GSource, but that's a complex code +refactoring job. The next best solution is to try to ensure ordering +between the 2 GSource objects. This can be achieved by lowering the +priority of the HUP GSource, so that it is never dispatched if the +main GSource is also ready to dispatch. Counter-intuitively, lowering +the priority of a GSource is done by raising its priority number. + +Reviewed-by: Marc-André Lureau +Reviewed-by: Thomas Huth +Signed-off-by: Daniel P. Berrangé +(cherry picked from commit 8bd8b04adc9f18904f323dff085f8b4ec77915c6) +--- + chardev/char-socket.c | 16 ++++++++++++++++ + 1 file changed, 16 insertions(+) + +diff --git a/chardev/char-socket.c b/chardev/char-socket.c +index 034840593d..f48d341ebc 100644 +--- a/chardev/char-socket.c ++++ b/chardev/char-socket.c +@@ -597,6 +597,22 @@ static void update_ioc_handlers(SocketChardev *s) + + remove_hup_source(s); + s->hup_source = qio_channel_create_watch(s->ioc, G_IO_HUP); ++ /* ++ * poll() is liable to return POLLHUP even when there is ++ * still incoming data available to read on the FD. If ++ * we have the hup_source at the same priority as the ++ * main io_add_watch_poll GSource, then we might end up ++ * processing the POLLHUP event first, closing the FD, ++ * and as a result silently discard data we should have ++ * read. ++ * ++ * By setting the hup_source to G_PRIORITY_DEFAULT + 1, ++ * we ensure that io_add_watch_poll GSource will always ++ * be dispatched first, thus guaranteeing we will be ++ * able to process all incoming data before closing the ++ * FD ++ */ ++ g_source_set_priority(s->hup_source, G_PRIORITY_DEFAULT + 1); + g_source_set_callback(s->hup_source, (GSourceFunc)tcp_chr_hup, + chr, NULL); + g_source_attach(s->hup_source, chr->gcontext); +-- +2.39.3 + diff --git a/SOURCES/kvm-checkpatch-add-qemu_bh_new-aio_bh_new-checks.patch b/SOURCES/kvm-checkpatch-add-qemu_bh_new-aio_bh_new-checks.patch deleted file mode 100644 index 4173648..0000000 --- a/SOURCES/kvm-checkpatch-add-qemu_bh_new-aio_bh_new-checks.patch +++ /dev/null @@ -1,55 +0,0 @@ -From 961bc392ee60743344236ddd247ab646a0eec914 Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Tue, 9 May 2023 10:29:03 -0400 -Subject: [PATCH 07/21] checkpatch: add qemu_bh_new/aio_bh_new checks - -RH-Author: Jon Maloy -RH-MergeRequest: 165: memory: prevent dma-reentracy issues -RH-Jira: RHEL-516 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [3/13] e0473487f0e3186c42559a5c36a8650f27ab26ae (jmaloy/jmaloy-qemu-kvm-2) - -Jira: https://issues.redhat.com/browse/RHEL-516 -Upstream: Merged -CVE: CVE-2023-2680 - -commit ef56ffbdd6b0605dc1e305611287b948c970e236 -Author: Alexander Bulekov -Date: Thu Apr 27 17:10:08 2023 -0400 - - checkpatch: add qemu_bh_new/aio_bh_new checks - - Advise authors to use the _guarded versions of the APIs, instead. - - Signed-off-by: Alexander Bulekov - Reviewed-by: Darren Kenny - Message-Id: <20230427211013.2994127-4-alxndr@bu.edu> - Signed-off-by: Thomas Huth - -Signed-off-by: Jon Maloy ---- - scripts/checkpatch.pl | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl -index d768171dcf..eeaec436eb 100755 ---- a/scripts/checkpatch.pl -+++ b/scripts/checkpatch.pl -@@ -2865,6 +2865,14 @@ sub process { - if ($line =~ /\bsignal\s*\(/ && !($line =~ /SIG_(?:IGN|DFL)/)) { - ERROR("use sigaction to establish signal handlers; signal is not portable\n" . $herecurr); - } -+# recommend qemu_bh_new_guarded instead of qemu_bh_new -+ if ($realfile =~ /.*\/hw\/.*/ && $line =~ /\bqemu_bh_new\s*\(/) { -+ ERROR("use qemu_bh_new_guarded() instead of qemu_bh_new() to avoid reentrancy problems\n" . $herecurr); -+ } -+# recommend aio_bh_new_guarded instead of aio_bh_new -+ if ($realfile =~ /.*\/hw\/.*/ && $line =~ /\baio_bh_new\s*\(/) { -+ ERROR("use aio_bh_new_guarded() instead of aio_bh_new() to avoid reentrancy problems\n" . $herecurr); -+ } - # check for module_init(), use category-specific init macros explicitly please - if ($line =~ /^module_init\s*\(/) { - ERROR("please use block_init(), type_init() etc. instead of module_init()\n" . $herecurr); --- -2.39.3 - diff --git a/SOURCES/kvm-coroutine-cap-per-thread-local-pool-size.patch b/SOURCES/kvm-coroutine-cap-per-thread-local-pool-size.patch new file mode 100644 index 0000000..6fffc23 --- /dev/null +++ b/SOURCES/kvm-coroutine-cap-per-thread-local-pool-size.patch @@ -0,0 +1,412 @@ +From e99c56752a1c4021a93c92b7be78856ebefaa1b3 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Mon, 18 Mar 2024 14:34:29 -0400 +Subject: [PATCH 1/2] coroutine: cap per-thread local pool size + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 234: coroutine: cap per-thread local pool size +RH-Jira: RHEL-28947 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Hanna Czenczek +RH-Commit: [1/2] 5971de1c1e238457925bfb9c4bfc932de857b28d (stefanha/centos-stream-qemu-kvm) + +The coroutine pool implementation can hit the Linux vm.max_map_count +limit, causing QEMU to abort with "failed to allocate memory for stack" +or "failed to set up stack guard page" during coroutine creation. + +This happens because per-thread pools can grow to tens of thousands of +coroutines. Each coroutine causes 2 virtual memory areas to be created. +Eventually vm.max_map_count is reached and memory-related syscalls fail. +The per-thread pool sizes are non-uniform and depend on past coroutine +usage in each thread, so it's possible for one thread to have a large +pool while another thread's pool is empty. + +Switch to a new coroutine pool implementation with a global pool that +grows to a maximum number of coroutines and per-thread local pools that +are capped at hardcoded small number of coroutines. + +This approach does not leave large numbers of coroutines pooled in a +thread that may not use them again. In order to perform well it +amortizes the cost of global pool accesses by working in batches of +coroutines instead of individual coroutines. + +The global pool is a list. Threads donate batches of coroutines to when +they have too many and take batches from when they have too few: + +.-----------------------------------. +| Batch 1 | Batch 2 | Batch 3 | ... | global_pool +`-----------------------------------' + +Each thread has up to 2 batches of coroutines: + +.-------------------. +| Batch 1 | Batch 2 | per-thread local_pool (maximum 2 batches) +`-------------------' + +The goal of this change is to reduce the excessive number of pooled +coroutines that cause QEMU to abort when vm.max_map_count is reached +without losing the performance of an adequately sized coroutine pool. + +Here are virtio-blk disk I/O benchmark results: + + RW BLKSIZE IODEPTH OLD NEW CHANGE +randread 4k 1 113725 117451 +3.3% +randread 4k 8 192968 198510 +2.9% +randread 4k 16 207138 209429 +1.1% +randread 4k 32 212399 215145 +1.3% +randread 4k 64 218319 221277 +1.4% +randread 128k 1 17587 17535 -0.3% +randread 128k 8 17614 17616 +0.0% +randread 128k 16 17608 17609 +0.0% +randread 128k 32 17552 17553 +0.0% +randread 128k 64 17484 17484 +0.0% + +See files/{fio.sh,test.xml.j2} for the benchmark configuration: +https://gitlab.com/stefanha/virt-playbooks/-/tree/coroutine-pool-fix-sizing + +Buglink: https://issues.redhat.com/browse/RHEL-28947 +Reported-by: Sanjay Rao +Reported-by: Boaz Ben Shabat +Reported-by: Joe Mario +Reviewed-by: Kevin Wolf +Signed-off-by: Stefan Hajnoczi +Message-ID: <20240318183429.1039340-1-stefanha@redhat.com> +(cherry picked from commit 86a637e48104ae74d8be53bed6441ce32be33433) +Signed-off-by: Stefan Hajnoczi +--- + util/qemu-coroutine.c | 282 +++++++++++++++++++++++++++++++++--------- + 1 file changed, 223 insertions(+), 59 deletions(-) + +diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c +index 5fd2dbaf8b..2790959eaf 100644 +--- a/util/qemu-coroutine.c ++++ b/util/qemu-coroutine.c +@@ -18,39 +18,200 @@ + #include "qemu/atomic.h" + #include "qemu/coroutine_int.h" + #include "qemu/coroutine-tls.h" ++#include "qemu/cutils.h" + #include "block/aio.h" + +-/** +- * The minimal batch size is always 64, coroutines from the release_pool are +- * reused as soon as there are 64 coroutines in it. The maximum pool size starts +- * with 64 and is increased on demand so that coroutines are not deleted even if +- * they are not immediately reused. +- */ + enum { +- POOL_MIN_BATCH_SIZE = 64, +- POOL_INITIAL_MAX_SIZE = 64, ++ COROUTINE_POOL_BATCH_MAX_SIZE = 128, + }; + +-/** Free list to speed up creation */ +-static QSLIST_HEAD(, Coroutine) release_pool = QSLIST_HEAD_INITIALIZER(pool); +-static unsigned int pool_max_size = POOL_INITIAL_MAX_SIZE; +-static unsigned int release_pool_size; ++/* ++ * Coroutine creation and deletion is expensive so a pool of unused coroutines ++ * is kept as a cache. When the pool has coroutines available, they are ++ * recycled instead of creating new ones from scratch. Coroutines are added to ++ * the pool upon termination. ++ * ++ * The pool is global but each thread maintains a small local pool to avoid ++ * global pool contention. Threads fetch and return batches of coroutines from ++ * the global pool to maintain their local pool. The local pool holds up to two ++ * batches whereas the maximum size of the global pool is controlled by the ++ * qemu_coroutine_inc_pool_size() API. ++ * ++ * .-----------------------------------. ++ * | Batch 1 | Batch 2 | Batch 3 | ... | global_pool ++ * `-----------------------------------' ++ * ++ * .-------------------. ++ * | Batch 1 | Batch 2 | per-thread local_pool (maximum 2 batches) ++ * `-------------------' ++ */ ++typedef struct CoroutinePoolBatch { ++ /* Batches are kept in a list */ ++ QSLIST_ENTRY(CoroutinePoolBatch) next; ++ ++ /* This batch holds up to @COROUTINE_POOL_BATCH_MAX_SIZE coroutines */ ++ QSLIST_HEAD(, Coroutine) list; ++ unsigned int size; ++} CoroutinePoolBatch; ++ ++typedef QSLIST_HEAD(, CoroutinePoolBatch) CoroutinePool; ++ ++/* Host operating system limit on number of pooled coroutines */ ++static unsigned int global_pool_hard_max_size; ++ ++static QemuMutex global_pool_lock; /* protects the following variables */ ++static CoroutinePool global_pool = QSLIST_HEAD_INITIALIZER(global_pool); ++static unsigned int global_pool_size; ++static unsigned int global_pool_max_size = COROUTINE_POOL_BATCH_MAX_SIZE; ++ ++QEMU_DEFINE_STATIC_CO_TLS(CoroutinePool, local_pool); ++QEMU_DEFINE_STATIC_CO_TLS(Notifier, local_pool_cleanup_notifier); + +-typedef QSLIST_HEAD(, Coroutine) CoroutineQSList; +-QEMU_DEFINE_STATIC_CO_TLS(CoroutineQSList, alloc_pool); +-QEMU_DEFINE_STATIC_CO_TLS(unsigned int, alloc_pool_size); +-QEMU_DEFINE_STATIC_CO_TLS(Notifier, coroutine_pool_cleanup_notifier); ++static CoroutinePoolBatch *coroutine_pool_batch_new(void) ++{ ++ CoroutinePoolBatch *batch = g_new(CoroutinePoolBatch, 1); ++ ++ QSLIST_INIT(&batch->list); ++ batch->size = 0; ++ return batch; ++} + +-static void coroutine_pool_cleanup(Notifier *n, void *value) ++static void coroutine_pool_batch_delete(CoroutinePoolBatch *batch) + { + Coroutine *co; + Coroutine *tmp; +- CoroutineQSList *alloc_pool = get_ptr_alloc_pool(); + +- QSLIST_FOREACH_SAFE(co, alloc_pool, pool_next, tmp) { +- QSLIST_REMOVE_HEAD(alloc_pool, pool_next); ++ QSLIST_FOREACH_SAFE(co, &batch->list, pool_next, tmp) { ++ QSLIST_REMOVE_HEAD(&batch->list, pool_next); + qemu_coroutine_delete(co); + } ++ g_free(batch); ++} ++ ++static void local_pool_cleanup(Notifier *n, void *value) ++{ ++ CoroutinePool *local_pool = get_ptr_local_pool(); ++ CoroutinePoolBatch *batch; ++ CoroutinePoolBatch *tmp; ++ ++ QSLIST_FOREACH_SAFE(batch, local_pool, next, tmp) { ++ QSLIST_REMOVE_HEAD(local_pool, next); ++ coroutine_pool_batch_delete(batch); ++ } ++} ++ ++/* Ensure the atexit notifier is registered */ ++static void local_pool_cleanup_init_once(void) ++{ ++ Notifier *notifier = get_ptr_local_pool_cleanup_notifier(); ++ if (!notifier->notify) { ++ notifier->notify = local_pool_cleanup; ++ qemu_thread_atexit_add(notifier); ++ } ++} ++ ++/* Helper to get the next unused coroutine from the local pool */ ++static Coroutine *coroutine_pool_get_local(void) ++{ ++ CoroutinePool *local_pool = get_ptr_local_pool(); ++ CoroutinePoolBatch *batch = QSLIST_FIRST(local_pool); ++ Coroutine *co; ++ ++ if (unlikely(!batch)) { ++ return NULL; ++ } ++ ++ co = QSLIST_FIRST(&batch->list); ++ QSLIST_REMOVE_HEAD(&batch->list, pool_next); ++ batch->size--; ++ ++ if (batch->size == 0) { ++ QSLIST_REMOVE_HEAD(local_pool, next); ++ coroutine_pool_batch_delete(batch); ++ } ++ return co; ++} ++ ++/* Get the next batch from the global pool */ ++static void coroutine_pool_refill_local(void) ++{ ++ CoroutinePool *local_pool = get_ptr_local_pool(); ++ CoroutinePoolBatch *batch; ++ ++ WITH_QEMU_LOCK_GUARD(&global_pool_lock) { ++ batch = QSLIST_FIRST(&global_pool); ++ ++ if (batch) { ++ QSLIST_REMOVE_HEAD(&global_pool, next); ++ global_pool_size -= batch->size; ++ } ++ } ++ ++ if (batch) { ++ QSLIST_INSERT_HEAD(local_pool, batch, next); ++ local_pool_cleanup_init_once(); ++ } ++} ++ ++/* Add a batch of coroutines to the global pool */ ++static void coroutine_pool_put_global(CoroutinePoolBatch *batch) ++{ ++ WITH_QEMU_LOCK_GUARD(&global_pool_lock) { ++ unsigned int max = MIN(global_pool_max_size, ++ global_pool_hard_max_size); ++ ++ if (global_pool_size < max) { ++ QSLIST_INSERT_HEAD(&global_pool, batch, next); ++ ++ /* Overshooting the max pool size is allowed */ ++ global_pool_size += batch->size; ++ return; ++ } ++ } ++ ++ /* The global pool was full, so throw away this batch */ ++ coroutine_pool_batch_delete(batch); ++} ++ ++/* Get the next unused coroutine from the pool or return NULL */ ++static Coroutine *coroutine_pool_get(void) ++{ ++ Coroutine *co; ++ ++ co = coroutine_pool_get_local(); ++ if (!co) { ++ coroutine_pool_refill_local(); ++ co = coroutine_pool_get_local(); ++ } ++ return co; ++} ++ ++static void coroutine_pool_put(Coroutine *co) ++{ ++ CoroutinePool *local_pool = get_ptr_local_pool(); ++ CoroutinePoolBatch *batch = QSLIST_FIRST(local_pool); ++ ++ if (unlikely(!batch)) { ++ batch = coroutine_pool_batch_new(); ++ QSLIST_INSERT_HEAD(local_pool, batch, next); ++ local_pool_cleanup_init_once(); ++ } ++ ++ if (unlikely(batch->size >= COROUTINE_POOL_BATCH_MAX_SIZE)) { ++ CoroutinePoolBatch *next = QSLIST_NEXT(batch, next); ++ ++ /* Is the local pool full? */ ++ if (next) { ++ QSLIST_REMOVE_HEAD(local_pool, next); ++ coroutine_pool_put_global(batch); ++ } ++ ++ batch = coroutine_pool_batch_new(); ++ QSLIST_INSERT_HEAD(local_pool, batch, next); ++ } ++ ++ QSLIST_INSERT_HEAD(&batch->list, co, pool_next); ++ batch->size++; + } + + Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque) +@@ -58,31 +219,7 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque) + Coroutine *co = NULL; + + if (IS_ENABLED(CONFIG_COROUTINE_POOL)) { +- CoroutineQSList *alloc_pool = get_ptr_alloc_pool(); +- +- co = QSLIST_FIRST(alloc_pool); +- if (!co) { +- if (release_pool_size > POOL_MIN_BATCH_SIZE) { +- /* Slow path; a good place to register the destructor, too. */ +- Notifier *notifier = get_ptr_coroutine_pool_cleanup_notifier(); +- if (!notifier->notify) { +- notifier->notify = coroutine_pool_cleanup; +- qemu_thread_atexit_add(notifier); +- } +- +- /* This is not exact; there could be a little skew between +- * release_pool_size and the actual size of release_pool. But +- * it is just a heuristic, it does not need to be perfect. +- */ +- set_alloc_pool_size(qatomic_xchg(&release_pool_size, 0)); +- QSLIST_MOVE_ATOMIC(alloc_pool, &release_pool); +- co = QSLIST_FIRST(alloc_pool); +- } +- } +- if (co) { +- QSLIST_REMOVE_HEAD(alloc_pool, pool_next); +- set_alloc_pool_size(get_alloc_pool_size() - 1); +- } ++ co = coroutine_pool_get(); + } + + if (!co) { +@@ -100,19 +237,10 @@ static void coroutine_delete(Coroutine *co) + co->caller = NULL; + + if (IS_ENABLED(CONFIG_COROUTINE_POOL)) { +- if (release_pool_size < qatomic_read(&pool_max_size) * 2) { +- QSLIST_INSERT_HEAD_ATOMIC(&release_pool, co, pool_next); +- qatomic_inc(&release_pool_size); +- return; +- } +- if (get_alloc_pool_size() < qatomic_read(&pool_max_size)) { +- QSLIST_INSERT_HEAD(get_ptr_alloc_pool(), co, pool_next); +- set_alloc_pool_size(get_alloc_pool_size() + 1); +- return; +- } ++ coroutine_pool_put(co); ++ } else { ++ qemu_coroutine_delete(co); + } +- +- qemu_coroutine_delete(co); + } + + void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co) +@@ -223,10 +351,46 @@ AioContext *qemu_coroutine_get_aio_context(Coroutine *co) + + void qemu_coroutine_inc_pool_size(unsigned int additional_pool_size) + { +- qatomic_add(&pool_max_size, additional_pool_size); ++ QEMU_LOCK_GUARD(&global_pool_lock); ++ global_pool_max_size += additional_pool_size; + } + + void qemu_coroutine_dec_pool_size(unsigned int removing_pool_size) + { +- qatomic_sub(&pool_max_size, removing_pool_size); ++ QEMU_LOCK_GUARD(&global_pool_lock); ++ global_pool_max_size -= removing_pool_size; ++} ++ ++static unsigned int get_global_pool_hard_max_size(void) ++{ ++#ifdef __linux__ ++ g_autofree char *contents = NULL; ++ int max_map_count; ++ ++ /* ++ * Linux processes can have up to max_map_count virtual memory areas ++ * (VMAs). mmap(2), mprotect(2), etc fail with ENOMEM beyond this limit. We ++ * must limit the coroutine pool to a safe size to avoid running out of ++ * VMAs. ++ */ ++ if (g_file_get_contents("/proc/sys/vm/max_map_count", &contents, NULL, ++ NULL) && ++ qemu_strtoi(contents, NULL, 10, &max_map_count) == 0) { ++ /* ++ * This is a conservative upper bound that avoids exceeding ++ * max_map_count. Leave half for non-coroutine users like library ++ * dependencies, vhost-user, etc. Each coroutine takes up 2 VMAs so ++ * halve the amount again. ++ */ ++ return max_map_count / 4; ++ } ++#endif ++ ++ return UINT_MAX; ++} ++ ++static void __attribute__((constructor)) qemu_coroutine_init(void) ++{ ++ qemu_mutex_init(&global_pool_lock); ++ global_pool_hard_max_size = get_global_pool_hard_max_size(); + } +-- +2.39.3 + diff --git a/SOURCES/kvm-coroutine-reserve-5-000-mappings.patch b/SOURCES/kvm-coroutine-reserve-5-000-mappings.patch new file mode 100644 index 0000000..cdbb666 --- /dev/null +++ b/SOURCES/kvm-coroutine-reserve-5-000-mappings.patch @@ -0,0 +1,61 @@ +From 0aa65dc3acba481f7064df936ab49e3bceb1d5bd Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Wed, 20 Mar 2024 14:12:32 -0400 +Subject: [PATCH 2/2] coroutine: reserve 5,000 mappings +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 234: coroutine: cap per-thread local pool size +RH-Jira: RHEL-28947 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Hanna Czenczek +RH-Commit: [2/2] 78560c2b947471111cc16c313d6f38db42860a1c (stefanha/centos-stream-qemu-kvm) + +Daniel P. Berrangé pointed out that the coroutine +pool size heuristic is very conservative. Instead of halving +max_map_count, he suggested reserving 5,000 mappings for non-coroutine +users based on observations of guests he has access to. + +Fixes: 86a637e48104 ("coroutine: cap per-thread local pool size") +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Message-id: 20240320181232.1464819-1-stefanha@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 9352f80cd926fe2dde7c89b93ee33bb0356ff40e) +Signed-off-by: Stefan Hajnoczi +--- + util/qemu-coroutine.c | 15 ++++++++++----- + 1 file changed, 10 insertions(+), 5 deletions(-) + +diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c +index 2790959eaf..eb4eebefdf 100644 +--- a/util/qemu-coroutine.c ++++ b/util/qemu-coroutine.c +@@ -377,12 +377,17 @@ static unsigned int get_global_pool_hard_max_size(void) + NULL) && + qemu_strtoi(contents, NULL, 10, &max_map_count) == 0) { + /* +- * This is a conservative upper bound that avoids exceeding +- * max_map_count. Leave half for non-coroutine users like library +- * dependencies, vhost-user, etc. Each coroutine takes up 2 VMAs so +- * halve the amount again. ++ * This is an upper bound that avoids exceeding max_map_count. Leave a ++ * fixed amount for non-coroutine users like library dependencies, ++ * vhost-user, etc. Each coroutine takes up 2 VMAs so halve the ++ * remaining amount. + */ +- return max_map_count / 4; ++ if (max_map_count > 5000) { ++ return (max_map_count - 5000) / 2; ++ } else { ++ /* Disable the global pool but threads still have local pools */ ++ return 0; ++ } + } + #endif + +-- +2.39.3 + diff --git a/SOURCES/kvm-dma-helpers-don-t-lock-AioContext-in-dma_blk_cb.patch b/SOURCES/kvm-dma-helpers-don-t-lock-AioContext-in-dma_blk_cb.patch new file mode 100644 index 0000000..735f2a3 --- /dev/null +++ b/SOURCES/kvm-dma-helpers-don-t-lock-AioContext-in-dma_blk_cb.patch @@ -0,0 +1,75 @@ +From ac9dc8ea241ef6d3a0447d696620d4d4053b71bf Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Mon, 4 Dec 2023 11:42:59 -0500 +Subject: [PATCH 080/101] dma-helpers: don't lock AioContext in dma_blk_cb() + +RH-Author: Kevin Wolf +RH-MergeRequest: 214: Remove AioContext lock +RH-Jira: RHEL-15965 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [11/26] a8580463ba6aee4ca248c0b947b9e72bd9e87aab (kmwolf/centos-qemu-kvm) + +Commit abfcd2760b3e ("dma-helpers: prevent dma_blk_cb() vs +dma_aio_cancel() race") acquired the AioContext lock inside dma_blk_cb() +to avoid a race with scsi_device_purge_requests() running in the main +loop thread. + +The SCSI code no longer calls dma_aio_cancel() from the main loop thread +while I/O is running in the IOThread AioContext. Therefore it is no +longer necessary to take this lock to protect DMAAIOCB fields. The +->cb() function also does not require the lock because blk_aio_*() and +friends do not need the AioContext lock. + +Both hw/ide/core.c and hw/ide/macio.c also call dma_blk_io() but don't +rely on it taking the AioContext lock, so this change is safe. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Eric Blake +Reviewed-by: Kevin Wolf +Message-ID: <20231204164259.1515217-5-stefanha@redhat.com> +Signed-off-by: Kevin Wolf +--- + system/dma-helpers.c | 7 ++----- + 1 file changed, 2 insertions(+), 5 deletions(-) + +diff --git a/system/dma-helpers.c b/system/dma-helpers.c +index 36211acc7e..528117f256 100644 +--- a/system/dma-helpers.c ++++ b/system/dma-helpers.c +@@ -119,13 +119,12 @@ static void dma_blk_cb(void *opaque, int ret) + + trace_dma_blk_cb(dbs, ret); + +- aio_context_acquire(ctx); + dbs->acb = NULL; + dbs->offset += dbs->iov.size; + + if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) { + dma_complete(dbs, ret); +- goto out; ++ return; + } + dma_blk_unmap(dbs); + +@@ -168,7 +167,7 @@ static void dma_blk_cb(void *opaque, int ret) + trace_dma_map_wait(dbs); + dbs->bh = aio_bh_new(ctx, reschedule_dma, dbs); + cpu_register_map_client(dbs->bh); +- goto out; ++ return; + } + + if (!QEMU_IS_ALIGNED(dbs->iov.size, dbs->align)) { +@@ -179,8 +178,6 @@ static void dma_blk_cb(void *opaque, int ret) + dbs->acb = dbs->io_func(dbs->offset, &dbs->iov, + dma_blk_cb, dbs, dbs->io_func_opaque); + assert(dbs->acb); +-out: +- aio_context_release(ctx); + } + + static void dma_aio_cancel(BlockAIOCB *acb) +-- +2.39.3 + diff --git a/SOURCES/kvm-docs-devel-Add-VFIO-iommufd-backend-documentation.patch b/SOURCES/kvm-docs-devel-Add-VFIO-iommufd-backend-documentation.patch new file mode 100644 index 0000000..dbe48d7 --- /dev/null +++ b/SOURCES/kvm-docs-devel-Add-VFIO-iommufd-backend-documentation.patch @@ -0,0 +1,228 @@ +From 71aa0219f7c84cbf175eb2a091d48d5fd5daa40b Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Tue, 21 Nov 2023 16:44:26 +0800 +Subject: [PATCH 047/101] docs/devel: Add VFIO iommufd backend documentation +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [46/67] 6cf49d00e87788f894d690a985bb6798eae24505 (eauger1/centos-qemu-kvm) + +Suggested-by: Cédric Le Goater +Signed-off-by: Eric Auger +Signed-off-by: Yi Liu +Signed-off-by: Zhenzhong Duan +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +(cherry picked from commit 98dad2b01931f6064c6c4b48ca3c2a1d9f542cd8) +Signed-off-by: Eric Auger +--- + MAINTAINERS | 1 + + docs/devel/index-internals.rst | 1 + + docs/devel/vfio-iommufd.rst | 166 +++++++++++++++++++++++++++++++++ + 3 files changed, 168 insertions(+) + create mode 100644 docs/devel/vfio-iommufd.rst + +diff --git a/MAINTAINERS b/MAINTAINERS +index ca70bb4e64..0ddb20a35f 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -2176,6 +2176,7 @@ F: backends/iommufd.c + F: include/sysemu/iommufd.h + F: include/qemu/chardev_open.h + F: util/chardev_open.c ++F: docs/devel/vfio-iommufd.rst + + vhost + M: Michael S. Tsirkin +diff --git a/docs/devel/index-internals.rst b/docs/devel/index-internals.rst +index 6f81df92bc..3def4a138b 100644 +--- a/docs/devel/index-internals.rst ++++ b/docs/devel/index-internals.rst +@@ -18,5 +18,6 @@ Details about QEMU's various subsystems including how to add features to them. + s390-dasd-ipl + tracing + vfio-migration ++ vfio-iommufd + writing-monitor-commands + virtio-backends +diff --git a/docs/devel/vfio-iommufd.rst b/docs/devel/vfio-iommufd.rst +new file mode 100644 +index 0000000000..3d1c11f175 +--- /dev/null ++++ b/docs/devel/vfio-iommufd.rst +@@ -0,0 +1,166 @@ ++=============================== ++IOMMUFD BACKEND usage with VFIO ++=============================== ++ ++(Same meaning for backend/container/BE) ++ ++With the introduction of iommufd, the Linux kernel provides a generic ++interface for user space drivers to propagate their DMA mappings to kernel ++for assigned devices. While the legacy kernel interface is group-centric, ++the new iommufd interface is device-centric, relying on device fd and iommufd. ++ ++To support both interfaces in the QEMU VFIO device, introduce a base container ++to abstract the common part of VFIO legacy and iommufd container. So that the ++generic VFIO code can use either container. ++ ++The base container implements generic functions such as memory_listener and ++address space management whereas the derived container implements callbacks ++specific to either legacy or iommufd. Each container has its own way to setup ++secure context and dma management interface. The below diagram shows how it ++looks like with both containers. ++ ++:: ++ ++ VFIO AddressSpace/Memory ++ +-------+ +----------+ +-----+ +-----+ ++ | pci | | platform | | ap | | ccw | ++ +---+---+ +----+-----+ +--+--+ +--+--+ +----------------------+ ++ | | | | | AddressSpace | ++ | | | | +------------+---------+ ++ +---V-----------V-----------V--------V----+ / ++ | VFIOAddressSpace | <------------+ ++ | | | MemoryListener ++ | VFIOContainerBase list | ++ +-------+----------------------------+----+ ++ | | ++ | | ++ +-------V------+ +--------V----------+ ++ | iommufd | | vfio legacy | ++ | container | | container | ++ +-------+------+ +--------+----------+ ++ | | ++ | /dev/iommu | /dev/vfio/vfio ++ | /dev/vfio/devices/vfioX | /dev/vfio/$group_id ++ Userspace | | ++ ============+============================+=========================== ++ Kernel | device fd | ++ +---------------+ | group/container fd ++ | (BIND_IOMMUFD | | (SET_CONTAINER/SET_IOMMU) ++ | ATTACH_IOAS) | | device fd ++ | | | ++ | +-------V------------V-----------------+ ++ iommufd | | vfio | ++ (map/unmap | +---------+--------------------+-------+ ++ ioas_copy) | | | map/unmap ++ | | | ++ +------V------+ +-----V------+ +------V--------+ ++ | iommfd core | | device | | vfio iommu | ++ +-------------+ +------------+ +---------------+ ++ ++* Secure Context setup ++ ++ - iommufd BE: uses device fd and iommufd to setup secure context ++ (bind_iommufd, attach_ioas) ++ - vfio legacy BE: uses group fd and container fd to setup secure context ++ (set_container, set_iommu) ++ ++* Device access ++ ++ - iommufd BE: device fd is opened through ``/dev/vfio/devices/vfioX`` ++ - vfio legacy BE: device fd is retrieved from group fd ioctl ++ ++* DMA Mapping flow ++ ++ 1. VFIOAddressSpace receives MemoryRegion add/del via MemoryListener ++ 2. VFIO populates DMA map/unmap via the container BEs ++ * iommufd BE: uses iommufd ++ * vfio legacy BE: uses container fd ++ ++Example configuration ++===================== ++ ++Step 1: configure the host device ++--------------------------------- ++ ++It's exactly same as the VFIO device with legacy VFIO container. ++ ++Step 2: configure QEMU ++---------------------- ++ ++Interactions with the ``/dev/iommu`` are abstracted by a new iommufd ++object (compiled in with the ``CONFIG_IOMMUFD`` option). ++ ++Any QEMU device (e.g. VFIO device) wishing to use ``/dev/iommu`` must ++be linked with an iommufd object. It gets a new optional property ++named iommufd which allows to pass an iommufd object. Take ``vfio-pci`` ++device for example: ++ ++.. code-block:: bash ++ ++ -object iommufd,id=iommufd0 ++ -device vfio-pci,host=0000:02:00.0,iommufd=iommufd0 ++ ++Note the ``/dev/iommu`` and VFIO cdev can be externally opened by a ++management layer. In such a case the fd is passed, the fd supports a ++string naming the fd or a number, for example: ++ ++.. code-block:: bash ++ ++ -object iommufd,id=iommufd0,fd=22 ++ -device vfio-pci,iommufd=iommufd0,fd=23 ++ ++If the ``fd`` property is not passed, the fd is opened by QEMU. ++ ++If no ``iommufd`` object is passed to the ``vfio-pci`` device, iommufd ++is not used and the user gets the behavior based on the legacy VFIO ++container: ++ ++.. code-block:: bash ++ ++ -device vfio-pci,host=0000:02:00.0 ++ ++Supported platform ++================== ++ ++Supports x86, ARM and s390x currently. ++ ++Caveats ++======= ++ ++Dirty page sync ++--------------- ++ ++Dirty page sync with iommufd backend is unsupported yet, live migration is ++disabled by default. But it can be force enabled like below, low efficient ++though. ++ ++.. code-block:: bash ++ ++ -object iommufd,id=iommufd0 ++ -device vfio-pci,host=0000:02:00.0,iommufd=iommufd0,enable-migration=on ++ ++P2P DMA ++------- ++ ++PCI p2p DMA is unsupported as IOMMUFD doesn't support mapping hardware PCI ++BAR region yet. Below warning shows for assigned PCI device, it's not a bug. ++ ++.. code-block:: none ++ ++ qemu-system-x86_64: warning: IOMMU_IOAS_MAP failed: Bad address, PCI BAR? ++ qemu-system-x86_64: vfio_container_dma_map(0x560cb6cb1620, 0xe000000021000, 0x3000, 0x7f32ed55c000) = -14 (Bad address) ++ ++FD passing with mdev ++-------------------- ++ ++``vfio-pci`` device checks sysfsdev property to decide if backend is a mdev. ++If FD passing is used, there is no way to know that and the mdev is treated ++like a real PCI device. There is an error as below if user wants to enable ++RAM discarding for mdev. ++ ++.. code-block:: none ++ ++ qemu-system-x86_64: -device vfio-pci,iommufd=iommufd0,x-balloon-allowed=on,fd=9: vfio VFIO_FD9: x-balloon-allowed only potentially compatible with mdev devices ++ ++``vfio-ap`` and ``vfio-ccw`` devices don't have same issue as their backend ++devices are always mdev and RAM discarding is force enabled. +-- +2.39.3 + diff --git a/SOURCES/kvm-docs-remove-AioContext-lock-from-IOThread-docs.patch b/SOURCES/kvm-docs-remove-AioContext-lock-from-IOThread-docs.patch new file mode 100644 index 0000000..80adc69 --- /dev/null +++ b/SOURCES/kvm-docs-remove-AioContext-lock-from-IOThread-docs.patch @@ -0,0 +1,98 @@ +From fc69df3a70bed5722643cc16828ca20beae3a20d Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 5 Dec 2023 13:20:08 -0500 +Subject: [PATCH 091/101] docs: remove AioContext lock from IOThread docs + +RH-Author: Kevin Wolf +RH-MergeRequest: 214: Remove AioContext lock +RH-Jira: RHEL-15965 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [22/26] ab89cda483e74ded983d26e1c6e50217405e0a55 (kmwolf/centos-qemu-kvm) + +Encourage the use of locking primitives and stop mentioning the +AioContext lock since it is being removed. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Eric Blake +Message-ID: <20231205182011.1976568-12-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +--- + docs/devel/multiple-iothreads.txt | 47 +++++++++++-------------------- + 1 file changed, 16 insertions(+), 31 deletions(-) + +diff --git a/docs/devel/multiple-iothreads.txt b/docs/devel/multiple-iothreads.txt +index a3e949f6b3..4865196bde 100644 +--- a/docs/devel/multiple-iothreads.txt ++++ b/docs/devel/multiple-iothreads.txt +@@ -88,27 +88,18 @@ loop, depending on which AioContext instance the caller passes in. + + How to synchronize with an IOThread + ----------------------------------- +-AioContext is not thread-safe so some rules must be followed when using file +-descriptors, event notifiers, timers, or BHs across threads: ++Variables that can be accessed by multiple threads require some form of ++synchronization such as qemu_mutex_lock(), rcu_read_lock(), etc. + +-1. AioContext functions can always be called safely. They handle their +-own locking internally. +- +-2. Other threads wishing to access the AioContext must use +-aio_context_acquire()/aio_context_release() for mutual exclusion. Once the +-context is acquired no other thread can access it or run event loop iterations +-in this AioContext. +- +-Legacy code sometimes nests aio_context_acquire()/aio_context_release() calls. +-Do not use nesting anymore, it is incompatible with the BDRV_POLL_WHILE() macro +-used in the block layer and can lead to hangs. +- +-There is currently no lock ordering rule if a thread needs to acquire multiple +-AioContexts simultaneously. Therefore, it is only safe for code holding the +-QEMU global mutex to acquire other AioContexts. ++AioContext functions like aio_set_fd_handler(), aio_set_event_notifier(), ++aio_bh_new(), and aio_timer_new() are thread-safe. They can be used to trigger ++activity in an IOThread. + + Side note: the best way to schedule a function call across threads is to call +-aio_bh_schedule_oneshot(). No acquire/release or locking is needed. ++aio_bh_schedule_oneshot(). ++ ++The main loop thread can wait synchronously for a condition using ++AIO_WAIT_WHILE(). + + AioContext and the block layer + ------------------------------ +@@ -124,22 +115,16 @@ Block layer code must therefore expect to run in an IOThread and avoid using + old APIs that implicitly use the main loop. See the "How to program for + IOThreads" above for information on how to do that. + +-If main loop code such as a QMP function wishes to access a BlockDriverState +-it must first call aio_context_acquire(bdrv_get_aio_context(bs)) to ensure +-that callbacks in the IOThread do not run in parallel. +- + Code running in the monitor typically needs to ensure that past + requests from the guest are completed. When a block device is running + in an IOThread, the IOThread can also process requests from the guest + (via ioeventfd). To achieve both objects, wrap the code between + bdrv_drained_begin() and bdrv_drained_end(), thus creating a "drained +-section". The functions must be called between aio_context_acquire() +-and aio_context_release(). You can freely release and re-acquire the +-AioContext within a drained section. +- +-Long-running jobs (usually in the form of coroutines) are best scheduled in +-the BlockDriverState's AioContext to avoid the need to acquire/release around +-each bdrv_*() call. The functions bdrv_add/remove_aio_context_notifier, +-or alternatively blk_add/remove_aio_context_notifier if you use BlockBackends, +-can be used to get a notification whenever bdrv_try_change_aio_context() moves a ++section". ++ ++Long-running jobs (usually in the form of coroutines) are often scheduled in ++the BlockDriverState's AioContext. The functions ++bdrv_add/remove_aio_context_notifier, or alternatively ++blk_add/remove_aio_context_notifier if you use BlockBackends, can be used to ++get a notification whenever bdrv_try_change_aio_context() moves a + BlockDriverState to a different AioContext. +-- +2.39.3 + diff --git a/SOURCES/kvm-graph-lock-Disable-locking-for-now.patch b/SOURCES/kvm-graph-lock-Disable-locking-for-now.patch deleted file mode 100644 index 77086e5..0000000 --- a/SOURCES/kvm-graph-lock-Disable-locking-for-now.patch +++ /dev/null @@ -1,153 +0,0 @@ -From 516bf44de08a13d97c08e210137078e642ce8e88 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 17 May 2023 17:28:32 +0200 -Subject: [PATCH 02/21] graph-lock: Disable locking for now - -RH-Author: Kevin Wolf -RH-MergeRequest: 166: block/graph-lock: Disable locking for now -RH-Bugzilla: 2186725 -RH-Acked-by: Eric Blake -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [2/4] 39d42fb527aad0491a018743289de7b762108317 (kmwolf/centos-qemu-kvm) - -In QEMU 8.0, we've been seeing deadlocks in bdrv_graph_wrlock(). They -come from callers that hold an AioContext lock, which is not allowed -during polling. In theory, we could temporarily release the lock, but -callers are inconsistent about whether they hold a lock, and if they do, -some are also confused about which one they hold. While all of this is -fixable, it's not trivial, and the best course of action for 8.0.1 is -probably just disabling the graph locking code temporarily. - -We don't currently rely on graph locking yet. It is supposed to replace -the AioContext lock eventually to enable multiqueue support, but as long -as we still have the AioContext lock, it is sufficient without the graph -lock. Once the AioContext lock goes away, the deadlock doesn't exist any -more either and this commit can be reverted. (Of course, it can also be -reverted while the AioContext lock still exists if the callers have been -fixed.) - -Cc: qemu-stable@nongnu.org -Signed-off-by: Kevin Wolf -Message-Id: <20230517152834.277483-2-kwolf@redhat.com> -Reviewed-by: Eric Blake -Signed-off-by: Kevin Wolf -(cherry picked from commit 80fc5d260002432628710f8b0c7cfc7d9b97bb9d) -Signed-off-by: Kevin Wolf ---- - block/graph-lock.c | 24 ++++++++++++++++++++++++ - 1 file changed, 24 insertions(+) - -diff --git a/block/graph-lock.c b/block/graph-lock.c -index 259a7a0bde..2490926c90 100644 ---- a/block/graph-lock.c -+++ b/block/graph-lock.c -@@ -30,8 +30,10 @@ BdrvGraphLock graph_lock; - /* Protects the list of aiocontext and orphaned_reader_count */ - static QemuMutex aio_context_list_lock; - -+#if 0 - /* Written and read with atomic operations. */ - static int has_writer; -+#endif - - /* - * A reader coroutine could move from an AioContext to another. -@@ -88,6 +90,7 @@ void unregister_aiocontext(AioContext *ctx) - g_free(ctx->bdrv_graph); - } - -+#if 0 - static uint32_t reader_count(void) - { - BdrvGraphRWlock *brdv_graph; -@@ -105,10 +108,17 @@ static uint32_t reader_count(void) - assert((int32_t)rd >= 0); - return rd; - } -+#endif - - void bdrv_graph_wrlock(void) - { - GLOBAL_STATE_CODE(); -+ /* -+ * TODO Some callers hold an AioContext lock when this is called, which -+ * causes deadlocks. Reenable once the AioContext locking is cleaned up (or -+ * AioContext locks are gone). -+ */ -+#if 0 - assert(!qatomic_read(&has_writer)); - - /* Make sure that constantly arriving new I/O doesn't cause starvation */ -@@ -139,11 +149,13 @@ void bdrv_graph_wrlock(void) - } while (reader_count() >= 1); - - bdrv_drain_all_end(); -+#endif - } - - void bdrv_graph_wrunlock(void) - { - GLOBAL_STATE_CODE(); -+#if 0 - QEMU_LOCK_GUARD(&aio_context_list_lock); - assert(qatomic_read(&has_writer)); - -@@ -155,10 +167,13 @@ void bdrv_graph_wrunlock(void) - - /* Wake up all coroutine that are waiting to read the graph */ - qemu_co_enter_all(&reader_queue, &aio_context_list_lock); -+#endif - } - - void coroutine_fn bdrv_graph_co_rdlock(void) - { -+ /* TODO Reenable when wrlock is reenabled */ -+#if 0 - BdrvGraphRWlock *bdrv_graph; - bdrv_graph = qemu_get_current_aio_context()->bdrv_graph; - -@@ -223,10 +238,12 @@ void coroutine_fn bdrv_graph_co_rdlock(void) - qemu_co_queue_wait(&reader_queue, &aio_context_list_lock); - } - } -+#endif - } - - void coroutine_fn bdrv_graph_co_rdunlock(void) - { -+#if 0 - BdrvGraphRWlock *bdrv_graph; - bdrv_graph = qemu_get_current_aio_context()->bdrv_graph; - -@@ -249,6 +266,7 @@ void coroutine_fn bdrv_graph_co_rdunlock(void) - if (qatomic_read(&has_writer)) { - aio_wait_kick(); - } -+#endif - } - - void bdrv_graph_rdlock_main_loop(void) -@@ -266,13 +284,19 @@ void bdrv_graph_rdunlock_main_loop(void) - void assert_bdrv_graph_readable(void) - { - /* reader_count() is slow due to aio_context_list_lock lock contention */ -+ /* TODO Reenable when wrlock is reenabled */ -+#if 0 - #ifdef CONFIG_DEBUG_GRAPH_LOCK - assert(qemu_in_main_thread() || reader_count()); - #endif -+#endif - } - - void assert_bdrv_graph_writable(void) - { - assert(qemu_in_main_thread()); -+ /* TODO Reenable when wrlock is reenabled */ -+#if 0 - assert(qatomic_read(&has_writer)); -+#endif - } --- -2.39.3 - diff --git a/SOURCES/kvm-graph-lock-remove-AioContext-locking.patch b/SOURCES/kvm-graph-lock-remove-AioContext-locking.patch new file mode 100644 index 0000000..2fff9ba --- /dev/null +++ b/SOURCES/kvm-graph-lock-remove-AioContext-locking.patch @@ -0,0 +1,1190 @@ +From 57d96b5774fab588c6bb6812ef8ef281ffe018d7 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 5 Dec 2023 13:20:02 -0500 +Subject: [PATCH 085/101] graph-lock: remove AioContext locking + +RH-Author: Kevin Wolf +RH-MergeRequest: 214: Remove AioContext lock +RH-Jira: RHEL-15965 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [16/26] 9575a8b834aaaa03abaf869e96f0808172e87824 (kmwolf/centos-qemu-kvm) + +Stop acquiring/releasing the AioContext lock in +bdrv_graph_wrlock()/bdrv_graph_unlock() since the lock no longer has any +effect. + +The distinction between bdrv_graph_wrunlock() and +bdrv_graph_wrunlock_ctx() becomes meaningless and they can be collapsed +into one function. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Eric Blake +Reviewed-by: Kevin Wolf +Message-ID: <20231205182011.1976568-6-stefanha@redhat.com> +Signed-off-by: Kevin Wolf +--- + block.c | 50 +++++++++++++++--------------- + block/backup.c | 4 +-- + block/blklogwrites.c | 8 ++--- + block/blkverify.c | 4 +-- + block/block-backend.c | 11 +++---- + block/commit.c | 16 +++++----- + block/graph-lock.c | 44 ++------------------------ + block/mirror.c | 22 ++++++------- + block/qcow2.c | 4 +-- + block/quorum.c | 8 ++--- + block/replication.c | 14 ++++----- + block/snapshot.c | 4 +-- + block/stream.c | 12 +++---- + block/vmdk.c | 20 ++++++------ + blockdev.c | 8 ++--- + blockjob.c | 12 +++---- + include/block/graph-lock.h | 21 ++----------- + scripts/block-coroutine-wrapper.py | 4 +-- + tests/unit/test-bdrv-drain.c | 40 ++++++++++++------------ + tests/unit/test-bdrv-graph-mod.c | 20 ++++++------ + 20 files changed, 133 insertions(+), 193 deletions(-) + +diff --git a/block.c b/block.c +index bfb0861ec6..25e1ebc606 100644 +--- a/block.c ++++ b/block.c +@@ -1708,12 +1708,12 @@ bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, const char *node_name, + open_failed: + bs->drv = NULL; + +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + if (bs->file != NULL) { + bdrv_unref_child(bs, bs->file); + assert(!bs->file); + } +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + + g_free(bs->opaque); + bs->opaque = NULL; +@@ -3575,9 +3575,9 @@ int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, + + bdrv_ref(drain_bs); + bdrv_drained_begin(drain_bs); +- bdrv_graph_wrlock(backing_hd); ++ bdrv_graph_wrlock(); + ret = bdrv_set_backing_hd_drained(bs, backing_hd, errp); +- bdrv_graph_wrunlock(backing_hd); ++ bdrv_graph_wrunlock(); + bdrv_drained_end(drain_bs); + bdrv_unref(drain_bs); + +@@ -3790,13 +3790,13 @@ BdrvChild *bdrv_open_child(const char *filename, + return NULL; + } + +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + ctx = bdrv_get_aio_context(bs); + aio_context_acquire(ctx); + child = bdrv_attach_child(parent, bs, bdref_key, child_class, child_role, + errp); + aio_context_release(ctx); +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + + return child; + } +@@ -4650,9 +4650,9 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) + aio_context_release(ctx); + } + +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + tran_commit(tran); +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + + QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) { + BlockDriverState *bs = bs_entry->state.bs; +@@ -4669,9 +4669,9 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp) + goto cleanup; + + abort: +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + tran_abort(tran); +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + + QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { + if (bs_entry->prepared) { +@@ -4852,12 +4852,12 @@ bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state, + } + + bdrv_graph_rdunlock_main_loop(); +- bdrv_graph_wrlock(new_child_bs); ++ bdrv_graph_wrlock(); + + ret = bdrv_set_file_or_backing_noperm(bs, new_child_bs, is_backing, + tran, errp); + +- bdrv_graph_wrunlock_ctx(ctx); ++ bdrv_graph_wrunlock(); + + if (old_ctx != ctx) { + aio_context_release(ctx); +@@ -5209,14 +5209,14 @@ static void bdrv_close(BlockDriverState *bs) + bs->drv = NULL; + } + +- bdrv_graph_wrlock(bs); ++ bdrv_graph_wrlock(); + QLIST_FOREACH_SAFE(child, &bs->children, next, next) { + bdrv_unref_child(bs, child); + } + + assert(!bs->backing); + assert(!bs->file); +- bdrv_graph_wrunlock(bs); ++ bdrv_graph_wrunlock(); + + g_free(bs->opaque); + bs->opaque = NULL; +@@ -5509,9 +5509,9 @@ int bdrv_drop_filter(BlockDriverState *bs, Error **errp) + bdrv_graph_rdunlock_main_loop(); + + bdrv_drained_begin(child_bs); +- bdrv_graph_wrlock(bs); ++ bdrv_graph_wrlock(); + ret = bdrv_replace_node_common(bs, child_bs, true, true, errp); +- bdrv_graph_wrunlock(bs); ++ bdrv_graph_wrunlock(); + bdrv_drained_end(child_bs); + + return ret; +@@ -5561,7 +5561,7 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, + aio_context_acquire(old_context); + new_context = NULL; + +- bdrv_graph_wrlock(bs_top); ++ bdrv_graph_wrlock(); + + child = bdrv_attach_child_noperm(bs_new, bs_top, "backing", + &child_of_bds, bdrv_backing_role(bs_new), +@@ -5593,7 +5593,7 @@ out: + tran_finalize(tran, ret); + + bdrv_refresh_limits(bs_top, NULL, NULL); +- bdrv_graph_wrunlock(bs_top); ++ bdrv_graph_wrunlock(); + + bdrv_drained_end(bs_top); + bdrv_drained_end(bs_new); +@@ -5620,7 +5620,7 @@ int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs, + bdrv_ref(old_bs); + bdrv_drained_begin(old_bs); + bdrv_drained_begin(new_bs); +- bdrv_graph_wrlock(new_bs); ++ bdrv_graph_wrlock(); + + bdrv_replace_child_tran(child, new_bs, tran); + +@@ -5631,7 +5631,7 @@ int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs, + + tran_finalize(tran, ret); + +- bdrv_graph_wrunlock(new_bs); ++ bdrv_graph_wrunlock(); + bdrv_drained_end(old_bs); + bdrv_drained_end(new_bs); + bdrv_unref(old_bs); +@@ -5718,9 +5718,9 @@ BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *options, + bdrv_ref(bs); + bdrv_drained_begin(bs); + bdrv_drained_begin(new_node_bs); +- bdrv_graph_wrlock(new_node_bs); ++ bdrv_graph_wrlock(); + ret = bdrv_replace_node(bs, new_node_bs, errp); +- bdrv_graph_wrunlock(new_node_bs); ++ bdrv_graph_wrunlock(); + bdrv_drained_end(new_node_bs); + bdrv_drained_end(bs); + bdrv_unref(bs); +@@ -5975,7 +5975,7 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, + + bdrv_ref(top); + bdrv_drained_begin(base); +- bdrv_graph_wrlock(base); ++ bdrv_graph_wrlock(); + + if (!top->drv || !base->drv) { + goto exit_wrlock; +@@ -6015,7 +6015,7 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, + * That's a FIXME. + */ + bdrv_replace_node_common(top, base, false, false, &local_err); +- bdrv_graph_wrunlock(base); ++ bdrv_graph_wrunlock(); + + if (local_err) { + error_report_err(local_err); +@@ -6052,7 +6052,7 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, + goto exit; + + exit_wrlock: +- bdrv_graph_wrunlock(base); ++ bdrv_graph_wrunlock(); + exit: + bdrv_drained_end(base); + bdrv_unref(top); +diff --git a/block/backup.c b/block/backup.c +index 8aae5836d7..ec29d6b810 100644 +--- a/block/backup.c ++++ b/block/backup.c +@@ -496,10 +496,10 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, + block_copy_set_speed(bcs, speed); + + /* Required permissions are taken by copy-before-write filter target */ +- bdrv_graph_wrlock(target); ++ bdrv_graph_wrlock(); + block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL, + &error_abort); +- bdrv_graph_wrunlock(target); ++ bdrv_graph_wrunlock(); + + return &job->common; + +diff --git a/block/blklogwrites.c b/block/blklogwrites.c +index 3678f6cf42..7207b2e757 100644 +--- a/block/blklogwrites.c ++++ b/block/blklogwrites.c +@@ -251,9 +251,9 @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags, + ret = 0; + fail_log: + if (ret < 0) { +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + bdrv_unref_child(bs, s->log_file); +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + s->log_file = NULL; + } + fail: +@@ -265,10 +265,10 @@ static void blk_log_writes_close(BlockDriverState *bs) + { + BDRVBlkLogWritesState *s = bs->opaque; + +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + bdrv_unref_child(bs, s->log_file); + s->log_file = NULL; +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + } + + static int64_t coroutine_fn GRAPH_RDLOCK +diff --git a/block/blkverify.c b/block/blkverify.c +index 9b17c46644..ec45d8335e 100644 +--- a/block/blkverify.c ++++ b/block/blkverify.c +@@ -151,10 +151,10 @@ static void blkverify_close(BlockDriverState *bs) + { + BDRVBlkverifyState *s = bs->opaque; + +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + bdrv_unref_child(bs, s->test_file); + s->test_file = NULL; +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + } + + static int64_t coroutine_fn GRAPH_RDLOCK +diff --git a/block/block-backend.c b/block/block-backend.c +index ec21148806..abac4e0235 100644 +--- a/block/block-backend.c ++++ b/block/block-backend.c +@@ -889,7 +889,6 @@ void blk_remove_bs(BlockBackend *blk) + { + ThrottleGroupMember *tgm = &blk->public.throttle_group_member; + BdrvChild *root; +- AioContext *ctx; + + GLOBAL_STATE_CODE(); + +@@ -919,10 +918,9 @@ void blk_remove_bs(BlockBackend *blk) + root = blk->root; + blk->root = NULL; + +- ctx = bdrv_get_aio_context(root->bs); +- bdrv_graph_wrlock(root->bs); ++ bdrv_graph_wrlock(); + bdrv_root_unref_child(root); +- bdrv_graph_wrunlock_ctx(ctx); ++ bdrv_graph_wrunlock(); + } + + /* +@@ -933,16 +931,15 @@ void blk_remove_bs(BlockBackend *blk) + int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp) + { + ThrottleGroupMember *tgm = &blk->public.throttle_group_member; +- AioContext *ctx = bdrv_get_aio_context(bs); + + GLOBAL_STATE_CODE(); + bdrv_ref(bs); +- bdrv_graph_wrlock(bs); ++ bdrv_graph_wrlock(); + blk->root = bdrv_root_attach_child(bs, "root", &child_root, + BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, + blk->perm, blk->shared_perm, + blk, errp); +- bdrv_graph_wrunlock_ctx(ctx); ++ bdrv_graph_wrunlock(); + if (blk->root == NULL) { + return -EPERM; + } +diff --git a/block/commit.c b/block/commit.c +index 69cc75be0c..1dd7a65ffb 100644 +--- a/block/commit.c ++++ b/block/commit.c +@@ -100,9 +100,9 @@ static void commit_abort(Job *job) + bdrv_graph_rdunlock_main_loop(); + + bdrv_drained_begin(commit_top_backing_bs); +- bdrv_graph_wrlock(commit_top_backing_bs); ++ bdrv_graph_wrlock(); + bdrv_replace_node(s->commit_top_bs, commit_top_backing_bs, &error_abort); +- bdrv_graph_wrunlock(commit_top_backing_bs); ++ bdrv_graph_wrunlock(); + bdrv_drained_end(commit_top_backing_bs); + + bdrv_unref(s->commit_top_bs); +@@ -339,7 +339,7 @@ void commit_start(const char *job_id, BlockDriverState *bs, + * this is the responsibility of the interface (i.e. whoever calls + * commit_start()). + */ +- bdrv_graph_wrlock(top); ++ bdrv_graph_wrlock(); + s->base_overlay = bdrv_find_overlay(top, base); + assert(s->base_overlay); + +@@ -370,19 +370,19 @@ void commit_start(const char *job_id, BlockDriverState *bs, + ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0, + iter_shared_perms, errp); + if (ret < 0) { +- bdrv_graph_wrunlock(top); ++ bdrv_graph_wrunlock(); + goto fail; + } + } + + if (bdrv_freeze_backing_chain(commit_top_bs, base, errp) < 0) { +- bdrv_graph_wrunlock(top); ++ bdrv_graph_wrunlock(); + goto fail; + } + s->chain_frozen = true; + + ret = block_job_add_bdrv(&s->common, "base", base, 0, BLK_PERM_ALL, errp); +- bdrv_graph_wrunlock(top); ++ bdrv_graph_wrunlock(); + + if (ret < 0) { + goto fail; +@@ -434,9 +434,9 @@ fail: + * otherwise this would fail because of lack of permissions. */ + if (commit_top_bs) { + bdrv_drained_begin(top); +- bdrv_graph_wrlock(top); ++ bdrv_graph_wrlock(); + bdrv_replace_node(commit_top_bs, top, &error_abort); +- bdrv_graph_wrunlock(top); ++ bdrv_graph_wrunlock(); + bdrv_drained_end(top); + } + } +diff --git a/block/graph-lock.c b/block/graph-lock.c +index 079e878d9b..c81162b147 100644 +--- a/block/graph-lock.c ++++ b/block/graph-lock.c +@@ -106,27 +106,12 @@ static uint32_t reader_count(void) + return rd; + } + +-void no_coroutine_fn bdrv_graph_wrlock(BlockDriverState *bs) ++void no_coroutine_fn bdrv_graph_wrlock(void) + { +- AioContext *ctx = NULL; +- + GLOBAL_STATE_CODE(); + assert(!qatomic_read(&has_writer)); + assert(!qemu_in_coroutine()); + +- /* +- * Release only non-mainloop AioContext. The mainloop often relies on the +- * BQL and doesn't lock the main AioContext before doing things. +- */ +- if (bs) { +- ctx = bdrv_get_aio_context(bs); +- if (ctx != qemu_get_aio_context()) { +- aio_context_release(ctx); +- } else { +- ctx = NULL; +- } +- } +- + /* Make sure that constantly arriving new I/O doesn't cause starvation */ + bdrv_drain_all_begin_nopoll(); + +@@ -155,27 +140,13 @@ void no_coroutine_fn bdrv_graph_wrlock(BlockDriverState *bs) + } while (reader_count() >= 1); + + bdrv_drain_all_end(); +- +- if (ctx) { +- aio_context_acquire(bdrv_get_aio_context(bs)); +- } + } + +-void no_coroutine_fn bdrv_graph_wrunlock_ctx(AioContext *ctx) ++void no_coroutine_fn bdrv_graph_wrunlock(void) + { + GLOBAL_STATE_CODE(); + assert(qatomic_read(&has_writer)); + +- /* +- * Release only non-mainloop AioContext. The mainloop often relies on the +- * BQL and doesn't lock the main AioContext before doing things. +- */ +- if (ctx && ctx != qemu_get_aio_context()) { +- aio_context_release(ctx); +- } else { +- ctx = NULL; +- } +- + WITH_QEMU_LOCK_GUARD(&aio_context_list_lock) { + /* + * No need for memory barriers, this works in pair with +@@ -197,17 +168,6 @@ void no_coroutine_fn bdrv_graph_wrunlock_ctx(AioContext *ctx) + * progress. + */ + aio_bh_poll(qemu_get_aio_context()); +- +- if (ctx) { +- aio_context_acquire(ctx); +- } +-} +- +-void no_coroutine_fn bdrv_graph_wrunlock(BlockDriverState *bs) +-{ +- AioContext *ctx = bs ? bdrv_get_aio_context(bs) : NULL; +- +- bdrv_graph_wrunlock_ctx(ctx); + } + + void coroutine_fn bdrv_graph_co_rdlock(void) +diff --git a/block/mirror.c b/block/mirror.c +index cd9d3ad4a8..51f9e2f17c 100644 +--- a/block/mirror.c ++++ b/block/mirror.c +@@ -764,7 +764,7 @@ static int mirror_exit_common(Job *job) + * check for an op blocker on @to_replace, and we have our own + * there. + */ +- bdrv_graph_wrlock(target_bs); ++ bdrv_graph_wrlock(); + if (bdrv_recurse_can_replace(src, to_replace)) { + bdrv_replace_node(to_replace, target_bs, &local_err); + } else { +@@ -773,7 +773,7 @@ static int mirror_exit_common(Job *job) + "would not lead to an abrupt change of visible data", + to_replace->node_name, target_bs->node_name); + } +- bdrv_graph_wrunlock(target_bs); ++ bdrv_graph_wrunlock(); + bdrv_drained_end(to_replace); + if (local_err) { + error_report_err(local_err); +@@ -796,9 +796,9 @@ static int mirror_exit_common(Job *job) + * valid. + */ + block_job_remove_all_bdrv(bjob); +- bdrv_graph_wrlock(mirror_top_bs); ++ bdrv_graph_wrlock(); + bdrv_replace_node(mirror_top_bs, mirror_top_bs->backing->bs, &error_abort); +- bdrv_graph_wrunlock(mirror_top_bs); ++ bdrv_graph_wrunlock(); + + bdrv_drained_end(target_bs); + bdrv_unref(target_bs); +@@ -1914,13 +1914,13 @@ static BlockJob *mirror_start_job( + */ + bdrv_disable_dirty_bitmap(s->dirty_bitmap); + +- bdrv_graph_wrlock(bs); ++ bdrv_graph_wrlock(); + ret = block_job_add_bdrv(&s->common, "source", bs, 0, + BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE | + BLK_PERM_CONSISTENT_READ, + errp); + if (ret < 0) { +- bdrv_graph_wrunlock(bs); ++ bdrv_graph_wrunlock(); + goto fail; + } + +@@ -1965,17 +1965,17 @@ static BlockJob *mirror_start_job( + ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0, + iter_shared_perms, errp); + if (ret < 0) { +- bdrv_graph_wrunlock(bs); ++ bdrv_graph_wrunlock(); + goto fail; + } + } + + if (bdrv_freeze_backing_chain(mirror_top_bs, target, errp) < 0) { +- bdrv_graph_wrunlock(bs); ++ bdrv_graph_wrunlock(); + goto fail; + } + } +- bdrv_graph_wrunlock(bs); ++ bdrv_graph_wrunlock(); + + QTAILQ_INIT(&s->ops_in_flight); + +@@ -2001,12 +2001,12 @@ fail: + + bs_opaque->stop = true; + bdrv_drained_begin(bs); +- bdrv_graph_wrlock(bs); ++ bdrv_graph_wrlock(); + assert(mirror_top_bs->backing->bs == bs); + bdrv_child_refresh_perms(mirror_top_bs, mirror_top_bs->backing, + &error_abort); + bdrv_replace_node(mirror_top_bs, bs, &error_abort); +- bdrv_graph_wrunlock(bs); ++ bdrv_graph_wrunlock(); + bdrv_drained_end(bs); + + bdrv_unref(mirror_top_bs); +diff --git a/block/qcow2.c b/block/qcow2.c +index 7968735346..d91b7b91d3 100644 +--- a/block/qcow2.c ++++ b/block/qcow2.c +@@ -2813,9 +2813,9 @@ qcow2_do_close(BlockDriverState *bs, bool close_data_file) + if (close_data_file && has_data_file(bs)) { + GLOBAL_STATE_CODE(); + bdrv_graph_rdunlock_main_loop(); +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + bdrv_unref_child(bs, s->data_file); +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + s->data_file = NULL; + bdrv_graph_rdlock_main_loop(); + } +diff --git a/block/quorum.c b/block/quorum.c +index 505b8b3e18..db8fe891c4 100644 +--- a/block/quorum.c ++++ b/block/quorum.c +@@ -1037,14 +1037,14 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags, + + close_exit: + /* cleanup on error */ +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + for (i = 0; i < s->num_children; i++) { + if (!opened[i]) { + continue; + } + bdrv_unref_child(bs, s->children[i]); + } +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + g_free(s->children); + g_free(opened); + exit: +@@ -1057,11 +1057,11 @@ static void quorum_close(BlockDriverState *bs) + BDRVQuorumState *s = bs->opaque; + int i; + +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + for (i = 0; i < s->num_children; i++) { + bdrv_unref_child(bs, s->children[i]); + } +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + + g_free(s->children); + } +diff --git a/block/replication.c b/block/replication.c +index 5ded5f1ca9..424b537ff7 100644 +--- a/block/replication.c ++++ b/block/replication.c +@@ -560,7 +560,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, + return; + } + +- bdrv_graph_wrlock(bs); ++ bdrv_graph_wrlock(); + + bdrv_ref(hidden_disk->bs); + s->hidden_disk = bdrv_attach_child(bs, hidden_disk->bs, "hidden disk", +@@ -568,7 +568,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, + &local_err); + if (local_err) { + error_propagate(errp, local_err); +- bdrv_graph_wrunlock(bs); ++ bdrv_graph_wrunlock(); + aio_context_release(aio_context); + return; + } +@@ -579,7 +579,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, + BDRV_CHILD_DATA, &local_err); + if (local_err) { + error_propagate(errp, local_err); +- bdrv_graph_wrunlock(bs); ++ bdrv_graph_wrunlock(); + aio_context_release(aio_context); + return; + } +@@ -592,7 +592,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, + if (!top_bs || !bdrv_is_root_node(top_bs) || + !check_top_bs(top_bs, bs)) { + error_setg(errp, "No top_bs or it is invalid"); +- bdrv_graph_wrunlock(bs); ++ bdrv_graph_wrunlock(); + reopen_backing_file(bs, false, NULL); + aio_context_release(aio_context); + return; +@@ -600,7 +600,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, + bdrv_op_block_all(top_bs, s->blocker); + bdrv_op_unblock(top_bs, BLOCK_OP_TYPE_DATAPLANE, s->blocker); + +- bdrv_graph_wrunlock(bs); ++ bdrv_graph_wrunlock(); + + s->backup_job = backup_job_create( + NULL, s->secondary_disk->bs, s->hidden_disk->bs, +@@ -691,12 +691,12 @@ static void replication_done(void *opaque, int ret) + if (ret == 0) { + s->stage = BLOCK_REPLICATION_DONE; + +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + bdrv_unref_child(bs, s->secondary_disk); + s->secondary_disk = NULL; + bdrv_unref_child(bs, s->hidden_disk); + s->hidden_disk = NULL; +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + + s->error = 0; + } else { +diff --git a/block/snapshot.c b/block/snapshot.c +index ec8cf4810b..e486d3e205 100644 +--- a/block/snapshot.c ++++ b/block/snapshot.c +@@ -290,9 +290,9 @@ int bdrv_snapshot_goto(BlockDriverState *bs, + } + + /* .bdrv_open() will re-attach it */ +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + bdrv_unref_child(bs, fallback); +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + + ret = bdrv_snapshot_goto(fallback_bs, snapshot_id, errp); + open_ret = drv->bdrv_open(bs, options, bs->open_flags, &local_err); +diff --git a/block/stream.c b/block/stream.c +index 01fe7c0f16..048c2d282f 100644 +--- a/block/stream.c ++++ b/block/stream.c +@@ -99,9 +99,9 @@ static int stream_prepare(Job *job) + } + } + +- bdrv_graph_wrlock(s->target_bs); ++ bdrv_graph_wrlock(); + bdrv_set_backing_hd_drained(unfiltered_bs, base, &local_err); +- bdrv_graph_wrunlock(s->target_bs); ++ bdrv_graph_wrunlock(); + + /* + * This call will do I/O, so the graph can change again from here on. +@@ -366,10 +366,10 @@ void stream_start(const char *job_id, BlockDriverState *bs, + * already have our own plans. Also don't allow resize as the image size is + * queried only at the job start and then cached. + */ +- bdrv_graph_wrlock(bs); ++ bdrv_graph_wrlock(); + if (block_job_add_bdrv(&s->common, "active node", bs, 0, + basic_flags | BLK_PERM_WRITE, errp)) { +- bdrv_graph_wrunlock(bs); ++ bdrv_graph_wrunlock(); + goto fail; + } + +@@ -389,11 +389,11 @@ void stream_start(const char *job_id, BlockDriverState *bs, + ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0, + basic_flags, errp); + if (ret < 0) { +- bdrv_graph_wrunlock(bs); ++ bdrv_graph_wrunlock(); + goto fail; + } + } +- bdrv_graph_wrunlock(bs); ++ bdrv_graph_wrunlock(); + + s->base_overlay = base_overlay; + s->above_base = above_base; +diff --git a/block/vmdk.c b/block/vmdk.c +index d6971c7067..bf78e12383 100644 +--- a/block/vmdk.c ++++ b/block/vmdk.c +@@ -272,7 +272,7 @@ static void vmdk_free_extents(BlockDriverState *bs) + BDRVVmdkState *s = bs->opaque; + VmdkExtent *e; + +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + for (i = 0; i < s->num_extents; i++) { + e = &s->extents[i]; + g_free(e->l1_table); +@@ -283,7 +283,7 @@ static void vmdk_free_extents(BlockDriverState *bs) + bdrv_unref_child(bs, e->file); + } + } +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + + g_free(s->extents); + } +@@ -1247,9 +1247,9 @@ vmdk_parse_extents(const char *desc, BlockDriverState *bs, QDict *options, + 0, 0, 0, 0, 0, &extent, errp); + if (ret < 0) { + bdrv_graph_rdunlock_main_loop(); +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + bdrv_unref_child(bs, extent_file); +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + bdrv_graph_rdlock_main_loop(); + goto out; + } +@@ -1266,9 +1266,9 @@ vmdk_parse_extents(const char *desc, BlockDriverState *bs, QDict *options, + g_free(buf); + if (ret) { + bdrv_graph_rdunlock_main_loop(); +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + bdrv_unref_child(bs, extent_file); +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + bdrv_graph_rdlock_main_loop(); + goto out; + } +@@ -1277,9 +1277,9 @@ vmdk_parse_extents(const char *desc, BlockDriverState *bs, QDict *options, + ret = vmdk_open_se_sparse(bs, extent_file, bs->open_flags, errp); + if (ret) { + bdrv_graph_rdunlock_main_loop(); +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + bdrv_unref_child(bs, extent_file); +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + bdrv_graph_rdlock_main_loop(); + goto out; + } +@@ -1287,9 +1287,9 @@ vmdk_parse_extents(const char *desc, BlockDriverState *bs, QDict *options, + } else { + error_setg(errp, "Unsupported extent type '%s'", type); + bdrv_graph_rdunlock_main_loop(); +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + bdrv_unref_child(bs, extent_file); +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + bdrv_graph_rdlock_main_loop(); + ret = -ENOTSUP; + goto out; +diff --git a/blockdev.c b/blockdev.c +index c91f49e7b6..9e1381169d 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -1611,9 +1611,9 @@ static void external_snapshot_abort(void *opaque) + } + + bdrv_drained_begin(state->new_bs); +- bdrv_graph_wrlock(state->old_bs); ++ bdrv_graph_wrlock(); + bdrv_replace_node(state->new_bs, state->old_bs, &error_abort); +- bdrv_graph_wrunlock(state->old_bs); ++ bdrv_graph_wrunlock(); + bdrv_drained_end(state->new_bs); + + bdrv_unref(state->old_bs); /* bdrv_replace_node() ref'ed old_bs */ +@@ -3657,7 +3657,7 @@ void qmp_x_blockdev_change(const char *parent, const char *child, + BlockDriverState *parent_bs, *new_bs = NULL; + BdrvChild *p_child; + +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + + parent_bs = bdrv_lookup_bs(parent, parent, errp); + if (!parent_bs) { +@@ -3693,7 +3693,7 @@ void qmp_x_blockdev_change(const char *parent, const char *child, + } + + out: +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + } + + BlockJobInfoList *qmp_query_block_jobs(Error **errp) +diff --git a/blockjob.c b/blockjob.c +index b7a29052b9..7310412313 100644 +--- a/blockjob.c ++++ b/blockjob.c +@@ -199,7 +199,7 @@ void block_job_remove_all_bdrv(BlockJob *job) + * to process an already freed BdrvChild. + */ + aio_context_release(job->job.aio_context); +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + aio_context_acquire(job->job.aio_context); + while (job->nodes) { + GSList *l = job->nodes; +@@ -212,7 +212,7 @@ void block_job_remove_all_bdrv(BlockJob *job) + + g_slist_free_1(l); + } +- bdrv_graph_wrunlock_ctx(job->job.aio_context); ++ bdrv_graph_wrunlock(); + } + + bool block_job_has_bdrv(BlockJob *job, BlockDriverState *bs) +@@ -514,7 +514,7 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, + int ret; + GLOBAL_STATE_CODE(); + +- bdrv_graph_wrlock(bs); ++ bdrv_graph_wrlock(); + + if (job_id == NULL && !(flags & JOB_INTERNAL)) { + job_id = bdrv_get_device_name(bs); +@@ -523,7 +523,7 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, + job = job_create(job_id, &driver->job_driver, txn, bdrv_get_aio_context(bs), + flags, cb, opaque, errp); + if (job == NULL) { +- bdrv_graph_wrunlock(bs); ++ bdrv_graph_wrunlock(); + return NULL; + } + +@@ -563,11 +563,11 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, + goto fail; + } + +- bdrv_graph_wrunlock(bs); ++ bdrv_graph_wrunlock(); + return job; + + fail: +- bdrv_graph_wrunlock(bs); ++ bdrv_graph_wrunlock(); + job_early_fail(&job->job); + return NULL; + } +diff --git a/include/block/graph-lock.h b/include/block/graph-lock.h +index 22b5db1ed9..d7545e82d0 100644 +--- a/include/block/graph-lock.h ++++ b/include/block/graph-lock.h +@@ -110,34 +110,17 @@ void unregister_aiocontext(AioContext *ctx); + * + * The wrlock can only be taken from the main loop, with BQL held, as only the + * main loop is allowed to modify the graph. +- * +- * If @bs is non-NULL, its AioContext is temporarily released. +- * +- * This function polls. Callers must not hold the lock of any AioContext other +- * than the current one and the one of @bs. + */ + void no_coroutine_fn TSA_ACQUIRE(graph_lock) TSA_NO_TSA +-bdrv_graph_wrlock(BlockDriverState *bs); ++bdrv_graph_wrlock(void); + + /* + * bdrv_graph_wrunlock: + * Write finished, reset global has_writer to 0 and restart + * all readers that are waiting. +- * +- * If @bs is non-NULL, its AioContext is temporarily released. +- */ +-void no_coroutine_fn TSA_RELEASE(graph_lock) TSA_NO_TSA +-bdrv_graph_wrunlock(BlockDriverState *bs); +- +-/* +- * bdrv_graph_wrunlock_ctx: +- * Write finished, reset global has_writer to 0 and restart +- * all readers that are waiting. +- * +- * If @ctx is non-NULL, its lock is temporarily released. + */ + void no_coroutine_fn TSA_RELEASE(graph_lock) TSA_NO_TSA +-bdrv_graph_wrunlock_ctx(AioContext *ctx); ++bdrv_graph_wrunlock(void); + + /* + * bdrv_graph_co_rdlock: +diff --git a/scripts/block-coroutine-wrapper.py b/scripts/block-coroutine-wrapper.py +index a38e5833fb..38364fa557 100644 +--- a/scripts/block-coroutine-wrapper.py ++++ b/scripts/block-coroutine-wrapper.py +@@ -261,8 +261,8 @@ def gen_no_co_wrapper(func: FuncDecl) -> str: + graph_lock=' bdrv_graph_rdlock_main_loop();' + graph_unlock=' bdrv_graph_rdunlock_main_loop();' + elif func.graph_wrlock: +- graph_lock=' bdrv_graph_wrlock(NULL);' +- graph_unlock=' bdrv_graph_wrunlock(NULL);' ++ graph_lock=' bdrv_graph_wrlock();' ++ graph_unlock=' bdrv_graph_wrunlock();' + + return f"""\ + /* +diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c +index 704d1a3f36..d9754dfebc 100644 +--- a/tests/unit/test-bdrv-drain.c ++++ b/tests/unit/test-bdrv-drain.c +@@ -807,9 +807,9 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type, + tjob->bs = src; + job = &tjob->common; + +- bdrv_graph_wrlock(target); ++ bdrv_graph_wrlock(); + block_job_add_bdrv(job, "target", target, 0, BLK_PERM_ALL, &error_abort); +- bdrv_graph_wrunlock(target); ++ bdrv_graph_wrunlock(); + + switch (result) { + case TEST_JOB_SUCCESS: +@@ -991,11 +991,11 @@ static void bdrv_test_top_close(BlockDriverState *bs) + { + BdrvChild *c, *next_c; + +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + QLIST_FOREACH_SAFE(c, &bs->children, next, next_c) { + bdrv_unref_child(bs, c); + } +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + } + + static int coroutine_fn GRAPH_RDLOCK +@@ -1085,10 +1085,10 @@ static void do_test_delete_by_drain(bool detach_instead_of_delete, + + null_bs = bdrv_open("null-co://", NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, + &error_abort); +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + bdrv_attach_child(bs, null_bs, "null-child", &child_of_bds, + BDRV_CHILD_DATA, &error_abort); +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + + /* This child will be the one to pass to requests through to, and + * it will stall until a drain occurs */ +@@ -1096,21 +1096,21 @@ static void do_test_delete_by_drain(bool detach_instead_of_delete, + &error_abort); + child_bs->total_sectors = 65536 >> BDRV_SECTOR_BITS; + /* Takes our reference to child_bs */ +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + tts->wait_child = bdrv_attach_child(bs, child_bs, "wait-child", + &child_of_bds, + BDRV_CHILD_DATA | BDRV_CHILD_PRIMARY, + &error_abort); +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + + /* This child is just there to be deleted + * (for detach_instead_of_delete == true) */ + null_bs = bdrv_open("null-co://", NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, + &error_abort); +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + bdrv_attach_child(bs, null_bs, "null-child", &child_of_bds, BDRV_CHILD_DATA, + &error_abort); +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + + blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); + blk_insert_bs(blk, bs, &error_abort); +@@ -1193,14 +1193,14 @@ static void no_coroutine_fn detach_indirect_bh(void *opaque) + + bdrv_dec_in_flight(data->child_b->bs); + +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + bdrv_unref_child(data->parent_b, data->child_b); + + bdrv_ref(data->c); + data->child_c = bdrv_attach_child(data->parent_b, data->c, "PB-C", + &child_of_bds, BDRV_CHILD_DATA, + &error_abort); +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + } + + static void coroutine_mixed_fn detach_by_parent_aio_cb(void *opaque, int ret) +@@ -1298,7 +1298,7 @@ static void TSA_NO_TSA test_detach_indirect(bool by_parent_cb) + /* Set child relationships */ + bdrv_ref(b); + bdrv_ref(a); +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + child_b = bdrv_attach_child(parent_b, b, "PB-B", &child_of_bds, + BDRV_CHILD_DATA, &error_abort); + child_a = bdrv_attach_child(parent_b, a, "PB-A", &child_of_bds, +@@ -1308,7 +1308,7 @@ static void TSA_NO_TSA test_detach_indirect(bool by_parent_cb) + bdrv_attach_child(parent_a, a, "PA-A", + by_parent_cb ? &child_of_bds : &detach_by_driver_cb_class, + BDRV_CHILD_DATA, &error_abort); +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + + g_assert_cmpint(parent_a->refcnt, ==, 1); + g_assert_cmpint(parent_b->refcnt, ==, 1); +@@ -1727,7 +1727,7 @@ static void test_drop_intermediate_poll(void) + * Establish the chain last, so the chain links are the first + * elements in the BDS.parents lists + */ +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + for (i = 0; i < 3; i++) { + if (i) { + /* Takes the reference to chain[i - 1] */ +@@ -1735,7 +1735,7 @@ static void test_drop_intermediate_poll(void) + &chain_child_class, BDRV_CHILD_COW, &error_abort); + } + } +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + + job = block_job_create("job", &test_simple_job_driver, NULL, job_node, + 0, BLK_PERM_ALL, 0, 0, NULL, NULL, &error_abort); +@@ -1982,10 +1982,10 @@ static void do_test_replace_child_mid_drain(int old_drain_count, + new_child_bs->total_sectors = 1; + + bdrv_ref(old_child_bs); +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + bdrv_attach_child(parent_bs, old_child_bs, "child", &child_of_bds, + BDRV_CHILD_COW, &error_abort); +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + parent_s->setup_completed = true; + + for (i = 0; i < old_drain_count; i++) { +@@ -2016,9 +2016,9 @@ static void do_test_replace_child_mid_drain(int old_drain_count, + g_assert(parent_bs->quiesce_counter == old_drain_count); + bdrv_drained_begin(old_child_bs); + bdrv_drained_begin(new_child_bs); +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + bdrv_replace_node(old_child_bs, new_child_bs, &error_abort); +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + bdrv_drained_end(new_child_bs); + bdrv_drained_end(old_child_bs); + g_assert(parent_bs->quiesce_counter == new_drain_count); +diff --git a/tests/unit/test-bdrv-graph-mod.c b/tests/unit/test-bdrv-graph-mod.c +index 074adcbb93..8ee6ef38d8 100644 +--- a/tests/unit/test-bdrv-graph-mod.c ++++ b/tests/unit/test-bdrv-graph-mod.c +@@ -137,10 +137,10 @@ static void test_update_perm_tree(void) + + blk_insert_bs(root, bs, &error_abort); + +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + bdrv_attach_child(filter, bs, "child", &child_of_bds, + BDRV_CHILD_DATA, &error_abort); +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + + aio_context_acquire(qemu_get_aio_context()); + ret = bdrv_append(filter, bs, NULL); +@@ -206,11 +206,11 @@ static void test_should_update_child(void) + + bdrv_set_backing_hd(target, bs, &error_abort); + +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + g_assert(target->backing->bs == bs); + bdrv_attach_child(filter, target, "target", &child_of_bds, + BDRV_CHILD_DATA, &error_abort); +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + aio_context_acquire(qemu_get_aio_context()); + bdrv_append(filter, bs, &error_abort); + aio_context_release(qemu_get_aio_context()); +@@ -248,7 +248,7 @@ static void test_parallel_exclusive_write(void) + bdrv_ref(base); + bdrv_ref(fl1); + +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + bdrv_attach_child(top, fl1, "backing", &child_of_bds, + BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, + &error_abort); +@@ -260,7 +260,7 @@ static void test_parallel_exclusive_write(void) + &error_abort); + + bdrv_replace_node(fl1, fl2, &error_abort); +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + + bdrv_drained_end(fl2); + bdrv_drained_end(fl1); +@@ -367,7 +367,7 @@ static void test_parallel_perm_update(void) + */ + bdrv_ref(base); + +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + bdrv_attach_child(top, ws, "file", &child_of_bds, BDRV_CHILD_DATA, + &error_abort); + c_fl1 = bdrv_attach_child(ws, fl1, "first", &child_of_bds, +@@ -380,7 +380,7 @@ static void test_parallel_perm_update(void) + bdrv_attach_child(fl2, base, "backing", &child_of_bds, + BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, + &error_abort); +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + + /* Select fl1 as first child to be active */ + s->selected = c_fl1; +@@ -434,11 +434,11 @@ static void test_append_greedy_filter(void) + BlockDriverState *base = no_perm_node("base"); + BlockDriverState *fl = exclusive_writer_node("fl1"); + +- bdrv_graph_wrlock(NULL); ++ bdrv_graph_wrlock(); + bdrv_attach_child(top, base, "backing", &child_of_bds, + BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, + &error_abort); +- bdrv_graph_wrunlock(NULL); ++ bdrv_graph_wrunlock(); + + aio_context_acquire(qemu_get_aio_context()); + bdrv_append(fl, base, &error_abort); +-- +2.39.3 + diff --git a/SOURCES/kvm-hv-balloon-use-get_min_alignment-to-express-32-GiB-a.patch b/SOURCES/kvm-hv-balloon-use-get_min_alignment-to-express-32-GiB-a.patch new file mode 100644 index 0000000..4fb4844 --- /dev/null +++ b/SOURCES/kvm-hv-balloon-use-get_min_alignment-to-express-32-GiB-a.patch @@ -0,0 +1,94 @@ +From a5b4eec5f456b1ca3fe753e1d76f96cf3f8914ef Mon Sep 17 00:00:00 2001 +From: David Hildenbrand +Date: Wed, 17 Jan 2024 14:55:53 +0100 +Subject: [PATCH 01/22] hv-balloon: use get_min_alignment() to express 32 GiB + alignment + +RH-Author: David Hildenbrand +RH-MergeRequest: 221: memory-device: reintroduce memory region size check +RH-Jira: RHEL-20341 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Igor Mammedov +RH-Commit: [1/2] cbe092fe549552928270892253b31cd8fe199825 + +https://issues.redhat.com/browse/RHEL-20341 + +Let's implement the get_min_alignment() callback for memory devices, and +copy for the device memory region the alignment of the host memory +region. This mimics what virtio-mem does, and allows for re-introducing +proper alignment checks for the memory region size (where we don't care +about additional device requirements) in memory device core. + +Message-ID: <20240117135554.787344-2-david@redhat.com> +Reviewed-by: Maciej S. Szmigiero +Signed-off-by: David Hildenbrand +(cherry picked from commit f77c5f38f49c71bc14cf1019ac92b0b95f572414) +Signed-off-by: David Hildenbrand +--- + hw/hyperv/hv-balloon.c | 37 +++++++++++++++++++++---------------- + 1 file changed, 21 insertions(+), 16 deletions(-) + +diff --git a/hw/hyperv/hv-balloon.c b/hw/hyperv/hv-balloon.c +index 66f297c1d7..0829c495b0 100644 +--- a/hw/hyperv/hv-balloon.c ++++ b/hw/hyperv/hv-balloon.c +@@ -1476,22 +1476,7 @@ static void hv_balloon_ensure_mr(HvBalloon *balloon) + balloon->mr = g_new0(MemoryRegion, 1); + memory_region_init(balloon->mr, OBJECT(balloon), TYPE_HV_BALLOON, + memory_region_size(hostmem_mr)); +- +- /* +- * The VM can indicate an alignment up to 32 GiB. Memory device core can +- * usually only handle/guarantee 1 GiB alignment. The user will have to +- * specify a larger maxmem eventually. +- * +- * The memory device core will warn the user in case maxmem might have to be +- * increased and will fail plugging the device if there is not sufficient +- * space after alignment. +- * +- * TODO: we could do the alignment ourselves in a slightly bigger region. +- * But this feels better, although the warning might be annoying. Maybe +- * we can optimize that in the future (e.g., with such a device on the +- * cmdline place/size the device memory region differently. +- */ +- balloon->mr->align = MAX(32 * GiB, memory_region_get_alignment(hostmem_mr)); ++ balloon->mr->align = memory_region_get_alignment(hostmem_mr); + } + + static void hv_balloon_free_mr(HvBalloon *balloon) +@@ -1653,6 +1638,25 @@ static MemoryRegion *hv_balloon_md_get_memory_region(MemoryDeviceState *md, + return balloon->mr; + } + ++static uint64_t hv_balloon_md_get_min_alignment(const MemoryDeviceState *md) ++{ ++ /* ++ * The VM can indicate an alignment up to 32 GiB. Memory device core can ++ * usually only handle/guarantee 1 GiB alignment. The user will have to ++ * specify a larger maxmem eventually. ++ * ++ * The memory device core will warn the user in case maxmem might have to be ++ * increased and will fail plugging the device if there is not sufficient ++ * space after alignment. ++ * ++ * TODO: we could do the alignment ourselves in a slightly bigger region. ++ * But this feels better, although the warning might be annoying. Maybe ++ * we can optimize that in the future (e.g., with such a device on the ++ * cmdline place/size the device memory region differently. ++ */ ++ return 32 * GiB; ++} ++ + static void hv_balloon_md_fill_device_info(const MemoryDeviceState *md, + MemoryDeviceInfo *info) + { +@@ -1765,5 +1769,6 @@ static void hv_balloon_class_init(ObjectClass *klass, void *data) + mdc->get_memory_region = hv_balloon_md_get_memory_region; + mdc->decide_memslots = hv_balloon_decide_memslots; + mdc->get_memslots = hv_balloon_get_memslots; ++ mdc->get_min_alignment = hv_balloon_md_get_min_alignment; + mdc->fill_device_info = hv_balloon_md_fill_device_info; + } +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-acpi-Mark-acpi-blobs-as-resizable-on-RHEL-pc-mach.patch b/SOURCES/kvm-hw-acpi-Mark-acpi-blobs-as-resizable-on-RHEL-pc-mach.patch deleted file mode 100644 index 67e702c..0000000 --- a/SOURCES/kvm-hw-acpi-Mark-acpi-blobs-as-resizable-on-RHEL-pc-mach.patch +++ /dev/null @@ -1,40 +0,0 @@ -From b4645e7682aa1bde6f89df0eff2a9de83720eecc Mon Sep 17 00:00:00 2001 -From: Ani Sinha -Date: Tue, 2 May 2023 15:51:53 +0530 -Subject: [PATCH 3/3] hw/acpi: Mark acpi blobs as resizable on RHEL pc machines - version 7.6 and above - -RH-Author: Ani Sinha -RH-MergeRequest: 160: hw/acpi: limit warning on acpi table size to pc machines older than version 2.3 -RH-Bugzilla: 1934134 -RH-Acked-by: Igor Mammedov -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: MST -RH-Commit: [2/2] 95d443af6e75c569d89d04d028012c3c56c0c3a4 (anisinha/centos-qemu-kvm) - -Please look at QEMU upstream commit -1af507756bae7 ("hw/acpi: limit warning on acpi table size to pc machines older than version 2.3") -This patch adapts the above change so that it applies to RHEL pc machines of -version 7.6 and newer. These are the machine types that are currently supported -in RHEL. Q35 machines are not affected. - -Signed-off-by: Ani Sinha ---- - hw/i386/pc_piix.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index 4d5880e249..6c7be628e1 100644 ---- a/hw/i386/pc_piix.c -+++ b/hw/i386/pc_piix.c -@@ -914,6 +914,7 @@ static void pc_machine_rhel7_options(MachineClass *m) - m->default_machine_opts = "firmware=bios-256k.bin,hpet=off"; - pcmc->default_nic_model = "e1000"; - pcmc->pci_root_uid = 0; -+ pcmc->resizable_acpi_blob = true; - m->default_display = "std"; - m->no_parallel = 1; - m->numa_mem_supported = true; --- -2.39.1 - diff --git a/SOURCES/kvm-hw-acpi-limit-warning-on-acpi-table-size-to-pc-machi.patch b/SOURCES/kvm-hw-acpi-limit-warning-on-acpi-table-size-to-pc-machi.patch deleted file mode 100644 index e06113a..0000000 --- a/SOURCES/kvm-hw-acpi-limit-warning-on-acpi-table-size-to-pc-machi.patch +++ /dev/null @@ -1,101 +0,0 @@ -From 3f70da88788c398877b8ded0b27689530385302b Mon Sep 17 00:00:00 2001 -From: Ani Sinha -Date: Wed, 29 Mar 2023 10:27:26 +0530 -Subject: [PATCH 2/3] hw/acpi: limit warning on acpi table size to pc machines - older than version 2.3 - -RH-Author: Ani Sinha -RH-MergeRequest: 160: hw/acpi: limit warning on acpi table size to pc machines older than version 2.3 -RH-Bugzilla: 1934134 -RH-Acked-by: Igor Mammedov -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: MST -RH-Commit: [1/2] 96c3b6d51e16734eb4e8de52635e0ca036964090 (anisinha/centos-qemu-kvm) - -i440fx machine versions 2.3 and newer supports dynamic ram -resizing. See commit a1666142db6233 ("acpi-build: make ROMs RAM blocks resizeable") . -Currently supported all q35 machine types (versions 2.4 and newer) supports -resizable RAM/ROM blocks.Therefore the warning generated when the ACPI table -size exceeds a pre-defined value does not apply to those machine versions. -Add a check limiting the warning message to only those machines that does not -support expandable ram blocks (that is, i440fx machines with version 2.2 -and older). - -Signed-off-by: Ani Sinha -Message-Id: <20230329045726.14028-1-anisinha@redhat.com> -Reviewed-by: Igor Mammedov -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 1af507756bae775028c27d30e602e2b9c72cd074) ---- - hw/i386/acpi-build.c | 6 ++++-- - hw/i386/pc.c | 1 + - hw/i386/pc_piix.c | 1 + - include/hw/i386/pc.h | 3 +++ - 4 files changed, 9 insertions(+), 2 deletions(-) - -diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c -index ec857a117e..9bc4d8a981 100644 ---- a/hw/i386/acpi-build.c -+++ b/hw/i386/acpi-build.c -@@ -2695,7 +2695,8 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) - int legacy_table_size = - ROUND_UP(tables_blob->len - aml_len + legacy_aml_len, - ACPI_BUILD_ALIGN_SIZE); -- if (tables_blob->len > legacy_table_size) { -+ if ((tables_blob->len > legacy_table_size) && -+ !pcmc->resizable_acpi_blob) { - /* Should happen only with PCI bridges and -M pc-i440fx-2.0. */ - warn_report("ACPI table size %u exceeds %d bytes," - " migration may not work", -@@ -2706,7 +2707,8 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) - g_array_set_size(tables_blob, legacy_table_size); - } else { - /* Make sure we have a buffer in case we need to resize the tables. */ -- if (tables_blob->len > ACPI_BUILD_TABLE_SIZE / 2) { -+ if ((tables_blob->len > ACPI_BUILD_TABLE_SIZE / 2) && -+ !pcmc->resizable_acpi_blob) { - /* As of QEMU 2.1, this fires with 160 VCPUs and 255 memory slots. */ - warn_report("ACPI table size %u exceeds %d bytes," - " migration may not work", -diff --git a/hw/i386/pc.c b/hw/i386/pc.c -index f216922cee..7db5a2348f 100644 ---- a/hw/i386/pc.c -+++ b/hw/i386/pc.c -@@ -2092,6 +2092,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) - pcmc->acpi_data_size = 0x20000 + 0x8000; - pcmc->pvh_enabled = true; - pcmc->kvmclock_create_always = true; -+ pcmc->resizable_acpi_blob = true; - assert(!mc->get_hotplug_handler); - mc->async_pf_vmexit_disable = false; - mc->get_hotplug_handler = pc_get_hotplug_handler; -diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c -index fc704d783f..4d5880e249 100644 ---- a/hw/i386/pc_piix.c -+++ b/hw/i386/pc_piix.c -@@ -750,6 +750,7 @@ static void pc_i440fx_2_2_machine_options(MachineClass *m) - compat_props_add(m->compat_props, hw_compat_2_2, hw_compat_2_2_len); - compat_props_add(m->compat_props, pc_compat_2_2, pc_compat_2_2_len); - pcmc->rsdp_in_ram = false; -+ pcmc->resizable_acpi_blob = false; - } - - DEFINE_I440FX_MACHINE(v2_2, "pc-i440fx-2.2", pc_compat_2_2_fn, -diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h -index d218ad1628..2f514d13d8 100644 ---- a/include/hw/i386/pc.h -+++ b/include/hw/i386/pc.h -@@ -130,6 +130,9 @@ struct PCMachineClass { - - /* create kvmclock device even when KVM PV features are not exposed */ - bool kvmclock_create_always; -+ -+ /* resizable acpi blob compat */ -+ bool resizable_acpi_blob; - }; - - #define TYPE_PC_MACHINE "generic-pc-machine" --- -2.39.1 - diff --git a/SOURCES/kvm-hw-arm-Activate-IOMMUFD-for-virt-machines.patch b/SOURCES/kvm-hw-arm-Activate-IOMMUFD-for-virt-machines.patch new file mode 100644 index 0000000..84f6108 --- /dev/null +++ b/SOURCES/kvm-hw-arm-Activate-IOMMUFD-for-virt-machines.patch @@ -0,0 +1,42 @@ +From ceaee9c4372bbdc4196cb6808515047388f7aa26 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Tue, 21 Nov 2023 16:44:18 +0800 +Subject: [PATCH 039/101] hw/arm: Activate IOMMUFD for virt machines +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [38/67] 0a059ae661616e95eb8455e17f35774495cae8e7 (eauger1/centos-qemu-kvm) + +Signed-off-by: Cédric Le Goater +Signed-off-by: Zhenzhong Duan +Reviewed-by: Eric Auger +Tested-by: Eric Auger +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +(cherry picked from commit 0970238343af45a8b547695bfc22f18d4eb7da7e) +Signed-off-by: Eric Auger +--- + hw/arm/Kconfig | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig +index 3ada335a24..660f49db49 100644 +--- a/hw/arm/Kconfig ++++ b/hw/arm/Kconfig +@@ -8,6 +8,7 @@ config ARM_VIRT + imply TPM_TIS_SYSBUS + imply TPM_TIS_I2C + imply NVDIMM ++ imply IOMMUFD + select ARM_GIC + select ACPI + select ARM_SMMUV3 +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-arm-Validate-cluster-and-NUMA-node-boundary.patch b/SOURCES/kvm-hw-arm-Validate-cluster-and-NUMA-node-boundary.patch deleted file mode 100644 index e96bb10..0000000 --- a/SOURCES/kvm-hw-arm-Validate-cluster-and-NUMA-node-boundary.patch +++ /dev/null @@ -1,60 +0,0 @@ -From 7b57aec372fc238cbaafe86557f9fb4b560895b1 Mon Sep 17 00:00:00 2001 -From: Gavin Shan -Date: Tue, 27 Jun 2023 20:20:09 +1000 -Subject: [PATCH 2/6] hw/arm: Validate cluster and NUMA node boundary - -RH-Author: Gavin Shan -RH-MergeRequest: 175: hw/arm: Validate CPU cluster and NUMA node boundary for RHEL machines -RH-Bugzilla: 2171363 -RH-Acked-by: Cornelia Huck -RH-Acked-by: Eric Auger -RH-Commit: [2/3] fcac7ea85d9f73613989903c642fc1bf6c51946b - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2171363 - -There are two ARM machines where NUMA is aware: 'virt' and 'sbsa-ref'. -Both of them are required to follow cluster-NUMA-node boundary. To -enable the validation to warn about the irregular configuration where -multiple CPUs in one cluster have been associated with different NUMA -nodes. - -Signed-off-by: Gavin Shan -Acked-by: Igor Mammedov -Message-Id: <20230509002739.18388-3-gshan@redhat.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit fecff672351ace5e39adf7dbcf7a8ee748b201cb) -Signed-off-by: Gavin Shan ---- - hw/arm/sbsa-ref.c | 2 ++ - hw/arm/virt.c | 2 ++ - 2 files changed, 4 insertions(+) - -diff --git a/hw/arm/sbsa-ref.c b/hw/arm/sbsa-ref.c -index 0b93558dde..efb380e7c8 100644 ---- a/hw/arm/sbsa-ref.c -+++ b/hw/arm/sbsa-ref.c -@@ -864,6 +864,8 @@ static void sbsa_ref_class_init(ObjectClass *oc, void *data) - mc->possible_cpu_arch_ids = sbsa_ref_possible_cpu_arch_ids; - mc->cpu_index_to_instance_props = sbsa_ref_cpu_index_to_props; - mc->get_default_cpu_node_id = sbsa_ref_get_default_cpu_node_id; -+ /* platform instead of architectural choice */ -+ mc->cpu_cluster_has_numa_boundary = true; - } - - static const TypeInfo sbsa_ref_info = { -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index 9be53e9355..df6a0231bc 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -3083,6 +3083,8 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) - mc->smp_props.clusters_supported = true; - mc->auto_enable_numa_with_memhp = true; - mc->auto_enable_numa_with_memdev = true; -+ /* platform instead of architectural choice */ -+ mc->cpu_cluster_has_numa_boundary = true; - mc->default_ram_id = "mach-virt.ram"; - - object_class_property_add(oc, "acpi", "OnOffAuto", --- -2.39.3 - diff --git a/SOURCES/kvm-hw-arm-smmu-Handle-big-endian-hosts-correctly.patch b/SOURCES/kvm-hw-arm-smmu-Handle-big-endian-hosts-correctly.patch deleted file mode 100644 index 3bbe93f..0000000 --- a/SOURCES/kvm-hw-arm-smmu-Handle-big-endian-hosts-correctly.patch +++ /dev/null @@ -1,166 +0,0 @@ -From a3412036477e8c91e0b71fcd91de4e24a9904077 Mon Sep 17 00:00:00 2001 -From: Peter Maydell -Date: Tue, 25 Jul 2023 10:56:51 +0100 -Subject: [PATCH 09/14] hw/arm/smmu: Handle big-endian hosts correctly -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eric Auger -RH-MergeRequest: 197: virtio-iommu/smmu: backport some late fixes -RH-Bugzilla: 2229133 -RH-Acked-by: Thomas Huth -RH-Acked-by: Peter Xu -RH-Commit: [3/3] df9c8d228b25273e0c4927a10b21e66fb4bef5f0 (eauger1/centos-qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2229133 - -The implementation of the SMMUv3 has multiple places where it reads a -data structure from the guest and directly operates on it without -doing a guest-to-host endianness conversion. Since all SMMU data -structures are little-endian, this means that the SMMU doesn't work -on a big-endian host. In particular, this causes the Avocado test - machine_aarch64_virt.py:Aarch64VirtMachine.test_alpine_virt_tcg_gic_max -to fail on an s390x host. - -Add appropriate byte-swapping on reads and writes of guest in-memory -data structures so that the device works correctly on big-endian -hosts. - -As part of this we constrain queue_read() to operate only on Cmd -structs and queue_write() on Evt structs, because in practice these -are the only data structures the two functions are used with, and we -need to know what the data structure is to be able to byte-swap its -parts correctly. - -Signed-off-by: Peter Maydell -Tested-by: Thomas Huth -Reviewed-by: Philippe Mathieu-Daudé -Reviewed-by: Eric Auger -Message-id: 20230717132641.764660-1-peter.maydell@linaro.org -Cc: qemu-stable@nongnu.org -(cherry picked from commit c6445544d4cea2628fbad3bad09f3d3a03c749d3) -Signed-off-by: Eric Auger ---- - hw/arm/smmu-common.c | 3 +-- - hw/arm/smmuv3.c | 39 +++++++++++++++++++++++++++++++-------- - 2 files changed, 32 insertions(+), 10 deletions(-) - -diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c -index e7f1c1f219..daa02ce798 100644 ---- a/hw/arm/smmu-common.c -+++ b/hw/arm/smmu-common.c -@@ -192,8 +192,7 @@ static int get_pte(dma_addr_t baseaddr, uint32_t index, uint64_t *pte, - dma_addr_t addr = baseaddr + index * sizeof(*pte); - - /* TODO: guarantee 64-bit single-copy atomicity */ -- ret = dma_memory_read(&address_space_memory, addr, pte, sizeof(*pte), -- MEMTXATTRS_UNSPECIFIED); -+ ret = ldq_le_dma(&address_space_memory, addr, pte, MEMTXATTRS_UNSPECIFIED); - - if (ret != MEMTX_OK) { - info->type = SMMU_PTW_ERR_WALK_EABT; -diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c -index 270c80b665..cfb56725a6 100644 ---- a/hw/arm/smmuv3.c -+++ b/hw/arm/smmuv3.c -@@ -98,20 +98,34 @@ static void smmuv3_write_gerrorn(SMMUv3State *s, uint32_t new_gerrorn) - trace_smmuv3_write_gerrorn(toggled & pending, s->gerrorn); - } - --static inline MemTxResult queue_read(SMMUQueue *q, void *data) -+static inline MemTxResult queue_read(SMMUQueue *q, Cmd *cmd) - { - dma_addr_t addr = Q_CONS_ENTRY(q); -+ MemTxResult ret; -+ int i; - -- return dma_memory_read(&address_space_memory, addr, data, q->entry_size, -- MEMTXATTRS_UNSPECIFIED); -+ ret = dma_memory_read(&address_space_memory, addr, cmd, sizeof(Cmd), -+ MEMTXATTRS_UNSPECIFIED); -+ if (ret != MEMTX_OK) { -+ return ret; -+ } -+ for (i = 0; i < ARRAY_SIZE(cmd->word); i++) { -+ le32_to_cpus(&cmd->word[i]); -+ } -+ return ret; - } - --static MemTxResult queue_write(SMMUQueue *q, void *data) -+static MemTxResult queue_write(SMMUQueue *q, Evt *evt_in) - { - dma_addr_t addr = Q_PROD_ENTRY(q); - MemTxResult ret; -+ Evt evt = *evt_in; -+ int i; - -- ret = dma_memory_write(&address_space_memory, addr, data, q->entry_size, -+ for (i = 0; i < ARRAY_SIZE(evt.word); i++) { -+ cpu_to_le32s(&evt.word[i]); -+ } -+ ret = dma_memory_write(&address_space_memory, addr, &evt, sizeof(Evt), - MEMTXATTRS_UNSPECIFIED); - if (ret != MEMTX_OK) { - return ret; -@@ -291,7 +305,7 @@ static void smmuv3_init_regs(SMMUv3State *s) - static int smmu_get_ste(SMMUv3State *s, dma_addr_t addr, STE *buf, - SMMUEventInfo *event) - { -- int ret; -+ int ret, i; - - trace_smmuv3_get_ste(addr); - /* TODO: guarantee 64-bit single-copy atomicity */ -@@ -304,6 +318,9 @@ static int smmu_get_ste(SMMUv3State *s, dma_addr_t addr, STE *buf, - event->u.f_ste_fetch.addr = addr; - return -EINVAL; - } -+ for (i = 0; i < ARRAY_SIZE(buf->word); i++) { -+ le32_to_cpus(&buf->word[i]); -+ } - return 0; - - } -@@ -313,7 +330,7 @@ static int smmu_get_cd(SMMUv3State *s, STE *ste, uint32_t ssid, - CD *buf, SMMUEventInfo *event) - { - dma_addr_t addr = STE_CTXPTR(ste); -- int ret; -+ int ret, i; - - trace_smmuv3_get_cd(addr); - /* TODO: guarantee 64-bit single-copy atomicity */ -@@ -326,6 +343,9 @@ static int smmu_get_cd(SMMUv3State *s, STE *ste, uint32_t ssid, - event->u.f_ste_fetch.addr = addr; - return -EINVAL; - } -+ for (i = 0; i < ARRAY_SIZE(buf->word); i++) { -+ le32_to_cpus(&buf->word[i]); -+ } - return 0; - } - -@@ -407,7 +427,7 @@ static int smmu_find_ste(SMMUv3State *s, uint32_t sid, STE *ste, - return -EINVAL; - } - if (s->features & SMMU_FEATURE_2LVL_STE) { -- int l1_ste_offset, l2_ste_offset, max_l2_ste, span; -+ int l1_ste_offset, l2_ste_offset, max_l2_ste, span, i; - dma_addr_t l1ptr, l2ptr; - STEDesc l1std; - -@@ -431,6 +451,9 @@ static int smmu_find_ste(SMMUv3State *s, uint32_t sid, STE *ste, - event->u.f_ste_fetch.addr = l1ptr; - return -EINVAL; - } -+ for (i = 0; i < ARRAY_SIZE(l1std.word); i++) { -+ le32_to_cpus(&l1std.word[i]); -+ } - - span = L1STD_SPAN(&l1std); - --- -2.39.3 - diff --git a/SOURCES/kvm-hw-arm-virt-Add-properties-to-disable-high-memory-re.patch b/SOURCES/kvm-hw-arm-virt-Add-properties-to-disable-high-memory-re.patch new file mode 100644 index 0000000..76ab341 --- /dev/null +++ b/SOURCES/kvm-hw-arm-virt-Add-properties-to-disable-high-memory-re.patch @@ -0,0 +1,88 @@ +From e670722b9a6460d41497688d820d5a9a9b51d8e9 Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Tue, 9 Jan 2024 11:36:42 +1000 +Subject: [PATCH 001/101] hw/arm/virt: Add properties to disable high memory + regions + +RH-Author: Gavin Shan +RH-MergeRequest: 210: hw/arm/virt: Add properties to disable high memory regions +RH-Jira: RHEL-19738 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Eric Auger +RH-Commit: [1/1] 4097ba5133a67126e30b84202cb40df4e019c5f4 + +Upstream: RHEL-only +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=57927352 + +There are 3 high memory regions for GICv3 or GICv4 redistributor, PCI +ECAM and PCI MMIO. Each of them has a property introduced by upstream +commit 6a48c64eec ("hw/arm/virt: Add properties to disable high memory +regions") so that the corresponding high memory region can be disabled. + +It's notable that another property ("compact-highmem") introduced by +upstream commit f40408a9fe ("hw/arm/virt: Add 'compact-highmem' property") +so that the compact high memory region layout during assignment can be +disabled, compatible to the old machine types. However, we don't have +the compatible issue since the compact high memory region layout is +always kept as disabled until RHEL9.2.0 machine type and onwards. + +Expose those 3 properties: "highmem-redists", "highmem-ecam" and +"highmem-mmio". The property "compact-highmem" is kept as hidden. + +Signed-off-by: Gavin Shan +--- + hw/arm/virt.c | 24 +++++++++++++++++++++++- + 1 file changed, 23 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 5cab00b4cd..60f117f0d2 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2456,6 +2456,7 @@ static void virt_set_compact_highmem(Object *obj, bool value, Error **errp) + + vms->highmem_compact = value; + } ++#endif /* disabled for RHEL */ + + static bool virt_get_highmem_redists(Object *obj, Error **errp) + { +@@ -2498,7 +2499,6 @@ static void virt_set_highmem_mmio(Object *obj, bool value, Error **errp) + + vms->highmem_mmio = value; + } +-#endif /* disabled for RHEL */ + + static bool virt_get_its(Object *obj, Error **errp) + { +@@ -3521,6 +3521,28 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + "Set on/off to enable/disable using " + "physical address space above 32 bits"); + ++ object_class_property_add_bool(oc, "highmem-redists", ++ virt_get_highmem_redists, ++ virt_set_highmem_redists); ++ object_class_property_set_description(oc, "highmem-redists", ++ "Set on/off to enable/disable high " ++ "memory region for GICv3 or GICv4 " ++ "redistributor"); ++ ++ object_class_property_add_bool(oc, "highmem-ecam", ++ virt_get_highmem_ecam, ++ virt_set_highmem_ecam); ++ object_class_property_set_description(oc, "highmem-ecam", ++ "Set on/off to enable/disable high " ++ "memory region for PCI ECAM"); ++ ++ object_class_property_add_bool(oc, "highmem-mmio", ++ virt_get_highmem_mmio, ++ virt_set_highmem_mmio); ++ object_class_property_set_description(oc, "highmem-mmio", ++ "Set on/off to enable/disable high " ++ "memory region for PCI MMIO"); ++ + object_class_property_add_str(oc, "gic-version", virt_get_gic_version, + virt_set_gic_version); + object_class_property_set_description(oc, "gic-version", +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-arm-virt-Fix-compats.patch b/SOURCES/kvm-hw-arm-virt-Fix-compats.patch new file mode 100644 index 0000000..7e3af18 --- /dev/null +++ b/SOURCES/kvm-hw-arm-virt-Fix-compats.patch @@ -0,0 +1,132 @@ +From 3f58194f8642a71c47d91d3c00a34faf44ea2c11 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Wed, 3 Jan 2024 05:57:38 -0500 +Subject: [PATCH] hw/arm/virt: Fix compats + +RH-Author: Eric Auger +RH-MergeRequest: 209: hw/arm/virt: Fix compats +RH-Jira: RHEL-17168 +RH-Acked-by: Gavin Shan +RH-Acked-by: Sebastian Ott +RH-Commit: [1/1] bcdf6493bbd6d7b52b0b88ff44441d22aeddfde2 (eauger1/centos-qemu-kvm) + +arm_rhel_compat is not added for virt-rhel9.4.0 machine causing +the efi-virtio.rom to be looked for when instantiating a virtio-net-pci +device and it won't be found since not shipped on ARM. This is a +regression compared to 9.2. + +Actually we do not need any rom file for any virtio-net-pci variant +because edk2 already brings the functionality. So for 9.4 onwards, we +want to set romfiles to "" for all of them. + +However at the moment we apply arm_rhel_compat from the latest +rhel*_virt_options(). This is not aligned with the generic compat +usage which sets compats for a given machine type to accomodate for +changes that occured after its advent. Here we are somehow abusing +the compat infra to set general driver options that should apply for +all machines. On top of that this is really error prone and we have +forgotten to add arm_rhel_compat several times in the past. + +So let's introduce set_arm_rhel_compat() being called before any +*virt_options in the non abstract machine class. That way the setting +will apply to any machine type without any need to add it in any +future machine types. + +For < 9.4 machines we don't really care keeping non void romfiles +for transitional and non transitional devices because anyway this was +not working. So let's keep things simple and apply the new defaults for +all RHEL9 machine types. + +Finally, to follow the generic pattern we should set hw_compat_rhel_9_0 +in 9.0 machine as it is done on x86 or ccw. This has no consequence on +aarch64 because it only contains x86 stuff but that helps understanding +the consistency. + +Signed-off-by: Eric Auger +--- + hw/arm/virt.c | 43 +++++++++++++++++++++++++++++-------------- + 1 file changed, 29 insertions(+), 14 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 0b17c94ad7..5cab00b4cd 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -111,11 +111,39 @@ + DEFINE_VIRT_MACHINE_LATEST(major, minor, false) + #endif /* disabled for RHEL */ + ++/* ++ * This variable is for changes to properties that are RHEL specific, ++ * different to the current upstream and to be applied to the latest ++ * machine type. They may be overriden by older machine compats. ++ * ++ * virtio-net-pci variant romfiles are not needed because edk2 does ++ * fully support the pxe boot. Besides virtio romfiles are not shipped ++ * on rhel/aarch64. ++ */ ++GlobalProperty arm_rhel_compat[] = { ++ {"virtio-net-pci", "romfile", "" }, ++ {"virtio-net-pci-transitional", "romfile", "" }, ++ {"virtio-net-pci-non-transitional", "romfile", "" }, ++}; ++const size_t arm_rhel_compat_len = G_N_ELEMENTS(arm_rhel_compat); ++ ++/* ++ * This cannot be called from the rhel_virt_class_init() because ++ * TYPE_RHEL_MACHINE is abstract and mc->compat_props g_ptr_array_new() ++ * only is called on virt-rhelm.n.s non abstract class init. ++ */ ++static void arm_rhel_compat_set(MachineClass *mc) ++{ ++ compat_props_add(mc->compat_props, arm_rhel_compat, ++ arm_rhel_compat_len); ++} ++ + #define DEFINE_RHEL_MACHINE_LATEST(m, n, s, latest) \ + static void rhel##m##n##s##_virt_class_init(ObjectClass *oc, \ + void *data) \ + { \ + MachineClass *mc = MACHINE_CLASS(oc); \ ++ arm_rhel_compat_set(mc); \ + rhel##m##n##s##_virt_options(mc); \ + mc->desc = "RHEL " # m "." # n "." # s " ARM Virtual Machine"; \ + if (latest) { \ +@@ -139,19 +167,6 @@ + #define DEFINE_RHEL_MACHINE(major, minor, subminor) \ + DEFINE_RHEL_MACHINE_LATEST(major, minor, subminor, false) + +-/* This variable is for changes to properties that are RHEL specific, +- * different to the current upstream and to be applied to the latest +- * machine type. +- */ +-GlobalProperty arm_rhel_compat[] = { +- { +- .driver = "virtio-net-pci", +- .property = "romfile", +- .value = "", +- }, +-}; +-const size_t arm_rhel_compat_len = G_N_ELEMENTS(arm_rhel_compat); +- + /* Number of external interrupt lines to configure the GIC with */ + #define NUM_IRQS 256 + +@@ -3639,7 +3654,6 @@ static void rhel920_virt_options(MachineClass *mc) + { + rhel940_virt_options(mc); + +- compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); + compat_props_add(mc->compat_props, hw_compat_rhel_9_4, hw_compat_rhel_9_4_len); + compat_props_add(mc->compat_props, hw_compat_rhel_9_3, hw_compat_rhel_9_3_len); + compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len); +@@ -3653,6 +3667,7 @@ static void rhel900_virt_options(MachineClass *mc) + rhel920_virt_options(mc); + + compat_props_add(mc->compat_props, hw_compat_rhel_9_1, hw_compat_rhel_9_1_len); ++ compat_props_add(mc->compat_props, hw_compat_rhel_9_0, hw_compat_rhel_9_0_len); + + /* Disable FEAT_LPA2 since old kernels (<= v5.12) don't boot with that feature */ + vmc->no_tcg_lpa2 = true; +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-arm-virt-Validate-cluster-and-NUMA-node-boundary-.patch b/SOURCES/kvm-hw-arm-virt-Validate-cluster-and-NUMA-node-boundary-.patch deleted file mode 100644 index 42ec705..0000000 --- a/SOURCES/kvm-hw-arm-virt-Validate-cluster-and-NUMA-node-boundary-.patch +++ /dev/null @@ -1,41 +0,0 @@ -From 022529f6d0ee306da857825c72a98bf7ddf5de22 Mon Sep 17 00:00:00 2001 -From: Gavin Shan -Date: Tue, 27 Jun 2023 20:20:09 +1000 -Subject: [PATCH 3/6] hw/arm/virt: Validate cluster and NUMA node boundary for - RHEL machines - -RH-Author: Gavin Shan -RH-MergeRequest: 175: hw/arm: Validate CPU cluster and NUMA node boundary for RHEL machines -RH-Bugzilla: 2171363 -RH-Acked-by: Cornelia Huck -RH-Acked-by: Eric Auger -RH-Commit: [3/3] a396c499259b566861ca007b01f8539bf6113711 - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2171363 -Upstream Status: RHEL only - -Set mc->cpu_cluster_has_numa_boundary to true so that the boundary of -CPU cluster and NUMA node will be validated for 'virt-rhel*' machines. -A warning message will be printed if the boundary is broken. - -Signed-off-by: Gavin Shan ---- - hw/arm/virt.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/hw/arm/virt.c b/hw/arm/virt.c -index df6a0231bc..faf68488d5 100644 ---- a/hw/arm/virt.c -+++ b/hw/arm/virt.c -@@ -3530,6 +3530,8 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) - mc->smp_props.clusters_supported = true; - mc->auto_enable_numa_with_memhp = true; - mc->auto_enable_numa_with_memdev = true; -+ /* platform instead of architectural choice */ -+ mc->cpu_cluster_has_numa_boundary = true; - mc->default_ram_id = "mach-virt.ram"; - - object_class_property_add(oc, "acpi", "OnOffAuto", --- -2.39.3 - diff --git a/SOURCES/kvm-hw-arm-virt-deprecate-virt-rhel9.-0-2-.0-machine-typ.patch b/SOURCES/kvm-hw-arm-virt-deprecate-virt-rhel9.-0-2-.0-machine-typ.patch new file mode 100644 index 0000000..4770a58 --- /dev/null +++ b/SOURCES/kvm-hw-arm-virt-deprecate-virt-rhel9.-0-2-.0-machine-typ.patch @@ -0,0 +1,41 @@ +From 4c1d07995a7afb6fae68a7e7a8b6b6c94fa0a7bb Mon Sep 17 00:00:00 2001 +From: Cornelia Huck +Date: Mon, 12 Feb 2024 10:37:54 +0100 +Subject: [PATCH 5/6] hw/arm/virt: deprecate virt-rhel9.{0,2}.0 machine types + +RH-Author: Cornelia Huck +RH-MergeRequest: 225: hw/arm/virt: deprecate virt-rhel9.{0,2}.0 machine types +RH-Jira: RHEL-24988 +RH-Acked-by: Sebastian Ott +RH-Acked-by: Eric Auger +RH-Commit: [1/1] f15579db44808fa8a2d7bc01b3915aa59c064411 (cohuck/qemu-kvm-c9s) + +Jira: https://issues.redhat.com/browse/RHEL-24988 +Upstream: RHEL only + +We do not plan to support any machine types prior to 9.4.0; leave them +in, but mark as deprecated. + +Signed-off-by: Cornelia Huck +--- + hw/arm/virt.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 60f117f0d2..943c563391 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3679,6 +3679,10 @@ static void rhel920_virt_options(MachineClass *mc) + compat_props_add(mc->compat_props, hw_compat_rhel_9_4, hw_compat_rhel_9_4_len); + compat_props_add(mc->compat_props, hw_compat_rhel_9_3, hw_compat_rhel_9_3_len); + compat_props_add(mc->compat_props, hw_compat_rhel_9_2, hw_compat_rhel_9_2_len); ++ ++ /* RHEL 9.4 is the first supported release */ ++ mc->deprecation_reason = ++ "machine types for versions prior to 9.4 are deprecated"; + } + DEFINE_RHEL_MACHINE(9, 2, 0) + +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-i386-Activate-IOMMUFD-for-q35-machines.patch b/SOURCES/kvm-hw-i386-Activate-IOMMUFD-for-q35-machines.patch new file mode 100644 index 0000000..81c20e5 --- /dev/null +++ b/SOURCES/kvm-hw-i386-Activate-IOMMUFD-for-q35-machines.patch @@ -0,0 +1,41 @@ +From 7a6be312c11911bdd2ce82566be22a3e014947c2 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Tue, 21 Nov 2023 16:44:20 +0800 +Subject: [PATCH 041/101] hw/i386: Activate IOMMUFD for q35 machines +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [40/67] b15764ab24fd57389a8d219736613484acd7d29e (eauger1/centos-qemu-kvm) + +Signed-off-by: Cédric Le Goater +Signed-off-by: Zhenzhong Duan +Reviewed-by: Eric Auger +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +(cherry picked from commit 64ad06f6eba66c514477f490bcba409439a480d8) +Signed-off-by: Eric Auger +--- + hw/i386/Kconfig | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig +index 55850791df..a1846be6f7 100644 +--- a/hw/i386/Kconfig ++++ b/hw/i386/Kconfig +@@ -95,6 +95,7 @@ config Q35 + imply E1000E_PCI_EXPRESS + imply VMPORT + imply VMMOUSE ++ imply IOMMUFD + select PC_PCI + select PC_ACPI + select PCI_EXPRESS_Q35 +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-i386-pc-Defer-smbios_set_defaults-to-machine_done.patch b/SOURCES/kvm-hw-i386-pc-Defer-smbios_set_defaults-to-machine_done.patch new file mode 100644 index 0000000..5470bdf --- /dev/null +++ b/SOURCES/kvm-hw-i386-pc-Defer-smbios_set_defaults-to-machine_done.patch @@ -0,0 +1,186 @@ +From ea2e2368dcf4140be47288472f2c2a094358e0c7 Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Thu, 8 Feb 2024 23:03:45 +0100 +Subject: [PATCH 03/20] hw/i386/pc: Defer smbios_set_defaults() to machine_done +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Igor Mammedov +RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS +RH-Jira: RHEL-21705 +RH-Acked-by: MST +RH-Acked-by: Ani Sinha +RH-Commit: [1/18] 9d4c1d1a910fec7d310429d6fc0b10c798932db7 + +JIRA: https://issues.redhat.com/browse/RHEL-21705 + +commit: a0204a5ed091dfe79aced7ec8f3ce1931fd25816 +Author: Bernhard Beschow + + Handling most of smbios data generation in the machine_done notifier is similar + to how the ARM virt machine handles it which also calls smbios_set_defaults() + there. The result is that all pc machines are freed from explicitly worrying + about smbios setup. + + Signed-off-by: Bernhard Beschow + Reviewed-by: Philippe Mathieu-Daudé + Message-ID: <20240208220349.4948-6-shentey@gmail.com> + Signed-off-by: Philippe Mathieu-Daudé + +Conflicts: hw/i386/pc_q35.c, hw/i386/pc_piix.c + due to missing 4d3457fef9 (w/i386/pc: Merge pc_guest_info_init() into pc_machine_initfn()) + and different signature of smbios_set_defaults() downstream +Fixup: hw/i386/fw_cfg.c to account for downstream changes smbios_set_defaults() + +Signed-off-by: Igor Mammedov +--- + hw/i386/fw_cfg.c | 14 +++++++++++++- + hw/i386/fw_cfg.h | 3 ++- + hw/i386/pc.c | 2 +- + hw/i386/pc_piix.c | 12 ------------ + hw/i386/pc_q35.c | 11 ----------- + include/hw/i386/pc.h | 1 - + 6 files changed, 16 insertions(+), 27 deletions(-) + +diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c +index 7362daa45a..6a5466faf0 100644 +--- a/hw/i386/fw_cfg.c ++++ b/hw/i386/fw_cfg.c +@@ -48,15 +48,27 @@ const char *fw_cfg_arch_key_name(uint16_t key) + return NULL; + } + +-void fw_cfg_build_smbios(MachineState *ms, FWCfgState *fw_cfg) ++void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg) + { + #ifdef CONFIG_SMBIOS + uint8_t *smbios_tables, *smbios_anchor; + size_t smbios_tables_len, smbios_anchor_len; + struct smbios_phys_mem_area *mem_array; + unsigned i, array_count; ++ MachineState *ms = MACHINE(pcms); ++ PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); ++ MachineClass *mc = MACHINE_GET_CLASS(pcms); + X86CPU *cpu = X86_CPU(ms->possible_cpus->cpus[0].cpu); + ++ if (pcmc->smbios_defaults) { ++ /* These values are guest ABI, do not change */ ++ smbios_set_defaults("QEMU", mc->desc, mc->name, ++ pcmc->smbios_legacy_mode, pcmc->smbios_uuid_encoded, ++ pcmc->smbios_stream_product, ++ pcmc->smbios_stream_version, ++ pcms->smbios_entry_point_type); ++ } ++ + /* tell smbios about cpuid version and features */ + smbios_set_cpuid(cpu->env.cpuid_version, cpu->env.features[FEAT_1_EDX]); + +diff --git a/hw/i386/fw_cfg.h b/hw/i386/fw_cfg.h +index 86ca7c1c0c..1e1de6b4a3 100644 +--- a/hw/i386/fw_cfg.h ++++ b/hw/i386/fw_cfg.h +@@ -10,6 +10,7 @@ + #define HW_I386_FW_CFG_H + + #include "hw/boards.h" ++#include "hw/i386/pc.h" + #include "hw/nvram/fw_cfg.h" + + #define FW_CFG_IO_BASE 0x510 +@@ -22,7 +23,7 @@ + FWCfgState *fw_cfg_arch_create(MachineState *ms, + uint16_t boot_cpus, + uint16_t apic_id_limit); +-void fw_cfg_build_smbios(MachineState *ms, FWCfgState *fw_cfg); ++void fw_cfg_build_smbios(PCMachineState *ms, FWCfgState *fw_cfg); + void fw_cfg_build_feature_control(MachineState *ms, FWCfgState *fw_cfg); + void fw_cfg_add_acpi_dsdt(Aml *scope, FWCfgState *fw_cfg); + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index a1faa9e92c..16de2a59e8 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -847,7 +847,7 @@ void pc_machine_done(Notifier *notifier, void *data) + + acpi_setup(); + if (x86ms->fw_cfg) { +- fw_cfg_build_smbios(MACHINE(pcms), x86ms->fw_cfg); ++ fw_cfg_build_smbios(pcms, x86ms->fw_cfg); + fw_cfg_build_feature_control(MACHINE(pcms), x86ms->fw_cfg); + /* update FW_CFG_NB_CPUS to account for -device added CPUs */ + fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index 09d02cc91f..7344b35cf1 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -36,7 +36,6 @@ + #include "hw/rtc/mc146818rtc.h" + #include "hw/southbridge/piix.h" + #include "hw/display/ramfb.h" +-#include "hw/firmware/smbios.h" + #include "hw/pci/pci.h" + #include "hw/pci/pci_ids.h" + #include "hw/usb.h" +@@ -233,17 +232,6 @@ static void pc_init1(MachineState *machine, + + pc_guest_info_init(pcms); + +- if (pcmc->smbios_defaults) { +- MachineClass *mc = MACHINE_GET_CLASS(machine); +- /* These values are guest ABI, do not change */ +- smbios_set_defaults("Red Hat", "KVM", +- mc->desc, pcmc->smbios_legacy_mode, +- pcmc->smbios_uuid_encoded, +- pcmc->smbios_stream_product, +- pcmc->smbios_stream_version, +- pcms->smbios_entry_point_type); +- } +- + /* allocate ram and load rom/bios */ + if (!xen_enabled()) { + pc_memory_init(pcms, system_memory, rom_memory, hole64_size); +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index c6967e1846..9a22ff5dd6 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -45,7 +45,6 @@ + #include "hw/i386/amd_iommu.h" + #include "hw/i386/intel_iommu.h" + #include "hw/display/ramfb.h" +-#include "hw/firmware/smbios.h" + #include "hw/ide/pci.h" + #include "hw/ide/ahci.h" + #include "hw/intc/ioapic.h" +@@ -201,16 +200,6 @@ static void pc_q35_init(MachineState *machine) + + pc_guest_info_init(pcms); + +- if (pcmc->smbios_defaults) { +- /* These values are guest ABI, do not change */ +- smbios_set_defaults("Red Hat", "KVM", +- mc->desc, pcmc->smbios_legacy_mode, +- pcmc->smbios_uuid_encoded, +- pcmc->smbios_stream_product, +- pcmc->smbios_stream_version, +- pcms->smbios_entry_point_type); +- } +- + /* create pci host bus */ + phb = OBJECT(qdev_new(TYPE_Q35_HOST_DEVICE)); + +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 37644ede7e..c286c10bc3 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -12,7 +12,6 @@ + #include "hw/hotplug.h" + #include "qom/object.h" + #include "hw/i386/sgx-epc.h" +-#include "hw/firmware/smbios.h" + #include "hw/cxl/cxl.h" + + #define HPET_INTCAP "hpet-intcap" +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-reg-for-machine-ty.patch b/SOURCES/kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-reg-for-machine-ty.patch deleted file mode 100644 index fe9cd8c..0000000 --- a/SOURCES/kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-reg-for-machine-ty.patch +++ /dev/null @@ -1,44 +0,0 @@ -From 491cf9e251026d135f315b7fe0d8771841f06e9f Mon Sep 17 00:00:00 2001 -From: Leonardo Bras -Date: Tue, 25 Jul 2023 15:34:45 -0300 -Subject: [PATCH 8/9] hw/pci: Disable PCI_ERR_UNCOR_MASK reg for machine type - <= pc-q35-rhel9.2.0 -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Leonardo Brás -RH-MergeRequest: 192: hw/pci: Disable PCI_ERR_UNCOR_MASK reg for machine type <= pc-q35-rhel9.2.0 -RH-Bugzilla: 2223691 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [1/1] e57816f8ad15a9ce5f342b061c103ae011ec1223 (LeoBras/centos-qemu-kvm) - -This is a downstream-only patch to that sets off the property -x-pcie-err-unc-mask for machine types <= pc-q35-rhel9.2.0, allowing -live migrations to RHEL9.2 happen successfully. - -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2223691 -Fixes: 293a34b4be ("hw/pci: Disable PCI_ERR_UNCOR_MASK register for machine -type < 8.0") -Signed-off-by: Leonardo Bras ---- - hw/core/machine.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/hw/core/machine.c b/hw/core/machine.c -index 5ea52317b9..6f5117669d 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -62,6 +62,8 @@ GlobalProperty hw_compat_rhel_9_2[] = { - { "virtio-mem", "x-early-migration", "false" }, - /* hw_compat_rhel_9_2 from hw_compat_7_2 */ - { "migration", "x-preempt-pre-7-2", "true" }, -+ /* hw_compat_rhel_9_2 from hw_compat_7_2 */ -+ { TYPE_PCI_DEVICE, "x-pcie-err-unc-mask", "off" }, - }; - const size_t hw_compat_rhel_9_2_len = G_N_ELEMENTS(hw_compat_rhel_9_2); - --- -2.39.3 - diff --git a/SOURCES/kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-register-for-machi.patch b/SOURCES/kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-register-for-machi.patch deleted file mode 100644 index 164bea7..0000000 --- a/SOURCES/kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-register-for-machi.patch +++ /dev/null @@ -1,118 +0,0 @@ -From 3ac01bb90da12538898f95b2fb4e7f6bc1557eb3 Mon Sep 17 00:00:00 2001 -From: Leonardo Bras -Date: Tue, 2 May 2023 21:27:02 -0300 -Subject: [PATCH 18/21] hw/pci: Disable PCI_ERR_UNCOR_MASK register for machine - type < 8.0 -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Leonardo Brás -RH-MergeRequest: 170: hw/pci: Disable PCI_ERR_UNCOR_MASK register for machine type < 8.0 -RH-Bugzilla: 2189423 -RH-Acked-by: Peter Xu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/1] ad62dd5a8567f386770577513c00a0bf36bd3df1 (LeoBras/centos-qemu-kvm) - -Since it's implementation on v8.0.0-rc0, having the PCI_ERR_UNCOR_MASK -set for machine types < 8.0 will cause migration to fail if the target -QEMU version is < 8.0.0 : - -qemu-system-x86_64: get_pci_config_device: Bad config data: i=0x10a read: 40 device: 0 cmask: ff wmask: 0 w1cmask:0 -qemu-system-x86_64: Failed to load PCIDevice:config -qemu-system-x86_64: Failed to load e1000e:parent_obj -qemu-system-x86_64: error while loading state for instance 0x0 of device '0000:00:02.0/e1000e' -qemu-system-x86_64: load of migration failed: Invalid argument - -The above test migrated a 7.2 machine type from QEMU master to QEMU 7.2.0, -with this cmdline: - -./qemu-system-x86_64 -M pc-q35-7.2 [-incoming XXX] - -In order to fix this, property x-pcie-err-unc-mask was introduced to -control when PCI_ERR_UNCOR_MASK is enabled. This property is enabled by -default, but is disabled if machine type <= 7.2. - -Fixes: 010746ae1d ("hw/pci/aer: Implement PCI_ERR_UNCOR_MASK register") -Suggested-by: Michael S. Tsirkin -Signed-off-by: Leonardo Bras -Message-Id: <20230503002701.854329-1-leobras@redhat.com> -Reviewed-by: Jonathan Cameron -Reviewed-by: Peter Xu -Reviewed-by: Juan Quintela -Fixes: https://gitlab.com/qemu-project/qemu/-/issues/1576 -Tested-by: Fiona Ebner -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 5ed3dabe57dd9f4c007404345e5f5bf0e347317f) -Signed-off-by: Leonardo Bras ---- - hw/core/machine.c | 1 + - hw/pci/pci.c | 2 ++ - hw/pci/pcie_aer.c | 11 +++++++---- - include/hw/pci/pci.h | 2 ++ - 4 files changed, 12 insertions(+), 4 deletions(-) - -diff --git a/hw/core/machine.c b/hw/core/machine.c -index 0e0120b7f2..c28702b690 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -43,6 +43,7 @@ GlobalProperty hw_compat_7_2[] = { - { "e1000e", "migrate-timadj", "off" }, - { "virtio-mem", "x-early-migration", "false" }, - { "migration", "x-preempt-pre-7-2", "true" }, -+ { TYPE_PCI_DEVICE, "x-pcie-err-unc-mask", "off" }, - }; - const size_t hw_compat_7_2_len = G_N_ELEMENTS(hw_compat_7_2); - -diff --git a/hw/pci/pci.c b/hw/pci/pci.c -index def5000e7b..8ad4349e96 100644 ---- a/hw/pci/pci.c -+++ b/hw/pci/pci.c -@@ -79,6 +79,8 @@ static Property pci_props[] = { - DEFINE_PROP_STRING("failover_pair_id", PCIDevice, - failover_pair_id), - DEFINE_PROP_UINT32("acpi-index", PCIDevice, acpi_index, 0), -+ DEFINE_PROP_BIT("x-pcie-err-unc-mask", PCIDevice, cap_present, -+ QEMU_PCIE_ERR_UNC_MASK_BITNR, true), - DEFINE_PROP_END_OF_LIST() - }; - -diff --git a/hw/pci/pcie_aer.c b/hw/pci/pcie_aer.c -index 103667c368..374d593ead 100644 ---- a/hw/pci/pcie_aer.c -+++ b/hw/pci/pcie_aer.c -@@ -112,10 +112,13 @@ int pcie_aer_init(PCIDevice *dev, uint8_t cap_ver, uint16_t offset, - - pci_set_long(dev->w1cmask + offset + PCI_ERR_UNCOR_STATUS, - PCI_ERR_UNC_SUPPORTED); -- pci_set_long(dev->config + offset + PCI_ERR_UNCOR_MASK, -- PCI_ERR_UNC_MASK_DEFAULT); -- pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_MASK, -- PCI_ERR_UNC_SUPPORTED); -+ -+ if (dev->cap_present & QEMU_PCIE_ERR_UNC_MASK) { -+ pci_set_long(dev->config + offset + PCI_ERR_UNCOR_MASK, -+ PCI_ERR_UNC_MASK_DEFAULT); -+ pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_MASK, -+ PCI_ERR_UNC_SUPPORTED); -+ } - - pci_set_long(dev->config + offset + PCI_ERR_UNCOR_SEVER, - PCI_ERR_UNC_SEVERITY_DEFAULT); -diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h -index d5a40cd058..6dc6742fc4 100644 ---- a/include/hw/pci/pci.h -+++ b/include/hw/pci/pci.h -@@ -207,6 +207,8 @@ enum { - QEMU_PCIE_EXTCAP_INIT = (1 << QEMU_PCIE_EXTCAP_INIT_BITNR), - #define QEMU_PCIE_CXL_BITNR 10 - QEMU_PCIE_CAP_CXL = (1 << QEMU_PCIE_CXL_BITNR), -+#define QEMU_PCIE_ERR_UNC_MASK_BITNR 11 -+ QEMU_PCIE_ERR_UNC_MASK = (1 << QEMU_PCIE_ERR_UNC_MASK_BITNR), - }; - - typedef struct PCIINTxRoute { --- -2.39.3 - diff --git a/SOURCES/kvm-hw-ppc-Kconfig-Imply-VFIO_PCI.patch b/SOURCES/kvm-hw-ppc-Kconfig-Imply-VFIO_PCI.patch new file mode 100644 index 0000000..f850765 --- /dev/null +++ b/SOURCES/kvm-hw-ppc-Kconfig-Imply-VFIO_PCI.patch @@ -0,0 +1,116 @@ +From 84f378c41832602dcf9bad6167b1f532c7c53e37 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Tue, 21 Nov 2023 15:03:55 +0100 +Subject: [PATCH 048/101] hw/ppc/Kconfig: Imply VFIO_PCI +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [47/67] c1a40cdab9bf62b16cb428d57a20b3e0eaa6de38 (eauger1/centos-qemu-kvm) + +When the legacy and iommufd backends were introduced, a set of common +vfio-pci routines were exported in pci.c for both backends to use : + + vfio_pci_pre_reset + vfio_pci_get_pci_hot_reset_info + vfio_pci_host_match + vfio_pci_post_reset + +This introduced a build failure on PPC when --without-default-devices +is use because VFIO is always selected in ppc/Kconfig but VFIO_PCI is +not. + +Use an 'imply VFIO_PCI' in ppc/Kconfig and bypass compilation of the +VFIO EEH hooks routines defined in hw/ppc/spapr_pci_vfio.c with +CONFIG_VFIO_PCI. + +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Zhenzhong Duan +Signed-off-by: Cédric Le Goater +(cherry picked from commit 4278df9d1d2383b738338c857406357660f11e42) +Signed-off-by: Eric Auger +--- + hw/ppc/Kconfig | 2 +- + hw/ppc/spapr_pci_vfio.c | 36 ++++++++++++++++++++++++++++++++++++ + 2 files changed, 37 insertions(+), 1 deletion(-) + +diff --git a/hw/ppc/Kconfig b/hw/ppc/Kconfig +index 56f0475a8e..44263a58c4 100644 +--- a/hw/ppc/Kconfig ++++ b/hw/ppc/Kconfig +@@ -3,11 +3,11 @@ config PSERIES + imply PCI_DEVICES + imply TEST_DEVICES + imply VIRTIO_VGA ++ imply VFIO_PCI if LINUX # needed by spapr_pci_vfio.c + select NVDIMM + select DIMM + select PCI + select SPAPR_VSCSI +- select VFIO if LINUX # needed by spapr_pci_vfio.c + select XICS + select XIVE + select MSI_NONBROKEN +diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c +index d1d07bec46..76b2a3487b 100644 +--- a/hw/ppc/spapr_pci_vfio.c ++++ b/hw/ppc/spapr_pci_vfio.c +@@ -26,10 +26,12 @@ + #include "hw/pci/pci_device.h" + #include "hw/vfio/vfio-common.h" + #include "qemu/error-report.h" ++#include CONFIG_DEVICES /* CONFIG_VFIO_PCI */ + + /* + * Interfaces for IBM EEH (Enhanced Error Handling) + */ ++#ifdef CONFIG_VFIO_PCI + static bool vfio_eeh_container_ok(VFIOContainer *container) + { + /* +@@ -314,3 +316,37 @@ int spapr_phb_vfio_eeh_configure(SpaprPhbState *sphb) + + return RTAS_OUT_SUCCESS; + } ++ ++#else ++ ++bool spapr_phb_eeh_available(SpaprPhbState *sphb) ++{ ++ return false; ++} ++ ++void spapr_phb_vfio_reset(DeviceState *qdev) ++{ ++} ++ ++int spapr_phb_vfio_eeh_set_option(SpaprPhbState *sphb, ++ unsigned int addr, int option) ++{ ++ return RTAS_OUT_NOT_SUPPORTED; ++} ++ ++int spapr_phb_vfio_eeh_get_state(SpaprPhbState *sphb, int *state) ++{ ++ return RTAS_OUT_NOT_SUPPORTED; ++} ++ ++int spapr_phb_vfio_eeh_reset(SpaprPhbState *sphb, int option) ++{ ++ return RTAS_OUT_NOT_SUPPORTED; ++} ++ ++int spapr_phb_vfio_eeh_configure(SpaprPhbState *sphb) ++{ ++ return RTAS_OUT_NOT_SUPPORTED; ++} ++ ++#endif /* CONFIG_VFIO_PCI */ +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-replace-most-qemu_bh_new-calls-with-qemu_bh_new_g.patch b/SOURCES/kvm-hw-replace-most-qemu_bh_new-calls-with-qemu_bh_new_g.patch deleted file mode 100644 index 08ee94f..0000000 --- a/SOURCES/kvm-hw-replace-most-qemu_bh_new-calls-with-qemu_bh_new_g.patch +++ /dev/null @@ -1,470 +0,0 @@ -From d1b7a9b25c0df9016cd8e93d40837314b1a81d70 Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Tue, 9 May 2023 10:29:03 -0400 -Subject: [PATCH 08/21] hw: replace most qemu_bh_new calls with - qemu_bh_new_guarded - -RH-Author: Jon Maloy -RH-MergeRequest: 165: memory: prevent dma-reentracy issues -RH-Jira: RHEL-516 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [4/13] bcbc67dd0023aee2b3a342665237daa83b183c7b (jmaloy/jmaloy-qemu-kvm-2) - -Jira: https://issues.redhat.com/browse/RHEL-516 -Upstream: Merged -CVE: CVE-2023-2680 - -commit f63192b0544af5d3e4d5edfd85ab520fcf671377 -Author: Alexander Bulekov -Date: Thu Apr 27 17:10:09 2023 -0400 - - hw: replace most qemu_bh_new calls with qemu_bh_new_guarded - - This protects devices from bh->mmio reentrancy issues. - - Thanks: Thomas Huth for diagnosing OS X test failure. - Signed-off-by: Alexander Bulekov - Reviewed-by: Darren Kenny - Reviewed-by: Stefan Hajnoczi - Reviewed-by: Michael S. Tsirkin - Reviewed-by: Paul Durrant - Reviewed-by: Thomas Huth - Message-Id: <20230427211013.2994127-5-alxndr@bu.edu> - Signed-off-by: Thomas Huth - -Signed-off-by: Jon Maloy ---- - hw/9pfs/xen-9p-backend.c | 5 ++++- - hw/block/dataplane/virtio-blk.c | 3 ++- - hw/block/dataplane/xen-block.c | 5 +++-- - hw/char/virtio-serial-bus.c | 3 ++- - hw/display/qxl.c | 9 ++++++--- - hw/display/virtio-gpu.c | 6 ++++-- - hw/ide/ahci.c | 3 ++- - hw/ide/ahci_internal.h | 1 + - hw/ide/core.c | 4 +++- - hw/misc/imx_rngc.c | 6 ++++-- - hw/misc/macio/mac_dbdma.c | 2 +- - hw/net/virtio-net.c | 3 ++- - hw/nvme/ctrl.c | 6 ++++-- - hw/scsi/mptsas.c | 3 ++- - hw/scsi/scsi-bus.c | 3 ++- - hw/scsi/vmw_pvscsi.c | 3 ++- - hw/usb/dev-uas.c | 3 ++- - hw/usb/hcd-dwc2.c | 3 ++- - hw/usb/hcd-ehci.c | 3 ++- - hw/usb/hcd-uhci.c | 2 +- - hw/usb/host-libusb.c | 6 ++++-- - hw/usb/redirect.c | 6 ++++-- - hw/usb/xen-usb.c | 3 ++- - hw/virtio/virtio-balloon.c | 5 +++-- - hw/virtio/virtio-crypto.c | 3 ++- - 25 files changed, 66 insertions(+), 33 deletions(-) - -diff --git a/hw/9pfs/xen-9p-backend.c b/hw/9pfs/xen-9p-backend.c -index 74f3a05f88..0e266c552b 100644 ---- a/hw/9pfs/xen-9p-backend.c -+++ b/hw/9pfs/xen-9p-backend.c -@@ -61,6 +61,7 @@ typedef struct Xen9pfsDev { - - int num_rings; - Xen9pfsRing *rings; -+ MemReentrancyGuard mem_reentrancy_guard; - } Xen9pfsDev; - - static void xen_9pfs_disconnect(struct XenLegacyDevice *xendev); -@@ -443,7 +444,9 @@ static int xen_9pfs_connect(struct XenLegacyDevice *xendev) - xen_9pdev->rings[i].ring.out = xen_9pdev->rings[i].data + - XEN_FLEX_RING_SIZE(ring_order); - -- xen_9pdev->rings[i].bh = qemu_bh_new(xen_9pfs_bh, &xen_9pdev->rings[i]); -+ xen_9pdev->rings[i].bh = qemu_bh_new_guarded(xen_9pfs_bh, -+ &xen_9pdev->rings[i], -+ &xen_9pdev->mem_reentrancy_guard); - xen_9pdev->rings[i].out_cons = 0; - xen_9pdev->rings[i].out_size = 0; - xen_9pdev->rings[i].inprogress = false; -diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c -index b28d81737e..a6202997ee 100644 ---- a/hw/block/dataplane/virtio-blk.c -+++ b/hw/block/dataplane/virtio-blk.c -@@ -127,7 +127,8 @@ bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, - } else { - s->ctx = qemu_get_aio_context(); - } -- s->bh = aio_bh_new(s->ctx, notify_guest_bh, s); -+ s->bh = aio_bh_new_guarded(s->ctx, notify_guest_bh, s, -+ &DEVICE(vdev)->mem_reentrancy_guard); - s->batch_notify_vqs = bitmap_new(conf->num_queues); - - *dataplane = s; -diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c -index 734da42ea7..d8bc39d359 100644 ---- a/hw/block/dataplane/xen-block.c -+++ b/hw/block/dataplane/xen-block.c -@@ -633,8 +633,9 @@ XenBlockDataPlane *xen_block_dataplane_create(XenDevice *xendev, - } else { - dataplane->ctx = qemu_get_aio_context(); - } -- dataplane->bh = aio_bh_new(dataplane->ctx, xen_block_dataplane_bh, -- dataplane); -+ dataplane->bh = aio_bh_new_guarded(dataplane->ctx, xen_block_dataplane_bh, -+ dataplane, -+ &DEVICE(xendev)->mem_reentrancy_guard); - - return dataplane; - } -diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c -index 7d4601cb5d..dd619f0731 100644 ---- a/hw/char/virtio-serial-bus.c -+++ b/hw/char/virtio-serial-bus.c -@@ -985,7 +985,8 @@ static void virtser_port_device_realize(DeviceState *dev, Error **errp) - return; - } - -- port->bh = qemu_bh_new(flush_queued_data_bh, port); -+ port->bh = qemu_bh_new_guarded(flush_queued_data_bh, port, -+ &dev->mem_reentrancy_guard); - port->elem = NULL; - } - -diff --git a/hw/display/qxl.c b/hw/display/qxl.c -index 80ce1e9a93..f1c0eb7dfc 100644 ---- a/hw/display/qxl.c -+++ b/hw/display/qxl.c -@@ -2201,11 +2201,14 @@ static void qxl_realize_common(PCIQXLDevice *qxl, Error **errp) - - qemu_add_vm_change_state_handler(qxl_vm_change_state_handler, qxl); - -- qxl->update_irq = qemu_bh_new(qxl_update_irq_bh, qxl); -+ qxl->update_irq = qemu_bh_new_guarded(qxl_update_irq_bh, qxl, -+ &DEVICE(qxl)->mem_reentrancy_guard); - qxl_reset_state(qxl); - -- qxl->update_area_bh = qemu_bh_new(qxl_render_update_area_bh, qxl); -- qxl->ssd.cursor_bh = qemu_bh_new(qemu_spice_cursor_refresh_bh, &qxl->ssd); -+ qxl->update_area_bh = qemu_bh_new_guarded(qxl_render_update_area_bh, qxl, -+ &DEVICE(qxl)->mem_reentrancy_guard); -+ qxl->ssd.cursor_bh = qemu_bh_new_guarded(qemu_spice_cursor_refresh_bh, &qxl->ssd, -+ &DEVICE(qxl)->mem_reentrancy_guard); - } - - static void qxl_realize_primary(PCIDevice *dev, Error **errp) -diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c -index 5e15c79b94..66ac9b6cc5 100644 ---- a/hw/display/virtio-gpu.c -+++ b/hw/display/virtio-gpu.c -@@ -1339,8 +1339,10 @@ void virtio_gpu_device_realize(DeviceState *qdev, Error **errp) - - g->ctrl_vq = virtio_get_queue(vdev, 0); - g->cursor_vq = virtio_get_queue(vdev, 1); -- g->ctrl_bh = qemu_bh_new(virtio_gpu_ctrl_bh, g); -- g->cursor_bh = qemu_bh_new(virtio_gpu_cursor_bh, g); -+ g->ctrl_bh = qemu_bh_new_guarded(virtio_gpu_ctrl_bh, g, -+ &qdev->mem_reentrancy_guard); -+ g->cursor_bh = qemu_bh_new_guarded(virtio_gpu_cursor_bh, g, -+ &qdev->mem_reentrancy_guard); - QTAILQ_INIT(&g->reslist); - QTAILQ_INIT(&g->cmdq); - QTAILQ_INIT(&g->fenceq); -diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c -index 55902e1df7..4e76d6b191 100644 ---- a/hw/ide/ahci.c -+++ b/hw/ide/ahci.c -@@ -1509,7 +1509,8 @@ static void ahci_cmd_done(const IDEDMA *dma) - ahci_write_fis_d2h(ad); - - if (ad->port_regs.cmd_issue && !ad->check_bh) { -- ad->check_bh = qemu_bh_new(ahci_check_cmd_bh, ad); -+ ad->check_bh = qemu_bh_new_guarded(ahci_check_cmd_bh, ad, -+ &ad->mem_reentrancy_guard); - qemu_bh_schedule(ad->check_bh); - } - } -diff --git a/hw/ide/ahci_internal.h b/hw/ide/ahci_internal.h -index 303fcd7235..2480455372 100644 ---- a/hw/ide/ahci_internal.h -+++ b/hw/ide/ahci_internal.h -@@ -321,6 +321,7 @@ struct AHCIDevice { - bool init_d2h_sent; - AHCICmdHdr *cur_cmd; - NCQTransferState ncq_tfs[AHCI_MAX_CMDS]; -+ MemReentrancyGuard mem_reentrancy_guard; - }; - - struct AHCIPCIState { -diff --git a/hw/ide/core.c b/hw/ide/core.c -index 45d14a25e9..de48ff9f86 100644 ---- a/hw/ide/core.c -+++ b/hw/ide/core.c -@@ -513,6 +513,7 @@ BlockAIOCB *ide_issue_trim( - BlockCompletionFunc *cb, void *cb_opaque, void *opaque) - { - IDEState *s = opaque; -+ IDEDevice *dev = s->unit ? s->bus->slave : s->bus->master; - TrimAIOCB *iocb; - - /* Paired with a decrement in ide_trim_bh_cb() */ -@@ -520,7 +521,8 @@ BlockAIOCB *ide_issue_trim( - - iocb = blk_aio_get(&trim_aiocb_info, s->blk, cb, cb_opaque); - iocb->s = s; -- iocb->bh = qemu_bh_new(ide_trim_bh_cb, iocb); -+ iocb->bh = qemu_bh_new_guarded(ide_trim_bh_cb, iocb, -+ &DEVICE(dev)->mem_reentrancy_guard); - iocb->ret = 0; - iocb->qiov = qiov; - iocb->i = -1; -diff --git a/hw/misc/imx_rngc.c b/hw/misc/imx_rngc.c -index 632c03779c..082c6980ad 100644 ---- a/hw/misc/imx_rngc.c -+++ b/hw/misc/imx_rngc.c -@@ -228,8 +228,10 @@ static void imx_rngc_realize(DeviceState *dev, Error **errp) - sysbus_init_mmio(sbd, &s->iomem); - - sysbus_init_irq(sbd, &s->irq); -- s->self_test_bh = qemu_bh_new(imx_rngc_self_test, s); -- s->seed_bh = qemu_bh_new(imx_rngc_seed, s); -+ s->self_test_bh = qemu_bh_new_guarded(imx_rngc_self_test, s, -+ &dev->mem_reentrancy_guard); -+ s->seed_bh = qemu_bh_new_guarded(imx_rngc_seed, s, -+ &dev->mem_reentrancy_guard); - } - - static void imx_rngc_reset(DeviceState *dev) -diff --git a/hw/misc/macio/mac_dbdma.c b/hw/misc/macio/mac_dbdma.c -index 43bb1f56ba..80a789f32b 100644 ---- a/hw/misc/macio/mac_dbdma.c -+++ b/hw/misc/macio/mac_dbdma.c -@@ -914,7 +914,7 @@ static void mac_dbdma_realize(DeviceState *dev, Error **errp) - { - DBDMAState *s = MAC_DBDMA(dev); - -- s->bh = qemu_bh_new(DBDMA_run_bh, s); -+ s->bh = qemu_bh_new_guarded(DBDMA_run_bh, s, &dev->mem_reentrancy_guard); - } - - static void mac_dbdma_class_init(ObjectClass *oc, void *data) -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index 53e1c32643..447f669921 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -2917,7 +2917,8 @@ static void virtio_net_add_queue(VirtIONet *n, int index) - n->vqs[index].tx_vq = - virtio_add_queue(vdev, n->net_conf.tx_queue_size, - virtio_net_handle_tx_bh); -- n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]); -+ n->vqs[index].tx_bh = qemu_bh_new_guarded(virtio_net_tx_bh, &n->vqs[index], -+ &DEVICE(vdev)->mem_reentrancy_guard); - } - - n->vqs[index].tx_waiting = 0; -diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c -index ac24eeb5ed..e5a468975e 100644 ---- a/hw/nvme/ctrl.c -+++ b/hw/nvme/ctrl.c -@@ -4607,7 +4607,8 @@ static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr, - QTAILQ_INSERT_TAIL(&(sq->req_list), &sq->io_req[i], entry); - } - -- sq->bh = qemu_bh_new(nvme_process_sq, sq); -+ sq->bh = qemu_bh_new_guarded(nvme_process_sq, sq, -+ &DEVICE(sq->ctrl)->mem_reentrancy_guard); - - if (n->dbbuf_enabled) { - sq->db_addr = n->dbbuf_dbs + (sqid << 3); -@@ -5253,7 +5254,8 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr, - } - } - n->cq[cqid] = cq; -- cq->bh = qemu_bh_new(nvme_post_cqes, cq); -+ cq->bh = qemu_bh_new_guarded(nvme_post_cqes, cq, -+ &DEVICE(cq->ctrl)->mem_reentrancy_guard); - } - - static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeRequest *req) -diff --git a/hw/scsi/mptsas.c b/hw/scsi/mptsas.c -index c485da792c..3de288b454 100644 ---- a/hw/scsi/mptsas.c -+++ b/hw/scsi/mptsas.c -@@ -1322,7 +1322,8 @@ static void mptsas_scsi_realize(PCIDevice *dev, Error **errp) - } - s->max_devices = MPTSAS_NUM_PORTS; - -- s->request_bh = qemu_bh_new(mptsas_fetch_requests, s); -+ s->request_bh = qemu_bh_new_guarded(mptsas_fetch_requests, s, -+ &DEVICE(dev)->mem_reentrancy_guard); - - scsi_bus_init(&s->bus, sizeof(s->bus), &dev->qdev, &mptsas_scsi_info); - } -diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c -index c97176110c..3c20b47ad0 100644 ---- a/hw/scsi/scsi-bus.c -+++ b/hw/scsi/scsi-bus.c -@@ -193,7 +193,8 @@ static void scsi_dma_restart_cb(void *opaque, bool running, RunState state) - AioContext *ctx = blk_get_aio_context(s->conf.blk); - /* The reference is dropped in scsi_dma_restart_bh.*/ - object_ref(OBJECT(s)); -- s->bh = aio_bh_new(ctx, scsi_dma_restart_bh, s); -+ s->bh = aio_bh_new_guarded(ctx, scsi_dma_restart_bh, s, -+ &DEVICE(s)->mem_reentrancy_guard); - qemu_bh_schedule(s->bh); - } - } -diff --git a/hw/scsi/vmw_pvscsi.c b/hw/scsi/vmw_pvscsi.c -index fa76696855..4de34536e9 100644 ---- a/hw/scsi/vmw_pvscsi.c -+++ b/hw/scsi/vmw_pvscsi.c -@@ -1184,7 +1184,8 @@ pvscsi_realizefn(PCIDevice *pci_dev, Error **errp) - pcie_endpoint_cap_init(pci_dev, PVSCSI_EXP_EP_OFFSET); - } - -- s->completion_worker = qemu_bh_new(pvscsi_process_completion_queue, s); -+ s->completion_worker = qemu_bh_new_guarded(pvscsi_process_completion_queue, s, -+ &DEVICE(pci_dev)->mem_reentrancy_guard); - - scsi_bus_init(&s->bus, sizeof(s->bus), DEVICE(pci_dev), &pvscsi_scsi_info); - /* override default SCSI bus hotplug-handler, with pvscsi's one */ -diff --git a/hw/usb/dev-uas.c b/hw/usb/dev-uas.c -index 88f99c05d5..f013ded91e 100644 ---- a/hw/usb/dev-uas.c -+++ b/hw/usb/dev-uas.c -@@ -937,7 +937,8 @@ static void usb_uas_realize(USBDevice *dev, Error **errp) - - QTAILQ_INIT(&uas->results); - QTAILQ_INIT(&uas->requests); -- uas->status_bh = qemu_bh_new(usb_uas_send_status_bh, uas); -+ uas->status_bh = qemu_bh_new_guarded(usb_uas_send_status_bh, uas, -+ &d->mem_reentrancy_guard); - - dev->flags |= (1 << USB_DEV_FLAG_IS_SCSI_STORAGE); - scsi_bus_init(&uas->bus, sizeof(uas->bus), DEVICE(dev), &usb_uas_scsi_info); -diff --git a/hw/usb/hcd-dwc2.c b/hw/usb/hcd-dwc2.c -index 8755e9cbb0..a0c4e782b2 100644 ---- a/hw/usb/hcd-dwc2.c -+++ b/hw/usb/hcd-dwc2.c -@@ -1364,7 +1364,8 @@ static void dwc2_realize(DeviceState *dev, Error **errp) - s->fi = USB_FRMINTVL - 1; - s->eof_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, dwc2_frame_boundary, s); - s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, dwc2_work_timer, s); -- s->async_bh = qemu_bh_new(dwc2_work_bh, s); -+ s->async_bh = qemu_bh_new_guarded(dwc2_work_bh, s, -+ &dev->mem_reentrancy_guard); - - sysbus_init_irq(sbd, &s->irq); - } -diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c -index d4da8dcb8d..c930c60921 100644 ---- a/hw/usb/hcd-ehci.c -+++ b/hw/usb/hcd-ehci.c -@@ -2533,7 +2533,8 @@ void usb_ehci_realize(EHCIState *s, DeviceState *dev, Error **errp) - } - - s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, ehci_work_timer, s); -- s->async_bh = qemu_bh_new(ehci_work_bh, s); -+ s->async_bh = qemu_bh_new_guarded(ehci_work_bh, s, -+ &dev->mem_reentrancy_guard); - s->device = dev; - - s->vmstate = qemu_add_vm_change_state_handler(usb_ehci_vm_state_change, s); -diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c -index 8ac1175ad2..77baaa7a6b 100644 ---- a/hw/usb/hcd-uhci.c -+++ b/hw/usb/hcd-uhci.c -@@ -1190,7 +1190,7 @@ void usb_uhci_common_realize(PCIDevice *dev, Error **errp) - USB_SPEED_MASK_LOW | USB_SPEED_MASK_FULL); - } - } -- s->bh = qemu_bh_new(uhci_bh, s); -+ s->bh = qemu_bh_new_guarded(uhci_bh, s, &DEVICE(dev)->mem_reentrancy_guard); - s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, uhci_frame_timer, s); - s->num_ports_vmstate = NB_PORTS; - QTAILQ_INIT(&s->queues); -diff --git a/hw/usb/host-libusb.c b/hw/usb/host-libusb.c -index 176868d345..f500db85ab 100644 ---- a/hw/usb/host-libusb.c -+++ b/hw/usb/host-libusb.c -@@ -1141,7 +1141,8 @@ static void usb_host_nodev_bh(void *opaque) - static void usb_host_nodev(USBHostDevice *s) - { - if (!s->bh_nodev) { -- s->bh_nodev = qemu_bh_new(usb_host_nodev_bh, s); -+ s->bh_nodev = qemu_bh_new_guarded(usb_host_nodev_bh, s, -+ &DEVICE(s)->mem_reentrancy_guard); - } - qemu_bh_schedule(s->bh_nodev); - } -@@ -1739,7 +1740,8 @@ static int usb_host_post_load(void *opaque, int version_id) - USBHostDevice *dev = opaque; - - if (!dev->bh_postld) { -- dev->bh_postld = qemu_bh_new(usb_host_post_load_bh, dev); -+ dev->bh_postld = qemu_bh_new_guarded(usb_host_post_load_bh, dev, -+ &DEVICE(dev)->mem_reentrancy_guard); - } - qemu_bh_schedule(dev->bh_postld); - dev->bh_postld_pending = true; -diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c -index fd7df599bc..39fbaaab16 100644 ---- a/hw/usb/redirect.c -+++ b/hw/usb/redirect.c -@@ -1441,8 +1441,10 @@ static void usbredir_realize(USBDevice *udev, Error **errp) - } - } - -- dev->chardev_close_bh = qemu_bh_new(usbredir_chardev_close_bh, dev); -- dev->device_reject_bh = qemu_bh_new(usbredir_device_reject_bh, dev); -+ dev->chardev_close_bh = qemu_bh_new_guarded(usbredir_chardev_close_bh, dev, -+ &DEVICE(dev)->mem_reentrancy_guard); -+ dev->device_reject_bh = qemu_bh_new_guarded(usbredir_device_reject_bh, dev, -+ &DEVICE(dev)->mem_reentrancy_guard); - dev->attach_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, usbredir_do_attach, dev); - - packet_id_queue_init(&dev->cancelled, dev, "cancelled"); -diff --git a/hw/usb/xen-usb.c b/hw/usb/xen-usb.c -index 66cb3f7c24..38ee660a30 100644 ---- a/hw/usb/xen-usb.c -+++ b/hw/usb/xen-usb.c -@@ -1032,7 +1032,8 @@ static void usbback_alloc(struct XenLegacyDevice *xendev) - - QTAILQ_INIT(&usbif->req_free_q); - QSIMPLEQ_INIT(&usbif->hotplug_q); -- usbif->bh = qemu_bh_new(usbback_bh, usbif); -+ usbif->bh = qemu_bh_new_guarded(usbback_bh, usbif, -+ &DEVICE(xendev)->mem_reentrancy_guard); - } - - static int usbback_free(struct XenLegacyDevice *xendev) -diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c -index 43092aa634..5186e831dd 100644 ---- a/hw/virtio/virtio-balloon.c -+++ b/hw/virtio/virtio-balloon.c -@@ -909,8 +909,9 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp) - precopy_add_notifier(&s->free_page_hint_notify); - - object_ref(OBJECT(s->iothread)); -- s->free_page_bh = aio_bh_new(iothread_get_aio_context(s->iothread), -- virtio_ballloon_get_free_page_hints, s); -+ s->free_page_bh = aio_bh_new_guarded(iothread_get_aio_context(s->iothread), -+ virtio_ballloon_get_free_page_hints, s, -+ &dev->mem_reentrancy_guard); - } - - if (virtio_has_feature(s->host_features, VIRTIO_BALLOON_F_REPORTING)) { -diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c -index 802e1b9659..2fe804510f 100644 ---- a/hw/virtio/virtio-crypto.c -+++ b/hw/virtio/virtio-crypto.c -@@ -1074,7 +1074,8 @@ static void virtio_crypto_device_realize(DeviceState *dev, Error **errp) - vcrypto->vqs[i].dataq = - virtio_add_queue(vdev, 1024, virtio_crypto_handle_dataq_bh); - vcrypto->vqs[i].dataq_bh = -- qemu_bh_new(virtio_crypto_dataq_bh, &vcrypto->vqs[i]); -+ qemu_bh_new_guarded(virtio_crypto_dataq_bh, &vcrypto->vqs[i], -+ &dev->mem_reentrancy_guard); - vcrypto->vqs[i].vcrypto = vcrypto; - } - --- -2.39.3 - diff --git a/SOURCES/kvm-hw-scsi-lsi53c895a-Fix-reentrancy-issues-in-the-LSI-.patch b/SOURCES/kvm-hw-scsi-lsi53c895a-Fix-reentrancy-issues-in-the-LSI-.patch deleted file mode 100644 index efa966e..0000000 --- a/SOURCES/kvm-hw-scsi-lsi53c895a-Fix-reentrancy-issues-in-the-LSI-.patch +++ /dev/null @@ -1,141 +0,0 @@ -From 8075a9e05699ef0c4e078017eefc20db3186328f Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Mon, 29 May 2023 14:21:08 -0400 -Subject: [PATCH 17/21] hw/scsi/lsi53c895a: Fix reentrancy issues in the LSI - controller (CVE-2023-0330) - -RH-Author: Jon Maloy -RH-MergeRequest: 165: memory: prevent dma-reentracy issues -RH-Jira: RHEL-516 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [13/13] 0b6fa742075ef2db3a354ee672dccca3747051cc (jmaloy/jmaloy-qemu-kvm-2) - -Jira: https://issues.redhat.com/browse/RHEL-516 -Upstream: Merged -CVE: CVE-2023-2680 - -commit b987718bbb1d0eabf95499b976212dd5f0120d75 -Author: Thomas Huth -Date: Mon May 22 11:10:11 2023 +0200 - - hw/scsi/lsi53c895a: Fix reentrancy issues in the LSI controller (CVE-2023-0330) - - We cannot use the generic reentrancy guard in the LSI code, so - we have to manually prevent endless reentrancy here. The problematic - lsi_execute_script() function has already a way to detect whether - too many instructions have been executed - we just have to slightly - change the logic here that it also takes into account if the function - has been called too often in a reentrant way. - - The code in fuzz-lsi53c895a-test.c has been taken from an earlier - patch by Mauro Matteo Cascella. - - Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1563 - Message-Id: <20230522091011.1082574-1-thuth@redhat.com> - Reviewed-by: Stefan Hajnoczi - Reviewed-by: Alexander Bulekov - Signed-off-by: Thomas Huth - -Signed-off-by: Jon Maloy ---- - hw/scsi/lsi53c895a.c | 23 +++++++++++++++------ - tests/qtest/fuzz-lsi53c895a-test.c | 33 ++++++++++++++++++++++++++++++ - 2 files changed, 50 insertions(+), 6 deletions(-) - -diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c -index 048436352b..f7d45b0b20 100644 ---- a/hw/scsi/lsi53c895a.c -+++ b/hw/scsi/lsi53c895a.c -@@ -1134,15 +1134,24 @@ static void lsi_execute_script(LSIState *s) - uint32_t addr, addr_high; - int opcode; - int insn_processed = 0; -+ static int reentrancy_level; -+ -+ reentrancy_level++; - - s->istat1 |= LSI_ISTAT1_SRUN; - again: -- if (++insn_processed > LSI_MAX_INSN) { -- /* Some windows drivers make the device spin waiting for a memory -- location to change. If we have been executed a lot of code then -- assume this is the case and force an unexpected device disconnect. -- This is apparently sufficient to beat the drivers into submission. -- */ -+ /* -+ * Some windows drivers make the device spin waiting for a memory location -+ * to change. If we have executed more than LSI_MAX_INSN instructions then -+ * assume this is the case and force an unexpected device disconnect. This -+ * is apparently sufficient to beat the drivers into submission. -+ * -+ * Another issue (CVE-2023-0330) can occur if the script is programmed to -+ * trigger itself again and again. Avoid this problem by stopping after -+ * being called multiple times in a reentrant way (8 is an arbitrary value -+ * which should be enough for all valid use cases). -+ */ -+ if (++insn_processed > LSI_MAX_INSN || reentrancy_level > 8) { - if (!(s->sien0 & LSI_SIST0_UDC)) { - qemu_log_mask(LOG_GUEST_ERROR, - "lsi_scsi: inf. loop with UDC masked"); -@@ -1596,6 +1605,8 @@ again: - } - } - trace_lsi_execute_script_stop(); -+ -+ reentrancy_level--; - } - - static uint8_t lsi_reg_readb(LSIState *s, int offset) -diff --git a/tests/qtest/fuzz-lsi53c895a-test.c b/tests/qtest/fuzz-lsi53c895a-test.c -index 2012bd54b7..1b55928b9f 100644 ---- a/tests/qtest/fuzz-lsi53c895a-test.c -+++ b/tests/qtest/fuzz-lsi53c895a-test.c -@@ -8,6 +8,36 @@ - #include "qemu/osdep.h" - #include "libqtest.h" - -+/* -+ * This used to trigger a DMA reentrancy issue -+ * leading to memory corruption bugs like stack -+ * overflow or use-after-free -+ * https://gitlab.com/qemu-project/qemu/-/issues/1563 -+ */ -+static void test_lsi_dma_reentrancy(void) -+{ -+ QTestState *s; -+ -+ s = qtest_init("-M q35 -m 512M -nodefaults " -+ "-blockdev driver=null-co,node-name=null0 " -+ "-device lsi53c810 -device scsi-cd,drive=null0"); -+ -+ qtest_outl(s, 0xcf8, 0x80000804); /* PCI Command Register */ -+ qtest_outw(s, 0xcfc, 0x7); /* Enables accesses */ -+ qtest_outl(s, 0xcf8, 0x80000814); /* Memory Bar 1 */ -+ qtest_outl(s, 0xcfc, 0xff100000); /* Set MMIO Address*/ -+ qtest_outl(s, 0xcf8, 0x80000818); /* Memory Bar 2 */ -+ qtest_outl(s, 0xcfc, 0xff000000); /* Set RAM Address*/ -+ qtest_writel(s, 0xff000000, 0xc0000024); -+ qtest_writel(s, 0xff000114, 0x00000080); -+ qtest_writel(s, 0xff00012c, 0xff000000); -+ qtest_writel(s, 0xff000004, 0xff000114); -+ qtest_writel(s, 0xff000008, 0xff100014); -+ qtest_writel(s, 0xff10002f, 0x000000ff); -+ -+ qtest_quit(s); -+} -+ - /* - * This used to trigger a UAF in lsi_do_msgout() - * https://gitlab.com/qemu-project/qemu/-/issues/972 -@@ -124,5 +154,8 @@ int main(int argc, char **argv) - qtest_add_func("fuzz/lsi53c895a/lsi_do_msgout_cancel_req", - test_lsi_do_msgout_cancel_req); - -+ qtest_add_func("fuzz/lsi53c895a/lsi_dma_reentrancy", -+ test_lsi_dma_reentrancy); -+ - return g_test_run(); - } --- -2.39.3 - diff --git a/SOURCES/kvm-hw-vfio-fix-iteration-over-global-VFIODevice-list.patch b/SOURCES/kvm-hw-vfio-fix-iteration-over-global-VFIODevice-list.patch new file mode 100644 index 0000000..2c7f6ff --- /dev/null +++ b/SOURCES/kvm-hw-vfio-fix-iteration-over-global-VFIODevice-list.patch @@ -0,0 +1,73 @@ +From 8f27893a37e55a31180bb66cd9eae7199911881b Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Volker=20R=C3=BCmelin?= +Date: Fri, 29 Dec 2023 21:38:54 +0100 +Subject: [PATCH 060/101] hw/vfio: fix iteration over global VFIODevice list +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [59/67] f926e1233c8c5ad418e8794b1a103371c9dc5eb0 (eauger1/centos-qemu-kvm) + +Commit 3d779abafe ("vfio/common: Introduce a global VFIODevice list") +introduced a global VFIODevice list, but forgot to update the list +element field name when iterating over the new list. Change the code +to use the correct list element field. + +Fixes: 3d779abafe ("vfio/common: Introduce a global VFIODevice list") +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2061 +Signed-off-by: Volker Rümelin +Reviewed-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Reviewed-by: Eric Auger +(cherry picked from commit 9353b6da430f90e47f352dbf6dc31120c8914da6) +Signed-off-by: Eric Auger +--- + hw/vfio/common.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 0d4d8b8416..0b3352f2a9 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -73,7 +73,7 @@ bool vfio_mig_active(void) + return false; + } + +- QLIST_FOREACH(vbasedev, &vfio_device_list, next) { ++ QLIST_FOREACH(vbasedev, &vfio_device_list, global_next) { + if (vbasedev->migration_blocker) { + return false; + } +@@ -94,7 +94,7 @@ static bool vfio_multiple_devices_migration_is_supported(void) + unsigned int device_num = 0; + bool all_support_p2p = true; + +- QLIST_FOREACH(vbasedev, &vfio_device_list, next) { ++ QLIST_FOREACH(vbasedev, &vfio_device_list, global_next) { + if (vbasedev->migration) { + device_num++; + +@@ -1366,13 +1366,13 @@ void vfio_reset_handler(void *opaque) + { + VFIODevice *vbasedev; + +- QLIST_FOREACH(vbasedev, &vfio_device_list, next) { ++ QLIST_FOREACH(vbasedev, &vfio_device_list, global_next) { + if (vbasedev->dev->realized) { + vbasedev->ops->vfio_compute_needs_reset(vbasedev); + } + } + +- QLIST_FOREACH(vbasedev, &vfio_device_list, next) { ++ QLIST_FOREACH(vbasedev, &vfio_device_list, global_next) { + if (vbasedev->dev->realized && vbasedev->needs_reset) { + vbasedev->ops->vfio_hot_reset_multi(vbasedev); + } +-- +2.39.3 + diff --git a/SOURCES/kvm-hw-vfio-pci-quirks-Sanitize-capability-pointer.patch b/SOURCES/kvm-hw-vfio-pci-quirks-Sanitize-capability-pointer.patch deleted file mode 100644 index ffabd75..0000000 --- a/SOURCES/kvm-hw-vfio-pci-quirks-Sanitize-capability-pointer.patch +++ /dev/null @@ -1,76 +0,0 @@ -From fcd6219a95851d17fd8bde69d87e78c6533be990 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 24/37] hw/vfio/pci-quirks: Sanitize capability pointer -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [22/28] cb080409c1912f4365f8e31cd23c914b48f91575 (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit 0ddcb39c9357 -Author: Alex Williamson -Date: Fri Jun 30 16:36:08 2023 -0600 - - hw/vfio/pci-quirks: Sanitize capability pointer - - Coverity reports a tained scalar when traversing the capabilities - chain (CID 1516589). In practice I've never seen a device with a - chain so broken as to cause an issue, but it's also pretty easy to - sanitize. - - Fixes: f6b30c1984f7 ("hw/vfio/pci-quirks: Support alternate offset for GPUDirect Cliques") - Signed-off-by: Alex Williamson - Reviewed-by: Cédric Le Goater - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/pci-quirks.c | 10 ++++++++-- - 1 file changed, 8 insertions(+), 2 deletions(-) - -diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c -index 0ed2fcd531..f4ff836805 100644 ---- a/hw/vfio/pci-quirks.c -+++ b/hw/vfio/pci-quirks.c -@@ -1530,6 +1530,12 @@ const PropertyInfo qdev_prop_nv_gpudirect_clique = { - .set = set_nv_gpudirect_clique_id, - }; - -+static bool is_valid_std_cap_offset(uint8_t pos) -+{ -+ return (pos >= PCI_STD_HEADER_SIZEOF && -+ pos <= (PCI_CFG_SPACE_SIZE - PCI_CAP_SIZEOF)); -+} -+ - static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp) - { - PCIDevice *pdev = &vdev->pdev; -@@ -1563,7 +1569,7 @@ static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp) - */ - ret = pread(vdev->vbasedev.fd, &tmp, 1, - vdev->config_offset + PCI_CAPABILITY_LIST); -- if (ret != 1 || !tmp) { -+ if (ret != 1 || !is_valid_std_cap_offset(tmp)) { - error_setg(errp, "NVIDIA GPUDirect Clique ID: error getting cap list"); - return -EINVAL; - } -@@ -1575,7 +1581,7 @@ static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp) - d4_conflict = true; - } - tmp = pdev->config[tmp + PCI_CAP_LIST_NEXT]; -- } while (tmp); -+ } while (is_valid_std_cap_offset(tmp)); - - if (!c8_conflict) { - pos = 0xC8; --- -2.39.3 - diff --git a/SOURCES/kvm-hw-vfio-pci-quirks-Support-alternate-offset-for-GPUD.patch b/SOURCES/kvm-hw-vfio-pci-quirks-Support-alternate-offset-for-GPUD.patch deleted file mode 100644 index 99f5c75..0000000 --- a/SOURCES/kvm-hw-vfio-pci-quirks-Support-alternate-offset-for-GPUD.patch +++ /dev/null @@ -1,110 +0,0 @@ -From dd38230a0a375fb8427fa106ff79562e56c51b6c Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 18/37] hw/vfio/pci-quirks: Support alternate offset for - GPUDirect Cliques -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [16/28] 9befb7c9adaeb58e9d0b49686cf54b751c742832 (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit f6b30c1984f7 -Author: Alex Williamson -Date: Thu Jun 8 12:05:07 2023 -0600 - - hw/vfio/pci-quirks: Support alternate offset for GPUDirect Cliques - - NVIDIA Turing and newer GPUs implement the MSI-X capability at the offset - previously reserved for use by hypervisors to implement the GPUDirect - Cliques capability. A revised specification provides an alternate - location. Add a config space walk to the quirk to check for conflicts, - allowing us to fall back to the new location or generate an error at the - quirk setup rather than when the real conflicting capability is added - should there be no available location. - - Signed-off-by: Alex Williamson - Reviewed-by: Cédric Le Goater - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/pci-quirks.c | 41 ++++++++++++++++++++++++++++++++++++++++- - 1 file changed, 40 insertions(+), 1 deletion(-) - -diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c -index f0147a050a..0ed2fcd531 100644 ---- a/hw/vfio/pci-quirks.c -+++ b/hw/vfio/pci-quirks.c -@@ -1490,6 +1490,9 @@ void vfio_setup_resetfn_quirk(VFIOPCIDevice *vdev) - * +---------------------------------+---------------------------------+ - * - * https://lists.gnu.org/archive/html/qemu-devel/2017-08/pdfUda5iEpgOS.pdf -+ * -+ * Specification for Turning and later GPU architectures: -+ * https://lists.gnu.org/archive/html/qemu-devel/2023-06/pdf142OR4O4c2.pdf - */ - static void get_nv_gpudirect_clique_id(Object *obj, Visitor *v, - const char *name, void *opaque, -@@ -1530,7 +1533,9 @@ const PropertyInfo qdev_prop_nv_gpudirect_clique = { - static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp) - { - PCIDevice *pdev = &vdev->pdev; -- int ret, pos = 0xC8; -+ int ret, pos; -+ bool c8_conflict = false, d4_conflict = false; -+ uint8_t tmp; - - if (vdev->nv_gpudirect_clique == 0xFF) { - return 0; -@@ -1547,6 +1552,40 @@ static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp) - return -EINVAL; - } - -+ /* -+ * Per the updated specification above, it's recommended to use offset -+ * D4h for Turing and later GPU architectures due to a conflict of the -+ * MSI-X capability at C8h. We don't know how to determine the GPU -+ * architecture, instead we walk the capability chain to mark conflicts -+ * and choose one or error based on the result. -+ * -+ * NB. Cap list head in pdev->config is already cleared, read from device. -+ */ -+ ret = pread(vdev->vbasedev.fd, &tmp, 1, -+ vdev->config_offset + PCI_CAPABILITY_LIST); -+ if (ret != 1 || !tmp) { -+ error_setg(errp, "NVIDIA GPUDirect Clique ID: error getting cap list"); -+ return -EINVAL; -+ } -+ -+ do { -+ if (tmp == 0xC8) { -+ c8_conflict = true; -+ } else if (tmp == 0xD4) { -+ d4_conflict = true; -+ } -+ tmp = pdev->config[tmp + PCI_CAP_LIST_NEXT]; -+ } while (tmp); -+ -+ if (!c8_conflict) { -+ pos = 0xC8; -+ } else if (!d4_conflict) { -+ pos = 0xD4; -+ } else { -+ error_setg(errp, "NVIDIA GPUDirect Clique ID: invalid config space"); -+ return -EINVAL; -+ } -+ - ret = pci_add_capability(pdev, PCI_CAP_ID_VNDR, pos, 8, errp); - if (ret < 0) { - error_prepend(errp, "Failed to add NVIDIA GPUDirect cap: "); --- -2.39.3 - diff --git a/SOURCES/kvm-hw-virtio-iommu-Fix-potential-OOB-access-in-virtio_i.patch b/SOURCES/kvm-hw-virtio-iommu-Fix-potential-OOB-access-in-virtio_i.patch deleted file mode 100644 index 7a5963c..0000000 --- a/SOURCES/kvm-hw-virtio-iommu-Fix-potential-OOB-access-in-virtio_i.patch +++ /dev/null @@ -1,62 +0,0 @@ -From 0a731ac1191182546e80af5f39d178a5a2f3688f Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Mon, 17 Jul 2023 18:21:26 +0200 -Subject: [PATCH 07/14] hw/virtio-iommu: Fix potential OOB access in - virtio_iommu_handle_command() - -RH-Author: Eric Auger -RH-MergeRequest: 197: virtio-iommu/smmu: backport some late fixes -RH-Bugzilla: 2229133 -RH-Acked-by: Thomas Huth -RH-Acked-by: Peter Xu -RH-Commit: [1/3] ecdb1e1aa6b93761dc87ea79bc0a1093ad649a74 (eauger1/centos-qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2229133 - -In the virtio_iommu_handle_command() when a PROBE request is handled, -output_size takes a value greater than the tail size and on a subsequent -iteration we can get a stack out-of-band access. Initialize the -output_size on each iteration. - -The issue was found with ASAN. Credits to: -Yiming Tao(Zhejiang University) -Gaoning Pan(Zhejiang University) - -Fixes: 1733eebb9e7 ("virtio-iommu: Implement RESV_MEM probe request") -Signed-off-by: Eric Auger -Reported-by: Mauro Matteo Cascella -Cc: qemu-stable@nongnu.org - -Message-Id: <20230717162126.11693-1-eric.auger@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit cf2f89edf36a59183166ae8721a8d7ab5cd286bd) -Signed-off-by: Eric Auger ---- - hw/virtio/virtio-iommu.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c -index 421e2a944f..17ce630200 100644 ---- a/hw/virtio/virtio-iommu.c -+++ b/hw/virtio/virtio-iommu.c -@@ -728,13 +728,15 @@ static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq) - VirtIOIOMMU *s = VIRTIO_IOMMU(vdev); - struct virtio_iommu_req_head head; - struct virtio_iommu_req_tail tail = {}; -- size_t output_size = sizeof(tail), sz; - VirtQueueElement *elem; - unsigned int iov_cnt; - struct iovec *iov; - void *buf = NULL; -+ size_t sz; - - for (;;) { -+ size_t output_size = sizeof(tail); -+ - elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); - if (!elem) { - return; --- -2.39.3 - diff --git a/SOURCES/kvm-i386-cpu-Update-how-the-EBX-register-of-CPUID-0x8000.patch b/SOURCES/kvm-i386-cpu-Update-how-the-EBX-register-of-CPUID-0x8000.patch deleted file mode 100644 index 3ee6b29..0000000 --- a/SOURCES/kvm-i386-cpu-Update-how-the-EBX-register-of-CPUID-0x8000.patch +++ /dev/null @@ -1,52 +0,0 @@ -From f9d982fae156aa9db0506e1e098c1e8a7f7eec94 Mon Sep 17 00:00:00 2001 -From: Bandan Das -Date: Thu, 3 Aug 2023 14:29:15 -0400 -Subject: [PATCH 13/14] i386/cpu: Update how the EBX register of CPUID - 0x8000001F is set - -RH-Author: Bandan Das -RH-MergeRequest: 196: Updates to SEV reduced-phys-bits parameter -RH-Bugzilla: 2214839 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [4/4] efc368b2c844fd4fbc3c755a5e2da288329e7a2c (bdas1/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2214839 - -commit fb6bbafc0f19385fb257ee073ed13dcaf613f2f8 -Author: Tom Lendacky -Date: Fri Sep 30 10:14:30 2022 -0500 - - i386/cpu: Update how the EBX register of CPUID 0x8000001F is set - - Update the setting of CPUID 0x8000001F EBX to clearly document the ranges - associated with fields being set. - - Fixes: 6cb8f2a663 ("cpu/i386: populate CPUID 0x8000_001F when SEV is active") - Signed-off-by: Tom Lendacky - Reviewed-by: Dr. David Alan Gilbert - Message-Id: <5822fd7d02b575121380e1f493a8f6d9eba2b11a.1664550870.git.thomas.lendacky@amd.com> - Signed-off-by: Paolo Bonzini - -Signed-off-by: Bandan Das ---- - target/i386/cpu.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 839706b430..4ac3046313 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -6008,8 +6008,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, - if (sev_enabled()) { - *eax = 0x2; - *eax |= sev_es_enabled() ? 0x8 : 0; -- *ebx = sev_get_cbit_position(); -- *ebx |= sev_get_reduced_phys_bits() << 6; -+ *ebx = sev_get_cbit_position() & 0x3f; /* EBX[5:0] */ -+ *ebx |= (sev_get_reduced_phys_bits() & 0x3f) << 6; /* EBX[11:6] */ - } - break; - default: --- -2.39.3 - diff --git a/SOURCES/kvm-i386-sev-Update-checks-and-information-related-to-re.patch b/SOURCES/kvm-i386-sev-Update-checks-and-information-related-to-re.patch deleted file mode 100644 index e9d28d3..0000000 --- a/SOURCES/kvm-i386-sev-Update-checks-and-information-related-to-re.patch +++ /dev/null @@ -1,77 +0,0 @@ -From 5c0d254762caaffd574bd95dbfc1df416e6e2509 Mon Sep 17 00:00:00 2001 -From: Bandan Das -Date: Thu, 3 Aug 2023 14:22:55 -0400 -Subject: [PATCH 12/14] i386/sev: Update checks and information related to - reduced-phys-bits - -RH-Author: Bandan Das -RH-MergeRequest: 196: Updates to SEV reduced-phys-bits parameter -RH-Bugzilla: 2214839 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [3/4] 7c5e7ea9f6cd39e84e5b60417c849430296399fd (bdas1/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2214839 - -commit 8168fed9f84e3128f7628969ae78af49433d5ce7 -Author: Tom Lendacky -Date: Fri Sep 30 10:14:29 2022 -0500 - - i386/sev: Update checks and information related to reduced-phys-bits - - The value of the reduced-phys-bits parameter is propogated to the CPUID - information exposed to the guest. Update the current validation check to - account for the size of the CPUID field (6-bits), ensuring the value is - in the range of 1 to 63. - - Maintain backward compatibility, to an extent, by allowing a value greater - than 1 (so that the previously documented value of 5 still works), but not - allowing anything over 63. - - Fixes: d8575c6c02 ("sev/i386: add command to initialize the memory encryption context") - Signed-off-by: Tom Lendacky - Reviewed-by: Dr. David Alan Gilbert - Message-Id: - Signed-off-by: Paolo Bonzini - -Signed-off-by: Bandan Das ---- - target/i386/sev.c | 17 ++++++++++++++--- - 1 file changed, 14 insertions(+), 3 deletions(-) - -diff --git a/target/i386/sev.c b/target/i386/sev.c -index 859e06f6ad..fe2144c038 100644 ---- a/target/i386/sev.c -+++ b/target/i386/sev.c -@@ -932,15 +932,26 @@ int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) - host_cpuid(0x8000001F, 0, NULL, &ebx, NULL, NULL); - host_cbitpos = ebx & 0x3f; - -+ /* -+ * The cbitpos value will be placed in bit positions 5:0 of the EBX -+ * register of CPUID 0x8000001F. No need to verify the range as the -+ * comparison against the host value accomplishes that. -+ */ - if (host_cbitpos != sev->cbitpos) { - error_setg(errp, "%s: cbitpos check failed, host '%d' requested '%d'", - __func__, host_cbitpos, sev->cbitpos); - goto err; - } - -- if (sev->reduced_phys_bits < 1) { -- error_setg(errp, "%s: reduced_phys_bits check failed, it should be >=1," -- " requested '%d'", __func__, sev->reduced_phys_bits); -+ /* -+ * The reduced-phys-bits value will be placed in bit positions 11:6 of -+ * the EBX register of CPUID 0x8000001F, so verify the supplied value -+ * is in the range of 1 to 63. -+ */ -+ if (sev->reduced_phys_bits < 1 || sev->reduced_phys_bits > 63) { -+ error_setg(errp, "%s: reduced_phys_bits check failed," -+ " it should be in the range of 1 to 63, requested '%d'", -+ __func__, sev->reduced_phys_bits); - goto err; - } - --- -2.39.3 - diff --git a/SOURCES/kvm-include-ui-rect.h-fix-qemu_rect_init-mis-assignment.patch b/SOURCES/kvm-include-ui-rect.h-fix-qemu_rect_init-mis-assignment.patch new file mode 100644 index 0000000..0cf782e --- /dev/null +++ b/SOURCES/kvm-include-ui-rect.h-fix-qemu_rect_init-mis-assignment.patch @@ -0,0 +1,54 @@ +From 51b8f29cddb73eb02f91af5f52a205fdd3af6583 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Wed, 17 Jan 2024 21:08:59 +0100 +Subject: [PATCH 099/101] include/ui/rect.h: fix qemu_rect_init() + mis-assignment +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +RH-MergeRequest: 216: Fix regression in QEMU's virtio-gpu VNC sessions +RH-Jira: RHEL-21570 +RH-Acked-by: Marc-André Lureau +RH-Acked-by: Cédric Le Goater +RH-Commit: [1/1] a9d487be04e2c1847b80c479b5cc790af81e3428 (thuth/qemu-kvm-cs9) + +JIRA: https://issues.redhat.com/browse/RHEL-21570 + +commit 9d5b42beb6978dc6219d5dc029c9d453c6b8d503 +Author: Elen Avan +Date: Fri Dec 22 22:17:21 2023 +0300 + + include/ui/rect.h: fix qemu_rect_init() mis-assignment + + Signed-off-by: Elen Avan + Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2051 + Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2050 + Fixes: a200d53b1fde "virtio-gpu: replace PIXMAN for region/rect test" + Cc: qemu-stable@nongnu.org + Reviewed-by: Michael Tokarev + Reviewed-by: Marc-André Lureau + Signed-off-by: Michael Tokarev + +Signed-off-by: Thomas Huth +--- + include/ui/rect.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/ui/rect.h b/include/ui/rect.h +index 94898f92d0..68f05d78a8 100644 +--- a/include/ui/rect.h ++++ b/include/ui/rect.h +@@ -19,7 +19,7 @@ static inline void qemu_rect_init(QemuRect *rect, + uint16_t width, uint16_t height) + { + rect->x = x; +- rect->y = x; ++ rect->y = y; + rect->width = width; + rect->height = height; + } +-- +2.39.3 + diff --git a/SOURCES/kvm-iotests-Add-test-for-reset-AioContext-switches-with-.patch b/SOURCES/kvm-iotests-Add-test-for-reset-AioContext-switches-with-.patch new file mode 100644 index 0000000..3b2841f --- /dev/null +++ b/SOURCES/kvm-iotests-Add-test-for-reset-AioContext-switches-with-.patch @@ -0,0 +1,133 @@ +From 65d58819ff7b012e43b5f1da1356b559d3f5a962 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 14 Mar 2024 17:58:25 +0100 +Subject: [PATCH 3/4] iotests: Add test for reset/AioContext switches with NBD + exports + +RH-Author: Kevin Wolf +RH-MergeRequest: 231: Fix deadlock and crash during storage migration +RH-Jira: RHEL-28125 +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [3/3] 7266acdf85271d157497bfe452aa6eeb58fbe7c6 (kmwolf/centos-qemu-kvm) + +This replicates the scenario in which the bug was reported. +Unfortunately this relies on actually executing a guest (so that the +firmware initialises the virtio-blk device and moves it to its +configured iothread), so this can't make use of the qtest accelerator +like most other test cases. I tried to find a different easy way to +trigger the bug, but couldn't find one. + +Signed-off-by: Kevin Wolf +Message-ID: <20240314165825.40261-3-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit e8fce34eccf68a32f4ecf2c6f121ff2ac383d6bf) +Signed-off-by: Kevin Wolf +--- + tests/qemu-iotests/tests/iothreads-nbd-export | 66 +++++++++++++++++++ + .../tests/iothreads-nbd-export.out | 19 ++++++ + 2 files changed, 85 insertions(+) + create mode 100755 tests/qemu-iotests/tests/iothreads-nbd-export + create mode 100644 tests/qemu-iotests/tests/iothreads-nbd-export.out + +diff --git a/tests/qemu-iotests/tests/iothreads-nbd-export b/tests/qemu-iotests/tests/iothreads-nbd-export +new file mode 100755 +index 0000000000..037260729c +--- /dev/null ++++ b/tests/qemu-iotests/tests/iothreads-nbd-export +@@ -0,0 +1,66 @@ ++#!/usr/bin/env python3 ++# group: rw quick ++# ++# Copyright (C) 2024 Red Hat, Inc. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++# ++# Creator/Owner: Kevin Wolf ++ ++import time ++import qemu ++import iotests ++ ++iotests.script_initialize(supported_fmts=['qcow2'], ++ supported_platforms=['linux']) ++ ++with iotests.FilePath('disk1.img') as path, \ ++ iotests.FilePath('nbd.sock', base_dir=iotests.sock_dir) as nbd_sock, \ ++ qemu.machine.QEMUMachine(iotests.qemu_prog) as vm: ++ ++ img_size = '10M' ++ ++ iotests.log('Preparing disk...') ++ iotests.qemu_img_create('-f', iotests.imgfmt, path, img_size) ++ vm.add_args('-blockdev', f'file,node-name=disk-file,filename={path}') ++ vm.add_args('-blockdev', 'qcow2,node-name=disk,file=disk-file') ++ vm.add_args('-object', 'iothread,id=iothread0') ++ vm.add_args('-device', ++ 'virtio-blk,drive=disk,iothread=iothread0,share-rw=on') ++ ++ iotests.log('Launching VM...') ++ vm.add_args('-accel', 'kvm', '-accel', 'tcg') ++ #vm.add_args('-accel', 'qtest') ++ vm.launch() ++ ++ iotests.log('Exporting to NBD...') ++ iotests.log(vm.qmp('nbd-server-start', ++ addr={'type': 'unix', 'data': {'path': nbd_sock}})) ++ iotests.log(vm.qmp('block-export-add', type='nbd', id='exp0', ++ node_name='disk', writable=True)) ++ ++ iotests.log('Connecting qemu-img...') ++ qemu_io = iotests.QemuIoInteractive('-f', 'raw', ++ f'nbd+unix:///disk?socket={nbd_sock}') ++ ++ iotests.log('Moving the NBD export to a different iothread...') ++ for i in range(0, 10): ++ iotests.log(vm.qmp('system_reset')) ++ time.sleep(0.1) ++ ++ iotests.log('Checking that it is still alive...') ++ iotests.log(vm.qmp('query-status')) ++ ++ qemu_io.close() ++ vm.shutdown() +diff --git a/tests/qemu-iotests/tests/iothreads-nbd-export.out b/tests/qemu-iotests/tests/iothreads-nbd-export.out +new file mode 100644 +index 0000000000..bc514e35e5 +--- /dev/null ++++ b/tests/qemu-iotests/tests/iothreads-nbd-export.out +@@ -0,0 +1,19 @@ ++Preparing disk... ++Launching VM... ++Exporting to NBD... ++{"return": {}} ++{"return": {}} ++Connecting qemu-img... ++Moving the NBD export to a different iothread... ++{"return": {}} ++{"return": {}} ++{"return": {}} ++{"return": {}} ++{"return": {}} ++{"return": {}} ++{"return": {}} ++{"return": {}} ++{"return": {}} ++{"return": {}} ++Checking that it is still alive... ++{"return": {"running": true, "status": "running"}} +-- +2.39.3 + diff --git a/SOURCES/kvm-iotests-Test-commit-with-iothreads-and-ongoing-I-O.patch b/SOURCES/kvm-iotests-Test-commit-with-iothreads-and-ongoing-I-O.patch deleted file mode 100644 index 1fc5697..0000000 --- a/SOURCES/kvm-iotests-Test-commit-with-iothreads-and-ongoing-I-O.patch +++ /dev/null @@ -1,144 +0,0 @@ -From 399bfc04fb8352af6d2f4c984e68c334d2043368 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 17 May 2023 17:28:34 +0200 -Subject: [PATCH 04/21] iotests: Test commit with iothreads and ongoing I/O - -RH-Author: Kevin Wolf -RH-MergeRequest: 166: block/graph-lock: Disable locking for now -RH-Bugzilla: 2186725 -RH-Acked-by: Eric Blake -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [4/4] 1e42fde5951ae12bddc4eea2320f066f7079878f (kmwolf/centos-qemu-kvm) - -This tests exercises graph locking, draining, and graph modifications -with AioContext switches a lot. Amongst others, it serves as a -regression test for bdrv_graph_wrlock() deadlocking because it is called -with a locked AioContext and for AioContext handling in the NBD server. - -Signed-off-by: Kevin Wolf -Message-Id: <20230517152834.277483-4-kwolf@redhat.com> -Tested-by: Eric Blake -Reviewed-by: Eric Blake -Signed-off-by: Kevin Wolf -(cherry picked from commit 95fdd8db61848d31fde1d9b32da7f3f76babfa25) -Signed-off-by: Kevin Wolf ---- - tests/qemu-iotests/iotests.py | 4 ++ - .../qemu-iotests/tests/graph-changes-while-io | 56 +++++++++++++++++-- - .../tests/graph-changes-while-io.out | 4 +- - 3 files changed, 58 insertions(+), 6 deletions(-) - -diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py -index 3e82c634cf..7073579a7d 100644 ---- a/tests/qemu-iotests/iotests.py -+++ b/tests/qemu-iotests/iotests.py -@@ -462,6 +462,10 @@ def qmp(self, cmd: str, args: Optional[Dict[str, object]] = None) \ - assert self._qmp is not None - return self._qmp.cmd(cmd, args) - -+ def get_qmp(self) -> QEMUMonitorProtocol: -+ assert self._qmp is not None -+ return self._qmp -+ - def stop(self, kill_signal=15): - self._p.send_signal(kill_signal) - self._p.wait() -diff --git a/tests/qemu-iotests/tests/graph-changes-while-io b/tests/qemu-iotests/tests/graph-changes-while-io -index 7664f33689..750e7d4d38 100755 ---- a/tests/qemu-iotests/tests/graph-changes-while-io -+++ b/tests/qemu-iotests/tests/graph-changes-while-io -@@ -22,19 +22,19 @@ - import os - from threading import Thread - import iotests --from iotests import imgfmt, qemu_img, qemu_img_create, QMPTestCase, \ -- QemuStorageDaemon -+from iotests import imgfmt, qemu_img, qemu_img_create, qemu_io, \ -+ QMPTestCase, QemuStorageDaemon - - - top = os.path.join(iotests.test_dir, 'top.img') - nbd_sock = os.path.join(iotests.sock_dir, 'nbd.sock') - - --def do_qemu_img_bench() -> None: -+def do_qemu_img_bench(count: int = 2000000) -> None: - """ - Do some I/O requests on `nbd_sock`. - """ -- qemu_img('bench', '-f', 'raw', '-c', '2000000', -+ qemu_img('bench', '-f', 'raw', '-c', str(count), - f'nbd+unix:///node0?socket={nbd_sock}') - - -@@ -84,6 +84,54 @@ class TestGraphChangesWhileIO(QMPTestCase): - - bench_thr.join() - -+ def test_commit_while_io(self) -> None: -+ # Run qemu-img bench in the background -+ bench_thr = Thread(target=do_qemu_img_bench, args=(200000, )) -+ bench_thr.start() -+ -+ qemu_io('-c', 'write 0 64k', top) -+ qemu_io('-c', 'write 128k 64k', top) -+ -+ result = self.qsd.qmp('blockdev-add', { -+ 'driver': imgfmt, -+ 'node-name': 'overlay', -+ 'backing': None, -+ 'file': { -+ 'driver': 'file', -+ 'filename': top -+ } -+ }) -+ self.assert_qmp(result, 'return', {}) -+ -+ result = self.qsd.qmp('blockdev-snapshot', { -+ 'node': 'node0', -+ 'overlay': 'overlay', -+ }) -+ self.assert_qmp(result, 'return', {}) -+ -+ # While qemu-img bench is running, repeatedly commit overlay to node0 -+ while bench_thr.is_alive(): -+ result = self.qsd.qmp('block-commit', { -+ 'job-id': 'job0', -+ 'device': 'overlay', -+ }) -+ self.assert_qmp(result, 'return', {}) -+ -+ result = self.qsd.qmp('block-job-cancel', { -+ 'device': 'job0', -+ }) -+ self.assert_qmp(result, 'return', {}) -+ -+ cancelled = False -+ while not cancelled: -+ for event in self.qsd.get_qmp().get_events(wait=10.0): -+ if event['event'] != 'JOB_STATUS_CHANGE': -+ continue -+ if event['data']['status'] == 'null': -+ cancelled = True -+ -+ bench_thr.join() -+ - if __name__ == '__main__': - # Format must support raw backing files - iotests.main(supported_fmts=['qcow', 'qcow2', 'qed'], -diff --git a/tests/qemu-iotests/tests/graph-changes-while-io.out b/tests/qemu-iotests/tests/graph-changes-while-io.out -index ae1213e6f8..fbc63e62f8 100644 ---- a/tests/qemu-iotests/tests/graph-changes-while-io.out -+++ b/tests/qemu-iotests/tests/graph-changes-while-io.out -@@ -1,5 +1,5 @@ --. -+.. - ---------------------------------------------------------------------- --Ran 1 tests -+Ran 2 tests - - OK --- -2.39.3 - diff --git a/SOURCES/kvm-iotests-Test-resizing-image-attached-to-an-iothread.patch b/SOURCES/kvm-iotests-Test-resizing-image-attached-to-an-iothread.patch deleted file mode 100644 index 4e91505..0000000 --- a/SOURCES/kvm-iotests-Test-resizing-image-attached-to-an-iothread.patch +++ /dev/null @@ -1,132 +0,0 @@ -From 2c9e6892369ff99decd4030642b8dcf3875e9ebf Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Tue, 9 May 2023 15:41:33 +0200 -Subject: [PATCH 55/56] iotests: Test resizing image attached to an iothread - -RH-Author: Kevin Wolf -RH-MergeRequest: 164: block: Fix hangs in qmp_block_resize() -RH-Bugzilla: 2185688 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Eric Blake -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [4/4] 8d31752d1e6e8c6a422d68d9cb2251fbc34b7aef (kmwolf/centos-qemu-kvm) - -This tests that trying to resize an image with QMP block_resize doesn't -hang or otherwise fail when the image is attached to a device running in -an iothread. - -This is a regression test for the recent fix that changed -qmp_block_resize, which is a coroutine based QMP handler, to avoid -calling no_coroutine_fns directly. - -Signed-off-by: Kevin Wolf -Message-Id: <20230509134133.373408-1-kwolf@redhat.com> -Reviewed-by: Eric Blake -Signed-off-by: Kevin Wolf -(cherry picked from commit e113362e4cdfdcfe1d497e569527f70a0021333a) -Signed-off-by: Kevin Wolf ---- - tests/qemu-iotests/tests/iothreads-resize | 71 +++++++++++++++++++ - tests/qemu-iotests/tests/iothreads-resize.out | 11 +++ - 2 files changed, 82 insertions(+) - create mode 100755 tests/qemu-iotests/tests/iothreads-resize - create mode 100644 tests/qemu-iotests/tests/iothreads-resize.out - -diff --git a/tests/qemu-iotests/tests/iothreads-resize b/tests/qemu-iotests/tests/iothreads-resize -new file mode 100755 -index 0000000000..36e4598c62 ---- /dev/null -+++ b/tests/qemu-iotests/tests/iothreads-resize -@@ -0,0 +1,71 @@ -+#!/usr/bin/env bash -+# group: rw auto quick -+# -+# Test resizing an image that is attached to a separate iothread -+# -+# Copyright (C) 2023 Red Hat, Inc. -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program. If not, see . -+# -+ -+# creator -+owner=kwolf@redhat.com -+ -+seq=`basename $0` -+echo "QA output created by $seq" -+ -+status=1 # failure is the default! -+ -+_cleanup() -+{ -+ _cleanup_test_img -+} -+trap "_cleanup; exit \$status" 0 1 2 3 15 -+ -+# get standard environment, filters and checks -+cd .. -+. ./common.rc -+. ./common.filter -+ -+# Resizing images is only supported by a few block drivers -+_supported_fmt raw qcow2 qed -+_supported_proto file -+_require_devices virtio-scsi-pci -+ -+size=64M -+_make_test_img $size -+ -+qmp() { -+cat < -Date: Thu, 11 May 2023 13:03:22 +0200 -Subject: [PATCH 54/56] iotests: Use alternative CPU type that is not - deprecated in RHEL - -RH-Author: Kevin Wolf -RH-MergeRequest: 164: block: Fix hangs in qmp_block_resize() -RH-Bugzilla: 2185688 -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Hanna Czenczek -RH-Acked-by: Eric Blake -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [3/4] 038d4718c0ee7a17ff5e6f4af8fc04d07e452f8d (kmwolf/centos-qemu-kvm) - -This is a downstream-only patch that is necessary because the default -CPU in RHEL is marked as deprecated. This makes test cases fail due to -the warning in the output: - -qemu-system-x86_64: warning: CPU model qemu64-x86_64-cpu is deprecated -- use at least 'Nehalem' / 'Opteron_G4', or 'host' / 'max' - -Fixes: 318178778db60b6475d1484509bee136317156d3 -Signed-off-by: Kevin Wolf ---- - tests/qemu-iotests/testenv.py | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/tests/qemu-iotests/testenv.py b/tests/qemu-iotests/testenv.py -index 9a37ad9152..963514aab3 100644 ---- a/tests/qemu-iotests/testenv.py -+++ b/tests/qemu-iotests/testenv.py -@@ -244,6 +244,9 @@ def __init__(self, source_dir: str, build_dir: str, - if self.qemu_prog.endswith(f'qemu-system-{suffix}'): - self.qemu_options += f' -machine {machine}' - -+ if self.qemu_prog.endswith('qemu-system-x86_64'): -+ self.qemu_options += ' -cpu Nehalem' -+ - # QEMU_DEFAULT_MACHINE - self.qemu_default_machine = get_default_machine(self.qemu_prog) - --- -2.39.1 - diff --git a/SOURCES/kvm-iotests-add-filter_qmp_generated_node_ids.patch b/SOURCES/kvm-iotests-add-filter_qmp_generated_node_ids.patch new file mode 100644 index 0000000..d9072f5 --- /dev/null +++ b/SOURCES/kvm-iotests-add-filter_qmp_generated_node_ids.patch @@ -0,0 +1,49 @@ +From a9be663beaace1c31d75ca353e5d3bb0657a4f6c Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Thu, 18 Jan 2024 09:48:21 -0500 +Subject: [PATCH 11/22] iotests: add filter_qmp_generated_node_ids() + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter +RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Hanna Czenczek +RH-Commit: [7/17] 8dd20acc5b1e992294ed422e80897a9c221940dd (stefanha/centos-stream-qemu-kvm) + +Add a filter function for QMP responses that contain QEMU's +automatically generated node ids. The ids change between runs and must +be masked in the reference output. + +The next commit will use this new function. + +Signed-off-by: Stefan Hajnoczi +Message-ID: <20240118144823.1497953-2-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit da62b507a20510d819bcfbe8f5e573409b954006) +Signed-off-by: Stefan Hajnoczi +--- + tests/qemu-iotests/iotests.py | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py +index e5c5798c71..ea48af4a7b 100644 +--- a/tests/qemu-iotests/iotests.py ++++ b/tests/qemu-iotests/iotests.py +@@ -651,6 +651,13 @@ def _filter(_key, value): + def filter_generated_node_ids(msg): + return re.sub("#block[0-9]+", "NODE_NAME", msg) + ++def filter_qmp_generated_node_ids(qmsg): ++ def _filter(_key, value): ++ if is_str(value): ++ return filter_generated_node_ids(value) ++ return value ++ return filter_qmp(qmsg, _filter) ++ + def filter_img_info(output: str, filename: str, + drop_child_info: bool = True) -> str: + lines = [] +-- +2.39.3 + diff --git a/SOURCES/kvm-iotests-iothreads-stream-Use-the-right-TimeoutError.patch b/SOURCES/kvm-iotests-iothreads-stream-Use-the-right-TimeoutError.patch new file mode 100644 index 0000000..ab63004 --- /dev/null +++ b/SOURCES/kvm-iotests-iothreads-stream-Use-the-right-TimeoutError.patch @@ -0,0 +1,49 @@ +From 453da839a7d81896d03b827a95c1991a60740dc5 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 25 Jan 2024 16:21:50 +0100 +Subject: [PATCH 21/22] iotests/iothreads-stream: Use the right TimeoutError + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter +RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Hanna Czenczek +RH-Commit: [17/17] ca5a512ccccb668089b726d7499562d1e294c828 (stefanha/centos-stream-qemu-kvm) + +Since Python 3.11 asyncio.TimeoutError is an alias for TimeoutError, but +in older versions it's not. We really have to catch asyncio.TimeoutError +here, otherwise a slow test run will fail (as has happened multiple +times on CI recently). + +Signed-off-by: Kevin Wolf +Message-ID: <20240125152150.42389-1-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit c9c0b37ff4c11b712b21efabe8e5381d223d0295) +Signed-off-by: Stefan Hajnoczi +--- + tests/qemu-iotests/tests/iothreads-stream | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/tests/qemu-iotests/tests/iothreads-stream b/tests/qemu-iotests/tests/iothreads-stream +index 503f221f16..231195b5e8 100755 +--- a/tests/qemu-iotests/tests/iothreads-stream ++++ b/tests/qemu-iotests/tests/iothreads-stream +@@ -18,6 +18,7 @@ + # + # Creator/Owner: Kevin Wolf + ++import asyncio + import iotests + + iotests.script_initialize(supported_fmts=['qcow2'], +@@ -69,6 +70,6 @@ with iotests.FilePath('disk1.img') as base1_path, \ + # The test is done once both jobs are gone + if finished == 2: + break +- except TimeoutError: ++ except asyncio.TimeoutError: + pass + vm.cmd('query-jobs') +-- +2.39.3 + diff --git a/SOURCES/kvm-iotests-iov-padding-New-test.patch b/SOURCES/kvm-iotests-iov-padding-New-test.patch deleted file mode 100644 index 9ef37a2..0000000 --- a/SOURCES/kvm-iotests-iov-padding-New-test.patch +++ /dev/null @@ -1,186 +0,0 @@ -From add833b5de202d6765dda56c8773985fbe7f40a6 Mon Sep 17 00:00:00 2001 -From: Hanna Czenczek -Date: Tue, 11 Apr 2023 19:34:18 +0200 -Subject: [PATCH 4/9] iotests/iov-padding: New test - -RH-Author: Hanna Czenczek -RH-MergeRequest: 189: block: Split padded I/O vectors exceeding IOV_MAX -RH-Bugzilla: 2174676 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [4/5] b32715b5c2a3e2add39c5ed6e8f71df56e0b91a0 (hreitz/qemu-kvm-c-9-s) - -Test that even vectored IO requests with 1024 vector elements that are -not aligned to the device's request alignment will succeed. - -Reviewed-by: Eric Blake -Reviewed-by: Vladimir Sementsov-Ogievskiy -Signed-off-by: Hanna Czenczek -Message-Id: <20230411173418.19549-5-hreitz@redhat.com> -(cherry picked from commit d7e1905e3f54ff9512db4c7a946a8603b62b108d) -Signed-off-by: Hanna Czenczek ---- - tests/qemu-iotests/tests/iov-padding | 85 ++++++++++++++++++++++++ - tests/qemu-iotests/tests/iov-padding.out | 59 ++++++++++++++++ - 2 files changed, 144 insertions(+) - create mode 100755 tests/qemu-iotests/tests/iov-padding - create mode 100644 tests/qemu-iotests/tests/iov-padding.out - -diff --git a/tests/qemu-iotests/tests/iov-padding b/tests/qemu-iotests/tests/iov-padding -new file mode 100755 -index 0000000000..b9604900c7 ---- /dev/null -+++ b/tests/qemu-iotests/tests/iov-padding -@@ -0,0 +1,85 @@ -+#!/usr/bin/env bash -+# group: rw quick -+# -+# Check the interaction of request padding (to fit alignment restrictions) with -+# vectored I/O from the guest -+# -+# Copyright Red Hat -+# -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 2 of the License, or -+# (at your option) any later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program. If not, see . -+# -+ -+seq=$(basename $0) -+echo "QA output created by $seq" -+ -+status=1 # failure is the default! -+ -+_cleanup() -+{ -+ _cleanup_test_img -+} -+trap "_cleanup; exit \$status" 0 1 2 3 15 -+ -+# get standard environment, filters and checks -+cd .. -+. ./common.rc -+. ./common.filter -+ -+_supported_fmt raw -+_supported_proto file -+ -+_make_test_img 1M -+ -+IMGSPEC="driver=blkdebug,align=4096,image.driver=file,image.filename=$TEST_IMG" -+ -+# Four combinations: -+# - Offset 4096, length 1023 * 512 + 512: Fully aligned to 4k -+# - Offset 4096, length 1023 * 512 + 4096: Head is aligned, tail is not -+# - Offset 512, length 1023 * 512 + 512: Neither head nor tail are aligned -+# - Offset 512, length 1023 * 512 + 4096: Tail is aligned, head is not -+for start_offset in 4096 512; do -+ for last_element_length in 512 4096; do -+ length=$((1023 * 512 + $last_element_length)) -+ -+ echo -+ echo "== performing 1024-element vectored requests to image (offset: $start_offset; length: $length) ==" -+ -+ # Fill with data for testing -+ $QEMU_IO -c 'write -P 1 0 1M' "$TEST_IMG" | _filter_qemu_io -+ -+ # 1023 512-byte buffers, and then one with length $last_element_length -+ cmd_params="-P 2 $start_offset $(yes 512 | head -n 1023 | tr '\n' ' ') $last_element_length" -+ QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS_NO_FMT" $QEMU_IO \ -+ -c "writev $cmd_params" \ -+ --image-opts \ -+ "$IMGSPEC" \ -+ | _filter_qemu_io -+ -+ # Read all patterns -- read the part we just wrote with writev twice, -+ # once "normally", and once with a readv, so we see that that works, too -+ QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS_NO_FMT" $QEMU_IO \ -+ -c "read -P 1 0 $start_offset" \ -+ -c "read -P 2 $start_offset $length" \ -+ -c "readv $cmd_params" \ -+ -c "read -P 1 $((start_offset + length)) $((1024 * 1024 - length - start_offset))" \ -+ --image-opts \ -+ "$IMGSPEC" \ -+ | _filter_qemu_io -+ done -+done -+ -+# success, all done -+echo "*** done" -+rm -f $seq.full -+status=0 -diff --git a/tests/qemu-iotests/tests/iov-padding.out b/tests/qemu-iotests/tests/iov-padding.out -new file mode 100644 -index 0000000000..e07a91fac7 ---- /dev/null -+++ b/tests/qemu-iotests/tests/iov-padding.out -@@ -0,0 +1,59 @@ -+QA output created by iov-padding -+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 -+ -+== performing 1024-element vectored requests to image (offset: 4096; length: 524288) == -+wrote 1048576/1048576 bytes at offset 0 -+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+wrote 524288/524288 bytes at offset 4096 -+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 4096/4096 bytes at offset 0 -+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 524288/524288 bytes at offset 4096 -+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 524288/524288 bytes at offset 4096 -+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 520192/520192 bytes at offset 528384 -+508 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+ -+== performing 1024-element vectored requests to image (offset: 4096; length: 527872) == -+wrote 1048576/1048576 bytes at offset 0 -+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+wrote 527872/527872 bytes at offset 4096 -+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 4096/4096 bytes at offset 0 -+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 527872/527872 bytes at offset 4096 -+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 527872/527872 bytes at offset 4096 -+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 516608/516608 bytes at offset 531968 -+504.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+ -+== performing 1024-element vectored requests to image (offset: 512; length: 524288) == -+wrote 1048576/1048576 bytes at offset 0 -+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+wrote 524288/524288 bytes at offset 512 -+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 512/512 bytes at offset 0 -+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 524288/524288 bytes at offset 512 -+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 524288/524288 bytes at offset 512 -+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 523776/523776 bytes at offset 524800 -+511.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+ -+== performing 1024-element vectored requests to image (offset: 512; length: 527872) == -+wrote 1048576/1048576 bytes at offset 0 -+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+wrote 527872/527872 bytes at offset 512 -+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 512/512 bytes at offset 0 -+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 527872/527872 bytes at offset 512 -+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 527872/527872 bytes at offset 512 -+515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+read 520192/520192 bytes at offset 528384 -+508 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -+*** done --- -2.39.3 - diff --git a/SOURCES/kvm-iotests-port-141-to-Python-for-reliable-QMP-testing.patch b/SOURCES/kvm-iotests-port-141-to-Python-for-reliable-QMP-testing.patch new file mode 100644 index 0000000..209bd1e --- /dev/null +++ b/SOURCES/kvm-iotests-port-141-to-Python-for-reliable-QMP-testing.patch @@ -0,0 +1,592 @@ +From 70efc3bbf1f7d7b1b0c2475d9ce3bb70cc9d1cc7 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Thu, 18 Jan 2024 09:48:22 -0500 +Subject: [PATCH 12/22] iotests: port 141 to Python for reliable QMP testing + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter +RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Hanna Czenczek +RH-Commit: [8/17] 0783f536508916feac4b4c39e41c22c24a2e52e7 (stefanha/centos-stream-qemu-kvm) + +The common.qemu bash functions allow tests to interact with the QMP +monitor of a QEMU process. I spent two days trying to update 141 when +the order of the test output changed, but found it would still fail +occassionally because printf() and QMP events race with synchronous QMP +communication. + +I gave up and ported 141 to the existing Python API for QMP tests. The +Python API is less affected by the order in which QEMU prints output +because it does not print all QMP traffic by default. + +The next commit changes the order in which QMP messages are received. +Make 141 reliable first. + +Cc: Hanna Czenczek +Signed-off-by: Stefan Hajnoczi +Message-ID: <20240118144823.1497953-3-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 9ee2dd4c22a3639c5462b3fc20df60c005c3de64) +Signed-off-by: Stefan Hajnoczi +--- + tests/qemu-iotests/141 | 307 ++++++++++++++++--------------------- + tests/qemu-iotests/141.out | 200 ++++++------------------ + 2 files changed, 176 insertions(+), 331 deletions(-) + +diff --git a/tests/qemu-iotests/141 b/tests/qemu-iotests/141 +index a37030ee17..a7d3985a02 100755 +--- a/tests/qemu-iotests/141 ++++ b/tests/qemu-iotests/141 +@@ -1,9 +1,12 @@ +-#!/usr/bin/env bash ++#!/usr/bin/env python3 + # group: rw auto quick + # + # Test case for ejecting BDSs with block jobs still running on them + # +-# Copyright (C) 2016 Red Hat, Inc. ++# Originally written in bash by Hanna Czenczek, ported to Python by Stefan ++# Hajnoczi. ++# ++# Copyright Red Hat + # + # This program is free software; you can redistribute it and/or modify + # it under the terms of the GNU General Public License as published by +@@ -19,177 +22,129 @@ + # along with this program. If not, see . + # + +-# creator +-owner=hreitz@redhat.com +- +-seq="$(basename $0)" +-echo "QA output created by $seq" +- +-status=1 # failure is the default! +- +-_cleanup() +-{ +- _cleanup_qemu +- _cleanup_test_img +- for img in "$TEST_DIR"/{b,m,o}.$IMGFMT; do +- _rm_test_img "$img" +- done +-} +-trap "_cleanup; exit \$status" 0 1 2 3 15 +- +-# get standard environment, filters and checks +-. ./common.rc +-. ./common.filter +-. ./common.qemu +- +-# Needs backing file and backing format support +-_supported_fmt qcow2 qed +-_supported_proto file +-_supported_os Linux +- +- +-test_blockjob() +-{ +- _send_qemu_cmd $QEMU_HANDLE \ +- "{'execute': 'blockdev-add', +- 'arguments': { +- 'node-name': 'drv0', +- 'driver': '$IMGFMT', +- 'file': { +- 'driver': 'file', +- 'filename': '$TEST_IMG' +- }}}" \ +- 'return' +- +- # If "$2" is an event, we may or may not see it before the +- # {"return": {}}. Therefore, filter the {"return": {}} out both +- # here and in the next command. (Naturally, if we do not see it +- # here, we will see it before the next command can be executed, +- # so it will appear in the next _send_qemu_cmd's output.) +- _send_qemu_cmd $QEMU_HANDLE \ +- "$1" \ +- "$2" \ +- | _filter_img_create | _filter_qmp_empty_return +- +- # We want this to return an error because the block job is still running +- _send_qemu_cmd $QEMU_HANDLE \ +- "{'execute': 'blockdev-del', +- 'arguments': {'node-name': 'drv0'}}" \ +- 'error' | _filter_generated_node_ids | _filter_qmp_empty_return +- +- _send_qemu_cmd $QEMU_HANDLE \ +- "{'execute': 'block-job-cancel', +- 'arguments': {'device': 'job0'}}" \ +- "$3" +- +- _send_qemu_cmd $QEMU_HANDLE \ +- "{'execute': 'blockdev-del', +- 'arguments': {'node-name': 'drv0'}}" \ +- 'return' +-} +- +- +-TEST_IMG="$TEST_DIR/b.$IMGFMT" _make_test_img 1M +-TEST_IMG="$TEST_DIR/m.$IMGFMT" _make_test_img -b "$TEST_DIR/b.$IMGFMT" -F $IMGFMT 1M +-_make_test_img -b "$TEST_DIR/m.$IMGFMT" 1M -F $IMGFMT +- +-_launch_qemu -nodefaults +- +-_send_qemu_cmd $QEMU_HANDLE \ +- "{'execute': 'qmp_capabilities'}" \ +- 'return' +- +-echo +-echo '=== Testing drive-backup ===' +-echo +- +-# drive-backup will not send BLOCK_JOB_READY by itself, and cancelling the job +-# will consequently result in BLOCK_JOB_CANCELLED being emitted. +- +-test_blockjob \ +- "{'execute': 'drive-backup', +- 'arguments': {'job-id': 'job0', +- 'device': 'drv0', +- 'target': '$TEST_DIR/o.$IMGFMT', +- 'format': '$IMGFMT', +- 'sync': 'none'}}" \ +- 'return' \ +- '"status": "null"' +- +-echo +-echo '=== Testing drive-mirror ===' +-echo +- +-# drive-mirror will send BLOCK_JOB_READY basically immediately, and cancelling +-# the job will consequently result in BLOCK_JOB_COMPLETED being emitted. +- +-test_blockjob \ +- "{'execute': 'drive-mirror', +- 'arguments': {'job-id': 'job0', +- 'device': 'drv0', +- 'target': '$TEST_DIR/o.$IMGFMT', +- 'format': '$IMGFMT', +- 'sync': 'none'}}" \ +- 'BLOCK_JOB_READY' \ +- '"status": "null"' +- +-echo +-echo '=== Testing active block-commit ===' +-echo +- +-# An active block-commit will send BLOCK_JOB_READY basically immediately, and +-# cancelling the job will consequently result in BLOCK_JOB_COMPLETED being +-# emitted. +- +-test_blockjob \ +- "{'execute': 'block-commit', +- 'arguments': {'job-id': 'job0', 'device': 'drv0'}}" \ +- 'BLOCK_JOB_READY' \ +- '"status": "null"' +- +-echo +-echo '=== Testing non-active block-commit ===' +-echo +- +-# Give block-commit something to work on, otherwise it would be done +-# immediately, send a BLOCK_JOB_COMPLETED and ejecting the BDS would work just +-# fine without the block job still running. +- +-$QEMU_IO -c 'write 0 1M' "$TEST_DIR/m.$IMGFMT" | _filter_qemu_io +- +-test_blockjob \ +- "{'execute': 'block-commit', +- 'arguments': {'job-id': 'job0', +- 'device': 'drv0', +- 'top': '$TEST_DIR/m.$IMGFMT', +- 'speed': 1}}" \ +- 'return' \ +- '"status": "null"' +- +-echo +-echo '=== Testing block-stream ===' +-echo +- +-# Give block-stream something to work on, otherwise it would be done +-# immediately, send a BLOCK_JOB_COMPLETED and ejecting the BDS would work just +-# fine without the block job still running. +- +-$QEMU_IO -c 'write 0 1M' "$TEST_DIR/b.$IMGFMT" | _filter_qemu_io +- +-# With some data to stream (and @speed set to 1), block-stream will not complete +-# until we send the block-job-cancel command. +- +-test_blockjob \ +- "{'execute': 'block-stream', +- 'arguments': {'job-id': 'job0', +- 'device': 'drv0', +- 'speed': 1}}" \ +- 'return' \ +- '"status": "null"' +- +-_cleanup_qemu +- +-# success, all done +-echo "*** done" +-rm -f $seq.full +-status=0 ++import iotests ++ ++# Common filters to mask values that vary in the test output ++QMP_FILTERS = [iotests.filter_qmp_testfiles, \ ++ iotests.filter_qmp_imgfmt] ++ ++ ++class TestCase: ++ def __init__(self, name, vm, image_path, cancel_event): ++ self.name = name ++ self.vm = vm ++ self.image_path = image_path ++ self.cancel_event = cancel_event ++ ++ def __enter__(self): ++ iotests.log(f'=== Testing {self.name} ===') ++ self.vm.qmp_log('blockdev-add', \ ++ node_name='drv0', \ ++ driver=iotests.imgfmt, \ ++ file={'driver': 'file', 'filename': self.image_path}, \ ++ filters=QMP_FILTERS) ++ ++ def __exit__(self, *exc_details): ++ # This is expected to fail because the job still exists ++ self.vm.qmp_log('blockdev-del', node_name='drv0', \ ++ filters=[iotests.filter_qmp_generated_node_ids]) ++ ++ self.vm.qmp_log('block-job-cancel', device='job0') ++ event = self.vm.event_wait(self.cancel_event) ++ iotests.log(event, filters=[iotests.filter_qmp_event]) ++ ++ # This time it succeeds ++ self.vm.qmp_log('blockdev-del', node_name='drv0') ++ ++ # Separate test cases in output ++ iotests.log('') ++ ++ ++def main() -> None: ++ with iotests.FilePath('bottom', 'middle', 'top', 'target') as \ ++ (bottom_path, middle_path, top_path, target_path), \ ++ iotests.VM() as vm: ++ ++ iotests.log('Creating bottom <- middle <- top backing file chain...') ++ IMAGE_SIZE='1M' ++ iotests.qemu_img_create('-f', iotests.imgfmt, bottom_path, IMAGE_SIZE) ++ iotests.qemu_img_create('-f', iotests.imgfmt, \ ++ '-F', iotests.imgfmt, \ ++ '-b', bottom_path, \ ++ middle_path, \ ++ IMAGE_SIZE) ++ iotests.qemu_img_create('-f', iotests.imgfmt, \ ++ '-F', iotests.imgfmt, \ ++ '-b', middle_path, \ ++ top_path, \ ++ IMAGE_SIZE) ++ ++ iotests.log('Starting VM...') ++ vm.add_args('-nodefaults') ++ vm.launch() ++ ++ # drive-backup will not send BLOCK_JOB_READY by itself, and cancelling ++ # the job will consequently result in BLOCK_JOB_CANCELLED being ++ # emitted. ++ with TestCase('drive-backup', vm, top_path, 'BLOCK_JOB_CANCELLED'): ++ vm.qmp_log('drive-backup', \ ++ job_id='job0', \ ++ device='drv0', \ ++ target=target_path, \ ++ format=iotests.imgfmt, \ ++ sync='none', \ ++ filters=QMP_FILTERS) ++ ++ # drive-mirror will send BLOCK_JOB_READY basically immediately, and ++ # cancelling the job will consequently result in BLOCK_JOB_COMPLETED ++ # being emitted. ++ with TestCase('drive-mirror', vm, top_path, 'BLOCK_JOB_COMPLETED'): ++ vm.qmp_log('drive-mirror', \ ++ job_id='job0', \ ++ device='drv0', \ ++ target=target_path, \ ++ format=iotests.imgfmt, \ ++ sync='none', \ ++ filters=QMP_FILTERS) ++ event = vm.event_wait('BLOCK_JOB_READY') ++ assert event is not None # silence mypy ++ iotests.log(event, filters=[iotests.filter_qmp_event]) ++ ++ # An active block-commit will send BLOCK_JOB_READY basically ++ # immediately, and cancelling the job will consequently result in ++ # BLOCK_JOB_COMPLETED being emitted. ++ with TestCase('active block-commit', vm, top_path, \ ++ 'BLOCK_JOB_COMPLETED'): ++ vm.qmp_log('block-commit', \ ++ job_id='job0', \ ++ device='drv0') ++ event = vm.event_wait('BLOCK_JOB_READY') ++ assert event is not None # silence mypy ++ iotests.log(event, filters=[iotests.filter_qmp_event]) ++ ++ # Give block-commit something to work on, otherwise it would be done ++ # immediately, send a BLOCK_JOB_COMPLETED and ejecting the BDS would ++ # work just fine without the block job still running. ++ iotests.qemu_io(middle_path, '-c', f'write 0 {IMAGE_SIZE}') ++ with TestCase('non-active block-commit', vm, top_path, \ ++ 'BLOCK_JOB_CANCELLED'): ++ vm.qmp_log('block-commit', \ ++ job_id='job0', \ ++ device='drv0', \ ++ top=middle_path, \ ++ speed=1, \ ++ filters=[iotests.filter_qmp_testfiles]) ++ ++ # Give block-stream something to work on, otherwise it would be done ++ # immediately, send a BLOCK_JOB_COMPLETED and ejecting the BDS would ++ # work just fine without the block job still running. ++ iotests.qemu_io(bottom_path, '-c', f'write 0 {IMAGE_SIZE}') ++ with TestCase('block-stream', vm, top_path, 'BLOCK_JOB_CANCELLED'): ++ vm.qmp_log('block-stream', \ ++ job_id='job0', \ ++ device='drv0', \ ++ speed=1) ++ ++if __name__ == '__main__': ++ iotests.script_main(main, supported_fmts=['qcow2', 'qed'], ++ supported_protocols=['file']) +diff --git a/tests/qemu-iotests/141.out b/tests/qemu-iotests/141.out +index 63203d9944..91b7ba50af 100644 +--- a/tests/qemu-iotests/141.out ++++ b/tests/qemu-iotests/141.out +@@ -1,179 +1,69 @@ +-QA output created by 141 +-Formatting 'TEST_DIR/b.IMGFMT', fmt=IMGFMT size=1048576 +-Formatting 'TEST_DIR/m.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/b.IMGFMT backing_fmt=IMGFMT +-Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/m.IMGFMT backing_fmt=IMGFMT +-{'execute': 'qmp_capabilities'} +-{"return": {}} +- ++Creating bottom <- middle <- top backing file chain... ++Starting VM... + === Testing drive-backup === +- +-{'execute': 'blockdev-add', +- 'arguments': { +- 'node-name': 'drv0', +- 'driver': 'IMGFMT', +- 'file': { +- 'driver': 'file', +- 'filename': 'TEST_DIR/t.IMGFMT' +- }}} +-{"return": {}} +-{'execute': 'drive-backup', +-'arguments': {'job-id': 'job0', +-'device': 'drv0', +-'target': 'TEST_DIR/o.IMGFMT', +-'format': 'IMGFMT', +-'sync': 'none'}} +-Formatting 'TEST_DIR/o.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.IMGFMT backing_fmt=IMGFMT +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "job0"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}} +-{'execute': 'blockdev-del', +- 'arguments': {'node-name': 'drv0'}} ++{"execute": "blockdev-add", "arguments": {"driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-top"}, "node-name": "drv0"}} ++{"return": {}} ++{"execute": "drive-backup", "arguments": {"device": "drv0", "format": "IMGFMT", "job-id": "job0", "sync": "none", "target": "TEST_DIR/PID-target"}} ++{"return": {}} ++{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}} + {"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: node is used as backing hd of 'NODE_NAME'"}} +-{'execute': 'block-job-cancel', +- 'arguments': {'device': 'job0'}} ++{"execute": "block-job-cancel", "arguments": {"device": "job0"}} + {"return": {}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "job0"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "job0", "len": 1048576, "offset": 0, "speed": 0, "type": "backup"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job0"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}} +-{'execute': 'blockdev-del', +- 'arguments': {'node-name': 'drv0'}} ++{"data": {"device": "job0", "len": 1048576, "offset": 0, "speed": 0, "type": "backup"}, "event": "BLOCK_JOB_CANCELLED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} ++{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}} + {"return": {}} + + === Testing drive-mirror === +- +-{'execute': 'blockdev-add', +- 'arguments': { +- 'node-name': 'drv0', +- 'driver': 'IMGFMT', +- 'file': { +- 'driver': 'file', +- 'filename': 'TEST_DIR/t.IMGFMT' +- }}} +-{"return": {}} +-{'execute': 'drive-mirror', +-'arguments': {'job-id': 'job0', +-'device': 'drv0', +-'target': 'TEST_DIR/o.IMGFMT', +-'format': 'IMGFMT', +-'sync': 'none'}} +-Formatting 'TEST_DIR/o.IMGFMT', fmt=IMGFMT size=1048576 backing_file=TEST_DIR/t.IMGFMT backing_fmt=IMGFMT +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "job0"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "mirror"}} +-{'execute': 'blockdev-del', +- 'arguments': {'node-name': 'drv0'}} ++{"execute": "blockdev-add", "arguments": {"driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-top"}, "node-name": "drv0"}} ++{"return": {}} ++{"execute": "drive-mirror", "arguments": {"device": "drv0", "format": "IMGFMT", "job-id": "job0", "sync": "none", "target": "TEST_DIR/PID-target"}} ++{"return": {}} ++{"data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "mirror"}, "event": "BLOCK_JOB_READY", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} ++{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}} + {"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: block device is in use by block job: mirror"}} +-{'execute': 'block-job-cancel', +- 'arguments': {'device': 'job0'}} ++{"execute": "block-job-cancel", "arguments": {"device": "job0"}} + {"return": {}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "job0"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "job0"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "mirror"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job0"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}} +-{'execute': 'blockdev-del', +- 'arguments': {'node-name': 'drv0'}} ++{"data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "mirror"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} ++{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}} + {"return": {}} + + === Testing active block-commit === +- +-{'execute': 'blockdev-add', +- 'arguments': { +- 'node-name': 'drv0', +- 'driver': 'IMGFMT', +- 'file': { +- 'driver': 'file', +- 'filename': 'TEST_DIR/t.IMGFMT' +- }}} +-{"return": {}} +-{'execute': 'block-commit', +-'arguments': {'job-id': 'job0', 'device': 'drv0'}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "job0"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}} +-{'execute': 'blockdev-del', +- 'arguments': {'node-name': 'drv0'}} ++{"execute": "blockdev-add", "arguments": {"driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-top"}, "node-name": "drv0"}} ++{"return": {}} ++{"execute": "block-commit", "arguments": {"device": "drv0", "job-id": "job0"}} ++{"return": {}} ++{"data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_READY", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} ++{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}} + {"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: block device is in use by block job: commit"}} +-{'execute': 'block-job-cancel', +- 'arguments': {'device': 'job0'}} ++{"execute": "block-job-cancel", "arguments": {"device": "job0"}} + {"return": {}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "job0"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "job0"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job0"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}} +-{'execute': 'blockdev-del', +- 'arguments': {'node-name': 'drv0'}} ++{"data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} ++{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}} + {"return": {}} + + === Testing non-active block-commit === +- +-wrote 1048576/1048576 bytes at offset 0 +-1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +-{'execute': 'blockdev-add', +- 'arguments': { +- 'node-name': 'drv0', +- 'driver': 'IMGFMT', +- 'file': { +- 'driver': 'file', +- 'filename': 'TEST_DIR/t.IMGFMT' +- }}} +-{"return": {}} +-{'execute': 'block-commit', +-'arguments': {'job-id': 'job0', +-'device': 'drv0', +-'top': 'TEST_DIR/m.IMGFMT', +-'speed': 1}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}} +-{'execute': 'blockdev-del', +- 'arguments': {'node-name': 'drv0'}} ++{"execute": "blockdev-add", "arguments": {"driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-top"}, "node-name": "drv0"}} ++{"return": {}} ++{"execute": "block-commit", "arguments": {"device": "drv0", "job-id": "job0", "speed": 1, "top": "TEST_DIR/PID-middle"}} ++{"return": {}} ++{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}} + {"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: block device is in use by block job: commit"}} +-{'execute': 'block-job-cancel', +- 'arguments': {'device': 'job0'}} ++{"execute": "block-job-cancel", "arguments": {"device": "job0"}} + {"return": {}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "job0"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "job0", "len": 1048576, "offset": 524288, "speed": 1, "type": "commit"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job0"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}} +-{'execute': 'blockdev-del', +- 'arguments': {'node-name': 'drv0'}} ++{"data": {"device": "job0", "len": 1048576, "offset": 524288, "speed": 1, "type": "commit"}, "event": "BLOCK_JOB_CANCELLED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} ++{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}} + {"return": {}} + + === Testing block-stream === +- +-wrote 1048576/1048576 bytes at offset 0 +-1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +-{'execute': 'blockdev-add', +- 'arguments': { +- 'node-name': 'drv0', +- 'driver': 'IMGFMT', +- 'file': { +- 'driver': 'file', +- 'filename': 'TEST_DIR/t.IMGFMT' +- }}} +-{"return": {}} +-{'execute': 'block-stream', +-'arguments': {'job-id': 'job0', +-'device': 'drv0', +-'speed': 1}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "job0"}} +-{'execute': 'blockdev-del', +- 'arguments': {'node-name': 'drv0'}} ++{"execute": "blockdev-add", "arguments": {"driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-top"}, "node-name": "drv0"}} ++{"return": {}} ++{"execute": "block-stream", "arguments": {"device": "drv0", "job-id": "job0", "speed": 1}} ++{"return": {}} ++{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}} + {"error": {"class": "GenericError", "desc": "Node 'drv0' is busy: block device is in use by block job: stream"}} +-{'execute': 'block-job-cancel', +- 'arguments': {'device': 'job0'}} ++{"execute": "block-job-cancel", "arguments": {"device": "job0"}} + {"return": {}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "job0"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "job0", "len": 1048576, "offset": 524288, "speed": 1, "type": "stream"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job0"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}} +-{'execute': 'blockdev-del', +- 'arguments': {'node-name': 'drv0'}} ++{"data": {"device": "job0", "len": 1048576, "offset": 524288, "speed": 1, "type": "stream"}, "event": "BLOCK_JOB_CANCELLED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} ++{"execute": "blockdev-del", "arguments": {"node-name": "drv0"}} + {"return": {}} +-*** done ++ +-- +2.39.3 + diff --git a/SOURCES/kvm-job-remove-outdated-AioContext-locking-comments.patch b/SOURCES/kvm-job-remove-outdated-AioContext-locking-comments.patch new file mode 100644 index 0000000..fc1c62f --- /dev/null +++ b/SOURCES/kvm-job-remove-outdated-AioContext-locking-comments.patch @@ -0,0 +1,105 @@ +From 4ab25b33831fa207500179bd30f29388d81e4cce Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 5 Dec 2023 13:20:10 -0500 +Subject: [PATCH 093/101] job: remove outdated AioContext locking comments + +RH-Author: Kevin Wolf +RH-MergeRequest: 214: Remove AioContext lock +RH-Jira: RHEL-15965 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [24/26] 15ff2928be82d6905c22619458487fbb72d6044a (kmwolf/centos-qemu-kvm) + +The AioContext lock no longer exists. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Eric Blake +Message-ID: <20231205182011.1976568-14-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +--- + include/qemu/job.h | 20 -------------------- + 1 file changed, 20 deletions(-) + +diff --git a/include/qemu/job.h b/include/qemu/job.h +index e502787dd8..9ea98b5927 100644 +--- a/include/qemu/job.h ++++ b/include/qemu/job.h +@@ -67,8 +67,6 @@ typedef struct Job { + + /** + * The completion function that will be called when the job completes. +- * Called with AioContext lock held, since many callback implementations +- * use bdrv_* functions that require to hold the lock. + */ + BlockCompletionFunc *cb; + +@@ -264,9 +262,6 @@ struct JobDriver { + * + * This callback will not be invoked if the job has already failed. + * If it fails, abort and then clean will be called. +- * +- * Called with AioContext lock held, since many callbacs implementations +- * use bdrv_* functions that require to hold the lock. + */ + int (*prepare)(Job *job); + +@@ -277,9 +272,6 @@ struct JobDriver { + * + * All jobs will complete with a call to either .commit() or .abort() but + * never both. +- * +- * Called with AioContext lock held, since many callback implementations +- * use bdrv_* functions that require to hold the lock. + */ + void (*commit)(Job *job); + +@@ -290,9 +282,6 @@ struct JobDriver { + * + * All jobs will complete with a call to either .commit() or .abort() but + * never both. +- * +- * Called with AioContext lock held, since many callback implementations +- * use bdrv_* functions that require to hold the lock. + */ + void (*abort)(Job *job); + +@@ -301,9 +290,6 @@ struct JobDriver { + * .commit() or .abort(). Regardless of which callback is invoked after + * completion, .clean() will always be called, even if the job does not + * belong to a transaction group. +- * +- * Called with AioContext lock held, since many callbacs implementations +- * use bdrv_* functions that require to hold the lock. + */ + void (*clean)(Job *job); + +@@ -318,17 +304,12 @@ struct JobDriver { + * READY). + * (If the callback is NULL, the job is assumed to terminate + * without I/O.) +- * +- * Called with AioContext lock held, since many callback implementations +- * use bdrv_* functions that require to hold the lock. + */ + bool (*cancel)(Job *job, bool force); + + + /** + * Called when the job is freed. +- * Called with AioContext lock held, since many callback implementations +- * use bdrv_* functions that require to hold the lock. + */ + void (*free)(Job *job); + }; +@@ -424,7 +405,6 @@ void job_ref_locked(Job *job); + * Release a reference that was previously acquired with job_ref_locked() or + * job_create(). If it's the last reference to the object, it will be freed. + * +- * Takes AioContext lock internally to invoke a job->driver callback. + * Called with job lock held. + */ + void job_unref_locked(Job *job); +-- +2.39.3 + diff --git a/SOURCES/kvm-kconfig-Activate-IOMMUFD-for-s390x-machines.patch b/SOURCES/kvm-kconfig-Activate-IOMMUFD-for-s390x-machines.patch new file mode 100644 index 0000000..3e562b8 --- /dev/null +++ b/SOURCES/kvm-kconfig-Activate-IOMMUFD-for-s390x-machines.patch @@ -0,0 +1,42 @@ +From 9c2eb4ab03903bc084c53ac29b60b8d2121c9fed Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Tue, 21 Nov 2023 16:44:19 +0800 +Subject: [PATCH 040/101] kconfig: Activate IOMMUFD for s390x machines +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [39/67] cf0ebe770b8db5916dd35247618c0a325dc1eaab (eauger1/centos-qemu-kvm) + +Signed-off-by: Cédric Le Goater +Signed-off-by: Zhenzhong Duan +Reviewed-by: Matthew Rosato +Reviewed-by: Eric Farman +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +(cherry picked from commit 73e2df669335047b542b67d37ade060a6ae40dd8) +Signed-off-by: Eric Auger +--- + hw/s390x/Kconfig | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/s390x/Kconfig b/hw/s390x/Kconfig +index 4c068d7960..26ad104485 100644 +--- a/hw/s390x/Kconfig ++++ b/hw/s390x/Kconfig +@@ -6,6 +6,7 @@ config S390_CCW_VIRTIO + imply VFIO_CCW + imply WDT_DIAG288 + imply PCIE_DEVICES ++ imply IOMMUFD + select PCI_EXPRESS + select S390_FLIC + select S390_FLIC_KVM if KVM +-- +2.39.3 + diff --git a/SOURCES/kvm-kvm-reuse-per-vcpu-stats-fd-to-avoid-vcpu-interrupti.patch b/SOURCES/kvm-kvm-reuse-per-vcpu-stats-fd-to-avoid-vcpu-interrupti.patch deleted file mode 100644 index d6a6d73..0000000 --- a/SOURCES/kvm-kvm-reuse-per-vcpu-stats-fd-to-avoid-vcpu-interrupti.patch +++ /dev/null @@ -1,160 +0,0 @@ -From a5857fb12fcad46e27c415fe82ce13c0cb5d09c7 Mon Sep 17 00:00:00 2001 -From: Marcelo Tosatti -Date: Thu, 29 Jun 2023 14:48:32 -0300 -Subject: [PATCH 5/6] kvm: reuse per-vcpu stats fd to avoid vcpu interruption -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marcelo Tosatti -RH-MergeRequest: 177: kvm: reuse per-vcpu stats fd to avoid vcpu interruption -RH-Bugzilla: 2218644 -RH-Acked-by: Vitaly Kuznetsov -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Leonardo Brás -RH-Commit: [1/1] 4ec72385a9047888121485f49bacb1aff84f7018 (mtosatti/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2218644 -Commit: 3b6f485275ae95a81eec589d2773b86ca9ddec4d - -A regression has been detected in latency testing of KVM guests. -More specifically, it was observed that the cyclictest -numbers inside of an isolated vcpu (running on isolated pcpu) are: - -Where a maximum of 50us is acceptable. - -The implementation of KVM_GET_STATS_FD uses run_on_cpu to query -per vcpu statistics, which interrupts the vcpu (and is unnecessary). - -To fix this, open the per vcpu stats fd on vcpu initialization, -and read from that fd from QEMU's main thread. - -Signed-off-by: Marcelo Tosatti -Signed-off-by: Paolo Bonzini ---- - accel/kvm/kvm-all.c | 30 +++++++++++++++--------------- - include/hw/core/cpu.h | 1 + - 2 files changed, 16 insertions(+), 15 deletions(-) - -diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c -index cf3a88d90e..fa7ca46c66 100644 ---- a/accel/kvm/kvm-all.c -+++ b/accel/kvm/kvm-all.c -@@ -450,6 +450,8 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp) - "kvm_init_vcpu: kvm_arch_init_vcpu failed (%lu)", - kvm_arch_vcpu_id(cpu)); - } -+ cpu->kvm_vcpu_stats_fd = kvm_vcpu_ioctl(cpu, KVM_GET_STATS_FD, NULL); -+ - err: - return ret; - } -@@ -3959,7 +3961,7 @@ static StatsDescriptors *find_stats_descriptors(StatsTarget target, int stats_fd - - /* Read stats header */ - kvm_stats_header = &descriptors->kvm_stats_header; -- ret = read(stats_fd, kvm_stats_header, sizeof(*kvm_stats_header)); -+ ret = pread(stats_fd, kvm_stats_header, sizeof(*kvm_stats_header), 0); - if (ret != sizeof(*kvm_stats_header)) { - error_setg(errp, "KVM stats: failed to read stats header: " - "expected %zu actual %zu", -@@ -3990,7 +3992,8 @@ static StatsDescriptors *find_stats_descriptors(StatsTarget target, int stats_fd - } - - static void query_stats(StatsResultList **result, StatsTarget target, -- strList *names, int stats_fd, Error **errp) -+ strList *names, int stats_fd, CPUState *cpu, -+ Error **errp) - { - struct kvm_stats_desc *kvm_stats_desc; - struct kvm_stats_header *kvm_stats_header; -@@ -4048,7 +4051,7 @@ static void query_stats(StatsResultList **result, StatsTarget target, - break; - case STATS_TARGET_VCPU: - add_stats_entry(result, STATS_PROVIDER_KVM, -- current_cpu->parent_obj.canonical_path, -+ cpu->parent_obj.canonical_path, - stats_list); - break; - default: -@@ -4085,10 +4088,9 @@ static void query_stats_schema(StatsSchemaList **result, StatsTarget target, - add_stats_schema(result, STATS_PROVIDER_KVM, target, stats_list); - } - --static void query_stats_vcpu(CPUState *cpu, run_on_cpu_data data) -+static void query_stats_vcpu(CPUState *cpu, StatsArgs *kvm_stats_args) - { -- StatsArgs *kvm_stats_args = (StatsArgs *) data.host_ptr; -- int stats_fd = kvm_vcpu_ioctl(cpu, KVM_GET_STATS_FD, NULL); -+ int stats_fd = cpu->kvm_vcpu_stats_fd; - Error *local_err = NULL; - - if (stats_fd == -1) { -@@ -4097,14 +4099,13 @@ static void query_stats_vcpu(CPUState *cpu, run_on_cpu_data data) - return; - } - query_stats(kvm_stats_args->result.stats, STATS_TARGET_VCPU, -- kvm_stats_args->names, stats_fd, kvm_stats_args->errp); -- close(stats_fd); -+ kvm_stats_args->names, stats_fd, cpu, -+ kvm_stats_args->errp); - } - --static void query_stats_schema_vcpu(CPUState *cpu, run_on_cpu_data data) -+static void query_stats_schema_vcpu(CPUState *cpu, StatsArgs *kvm_stats_args) - { -- StatsArgs *kvm_stats_args = (StatsArgs *) data.host_ptr; -- int stats_fd = kvm_vcpu_ioctl(cpu, KVM_GET_STATS_FD, NULL); -+ int stats_fd = cpu->kvm_vcpu_stats_fd; - Error *local_err = NULL; - - if (stats_fd == -1) { -@@ -4114,7 +4115,6 @@ static void query_stats_schema_vcpu(CPUState *cpu, run_on_cpu_data data) - } - query_stats_schema(kvm_stats_args->result.schema, STATS_TARGET_VCPU, stats_fd, - kvm_stats_args->errp); -- close(stats_fd); - } - - static void query_stats_cb(StatsResultList **result, StatsTarget target, -@@ -4132,7 +4132,7 @@ static void query_stats_cb(StatsResultList **result, StatsTarget target, - error_setg_errno(errp, errno, "KVM stats: ioctl failed"); - return; - } -- query_stats(result, target, names, stats_fd, errp); -+ query_stats(result, target, names, stats_fd, NULL, errp); - close(stats_fd); - break; - } -@@ -4146,7 +4146,7 @@ static void query_stats_cb(StatsResultList **result, StatsTarget target, - if (!apply_str_list_filter(cpu->parent_obj.canonical_path, targets)) { - continue; - } -- run_on_cpu(cpu, query_stats_vcpu, RUN_ON_CPU_HOST_PTR(&stats_args)); -+ query_stats_vcpu(cpu, &stats_args); - } - break; - } -@@ -4172,6 +4172,6 @@ void query_stats_schemas_cb(StatsSchemaList **result, Error **errp) - if (first_cpu) { - stats_args.result.schema = result; - stats_args.errp = errp; -- run_on_cpu(first_cpu, query_stats_schema_vcpu, RUN_ON_CPU_HOST_PTR(&stats_args)); -+ query_stats_schema_vcpu(first_cpu, &stats_args); - } - } -diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h -index 397fd3ac68..ae96be07e7 100644 ---- a/include/hw/core/cpu.h -+++ b/include/hw/core/cpu.h -@@ -399,6 +399,7 @@ struct CPUState { - struct kvm_dirty_gfn *kvm_dirty_gfns; - uint32_t kvm_fetch_index; - uint64_t dirty_pages; -+ int kvm_vcpu_stats_fd; - - /* Use by accel-block: CPU is executing an ioctl() */ - QemuLockCnt in_ioctl_lock; --- -2.39.3 - diff --git a/SOURCES/kvm-loongarch-mark-loongarch_ipi_iocsr-re-entrnacy-safe.patch b/SOURCES/kvm-loongarch-mark-loongarch_ipi_iocsr-re-entrnacy-safe.patch deleted file mode 100644 index c1100a5..0000000 --- a/SOURCES/kvm-loongarch-mark-loongarch_ipi_iocsr-re-entrnacy-safe.patch +++ /dev/null @@ -1,53 +0,0 @@ -From 6de2f37d9a5db6578554929227377e4fd6d2feb3 Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Tue, 9 May 2023 10:29:03 -0400 -Subject: [PATCH 14/21] loongarch: mark loongarch_ipi_iocsr re-entrnacy safe - -RH-Author: Jon Maloy -RH-MergeRequest: 165: memory: prevent dma-reentracy issues -RH-Jira: RHEL-516 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [10/13] 02435b9148b906960137de32eb5a3c4961e44a57 (jmaloy/jmaloy-qemu-kvm-2) - -Jira: https://issues.redhat.com/browse/RHEL-516 -Upstream: Merged -CVE: CVE-2023-2680 - -commit 6d0589e0e6c64b888864a2bf980537be20389264 -Author: Alexander Bulekov -Date: Sat May 6 07:21:45 2023 -0400 - - loongarch: mark loongarch_ipi_iocsr re-entrnacy safe - - loongarch_ipi_iocsr MRs rely on re-entrant IO through the ipi_send - function. As such, mark these MRs re-entrancy-safe. - - Fixes: a2e1753b80 ("memory: prevent dma-reentracy issues") - Signed-off-by: Alexander Bulekov - Reviewed-by: Song Gao - Message-Id: <20230506112145.3563708-1-alxndr@bu.edu> - Signed-off-by: Song Gao - -Signed-off-by: Jon Maloy ---- - hw/intc/loongarch_ipi.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/hw/intc/loongarch_ipi.c b/hw/intc/loongarch_ipi.c -index aa4bf9eb74..40e98af2ce 100644 ---- a/hw/intc/loongarch_ipi.c -+++ b/hw/intc/loongarch_ipi.c -@@ -215,6 +215,10 @@ static void loongarch_ipi_init(Object *obj) - for (cpu = 0; cpu < MAX_IPI_CORE_NUM; cpu++) { - memory_region_init_io(&s->ipi_iocsr_mem[cpu], obj, &loongarch_ipi_ops, - &lams->ipi_core[cpu], "loongarch_ipi_iocsr", 0x48); -+ -+ /* loongarch_ipi_iocsr performs re-entrant IO through ipi_send */ -+ s->ipi_iocsr_mem[cpu].disable_reentrancy_guard = true; -+ - sysbus_init_mmio(sbd, &s->ipi_iocsr_mem[cpu]); - - memory_region_init_io(&s->ipi64_iocsr_mem[cpu], obj, &loongarch_ipi64_ops, --- -2.39.3 - diff --git a/SOURCES/kvm-lsi53c895a-disable-reentrancy-detection-for-MMIO-reg.patch b/SOURCES/kvm-lsi53c895a-disable-reentrancy-detection-for-MMIO-reg.patch deleted file mode 100644 index 359d53f..0000000 --- a/SOURCES/kvm-lsi53c895a-disable-reentrancy-detection-for-MMIO-reg.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 0660a7a6994db0db9f6d0b84f6345aa06dc61761 Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Mon, 29 May 2023 14:21:08 -0400 -Subject: [PATCH 16/21] lsi53c895a: disable reentrancy detection for MMIO - region, too - -RH-Author: Jon Maloy -RH-MergeRequest: 165: memory: prevent dma-reentracy issues -RH-Jira: RHEL-516 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [12/13] fb9da8b68cdf0dc0b0bd8fb8540849c944d0bf20 (jmaloy/jmaloy-qemu-kvm-2) - -Jira: https://issues.redhat.com/browse/RHEL-516 -Upstream: Merged -CVE: CVE-2023-2680 - -commit d139fe9ad8a27bcc50b4ead77d2f97d191a0e95e -Author: Thomas Huth -Date: Tue May 16 11:05:56 2023 +0200 - - lsi53c895a: disable reentrancy detection for MMIO region, too - - While trying to use a SCSI disk on the LSI controller with an - older version of Fedora (25), I'm getting: - - qemu: warning: Blocked re-entrant IO on MemoryRegion: lsi-mmio at addr: 0x34 - - and the SCSI controller is not usable. Seems like we have to - disable the reentrancy checker for the MMIO region, too, to - get this working again. - - The problem could be reproduced it like this: - - ./qemu-system-x86_64 -accel kvm -m 2G -machine q35 \ - -device lsi53c810,id=lsi1 -device scsi-hd,drive=d0 \ - -drive if=none,id=d0,file=.../somedisk.qcow2 \ - -cdrom Fedora-Everything-netinst-i386-25-1.3.iso - - Where somedisk.qcow2 is an image that contains already some partitions - and file systems. - - In the boot menu of Fedora, go to - "Troubleshooting" -> "Rescue a Fedora system" -> "3) Skip to shell" - - Then check "dmesg | grep -i 53c" for failure messages, and try to mount - a partition from somedisk.qcow2. - - Message-Id: <20230516090556.553813-1-thuth@redhat.com> - Signed-off-by: Thomas Huth - -Signed-off-by: Jon Maloy ---- - hw/scsi/lsi53c895a.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c -index db27872963..048436352b 100644 ---- a/hw/scsi/lsi53c895a.c -+++ b/hw/scsi/lsi53c895a.c -@@ -2307,6 +2307,7 @@ static void lsi_scsi_realize(PCIDevice *dev, Error **errp) - * re-entrancy guard. - */ - s->ram_io.disable_reentrancy_guard = true; -+ s->mmio_io.disable_reentrancy_guard = true; - - address_space_init(&s->pci_io_as, pci_address_space_io(dev), "lsi-pci-io"); - qdev_init_gpio_out(d, &s->ext_irq, 1); --- -2.39.3 - diff --git a/SOURCES/kvm-lsi53c895a-disable-reentrancy-detection-for-script-R.patch b/SOURCES/kvm-lsi53c895a-disable-reentrancy-detection-for-script-R.patch deleted file mode 100644 index e671c92..0000000 --- a/SOURCES/kvm-lsi53c895a-disable-reentrancy-detection-for-script-R.patch +++ /dev/null @@ -1,58 +0,0 @@ -From 621808c6c4da3adcc073231493d487d6360386c9 Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Tue, 9 May 2023 10:29:03 -0400 -Subject: [PATCH 09/21] lsi53c895a: disable reentrancy detection for script RAM - -RH-Author: Jon Maloy -RH-MergeRequest: 165: memory: prevent dma-reentracy issues -RH-Jira: RHEL-516 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [5/13] 765d65fc3fb735eb4b52a408ccff91b538ad32b6 (jmaloy/jmaloy-qemu-kvm-2) - -Jira: https://issues.redhat.com/browse/RHEL-516 -Upstream: Merged -CVE: CVE-2023-2680 - -commit bfd6e7ae6a72b84e2eb9574f56e6ec037f05182c -Author: Alexander Bulekov -Date: Thu Apr 27 17:10:10 2023 -0400 - - lsi53c895a: disable reentrancy detection for script RAM - - As the code is designed to use the memory APIs to access the script ram, - disable reentrancy checks for the pseudo-RAM ram_io MemoryRegion. - - In the future, ram_io may be converted from an IO to a proper RAM MemoryRegion. - - Reported-by: Fiona Ebner - Signed-off-by: Alexander Bulekov - Reviewed-by: Thomas Huth - Reviewed-by: Darren Kenny - Message-Id: <20230427211013.2994127-6-alxndr@bu.edu> - Signed-off-by: Thomas Huth - -Signed-off-by: Jon Maloy ---- - hw/scsi/lsi53c895a.c | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c -index af93557a9a..db27872963 100644 ---- a/hw/scsi/lsi53c895a.c -+++ b/hw/scsi/lsi53c895a.c -@@ -2302,6 +2302,12 @@ static void lsi_scsi_realize(PCIDevice *dev, Error **errp) - memory_region_init_io(&s->io_io, OBJECT(s), &lsi_io_ops, s, - "lsi-io", 256); - -+ /* -+ * Since we use the address-space API to interact with ram_io, disable the -+ * re-entrancy guard. -+ */ -+ s->ram_io.disable_reentrancy_guard = true; -+ - address_space_init(&s->pci_io_as, pci_address_space_io(dev), "lsi-pci-io"); - qdev_init_gpio_out(d, &s->ext_irq, 1); - --- -2.39.3 - diff --git a/SOURCES/kvm-memory-device-reintroduce-memory-region-size-check.patch b/SOURCES/kvm-memory-device-reintroduce-memory-region-size-check.patch new file mode 100644 index 0000000..5b531f5 --- /dev/null +++ b/SOURCES/kvm-memory-device-reintroduce-memory-region-size-check.patch @@ -0,0 +1,112 @@ +From 633c6a52ac88526534466ae311522fe5447bcf91 Mon Sep 17 00:00:00 2001 +From: David Hildenbrand +Date: Wed, 17 Jan 2024 14:55:54 +0100 +Subject: [PATCH 02/22] memory-device: reintroduce memory region size check + +RH-Author: David Hildenbrand +RH-MergeRequest: 221: memory-device: reintroduce memory region size check +RH-Jira: RHEL-20341 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Igor Mammedov +RH-Commit: [2/2] e9ff2339b0c07c3f48f5834c9c80cd6d4cbc8f71 + +JIRA: https://issues.redhat.com/browse/RHEL-20341 + +We used to check that the memory region size is multiples of the overall +requested address alignment for the device memory address. + +We removed that check, because there are cases (i.e., hv-balloon) where +devices unconditionally request an address alignment that has a very large +alignment (i.e., 32 GiB), but the actual memory device size might not be +multiples of that alignment. + +However, this change: + +(a) allows for some practically impossible DIMM sizes, like "1GB+1 byte". +(b) allows for DIMMs that partially cover hugetlb pages, previously + reported in [1]. + +Both scenarios don't make any sense: we might even waste memory. + +So let's reintroduce that check, but only check that the +memory region size is multiples of the memory region alignment (i.e., +page size, huge page size), but not any additional memory device +requirements communicated using md->get_min_alignment(). + +The following examples now fail again as expected: + +(a) 1M with 2M THP + qemu-system-x86_64 -m 4g,maxmem=16g,slots=1 -S -nodefaults -nographic \ + -object memory-backend-ram,id=mem1,size=1M \ + -device pc-dimm,id=dimm1,memdev=mem1 + -> backend memory size must be multiple of 0x200000 + +(b) 1G+1byte + + qemu-system-x86_64 -m 4g,maxmem=16g,slots=1 -S -nodefaults -nographic \ + -object memory-backend-ram,id=mem1,size=1073741825B \ + -device pc-dimm,id=dimm1,memdev=mem1 + -> backend memory size must be multiple of 0x200000 + +(c) Unliagned hugetlb size (2M) + + qemu-system-x86_64 -m 4g,maxmem=16g,slots=1 -S -nodefaults -nographic \ + -object memory-backend-file,id=mem1,mem-path=/dev/hugepages/tmp,size=511M \ + -device pc-dimm,id=dimm1,memdev=mem1 + backend memory size must be multiple of 0x200000 + +(d) Unliagned hugetlb size (1G) + + qemu-system-x86_64 -m 4g,maxmem=16g,slots=1 -S -nodefaults -nographic \ + -object memory-backend-file,id=mem1,mem-path=/dev/hugepages1G/tmp,size=2047M \ + -device pc-dimm,id=dimm1,memdev=mem1 + -> backend memory size must be multiple of 0x40000000 + +Note that this fix depends on a hv-balloon change to communicate its +additional alignment requirements using get_min_alignment() instead of +through the memory region. + +[1] https://lkml.kernel.org/r/f77d641d500324525ac036fe1827b3070de75fc1.1701088320.git.mprivozn@redhat.com + +Message-ID: <20240117135554.787344-3-david@redhat.com> +Reported-by: Zhenyu Zhang +Reported-by: Michal Privoznik +Fixes: eb1b7c4bd413 ("memory-device: Drop size alignment check") +Tested-by: Zhenyu Zhang +Tested-by: Mario Casquero +Reviewed-by: Maciej S. Szmigiero +Signed-off-by: David Hildenbrand +(cherry picked from commit 540a1abbf0b243e4cfb4333c5d30a041f7080ba4) +Signed-off-by: David Hildenbrand +--- + hw/mem/memory-device.c | 14 ++++++++++++++ + 1 file changed, 14 insertions(+) + +diff --git a/hw/mem/memory-device.c b/hw/mem/memory-device.c +index a1b1af26bc..e098585cda 100644 +--- a/hw/mem/memory-device.c ++++ b/hw/mem/memory-device.c +@@ -374,6 +374,20 @@ void memory_device_pre_plug(MemoryDeviceState *md, MachineState *ms, + goto out; + } + ++ /* ++ * We always want the memory region size to be multiples of the memory ++ * region alignment: for example, DIMMs with 1G+1byte size don't make ++ * any sense. Note that we don't check that the size is multiples ++ * of any additional alignment requirements the memory device might ++ * have when it comes to the address in physical address space. ++ */ ++ if (!QEMU_IS_ALIGNED(memory_region_size(mr), ++ memory_region_get_alignment(mr))) { ++ error_setg(errp, "backend memory size must be multiple of 0x%" ++ PRIx64, memory_region_get_alignment(mr)); ++ return; ++ } ++ + if (legacy_align) { + align = *legacy_align; + } else { +-- +2.39.3 + diff --git a/SOURCES/kvm-memory-prevent-dma-reentracy-issues.patch b/SOURCES/kvm-memory-prevent-dma-reentracy-issues.patch deleted file mode 100644 index d3697dc..0000000 --- a/SOURCES/kvm-memory-prevent-dma-reentracy-issues.patch +++ /dev/null @@ -1,150 +0,0 @@ -From 0bc9295be331781491e993b6f1b0dca959194f13 Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Tue, 9 May 2023 10:29:03 -0400 -Subject: [PATCH 05/21] memory: prevent dma-reentracy issues - -RH-Author: Jon Maloy -RH-MergeRequest: 165: memory: prevent dma-reentracy issues -RH-Jira: RHEL-516 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/13] d4a762d3b156200a65d09cde58cd6d77b229071e (jmaloy/jmaloy-qemu-kvm-2) - -Jira: https://issues.redhat.com/browse/RHEL-516 -Upstream: Merged -CVE: CVE-2023-2680 -CVE: CVE-2023-0330 - -commit a2e1753b8054344f32cf94f31c6399a58794a380 -Author: Alexander Bulekov -Date: Thu Apr 27 17:10:06 2023 -0400 - - memory: prevent dma-reentracy issues - - Add a flag to the DeviceState, when a device is engaged in PIO/MMIO/DMA. - This flag is set/checked prior to calling a device's MemoryRegion - handlers, and set when device code initiates DMA. The purpose of this - flag is to prevent two types of DMA-based reentrancy issues: - - 1.) mmio -> dma -> mmio case - 2.) bh -> dma write -> mmio case - - These issues have led to problems such as stack-exhaustion and - use-after-frees. - - Summary of the problem from Peter Maydell: - https://lore.kernel.org/qemu-devel/CAFEAcA_23vc7hE3iaM-JVA6W38LK4hJoWae5KcknhPRD5fPBZA@mail.gmail.com - - Resolves: https://gitlab.com/qemu-project/qemu/-/issues/62 - Resolves: https://gitlab.com/qemu-project/qemu/-/issues/540 - Resolves: https://gitlab.com/qemu-project/qemu/-/issues/541 - Resolves: https://gitlab.com/qemu-project/qemu/-/issues/556 - Resolves: https://gitlab.com/qemu-project/qemu/-/issues/557 - Resolves: https://gitlab.com/qemu-project/qemu/-/issues/827 - Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1282 - Resolves: CVE-2023-0330 - - Signed-off-by: Alexander Bulekov - Reviewed-by: Thomas Huth - Message-Id: <20230427211013.2994127-2-alxndr@bu.edu> - [thuth: Replace warn_report() with warn_report_once()] - Signed-off-by: Thomas Huth - -Signed-off-by: Jon Maloy ---- - include/exec/memory.h | 5 +++++ - include/hw/qdev-core.h | 7 +++++++ - softmmu/memory.c | 16 ++++++++++++++++ - 3 files changed, 28 insertions(+) - -diff --git a/include/exec/memory.h b/include/exec/memory.h -index 15ade918ba..e45ce6061f 100644 ---- a/include/exec/memory.h -+++ b/include/exec/memory.h -@@ -767,6 +767,8 @@ struct MemoryRegion { - bool is_iommu; - RAMBlock *ram_block; - Object *owner; -+ /* owner as TYPE_DEVICE. Used for re-entrancy checks in MR access hotpath */ -+ DeviceState *dev; - - const MemoryRegionOps *ops; - void *opaque; -@@ -791,6 +793,9 @@ struct MemoryRegion { - unsigned ioeventfd_nb; - MemoryRegionIoeventfd *ioeventfds; - RamDiscardManager *rdm; /* Only for RAM */ -+ -+ /* For devices designed to perform re-entrant IO into their own IO MRs */ -+ bool disable_reentrancy_guard; - }; - - struct IOMMUMemoryRegion { -diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h -index bd50ad5ee1..7623703943 100644 ---- a/include/hw/qdev-core.h -+++ b/include/hw/qdev-core.h -@@ -162,6 +162,10 @@ struct NamedClockList { - QLIST_ENTRY(NamedClockList) node; - }; - -+typedef struct { -+ bool engaged_in_io; -+} MemReentrancyGuard; -+ - /** - * DeviceState: - * @realized: Indicates whether the device has been fully constructed. -@@ -194,6 +198,9 @@ struct DeviceState { - int alias_required_for_version; - ResettableState reset; - GSList *unplug_blockers; -+ -+ /* Is the device currently in mmio/pio/dma? Used to prevent re-entrancy */ -+ MemReentrancyGuard mem_reentrancy_guard; - }; - - struct DeviceListener { -diff --git a/softmmu/memory.c b/softmmu/memory.c -index b1a6cae6f5..b7b3386e9d 100644 ---- a/softmmu/memory.c -+++ b/softmmu/memory.c -@@ -542,6 +542,18 @@ static MemTxResult access_with_adjusted_size(hwaddr addr, - access_size_max = 4; - } - -+ /* Do not allow more than one simultaneous access to a device's IO Regions */ -+ if (mr->dev && !mr->disable_reentrancy_guard && -+ !mr->ram_device && !mr->ram && !mr->rom_device && !mr->readonly) { -+ if (mr->dev->mem_reentrancy_guard.engaged_in_io) { -+ warn_report_once("Blocked re-entrant IO on MemoryRegion: " -+ "%s at addr: 0x%" HWADDR_PRIX, -+ memory_region_name(mr), addr); -+ return MEMTX_ACCESS_ERROR; -+ } -+ mr->dev->mem_reentrancy_guard.engaged_in_io = true; -+ } -+ - /* FIXME: support unaligned access? */ - access_size = MAX(MIN(size, access_size_max), access_size_min); - access_mask = MAKE_64BIT_MASK(0, access_size * 8); -@@ -556,6 +568,9 @@ static MemTxResult access_with_adjusted_size(hwaddr addr, - access_mask, attrs); - } - } -+ if (mr->dev) { -+ mr->dev->mem_reentrancy_guard.engaged_in_io = false; -+ } - return r; - } - -@@ -1170,6 +1185,7 @@ static void memory_region_do_init(MemoryRegion *mr, - } - mr->name = g_strdup(name); - mr->owner = owner; -+ mr->dev = (DeviceState *) object_dynamic_cast(mr->owner, TYPE_DEVICE); - mr->ram_block = NULL; - - if (name) { --- -2.39.3 - diff --git a/SOURCES/kvm-memory-stricter-checks-prior-to-unsetting-engaged_in.patch b/SOURCES/kvm-memory-stricter-checks-prior-to-unsetting-engaged_in.patch deleted file mode 100644 index f45abea..0000000 --- a/SOURCES/kvm-memory-stricter-checks-prior-to-unsetting-engaged_in.patch +++ /dev/null @@ -1,67 +0,0 @@ -From 3f2042e33acb6db91594e12ebd63b9abd9e753cc Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Wed, 7 Jun 2023 11:45:09 -0400 -Subject: [PATCH 15/21] memory: stricter checks prior to unsetting - engaged_in_io - -RH-Author: Jon Maloy -RH-MergeRequest: 165: memory: prevent dma-reentracy issues -RH-Jira: RHEL-516 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [11/13] b8e1a4b49dd7fa3b7948d32f46dfe1d7f7a4c1cf (jmaloy/jmaloy-qemu-kvm-2) - -Jira: https://issues.redhat.com/browse/RHEL-516 -Upstream: Merged -CVE: CVE-2023-2680 - -commit 3884bf6468ac6bbb58c2b3feaa74e87f821b52f3 -Author: Alexander Bulekov -Date: Tue May 16 04:40:02 2023 -0400 - - memory: stricter checks prior to unsetting engaged_in_io - - engaged_in_io could be unset by an MR with re-entrancy checks disabled. - Ensure that only MRs that can set the engaged_in_io flag can unset it. - - Signed-off-by: Alexander Bulekov - Message-Id: <20230516084002.3813836-1-alxndr@bu.edu> - Reviewed-by: Darren Kenny - Signed-off-by: Thomas Huth - -Signed-off-by: Jon Maloy ---- - softmmu/memory.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/softmmu/memory.c b/softmmu/memory.c -index b7b3386e9d..26424f1d78 100644 ---- a/softmmu/memory.c -+++ b/softmmu/memory.c -@@ -534,6 +534,7 @@ static MemTxResult access_with_adjusted_size(hwaddr addr, - unsigned access_size; - unsigned i; - MemTxResult r = MEMTX_OK; -+ bool reentrancy_guard_applied = false; - - if (!access_size_min) { - access_size_min = 1; -@@ -552,6 +553,7 @@ static MemTxResult access_with_adjusted_size(hwaddr addr, - return MEMTX_ACCESS_ERROR; - } - mr->dev->mem_reentrancy_guard.engaged_in_io = true; -+ reentrancy_guard_applied = true; - } - - /* FIXME: support unaligned access? */ -@@ -568,7 +570,7 @@ static MemTxResult access_with_adjusted_size(hwaddr addr, - access_mask, attrs); - } - } -- if (mr->dev) { -+ if (mr->dev && reentrancy_guard_applied) { - mr->dev->mem_reentrancy_guard.engaged_in_io = false; - } - return r; --- -2.39.3 - diff --git a/SOURCES/kvm-migration-Add-switchover-ack-capability.patch b/SOURCES/kvm-migration-Add-switchover-ack-capability.patch deleted file mode 100644 index 399c9ed..0000000 --- a/SOURCES/kvm-migration-Add-switchover-ack-capability.patch +++ /dev/null @@ -1,162 +0,0 @@ -From 8f89d3bc8f226cd038bf88b9fb3ef43b0fb33034 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 10/37] migration: Add switchover ack capability -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [8/28] 2f4ca020783bd617eca13b18289fce764279833b (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit 6574232fff6a -Author: Avihai Horon -Date: Wed Jun 21 14:11:54 2023 +0300 - - migration: Add switchover ack capability - - Migration downtime estimation is calculated based on bandwidth and - remaining migration data. This assumes that loading of migration data in - the destination takes a negligible amount of time and that downtime - depends only on network speed. - - While this may be true for RAM, it's not necessarily true for other - migrated devices. For example, loading the data of a VFIO device in the - destination might require from the device to allocate resources, prepare - internal data structures and so on. These operations can take a - significant amount of time which can increase migration downtime. - - This patch adds a new capability "switchover ack" that prevents the - source from stopping the VM and completing the migration until an ACK - is received from the destination that it's OK to do so. - - This can be used by migrated devices in various ways to reduce downtime. - For example, a device can send initial precopy metadata to pre-allocate - resources in the destination and use this capability to make sure that - the pre-allocation is completed before the source VM is stopped, so it - will have full effect. - - This new capability relies on the return path capability to communicate - from the destination back to the source. - - The actual implementation of the capability will be added in the - following patches. - - Signed-off-by: Avihai Horon - Reviewed-by: Peter Xu - Acked-by: Markus Armbruster - Tested-by: YangHang Liu - Acked-by: Alex Williamson - Signed-off-by: Cédric Le Goater - -Conflicts: - - qapi/migration.json - re-indent of @switchover-ack to avoid ../qapi/migration.json:482:1: - unexpected de-indent (expected at least 17 spaces) - -Signed-off-by: Cédric Le Goater ---- - migration/options.c | 21 +++++++++++++++++++++ - migration/options.h | 1 + - qapi/migration.json | 14 +++++++++++++- - 3 files changed, 35 insertions(+), 1 deletion(-) - -diff --git a/migration/options.c b/migration/options.c -index a76984276d..c3df6c6dde 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -182,6 +182,8 @@ Property migration_properties[] = { - DEFINE_PROP_MIG_CAP("x-zero-copy-send", - MIGRATION_CAPABILITY_ZERO_COPY_SEND), - #endif -+ DEFINE_PROP_MIG_CAP("x-switchover-ack", -+ MIGRATION_CAPABILITY_SWITCHOVER_ACK), - - DEFINE_PROP_END_OF_LIST(), - }; -@@ -305,6 +307,13 @@ bool migrate_return_path(void) - return s->capabilities[MIGRATION_CAPABILITY_RETURN_PATH]; - } - -+bool migrate_switchover_ack(void) -+{ -+ MigrationState *s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_SWITCHOVER_ACK]; -+} -+ - bool migrate_validate_uuid(void) - { - MigrationState *s = migrate_get_current(); -@@ -532,6 +541,18 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) - } - } - -+ if (new_caps[MIGRATION_CAPABILITY_SWITCHOVER_ACK]) { -+ if (!new_caps[MIGRATION_CAPABILITY_RETURN_PATH]) { -+ error_setg(errp, "Capability 'switchover-ack' requires capability " -+ "'return-path'"); -+ return false; -+ } -+ -+ /* Disable this capability until it's implemented */ -+ error_setg(errp, "'switchover-ack' is not implemented yet"); -+ return false; -+ } -+ - return true; - } - -diff --git a/migration/options.h b/migration/options.h -index 7b0f7245ad..0fc7be6869 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -47,6 +47,7 @@ bool migrate_postcopy_ram(void); - bool migrate_rdma_pin_all(void); - bool migrate_release_ram(void); - bool migrate_return_path(void); -+bool migrate_switchover_ack(void); - bool migrate_validate_uuid(void); - bool migrate_xbzrle(void); - bool migrate_zero_blocks(void); -diff --git a/qapi/migration.json b/qapi/migration.json -index 2c35b7b9cf..b6a58347cc 100644 ---- a/qapi/migration.json -+++ b/qapi/migration.json -@@ -478,6 +478,18 @@ - # should not affect the correctness of postcopy migration. - # (since 7.1) - # -+# @switchover-ack: If enabled, migration will not stop the source VM -+# and complete the migration until an ACK is received -+# from the destination that it's OK to do so. -+# Exactly when this ACK is sent depends on the -+# migrated devices that use this feature. For -+# example, a device can use it to make sure some of -+# its data is sent and loaded in the destination -+# before doing switchover. This can reduce downtime -+# if devices that support this capability are -+# present. 'return-path' capability must be enabled -+# to use it. (since 8.1) -+# - # Features: - # @unstable: Members @x-colo and @x-ignore-shared are experimental. - # -@@ -492,7 +504,7 @@ - 'dirty-bitmaps', 'postcopy-blocktime', 'late-block-activate', - { 'name': 'x-ignore-shared', 'features': [ 'unstable' ] }, - 'validate-uuid', 'background-snapshot', -- 'zero-copy-send', 'postcopy-preempt'] } -+ 'zero-copy-send', 'postcopy-preempt', 'switchover-ack'] } - - ## - # @MigrationCapabilityStatus: --- -2.39.3 - diff --git a/SOURCES/kvm-migration-Allow-postcopy_ram_supported_by_host-to-re.patch b/SOURCES/kvm-migration-Allow-postcopy_ram_supported_by_host-to-re.patch deleted file mode 100644 index 7c9748b..0000000 --- a/SOURCES/kvm-migration-Allow-postcopy_ram_supported_by_host-to-re.patch +++ /dev/null @@ -1,308 +0,0 @@ -From e2c2910edf90186ca0d7d13c9943caa284e95ea9 Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Tue, 25 Apr 2023 21:15:14 -0400 -Subject: [PATCH 51/56] migration: Allow postcopy_ram_supported_by_host() to - report err -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [50/50] 08c44affc11c27ddf1aa7ce0dfacbaf5effb80cb (peterx/qemu-kvm) - -Instead of print it to STDERR, bring the error upwards so that it can be -reported via QMP responses. - -E.g.: - -{ "execute": "migrate-set-capabilities" , - "arguments": { "capabilities": - [ { "capability": "postcopy-ram", "state": true } ] } } - -{ "error": - { "class": "GenericError", - "desc": "Postcopy is not supported: Host backend files need to be TMPFS - or HUGETLBFS only" } } - -Signed-off-by: Peter Xu -Reviewed-by: Juan Quintela -Signed-off-by: Juan Quintela -(cherry picked from commit 74c38cf7fd24c60e4f0a90585d17250478260877) -Signed-off-by: Peter Xu ---- - migration/options.c | 8 ++---- - migration/postcopy-ram.c | 60 +++++++++++++++++++++------------------- - migration/postcopy-ram.h | 3 +- - migration/savevm.c | 3 +- - 4 files changed, 39 insertions(+), 35 deletions(-) - -diff --git a/migration/options.c b/migration/options.c -index 4701c75a4d..e51d667e14 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -302,6 +302,7 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) - { - MigrationIncomingState *mis = migration_incoming_get_current(); - -+ ERRP_GUARD(); - #ifndef CONFIG_LIVE_BLOCK_MIGRATION - if (new_caps[MIGRATION_CAPABILITY_BLOCK]) { - error_setg(errp, "QEMU compiled without old-style (blk/-b, inc/-i) " -@@ -327,11 +328,8 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) - */ - if (!old_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM] && - runstate_check(RUN_STATE_INMIGRATE) && -- !postcopy_ram_supported_by_host(mis)) { -- /* postcopy_ram_supported_by_host will have emitted a more -- * detailed message -- */ -- error_setg(errp, "Postcopy is not supported"); -+ !postcopy_ram_supported_by_host(mis, errp)) { -+ error_prepend(errp, "Postcopy is not supported: "); - return false; - } - -diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c -index 0711500036..75aa276bb1 100644 ---- a/migration/postcopy-ram.c -+++ b/migration/postcopy-ram.c -@@ -283,11 +283,13 @@ static bool request_ufd_features(int ufd, uint64_t features) - return true; - } - --static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis) -+static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis, -+ Error **errp) - { - uint64_t asked_features = 0; - static uint64_t supported_features; - -+ ERRP_GUARD(); - /* - * it's not possible to - * request UFFD_API twice per one fd -@@ -295,7 +297,7 @@ static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis) - */ - if (!supported_features) { - if (!receive_ufd_features(&supported_features)) { -- error_report("%s failed", __func__); -+ error_setg(errp, "Userfault feature detection failed"); - return false; - } - } -@@ -317,8 +319,7 @@ static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis) - * userfault file descriptor - */ - if (!request_ufd_features(ufd, asked_features)) { -- error_report("%s failed: features %" PRIu64, __func__, -- asked_features); -+ error_setg(errp, "Failed features %" PRIu64, asked_features); - return false; - } - -@@ -329,7 +330,8 @@ static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis) - have_hp = supported_features & UFFD_FEATURE_MISSING_HUGETLBFS; - #endif - if (!have_hp) { -- error_report("Userfault on this host does not support huge pages"); -+ error_setg(errp, -+ "Userfault on this host does not support huge pages"); - return false; - } - } -@@ -338,7 +340,7 @@ static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis) - - /* Callback from postcopy_ram_supported_by_host block iterator. - */ --static int test_ramblock_postcopiable(RAMBlock *rb) -+static int test_ramblock_postcopiable(RAMBlock *rb, Error **errp) - { - const char *block_name = qemu_ram_get_idstr(rb); - ram_addr_t length = qemu_ram_get_used_length(rb); -@@ -346,16 +348,18 @@ static int test_ramblock_postcopiable(RAMBlock *rb) - QemuFsType fs; - - if (length % pagesize) { -- error_report("Postcopy requires RAM blocks to be a page size multiple," -- " block %s is 0x" RAM_ADDR_FMT " bytes with a " -- "page size of 0x%zx", block_name, length, pagesize); -+ error_setg(errp, -+ "Postcopy requires RAM blocks to be a page size multiple," -+ " block %s is 0x" RAM_ADDR_FMT " bytes with a " -+ "page size of 0x%zx", block_name, length, pagesize); - return 1; - } - - if (rb->fd >= 0) { - fs = qemu_fd_getfs(rb->fd); - if (fs != QEMU_FS_TYPE_TMPFS && fs != QEMU_FS_TYPE_HUGETLBFS) { -- error_report("Host backend files need to be TMPFS or HUGETLBFS only"); -+ error_setg(errp, -+ "Host backend files need to be TMPFS or HUGETLBFS only"); - return 1; - } - } -@@ -368,7 +372,7 @@ static int test_ramblock_postcopiable(RAMBlock *rb) - * normally fine since if the postcopy succeeds it gets turned back on at the - * end. - */ --bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) -+bool postcopy_ram_supported_by_host(MigrationIncomingState *mis, Error **errp) - { - long pagesize = qemu_real_host_page_size(); - int ufd = -1; -@@ -377,29 +381,27 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) - struct uffdio_register reg_struct; - struct uffdio_range range_struct; - uint64_t feature_mask; -- Error *local_err = NULL; - RAMBlock *block; - -+ ERRP_GUARD(); - if (qemu_target_page_size() > pagesize) { -- error_report("Target page size bigger than host page size"); -+ error_setg(errp, "Target page size bigger than host page size"); - goto out; - } - - ufd = uffd_open(O_CLOEXEC); - if (ufd == -1) { -- error_report("%s: userfaultfd not available: %s", __func__, -- strerror(errno)); -+ error_setg(errp, "Userfaultfd not available: %s", strerror(errno)); - goto out; - } - - /* Give devices a chance to object */ -- if (postcopy_notify(POSTCOPY_NOTIFY_PROBE, &local_err)) { -- error_report_err(local_err); -+ if (postcopy_notify(POSTCOPY_NOTIFY_PROBE, errp)) { - goto out; - } - - /* Version and features check */ -- if (!ufd_check_and_apply(ufd, mis)) { -+ if (!ufd_check_and_apply(ufd, mis, errp)) { - goto out; - } - -@@ -417,7 +419,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) - * affect in reality, or we can revisit. - */ - RAMBLOCK_FOREACH(block) { -- if (test_ramblock_postcopiable(block)) { -+ if (test_ramblock_postcopiable(block, errp)) { - goto out; - } - } -@@ -427,7 +429,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) - * it was enabled. - */ - if (munlockall()) { -- error_report("%s: munlockall: %s", __func__, strerror(errno)); -+ error_setg(errp, "munlockall() failed: %s", strerror(errno)); - goto out; - } - -@@ -439,8 +441,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) - testarea = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE | - MAP_ANONYMOUS, -1, 0); - if (testarea == MAP_FAILED) { -- error_report("%s: Failed to map test area: %s", __func__, -- strerror(errno)); -+ error_setg(errp, "Failed to map test area: %s", strerror(errno)); - goto out; - } - g_assert(QEMU_PTR_IS_ALIGNED(testarea, pagesize)); -@@ -450,14 +451,14 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) - reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING; - - if (ioctl(ufd, UFFDIO_REGISTER, ®_struct)) { -- error_report("%s userfault register: %s", __func__, strerror(errno)); -+ error_setg(errp, "UFFDIO_REGISTER failed: %s", strerror(errno)); - goto out; - } - - range_struct.start = (uintptr_t)testarea; - range_struct.len = pagesize; - if (ioctl(ufd, UFFDIO_UNREGISTER, &range_struct)) { -- error_report("%s userfault unregister: %s", __func__, strerror(errno)); -+ error_setg(errp, "UFFDIO_UNREGISTER failed: %s", strerror(errno)); - goto out; - } - -@@ -465,8 +466,8 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) - (__u64)1 << _UFFDIO_COPY | - (__u64)1 << _UFFDIO_ZEROPAGE; - if ((reg_struct.ioctls & feature_mask) != feature_mask) { -- error_report("Missing userfault map features: %" PRIx64, -- (uint64_t)(~reg_struct.ioctls & feature_mask)); -+ error_setg(errp, "Missing userfault map features: %" PRIx64, -+ (uint64_t)(~reg_struct.ioctls & feature_mask)); - goto out; - } - -@@ -1188,6 +1189,8 @@ static int postcopy_temp_pages_setup(MigrationIncomingState *mis) - - int postcopy_ram_incoming_setup(MigrationIncomingState *mis) - { -+ Error *local_err = NULL; -+ - /* Open the fd for the kernel to give us userfaults */ - mis->userfault_fd = uffd_open(O_CLOEXEC | O_NONBLOCK); - if (mis->userfault_fd == -1) { -@@ -1200,7 +1203,8 @@ int postcopy_ram_incoming_setup(MigrationIncomingState *mis) - * Although the host check already tested the API, we need to - * do the check again as an ABI handshake on the new fd. - */ -- if (!ufd_check_and_apply(mis->userfault_fd, mis)) { -+ if (!ufd_check_and_apply(mis->userfault_fd, mis, &local_err)) { -+ error_report_err(local_err); - return -1; - } - -@@ -1360,7 +1364,7 @@ void fill_destination_postcopy_migration_info(MigrationInfo *info) - { - } - --bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) -+bool postcopy_ram_supported_by_host(MigrationIncomingState *mis, Error **errp) - { - error_report("%s: No OS support", __func__); - return false; -diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h -index b4867a32d5..442ab89752 100644 ---- a/migration/postcopy-ram.h -+++ b/migration/postcopy-ram.h -@@ -14,7 +14,8 @@ - #define QEMU_POSTCOPY_RAM_H - - /* Return true if the host supports everything we need to do postcopy-ram */ --bool postcopy_ram_supported_by_host(MigrationIncomingState *mis); -+bool postcopy_ram_supported_by_host(MigrationIncomingState *mis, -+ Error **errp); - - /* - * Make all of RAM sensitive to accesses to areas that haven't yet been written -diff --git a/migration/savevm.c b/migration/savevm.c -index 9671211339..211eff3a8b 100644 ---- a/migration/savevm.c -+++ b/migration/savevm.c -@@ -1753,7 +1753,8 @@ static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis, - return -EINVAL; - } - -- if (!postcopy_ram_supported_by_host(mis)) { -+ if (!postcopy_ram_supported_by_host(mis, &local_err)) { -+ error_report_err(local_err); - postcopy_state_set(POSTCOPY_INCOMING_NONE); - return -1; - } --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Attempt-disk-reactivation-in-more-failure-.patch b/SOURCES/kvm-migration-Attempt-disk-reactivation-in-more-failure-.patch deleted file mode 100644 index d1620f0..0000000 --- a/SOURCES/kvm-migration-Attempt-disk-reactivation-in-more-failure-.patch +++ /dev/null @@ -1,111 +0,0 @@ -From 3691bb5f956e3c60dbf6de183011b31dbc7a7801 Mon Sep 17 00:00:00 2001 -From: Eric Blake -Date: Tue, 2 May 2023 15:52:12 -0500 -Subject: [PATCH 01/56] migration: Attempt disk reactivation in more failure - scenarios - -RH-Author: Eric Blake -RH-MergeRequest: 161: Avoid migration assertion from failed NFS server. -RH-Bugzilla: 2058982 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Acked-by: Kevin Wolf -RH-Commit: [1/1] 5999b747b314641259d3b8809033b057805eed3f (ebblake/centos-qemu-kvm) - -Commit fe904ea824 added a fail_inactivate label, which tries to -reactivate disks on the source after a failure while s->state == -MIGRATION_STATUS_ACTIVE, but didn't actually use the label if -qemu_savevm_state_complete_precopy() failed. This failure to -reactivate is also present in commit 6039dd5b1c (also covering the new -s->state == MIGRATION_STATUS_DEVICE state) and 403d18ae (ensuring -s->block_inactive is set more reliably). - -Consolidate the two labels back into one - no matter HOW migration is -failed, if there is any chance we can reach vm_start() after having -attempted inactivation, it is essential that we have tried to restart -disks before then. This also makes the cleanup more like -migrate_fd_cancel(). - -Suggested-by: Kevin Wolf -Signed-off-by: Eric Blake -Message-Id: <20230502205212.134680-1-eblake@redhat.com> -Acked-by: Peter Xu -Reviewed-by: Juan Quintela -Reviewed-by: Kevin Wolf -Signed-off-by: Kevin Wolf -(cherry picked from commit 6dab4c93ecfae48e2e67b984d1032c1e988d3005) -[eblake: downstream migrate_colo() => migrate_colo_enabled()] -Signed-off-by: Eric Blake ---- - migration/migration.c | 24 ++++++++++++++---------- - 1 file changed, 14 insertions(+), 10 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 08007cef4e..99f86bd6c2 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -3443,6 +3443,11 @@ static void migration_completion(MigrationState *s) - MIGRATION_STATUS_DEVICE); - } - if (ret >= 0) { -+ /* -+ * Inactivate disks except in COLO, and track that we -+ * have done so in order to remember to reactivate -+ * them if migration fails or is cancelled. -+ */ - s->block_inactive = !migrate_colo_enabled(); - qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX); - ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false, -@@ -3487,13 +3492,13 @@ static void migration_completion(MigrationState *s) - rp_error = await_return_path_close_on_source(s); - trace_migration_return_path_end_after(rp_error); - if (rp_error) { -- goto fail_invalidate; -+ goto fail; - } - } - - if (qemu_file_get_error(s->to_dst_file)) { - trace_migration_completion_file_err(); -- goto fail_invalidate; -+ goto fail; - } - - if (migrate_colo_enabled() && s->state == MIGRATION_STATUS_ACTIVE) { -@@ -3507,26 +3512,25 @@ static void migration_completion(MigrationState *s) - - return; - --fail_invalidate: -- /* If not doing postcopy, vm_start() will be called: let's regain -- * control on images. -- */ -- if (s->state == MIGRATION_STATUS_ACTIVE || -- s->state == MIGRATION_STATUS_DEVICE) { -+fail: -+ if (s->block_inactive && (s->state == MIGRATION_STATUS_ACTIVE || -+ s->state == MIGRATION_STATUS_DEVICE)) { -+ /* -+ * If not doing postcopy, vm_start() will be called: let's -+ * regain control on images. -+ */ - Error *local_err = NULL; - - qemu_mutex_lock_iothread(); - bdrv_activate_all(&local_err); - if (local_err) { - error_report_err(local_err); -- s->block_inactive = true; - } else { - s->block_inactive = false; - } - qemu_mutex_unlock_iothread(); - } - --fail: - migrate_set_state(&s->state, current_active_state, - MIGRATION_STATUS_FAILED); - } --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Create-migrate_cap_set.patch b/SOURCES/kvm-migration-Create-migrate_cap_set.patch deleted file mode 100644 index 33268bb..0000000 --- a/SOURCES/kvm-migration-Create-migrate_cap_set.patch +++ /dev/null @@ -1,93 +0,0 @@ -From d772464e9a51a085e10864b2dc7ffd49991fc23b Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 1 Mar 2023 21:02:42 +0100 -Subject: [PATCH 22/56] migration: Create migrate_cap_set() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [21/50] 5b12f04013cf2d374a869134bb67c938c789e24d (peterx/qemu-kvm) - -And remove the convoluted use of qmp_migrate_set_capabilities() to -enable disable MIGRATION_CAPABILITY_BLOCK. - -Signed-off-by: Juan Quintela -Reviewed-by: Fabiano Rosas -(cherry picked from commit 9eb1109cfba5415dd0b0cb82e80fc5e42fe861b7) -Signed-off-by: Peter Xu ---- - migration/migration.c | 34 ++++++++++++++++------------------ - 1 file changed, 16 insertions(+), 18 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index b745d829a4..18058fb597 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1912,25 +1912,24 @@ void migrate_set_state(int *state, int old_state, int new_state) - } - } - --static MigrationCapabilityStatus *migrate_cap_add(MigrationCapability index, -- bool state) -+static bool migrate_cap_set(int cap, bool value, Error **errp) - { -- MigrationCapabilityStatus *cap; -- -- cap = g_new0(MigrationCapabilityStatus, 1); -- cap->capability = index; -- cap->state = state; -+ MigrationState *s = migrate_get_current(); -+ bool new_caps[MIGRATION_CAPABILITY__MAX]; - -- return cap; --} -+ if (migration_is_running(s->state)) { -+ error_setg(errp, QERR_MIGRATION_ACTIVE); -+ return false; -+ } - --void migrate_set_block_enabled(bool value, Error **errp) --{ -- MigrationCapabilityStatusList *cap = NULL; -+ memcpy(new_caps, s->capabilities, sizeof(new_caps)); -+ new_caps[cap] = value; - -- QAPI_LIST_PREPEND(cap, migrate_cap_add(MIGRATION_CAPABILITY_BLOCK, value)); -- qmp_migrate_set_capabilities(cap, errp); -- qapi_free_MigrationCapabilityStatusList(cap); -+ if (!migrate_caps_check(s->capabilities, new_caps, errp)) { -+ return false; -+ } -+ s->capabilities[cap] = value; -+ return true; - } - - static void migrate_set_block_incremental(MigrationState *s, bool value) -@@ -1942,7 +1941,7 @@ static void block_cleanup_parameters(MigrationState *s) - { - if (s->must_remove_block_options) { - /* setting to false can never fail */ -- migrate_set_block_enabled(false, &error_abort); -+ migrate_cap_set(MIGRATION_CAPABILITY_BLOCK, false, &error_abort); - migrate_set_block_incremental(s, false); - s->must_remove_block_options = false; - } -@@ -2429,8 +2428,7 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, - "current migration capabilities"); - return false; - } -- migrate_set_block_enabled(true, &local_err); -- if (local_err) { -+ if (!migrate_cap_set(MIGRATION_CAPABILITY_BLOCK, true, &local_err)) { - error_propagate(errp, local_err); - return false; - } --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Create-migrate_checkpoint_delay.patch b/SOURCES/kvm-migration-Create-migrate_checkpoint_delay.patch deleted file mode 100644 index 408d258..0000000 --- a/SOURCES/kvm-migration-Create-migrate_checkpoint_delay.patch +++ /dev/null @@ -1,84 +0,0 @@ -From a17bee3c8ab48daa471ec53bed0e2cb0bb41fc76 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Thu, 2 Mar 2023 01:04:55 +0100 -Subject: [PATCH 41/56] migration: Create migrate_checkpoint_delay() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [40/50] b972d3f12e49dc27aa78eb723ca6d0fac4d174d8 (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Fabiano Rosas -(cherry picked from commit f94a858fa3e72ba954a338c01ae9fecc15fcce5c) -Signed-off-by: Peter Xu ---- - migration/colo.c | 5 ++--- - migration/options.c | 9 +++++++++ - migration/options.h | 1 + - 3 files changed, 12 insertions(+), 3 deletions(-) - -diff --git a/migration/colo.c b/migration/colo.c -index 93b78c9270..07bfa21fea 100644 ---- a/migration/colo.c -+++ b/migration/colo.c -@@ -576,7 +576,7 @@ static void colo_process_checkpoint(MigrationState *s) - trace_colo_vm_state_change("stop", "run"); - - timer_mod(s->colo_delay_timer, qemu_clock_get_ms(QEMU_CLOCK_HOST) + -- s->parameters.x_checkpoint_delay); -+ migrate_checkpoint_delay()); - - while (s->state == MIGRATION_STATUS_COLO) { - if (failover_get_state() != FAILOVER_STATUS_NONE) { -@@ -651,8 +651,7 @@ void colo_checkpoint_notify(void *opaque) - - qemu_event_set(&s->colo_checkpoint_event); - s->colo_checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST); -- next_notify_time = s->colo_checkpoint_time + -- s->parameters.x_checkpoint_delay; -+ next_notify_time = s->colo_checkpoint_time + migrate_checkpoint_delay(); - timer_mod(s->colo_delay_timer, next_notify_time); - } - -diff --git a/migration/options.c b/migration/options.c -index b9f3815f7e..0e102e5700 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -472,6 +472,15 @@ bool migrate_block_incremental(void) - return s->parameters.block_incremental; - } - -+uint32_t migrate_checkpoint_delay(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.x_checkpoint_delay; -+} -+ - int migrate_compress_level(void) - { - MigrationState *s; -diff --git a/migration/options.h b/migration/options.h -index aa54443353..adc2879bbb 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -46,6 +46,7 @@ bool migrate_cap_set(int cap, bool value, Error **errp); - /* parameters */ - - bool migrate_block_incremental(void); -+uint32_t migrate_checkpoint_delay(void); - int migrate_compress_level(void); - int migrate_compress_threads(void); - int migrate_compress_wait_thread(void); --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Create-migrate_cpu_throttle_increment-func.patch b/SOURCES/kvm-migration-Create-migrate_cpu_throttle_increment-func.patch deleted file mode 100644 index 65bad3c..0000000 --- a/SOURCES/kvm-migration-Create-migrate_cpu_throttle_increment-func.patch +++ /dev/null @@ -1,75 +0,0 @@ -From 7ff430e011780dad00e5ebaad0318c5fa3aec102 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Thu, 2 Mar 2023 10:20:49 +0100 -Subject: [PATCH 45/56] migration: Create migrate_cpu_throttle_increment() - function -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [44/50] aec990a106a0347b265f5c056a516e0b91e8183c (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Fabiano Rosas -(cherry picked from commit 9605c2ac282c565bb00b5f344217161bef29eff8) -Signed-off-by: Peter Xu ---- - migration/options.c | 9 +++++++++ - migration/options.h | 1 + - migration/ram.c | 2 +- - 3 files changed, 11 insertions(+), 1 deletion(-) - -diff --git a/migration/options.c b/migration/options.c -index f7fb6999f7..31435d2b45 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -509,6 +509,15 @@ int migrate_compress_wait_thread(void) - return s->parameters.compress_wait_thread; - } - -+uint8_t migrate_cpu_throttle_increment(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.cpu_throttle_increment; -+} -+ - uint8_t migrate_cpu_throttle_initial(void) - { - MigrationState *s; -diff --git a/migration/options.h b/migration/options.h -index fd8b91d767..49b29bdafd 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -50,6 +50,7 @@ uint32_t migrate_checkpoint_delay(void); - int migrate_compress_level(void); - int migrate_compress_threads(void); - int migrate_compress_wait_thread(void); -+uint8_t migrate_cpu_throttle_increment(void); - uint8_t migrate_cpu_throttle_initial(void); - int migrate_decompress_threads(void); - uint8_t migrate_max_cpu_throttle(void); -diff --git a/migration/ram.c b/migration/ram.c -index 5e855d5c22..5645745a42 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -713,7 +713,7 @@ static void mig_throttle_guest_down(uint64_t bytes_dirty_period, - { - MigrationState *s = migrate_get_current(); - uint64_t pct_initial = migrate_cpu_throttle_initial(); -- uint64_t pct_increment = s->parameters.cpu_throttle_increment; -+ uint64_t pct_increment = migrate_cpu_throttle_increment(); - bool pct_tailslow = s->parameters.cpu_throttle_tailslow; - int pct_max = migrate_max_cpu_throttle(); - --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Create-migrate_cpu_throttle_initial-to-opt.patch b/SOURCES/kvm-migration-Create-migrate_cpu_throttle_initial-to-opt.patch deleted file mode 100644 index aab2013..0000000 --- a/SOURCES/kvm-migration-Create-migrate_cpu_throttle_initial-to-opt.patch +++ /dev/null @@ -1,75 +0,0 @@ -From fdc2f14bfb3ef8897310a7db63287a9bab1fb858 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Thu, 2 Mar 2023 01:22:44 +0100 -Subject: [PATCH 44/56] migration: Create migrate_cpu_throttle_initial() to - option.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [43/50] e0e0db7218f28aefd4bd022edbaec236e2030cb1 (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Fabiano Rosas -(cherry picked from commit 2a8ec38082f8098f2693bb3632175453c0c84a51) -Signed-off-by: Peter Xu ---- - migration/options.c | 9 +++++++++ - migration/options.h | 1 + - migration/ram.c | 2 +- - 3 files changed, 11 insertions(+), 1 deletion(-) - -diff --git a/migration/options.c b/migration/options.c -index 418aafac64..f7fb6999f7 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -509,6 +509,15 @@ int migrate_compress_wait_thread(void) - return s->parameters.compress_wait_thread; - } - -+uint8_t migrate_cpu_throttle_initial(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.cpu_throttle_initial; -+} -+ - int migrate_decompress_threads(void) - { - MigrationState *s; -diff --git a/migration/options.h b/migration/options.h -index 72b1a320b7..fd8b91d767 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -50,6 +50,7 @@ uint32_t migrate_checkpoint_delay(void); - int migrate_compress_level(void); - int migrate_compress_threads(void); - int migrate_compress_wait_thread(void); -+uint8_t migrate_cpu_throttle_initial(void); - int migrate_decompress_threads(void); - uint8_t migrate_max_cpu_throttle(void); - int64_t migrate_max_postcopy_bandwidth(void); -diff --git a/migration/ram.c b/migration/ram.c -index 5c786513ef..5e855d5c22 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -712,7 +712,7 @@ static void mig_throttle_guest_down(uint64_t bytes_dirty_period, - uint64_t bytes_dirty_threshold) - { - MigrationState *s = migrate_get_current(); -- uint64_t pct_initial = s->parameters.cpu_throttle_initial; -+ uint64_t pct_initial = migrate_cpu_throttle_initial(); - uint64_t pct_increment = s->parameters.cpu_throttle_increment; - bool pct_tailslow = s->parameters.cpu_throttle_tailslow; - int pct_max = migrate_max_cpu_throttle(); --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Create-migrate_cpu_throttle_tailslow-funct.patch b/SOURCES/kvm-migration-Create-migrate_cpu_throttle_tailslow-funct.patch deleted file mode 100644 index e36f003..0000000 --- a/SOURCES/kvm-migration-Create-migrate_cpu_throttle_tailslow-funct.patch +++ /dev/null @@ -1,78 +0,0 @@ -From b88c51c4b02639e28da73143b1da7bd3d6706ce5 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Thu, 2 Mar 2023 10:29:51 +0100 -Subject: [PATCH 46/56] migration: Create migrate_cpu_throttle_tailslow() - function -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [45/50] e93e96392405c60f75abbf288e4fddb191bbc996 (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Fabiano Rosas -(cherry picked from commit 873f674c559e3162a6e6e92994301d400c5cc873) -Signed-off-by: Peter Xu ---- - migration/options.c | 9 +++++++++ - migration/options.h | 1 + - migration/ram.c | 3 +-- - 3 files changed, 11 insertions(+), 2 deletions(-) - -diff --git a/migration/options.c b/migration/options.c -index 31435d2b45..615534c151 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -527,6 +527,15 @@ uint8_t migrate_cpu_throttle_initial(void) - return s->parameters.cpu_throttle_initial; - } - -+bool migrate_cpu_throttle_tailslow(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.cpu_throttle_tailslow; -+} -+ - int migrate_decompress_threads(void) - { - MigrationState *s; -diff --git a/migration/options.h b/migration/options.h -index 49b29bdafd..99f6bbd7a1 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -52,6 +52,7 @@ int migrate_compress_threads(void); - int migrate_compress_wait_thread(void); - uint8_t migrate_cpu_throttle_increment(void); - uint8_t migrate_cpu_throttle_initial(void); -+bool migrate_cpu_throttle_tailslow(void); - int migrate_decompress_threads(void); - uint8_t migrate_max_cpu_throttle(void); - int64_t migrate_max_postcopy_bandwidth(void); -diff --git a/migration/ram.c b/migration/ram.c -index 5645745a42..01356f60a4 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -711,10 +711,9 @@ static size_t save_page_header(PageSearchStatus *pss, QEMUFile *f, - static void mig_throttle_guest_down(uint64_t bytes_dirty_period, - uint64_t bytes_dirty_threshold) - { -- MigrationState *s = migrate_get_current(); - uint64_t pct_initial = migrate_cpu_throttle_initial(); - uint64_t pct_increment = migrate_cpu_throttle_increment(); -- bool pct_tailslow = s->parameters.cpu_throttle_tailslow; -+ bool pct_tailslow = migrate_cpu_throttle_tailslow(); - int pct_max = migrate_max_cpu_throttle(); - - uint64_t throttle_now = cpu_throttle_get_percentage(); --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Create-migrate_max_bandwidth-function.patch b/SOURCES/kvm-migration-Create-migrate_max_bandwidth-function.patch deleted file mode 100644 index ba1d34c..0000000 --- a/SOURCES/kvm-migration-Create-migrate_max_bandwidth-function.patch +++ /dev/null @@ -1,232 +0,0 @@ -From b6228b3122f5c1f220f92042277ab1bfbb5ba086 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Thu, 2 Mar 2023 11:00:12 +0100 -Subject: [PATCH 48/56] migration: Create migrate_max_bandwidth() function -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [47/50] 3874656f70cb9c2a30f4d63e146539480d422326 (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Fabiano Rosas -(cherry picked from commit 9c894df3a37d675652390f7dbbe2f65b7bad7efa) -Signed-off-by: Peter Xu ---- - migration/migration.c | 70 +------------------------------------- - migration/options.c | 79 +++++++++++++++++++++++++++++++++++++++++++ - migration/options.h | 1 + - 3 files changed, 81 insertions(+), 69 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 46a5ea4d42..c2e109329d 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -886,74 +886,6 @@ void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value) - migrate_send_rp_message(mis, MIG_RP_MSG_RESUME_ACK, sizeof(buf), &buf); - } - --MigrationParameters *qmp_query_migrate_parameters(Error **errp) --{ -- MigrationParameters *params; -- MigrationState *s = migrate_get_current(); -- -- /* TODO use QAPI_CLONE() instead of duplicating it inline */ -- params = g_malloc0(sizeof(*params)); -- params->has_compress_level = true; -- params->compress_level = s->parameters.compress_level; -- params->has_compress_threads = true; -- params->compress_threads = s->parameters.compress_threads; -- params->has_compress_wait_thread = true; -- params->compress_wait_thread = s->parameters.compress_wait_thread; -- params->has_decompress_threads = true; -- params->decompress_threads = s->parameters.decompress_threads; -- params->has_throttle_trigger_threshold = true; -- params->throttle_trigger_threshold = s->parameters.throttle_trigger_threshold; -- params->has_cpu_throttle_initial = true; -- params->cpu_throttle_initial = s->parameters.cpu_throttle_initial; -- params->has_cpu_throttle_increment = true; -- params->cpu_throttle_increment = s->parameters.cpu_throttle_increment; -- params->has_cpu_throttle_tailslow = true; -- params->cpu_throttle_tailslow = s->parameters.cpu_throttle_tailslow; -- params->tls_creds = g_strdup(s->parameters.tls_creds); -- params->tls_hostname = g_strdup(s->parameters.tls_hostname); -- params->tls_authz = g_strdup(s->parameters.tls_authz ? -- s->parameters.tls_authz : ""); -- params->has_max_bandwidth = true; -- params->max_bandwidth = s->parameters.max_bandwidth; -- params->has_downtime_limit = true; -- params->downtime_limit = s->parameters.downtime_limit; -- params->has_x_checkpoint_delay = true; -- params->x_checkpoint_delay = s->parameters.x_checkpoint_delay; -- params->has_block_incremental = true; -- params->block_incremental = s->parameters.block_incremental; -- params->has_multifd_channels = true; -- params->multifd_channels = s->parameters.multifd_channels; -- params->has_multifd_compression = true; -- params->multifd_compression = s->parameters.multifd_compression; -- params->has_multifd_zlib_level = true; -- params->multifd_zlib_level = s->parameters.multifd_zlib_level; -- params->has_multifd_zstd_level = true; -- params->multifd_zstd_level = s->parameters.multifd_zstd_level; -- params->has_xbzrle_cache_size = true; -- params->xbzrle_cache_size = s->parameters.xbzrle_cache_size; -- params->has_max_postcopy_bandwidth = true; -- params->max_postcopy_bandwidth = s->parameters.max_postcopy_bandwidth; -- params->has_max_cpu_throttle = true; -- params->max_cpu_throttle = s->parameters.max_cpu_throttle; -- params->has_announce_initial = true; -- params->announce_initial = s->parameters.announce_initial; -- params->has_announce_max = true; -- params->announce_max = s->parameters.announce_max; -- params->has_announce_rounds = true; -- params->announce_rounds = s->parameters.announce_rounds; -- params->has_announce_step = true; -- params->announce_step = s->parameters.announce_step; -- -- if (s->parameters.has_block_bitmap_mapping) { -- params->has_block_bitmap_mapping = true; -- params->block_bitmap_mapping = -- QAPI_CLONE(BitmapMigrationNodeAliasList, -- s->parameters.block_bitmap_mapping); -- } -- -- return params; --} -- - /* - * Return true if we're already in the middle of a migration - * (i.e. any of the active or setup states) -@@ -3775,7 +3707,7 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) - XFER_LIMIT_RATIO; - } else { - /* This is a fresh new migration */ -- rate_limit = s->parameters.max_bandwidth / XFER_LIMIT_RATIO; -+ rate_limit = migrate_max_bandwidth() / XFER_LIMIT_RATIO; - - /* Notify before starting migration thread */ - notifier_list_notify(&migration_state_notifiers, s); -diff --git a/migration/options.c b/migration/options.c -index 8bd2d949ae..8e8753d9be 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -12,8 +12,10 @@ - */ - - #include "qemu/osdep.h" -+#include "qapi/clone-visitor.h" - #include "qapi/error.h" - #include "qapi/qapi-commands-migration.h" -+#include "qapi/qapi-visit-migration.h" - #include "qapi/qmp/qerror.h" - #include "sysemu/runstate.h" - #include "migration/misc.h" -@@ -562,6 +564,15 @@ uint8_t migrate_max_cpu_throttle(void) - return s->parameters.max_cpu_throttle; - } - -+uint64_t migrate_max_bandwidth(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.max_bandwidth; -+} -+ - int64_t migrate_max_postcopy_bandwidth(void) - { - MigrationState *s; -@@ -641,3 +652,71 @@ AnnounceParameters *migrate_announce_params(void) - - return ≈ - } -+ -+MigrationParameters *qmp_query_migrate_parameters(Error **errp) -+{ -+ MigrationParameters *params; -+ MigrationState *s = migrate_get_current(); -+ -+ /* TODO use QAPI_CLONE() instead of duplicating it inline */ -+ params = g_malloc0(sizeof(*params)); -+ params->has_compress_level = true; -+ params->compress_level = s->parameters.compress_level; -+ params->has_compress_threads = true; -+ params->compress_threads = s->parameters.compress_threads; -+ params->has_compress_wait_thread = true; -+ params->compress_wait_thread = s->parameters.compress_wait_thread; -+ params->has_decompress_threads = true; -+ params->decompress_threads = s->parameters.decompress_threads; -+ params->has_throttle_trigger_threshold = true; -+ params->throttle_trigger_threshold = s->parameters.throttle_trigger_threshold; -+ params->has_cpu_throttle_initial = true; -+ params->cpu_throttle_initial = s->parameters.cpu_throttle_initial; -+ params->has_cpu_throttle_increment = true; -+ params->cpu_throttle_increment = s->parameters.cpu_throttle_increment; -+ params->has_cpu_throttle_tailslow = true; -+ params->cpu_throttle_tailslow = s->parameters.cpu_throttle_tailslow; -+ params->tls_creds = g_strdup(s->parameters.tls_creds); -+ params->tls_hostname = g_strdup(s->parameters.tls_hostname); -+ params->tls_authz = g_strdup(s->parameters.tls_authz ? -+ s->parameters.tls_authz : ""); -+ params->has_max_bandwidth = true; -+ params->max_bandwidth = s->parameters.max_bandwidth; -+ params->has_downtime_limit = true; -+ params->downtime_limit = s->parameters.downtime_limit; -+ params->has_x_checkpoint_delay = true; -+ params->x_checkpoint_delay = s->parameters.x_checkpoint_delay; -+ params->has_block_incremental = true; -+ params->block_incremental = s->parameters.block_incremental; -+ params->has_multifd_channels = true; -+ params->multifd_channels = s->parameters.multifd_channels; -+ params->has_multifd_compression = true; -+ params->multifd_compression = s->parameters.multifd_compression; -+ params->has_multifd_zlib_level = true; -+ params->multifd_zlib_level = s->parameters.multifd_zlib_level; -+ params->has_multifd_zstd_level = true; -+ params->multifd_zstd_level = s->parameters.multifd_zstd_level; -+ params->has_xbzrle_cache_size = true; -+ params->xbzrle_cache_size = s->parameters.xbzrle_cache_size; -+ params->has_max_postcopy_bandwidth = true; -+ params->max_postcopy_bandwidth = s->parameters.max_postcopy_bandwidth; -+ params->has_max_cpu_throttle = true; -+ params->max_cpu_throttle = s->parameters.max_cpu_throttle; -+ params->has_announce_initial = true; -+ params->announce_initial = s->parameters.announce_initial; -+ params->has_announce_max = true; -+ params->announce_max = s->parameters.announce_max; -+ params->has_announce_rounds = true; -+ params->announce_rounds = s->parameters.announce_rounds; -+ params->has_announce_step = true; -+ params->announce_step = s->parameters.announce_step; -+ -+ if (s->parameters.has_block_bitmap_mapping) { -+ params->has_block_bitmap_mapping = true; -+ params->block_bitmap_mapping = -+ QAPI_CLONE(BitmapMigrationNodeAliasList, -+ s->parameters.block_bitmap_mapping); -+ } -+ -+ return params; -+} -diff --git a/migration/options.h b/migration/options.h -index 093bc907a1..1b78fa9f3d 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -64,6 +64,7 @@ uint8_t migrate_cpu_throttle_initial(void); - bool migrate_cpu_throttle_tailslow(void); - int migrate_decompress_threads(void); - uint8_t migrate_max_cpu_throttle(void); -+uint64_t migrate_max_bandwidth(void); - int64_t migrate_max_postcopy_bandwidth(void); - int migrate_multifd_channels(void); - MultiFDCompression migrate_multifd_compression(void); --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Create-migrate_max_cpu_throttle.patch b/SOURCES/kvm-migration-Create-migrate_max_cpu_throttle.patch deleted file mode 100644 index 6628b80..0000000 --- a/SOURCES/kvm-migration-Create-migrate_max_cpu_throttle.patch +++ /dev/null @@ -1,88 +0,0 @@ -From f0d4e34b00f66d2336b755a34a1ba226571641c4 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Thu, 2 Mar 2023 01:13:01 +0100 -Subject: [PATCH 42/56] migration: Create migrate_max_cpu_throttle() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [41/50] fc7537c06d8e1f53d7bb552661f6ddb0133a978d (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Fabiano Rosas -(cherry picked from commit 24155bd0520035d5148c0af5b925932c4d8064a8) -Signed-off-by: Peter Xu ---- - migration/migration.h | 2 -- - migration/options.c | 9 +++++++++ - migration/options.h | 1 + - migration/ram.c | 2 +- - 4 files changed, 11 insertions(+), 3 deletions(-) - -diff --git a/migration/migration.h b/migration/migration.h -index 86051af132..3ae938b19c 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -451,8 +451,6 @@ bool migrate_postcopy(void); - - int migrate_use_tls(void); - --int migrate_max_cpu_throttle(void); -- - uint64_t ram_get_total_transferred_pages(void); - - /* Sending on the return path - generic and then for each message type */ -diff --git a/migration/options.c b/migration/options.c -index 0e102e5700..2cb04fbbd1 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -517,6 +517,15 @@ int migrate_decompress_threads(void) - return s->parameters.decompress_threads; - } - -+uint8_t migrate_max_cpu_throttle(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.max_cpu_throttle; -+} -+ - int64_t migrate_max_postcopy_bandwidth(void) - { - MigrationState *s; -diff --git a/migration/options.h b/migration/options.h -index adc2879bbb..72b1a320b7 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -51,6 +51,7 @@ int migrate_compress_level(void); - int migrate_compress_threads(void); - int migrate_compress_wait_thread(void); - int migrate_decompress_threads(void); -+uint8_t migrate_max_cpu_throttle(void); - int64_t migrate_max_postcopy_bandwidth(void); - int migrate_multifd_channels(void); - MultiFDCompression migrate_multifd_compression(void); -diff --git a/migration/ram.c b/migration/ram.c -index e82cee97c3..5c786513ef 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -715,7 +715,7 @@ static void mig_throttle_guest_down(uint64_t bytes_dirty_period, - uint64_t pct_initial = s->parameters.cpu_throttle_initial; - uint64_t pct_increment = s->parameters.cpu_throttle_increment; - bool pct_tailslow = s->parameters.cpu_throttle_tailslow; -- int pct_max = s->parameters.max_cpu_throttle; -+ int pct_max = migrate_max_cpu_throttle(); - - uint64_t throttle_now = cpu_throttle_get_percentage(); - uint64_t cpu_now, cpu_ideal, throttle_inc; --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Create-migrate_rdma_pin_all-function.patch b/SOURCES/kvm-migration-Create-migrate_rdma_pin_all-function.patch deleted file mode 100644 index c7799f1..0000000 --- a/SOURCES/kvm-migration-Create-migrate_rdma_pin_all-function.patch +++ /dev/null @@ -1,95 +0,0 @@ -From e4ef0f2cee6cdf2cf4bd225ac9e610f41d66dfcb Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 1 Mar 2023 22:41:55 +0100 -Subject: [PATCH 32/56] migration: Create migrate_rdma_pin_all() function -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [31/50] 206d96d47d9ee73ddc89dd01186560bf62ea5295 (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy - ---- - -Fixed missing space after comma (fabiano) - -(cherry picked from commit 17cba690cdd42108369fafe6b07bff09872fbea6) -Signed-off-by: Peter Xu ---- - migration/options.c | 7 +++++++ - migration/options.h | 1 + - migration/rdma.c | 6 +++--- - 3 files changed, 11 insertions(+), 3 deletions(-) - -diff --git a/migration/options.c b/migration/options.c -index 2003e413da..9c9b8e5863 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -138,6 +138,13 @@ bool migrate_postcopy_ram(void) - return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM]; - } - -+bool migrate_rdma_pin_all(void) -+{ -+ MigrationState *s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL]; -+} -+ - bool migrate_release_ram(void) - { - MigrationState *s; -diff --git a/migration/options.h b/migration/options.h -index 316efd1063..25c002b37a 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -30,6 +30,7 @@ bool migrate_pause_before_switchover(void); - bool migrate_postcopy_blocktime(void); - bool migrate_postcopy_preempt(void); - bool migrate_postcopy_ram(void); -+bool migrate_rdma_pin_all(void); - bool migrate_release_ram(void); - bool migrate_return_path(void); - bool migrate_validate_uuid(void); -diff --git a/migration/rdma.c b/migration/rdma.c -index bf55e2f163..0af5e944f0 100644 ---- a/migration/rdma.c -+++ b/migration/rdma.c -@@ -35,6 +35,7 @@ - #include - #include "trace.h" - #include "qom/object.h" -+#include "options.h" - #include - - /* -@@ -4178,8 +4179,7 @@ void rdma_start_outgoing_migration(void *opaque, - goto err; - } - -- ret = qemu_rdma_source_init(rdma, -- s->capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL], errp); -+ ret = qemu_rdma_source_init(rdma, migrate_rdma_pin_all(), errp); - - if (ret) { - goto err; -@@ -4201,7 +4201,7 @@ void rdma_start_outgoing_migration(void *opaque, - } - - ret = qemu_rdma_source_init(rdma_return_path, -- s->capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL], errp); -+ migrate_rdma_pin_all(), errp); - - if (ret) { - goto return_path_err; --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Create-migrate_throttle_trigger_threshold.patch b/SOURCES/kvm-migration-Create-migrate_throttle_trigger_threshold.patch deleted file mode 100644 index 5fc1072..0000000 --- a/SOURCES/kvm-migration-Create-migrate_throttle_trigger_threshold.patch +++ /dev/null @@ -1,75 +0,0 @@ -From 27862b9d31da6447b60f185cdad95764018c6bc6 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Thu, 2 Mar 2023 00:59:13 +0100 -Subject: [PATCH 40/56] migration: Create migrate_throttle_trigger_threshold() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [39/50] b8af9080c49be3d38bd2784d61289be89c03db3e (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Fabiano Rosas -(cherry picked from commit 6499efdb16e5c1288b4c8390d3bf68b313329b8b) -Signed-off-by: Peter Xu ---- - migration/options.c | 9 +++++++++ - migration/options.h | 1 + - migration/ram.c | 3 +-- - 3 files changed, 11 insertions(+), 2 deletions(-) - -diff --git a/migration/options.c b/migration/options.c -index 2b6d88b4b9..b9f3815f7e 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -554,6 +554,15 @@ int migrate_multifd_zstd_level(void) - return s->parameters.multifd_zstd_level; - } - -+uint8_t migrate_throttle_trigger_threshold(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.throttle_trigger_threshold; -+} -+ - uint64_t migrate_xbzrle_cache_size(void) - { - MigrationState *s; -diff --git a/migration/options.h b/migration/options.h -index 96d5a8e6e4..aa54443353 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -55,6 +55,7 @@ int migrate_multifd_channels(void); - MultiFDCompression migrate_multifd_compression(void); - int migrate_multifd_zlib_level(void); - int migrate_multifd_zstd_level(void); -+uint8_t migrate_throttle_trigger_threshold(void); - uint64_t migrate_xbzrle_cache_size(void); - - #endif -diff --git a/migration/ram.c b/migration/ram.c -index 4576d0d849..e82cee97c3 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -1178,8 +1178,7 @@ static void migration_update_rates(RAMState *rs, int64_t end_time) - - static void migration_trigger_throttle(RAMState *rs) - { -- MigrationState *s = migrate_get_current(); -- uint64_t threshold = s->parameters.throttle_trigger_threshold; -+ uint64_t threshold = migrate_throttle_trigger_threshold(); - uint64_t bytes_xfer_period = - stat64_get(&ram_counters.transferred) - rs->bytes_xfer_prev; - uint64_t bytes_dirty_period = rs->num_dirty_pages_period * TARGET_PAGE_SIZE; --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Create-options.c.patch b/SOURCES/kvm-migration-Create-options.c.patch deleted file mode 100644 index ea60202..0000000 --- a/SOURCES/kvm-migration-Create-options.c.patch +++ /dev/null @@ -1,524 +0,0 @@ -From 282634a835f4711c8b501dd76c344058bc399fbd Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 1 Mar 2023 21:18:45 +0100 -Subject: [PATCH 23/56] migration: Create options.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [22/50] 10c9be528b9fcfae93f1a12fcd09db1a69e58f64 (peterx/qemu-kvm) - -We move there all capabilities helpers from migration.c. - -Signed-off-by: Juan Quintela -Reviewed-by: Dr. David Alan Gilbert - ---- - -Following David advise: -- looked through the history, capabilities are newer than 2012, so we - can remove that bit of the header. -- This part is posterior to Anthony. - Original Author is Orit. Once there, - I put myself. Peter Xu also did quite a bit of work here. - Anyone else wants/needs to be there? I didn't search too hard - because nobody asked before to be added. - -What do you think? - -(cherry picked from commit 1f0776f1c03312aad5d6a5f98871240bc3af01e5) -Signed-off-by: Peter Xu ---- - hw/virtio/virtio-balloon.c | 1 + - migration/block-dirty-bitmap.c | 1 + - migration/block.c | 1 + - migration/colo.c | 1 + - migration/meson.build | 1 + - migration/migration.c | 109 +---------------------------- - migration/migration.h | 12 ---- - migration/options.c | 124 +++++++++++++++++++++++++++++++++ - migration/options.h | 32 +++++++++ - migration/postcopy-ram.c | 1 + - migration/ram.c | 1 + - migration/savevm.c | 1 + - migration/socket.c | 1 + - 13 files changed, 166 insertions(+), 120 deletions(-) - create mode 100644 migration/options.c - create mode 100644 migration/options.h - -diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c -index 746f07c4d2..43092aa634 100644 ---- a/hw/virtio/virtio-balloon.c -+++ b/hw/virtio/virtio-balloon.c -@@ -32,6 +32,7 @@ - #include "qemu/error-report.h" - #include "migration/misc.h" - #include "migration/migration.h" -+#include "migration/options.h" - - #include "hw/virtio/virtio-bus.h" - #include "hw/virtio/virtio-access.h" -diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c -index fe73aa94b1..a6ffae0002 100644 ---- a/migration/block-dirty-bitmap.c -+++ b/migration/block-dirty-bitmap.c -@@ -79,6 +79,7 @@ - #include "qapi/qapi-visit-migration.h" - #include "qapi/clone-visitor.h" - #include "trace.h" -+#include "options.h" - - #define CHUNK_SIZE (1 << 10) - -diff --git a/migration/block.c b/migration/block.c -index b2497bbd32..4b167fa5cf 100644 ---- a/migration/block.c -+++ b/migration/block.c -@@ -28,6 +28,7 @@ - #include "migration/vmstate.h" - #include "sysemu/block-backend.h" - #include "trace.h" -+#include "options.h" - - #define BLK_MIG_BLOCK_SIZE (1ULL << 20) - #define BDRV_SECTORS_PER_DIRTY_CHUNK (BLK_MIG_BLOCK_SIZE >> BDRV_SECTOR_BITS) -diff --git a/migration/colo.c b/migration/colo.c -index 0716e64689..93b78c9270 100644 ---- a/migration/colo.c -+++ b/migration/colo.c -@@ -36,6 +36,7 @@ - #include "sysemu/cpus.h" - #include "sysemu/runstate.h" - #include "net/filter.h" -+#include "options.h" - - static bool vmstate_loading; - static Notifier packets_compare_notifier; -diff --git a/migration/meson.build b/migration/meson.build -index 0d1bb9f96e..480ff6854a 100644 ---- a/migration/meson.build -+++ b/migration/meson.build -@@ -22,6 +22,7 @@ softmmu_ss.add(files( - 'migration.c', - 'multifd.c', - 'multifd-zlib.c', -+ 'options.c', - 'postcopy-ram.c', - 'savevm.c', - 'socket.c', -diff --git a/migration/migration.c b/migration/migration.c -index 18058fb597..66ea55be06 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -63,6 +63,7 @@ - #include "sysemu/cpus.h" - #include "yank_functions.h" - #include "sysemu/qtest.h" -+#include "options.h" - - #define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */ - -@@ -357,15 +358,6 @@ static void migrate_generate_event(int new_state) - } - } - --static bool migrate_late_block_activate(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE]; --} -- - /* - * Send a message on the return channel back to the source - * of the migration. -@@ -2525,56 +2517,11 @@ void qmp_migrate_continue(MigrationStatus state, Error **errp) - qemu_sem_post(&s->pause_sem); - } - --bool migrate_release_ram(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_RELEASE_RAM]; --} -- --bool migrate_postcopy_ram(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM]; --} -- - bool migrate_postcopy(void) - { - return migrate_postcopy_ram() || migrate_dirty_bitmaps(); - } - --bool migrate_auto_converge(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE]; --} -- --bool migrate_zero_blocks(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS]; --} -- --bool migrate_postcopy_blocktime(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME]; --} -- - bool migrate_use_compression(void) - { - MigrationState *s; -@@ -2620,33 +2567,6 @@ int migrate_decompress_threads(void) - return s->parameters.decompress_threads; - } - --bool migrate_dirty_bitmaps(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS]; --} -- --bool migrate_ignore_shared(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_X_IGNORE_SHARED]; --} -- --bool migrate_validate_uuid(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_VALIDATE_UUID]; --} -- - bool migrate_use_events(void) - { - MigrationState *s; -@@ -2665,15 +2585,6 @@ bool migrate_use_multifd(void) - return s->capabilities[MIGRATION_CAPABILITY_MULTIFD]; - } - --bool migrate_pause_before_switchover(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER]; --} -- - int migrate_multifd_channels(void) - { - MigrationState *s; -@@ -2785,24 +2696,6 @@ bool migrate_use_block_incremental(void) - return s->parameters.block_incremental; - } - --bool migrate_background_snapshot(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]; --} -- --bool migrate_postcopy_preempt(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]; --} -- - /* migration thread support */ - /* - * Something bad happened to the RP stream, mark an error -diff --git a/migration/migration.h b/migration/migration.h -index 04e0860b4e..a25fed6ef0 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -449,16 +449,7 @@ MigrationState *migrate_get_current(void); - - bool migrate_postcopy(void); - --bool migrate_release_ram(void); --bool migrate_postcopy_ram(void); --bool migrate_zero_blocks(void); --bool migrate_dirty_bitmaps(void); --bool migrate_ignore_shared(void); --bool migrate_validate_uuid(void); -- --bool migrate_auto_converge(void); - bool migrate_use_multifd(void); --bool migrate_pause_before_switchover(void); - int migrate_multifd_channels(void); - MultiFDCompression migrate_multifd_compression(void); - int migrate_multifd_zlib_level(void); -@@ -487,9 +478,6 @@ int migrate_compress_threads(void); - int migrate_compress_wait_thread(void); - int migrate_decompress_threads(void); - bool migrate_use_events(void); --bool migrate_postcopy_blocktime(void); --bool migrate_background_snapshot(void); --bool migrate_postcopy_preempt(void); - - /* Sending on the return path - generic and then for each message type */ - void migrate_send_rp_shut(MigrationIncomingState *mis, -diff --git a/migration/options.c b/migration/options.c -new file mode 100644 -index 0000000000..88a9a45913 ---- /dev/null -+++ b/migration/options.c -@@ -0,0 +1,124 @@ -+/* -+ * QEMU migration capabilities -+ * -+ * Copyright (c) 2012-2023 Red Hat Inc -+ * -+ * Authors: -+ * Orit Wasserman -+ * Juan Quintela -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2 or later. -+ * See the COPYING file in the top-level directory. -+ */ -+ -+#include "qemu/osdep.h" -+#include "migration.h" -+#include "options.h" -+ -+bool migrate_auto_converge(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE]; -+} -+ -+bool migrate_background_snapshot(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]; -+} -+ -+bool migrate_dirty_bitmaps(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS]; -+} -+ -+bool migrate_ignore_shared(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_X_IGNORE_SHARED]; -+} -+ -+bool migrate_late_block_activate(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE]; -+} -+ -+bool migrate_pause_before_switchover(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER]; -+} -+ -+bool migrate_postcopy_blocktime(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME]; -+} -+ -+bool migrate_postcopy_preempt(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]; -+} -+ -+bool migrate_postcopy_ram(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM]; -+} -+ -+bool migrate_release_ram(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_RELEASE_RAM]; -+} -+ -+bool migrate_validate_uuid(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_VALIDATE_UUID]; -+} -+ -+bool migrate_zero_blocks(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS]; -+} -diff --git a/migration/options.h b/migration/options.h -new file mode 100644 -index 0000000000..0dfa0af245 ---- /dev/null -+++ b/migration/options.h -@@ -0,0 +1,32 @@ -+/* -+ * QEMU migration capabilities -+ * -+ * Copyright (c) 2012-2023 Red Hat Inc -+ * -+ * Authors: -+ * Orit Wasserman -+ * Juan Quintela -+ * -+ * This work is licensed under the terms of the GNU GPL, version 2 or later. -+ * See the COPYING file in the top-level directory. -+ */ -+ -+#ifndef QEMU_MIGRATION_OPTIONS_H -+#define QEMU_MIGRATION_OPTIONS_H -+ -+/* capabilities */ -+ -+bool migrate_auto_converge(void); -+bool migrate_background_snapshot(void); -+bool migrate_dirty_bitmaps(void); -+bool migrate_ignore_shared(void); -+bool migrate_late_block_activate(void); -+bool migrate_pause_before_switchover(void); -+bool migrate_postcopy_blocktime(void); -+bool migrate_postcopy_preempt(void); -+bool migrate_postcopy_ram(void); -+bool migrate_release_ram(void); -+bool migrate_validate_uuid(void); -+bool migrate_zero_blocks(void); -+ -+#endif -diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c -index d7b48dd920..0711500036 100644 ---- a/migration/postcopy-ram.c -+++ b/migration/postcopy-ram.c -@@ -37,6 +37,7 @@ - #include "tls.h" - #include "qemu/userfaultfd.h" - #include "qemu/mmap-alloc.h" -+#include "options.h" - - /* Arbitrary limit on size of each discard command, - * keeps them around ~200 bytes -diff --git a/migration/ram.c b/migration/ram.c -index 229714045a..912ccd89fa 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -57,6 +57,7 @@ - #include "qemu/iov.h" - #include "multifd.h" - #include "sysemu/runstate.h" -+#include "options.h" - - #include "hw/boards.h" /* for machine_dump_guest_core() */ - -diff --git a/migration/savevm.c b/migration/savevm.c -index 589ef926ab..ebcf571e37 100644 ---- a/migration/savevm.c -+++ b/migration/savevm.c -@@ -67,6 +67,7 @@ - #include "qemu/yank.h" - #include "yank_functions.h" - #include "sysemu/qtest.h" -+#include "options.h" - - const unsigned int postcopy_ram_discard_version; - -diff --git a/migration/socket.c b/migration/socket.c -index e6fdf3c5e1..ebf9ac41af 100644 ---- a/migration/socket.c -+++ b/migration/socket.c -@@ -27,6 +27,7 @@ - #include "io/net-listener.h" - #include "trace.h" - #include "postcopy-ram.h" -+#include "options.h" - - struct SocketOutgoingArgs { - SocketAddress *saddr; --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Enable-switchover-ack-capability.patch b/SOURCES/kvm-migration-Enable-switchover-ack-capability.patch deleted file mode 100644 index e08e5df..0000000 --- a/SOURCES/kvm-migration-Enable-switchover-ack-capability.patch +++ /dev/null @@ -1,56 +0,0 @@ -From bbe565f7d3b7fe46971e020e9bd8e79dc9ffa69c Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 12/37] migration: Enable switchover ack capability -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [10/28] c4a7d7d26a97181c9516d133a6610bfa5dcb1d16 (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit 538ef4fe2f72 -Author: Avihai Horon -Date: Wed Jun 21 14:11:56 2023 +0300 - - migration: Enable switchover ack capability - - Now that switchover ack logic has been implemented, enable the - capability. - - Signed-off-by: Avihai Horon - Reviewed-by: Juan Quintela - Reviewed-by: Peter Xu - Tested-by: YangHang Liu - Acked-by: Alex Williamson - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - migration/options.c | 4 ---- - 1 file changed, 4 deletions(-) - -diff --git a/migration/options.c b/migration/options.c -index c3df6c6dde..ccd7ef3907 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -547,10 +547,6 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) - "'return-path'"); - return false; - } -- -- /* Disable this capability until it's implemented */ -- error_setg(errp, "'switchover-ack' is not implemented yet"); -- return false; - } - - return true; --- -2.39.3 - diff --git a/SOURCES/kvm-migration-Handle-block-device-inactivation-failures-.patch b/SOURCES/kvm-migration-Handle-block-device-inactivation-failures-.patch deleted file mode 100644 index 26c8437..0000000 --- a/SOURCES/kvm-migration-Handle-block-device-inactivation-failures-.patch +++ /dev/null @@ -1,116 +0,0 @@ -From 2aac64623d8d2d06d248c1bcc71aa13572fc843c Mon Sep 17 00:00:00 2001 -From: Eric Blake -Date: Fri, 14 Apr 2023 10:33:58 -0500 -Subject: [PATCH 1/2] migration: Handle block device inactivation failures - better - -RH-Author: Eric Blake -RH-MergeRequest: 161: Avoid migration assertion from failed NFS server. -RH-Bugzilla: 2058982 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [1/2] 5ae143c9234f6eee9fc5154944172bcd56975b36 (ebblake/centos-qemu-kvm) - -Consider what happens when performing a migration between two host -machines connected to an NFS server serving multiple block devices to -the guest, when the NFS server becomes unavailable. The migration -attempts to inactivate all block devices on the source (a necessary -step before the destination can take over); but if the NFS server is -non-responsive, the attempt to inactivate can itself fail. When that -happens, the destination fails to get the migrated guest (good, -because the source wasn't able to flush everything properly): - - (qemu) qemu-kvm: load of migration failed: Input/output error - -at which point, our only hope for the guest is for the source to take -back control. With the current code base, the host outputs a message, but then appears to resume: - - (qemu) qemu-kvm: qemu_savevm_state_complete_precopy_non_iterable: bdrv_inactivate_all() failed (-1) - - (src qemu)info status - VM status: running - -but a second migration attempt now asserts: - - (src qemu) qemu-kvm: ../block.c:6738: int bdrv_inactivate_recurse(BlockDriverState *): Assertion `!(bs->open_flags & BDRV_O_INACTIVE)' failed. - -Whether the guest is recoverable on the source after the first failure -is debatable, but what we do not want is to have qemu itself fail due -to an assertion. It looks like the problem is as follows: - -In migration.c:migration_completion(), the source sets 'inactivate' to -true (since COLO is not enabled), then tries -savevm.c:qemu_savevm_state_complete_precopy() with a request to -inactivate block devices. In turn, this calls -block.c:bdrv_inactivate_all(), which fails when flushing runs up -against the non-responsive NFS server. With savevm failing, we are -now left in a state where some, but not all, of the block devices have -been inactivated; but migration_completion() then jumps to 'fail' -rather than 'fail_invalidate' and skips an attempt to reclaim those -those disks by calling bdrv_activate_all(). Even if we do attempt to -reclaim disks, we aren't taking note of failure there, either. - -Thus, we have reached a state where the migration engine has forgotten -all state about whether a block device is inactive, because we did not -set s->block_inactive in enough places; so migration allows the source -to reach vm_start() and resume execution, violating the block layer -invariant that the guest CPUs should not be restarted while a device -is inactive. Note that the code in migration.c:migrate_fd_cancel() -will also try to reactivate all block devices if s->block_inactive was -set, but because we failed to set that flag after the first failure, -the source assumes it has reclaimed all devices, even though it still -has remaining inactivated devices and does not try again. Normally, -qmp_cont() will also try to reactivate all disks (or correctly fail if -the disks are not reclaimable because NFS is not yet back up), but the -auto-resumption of the source after a migration failure does not go -through qmp_cont(). And because we have left the block layer in an -inconsistent state with devices still inactivated, the later migration -attempt is hitting the assertion failure. - -Since it is important to not resume the source with inactive disks, -this patch marks s->block_inactive before attempting inactivation, -rather than after succeeding, in order to prevent any vm_start() until -it has successfully reactivated all devices. - -See also https://bugzilla.redhat.com/show_bug.cgi?id=2058982 - -Signed-off-by: Eric Blake -Reviewed-by: Juan Quintela -Acked-by: Lukas Straub -Tested-by: Lukas Straub -Signed-off-by: Juan Quintela -(cherry picked from commit 403d18ae384239876764bbfa111d6cc5dcb673d1) ---- - migration/migration.c | 5 ++--- - 1 file changed, 2 insertions(+), 3 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index bda4789193..cb0d42c061 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -3444,13 +3444,11 @@ static void migration_completion(MigrationState *s) - MIGRATION_STATUS_DEVICE); - } - if (ret >= 0) { -+ s->block_inactive = inactivate; - qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX); - ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false, - inactivate); - } -- if (inactivate && ret >= 0) { -- s->block_inactive = true; -- } - } - qemu_mutex_unlock_iothread(); - -@@ -3522,6 +3520,7 @@ fail_invalidate: - bdrv_activate_all(&local_err); - if (local_err) { - error_report_err(local_err); -+ s->block_inactive = true; - } else { - s->block_inactive = false; - } --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Implement-switchover-ack-logic.patch b/SOURCES/kvm-migration-Implement-switchover-ack-logic.patch deleted file mode 100644 index 49b9f12..0000000 --- a/SOURCES/kvm-migration-Implement-switchover-ack-logic.patch +++ /dev/null @@ -1,339 +0,0 @@ -From 387c39f198d94f600be525e363edc7ca916dc261 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 11/37] migration: Implement switchover ack logic -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [9/28] 853e1978f3b9f87942863bba894a0ed908bde6b1 (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit 1b4adb10f898 -Author: Avihai Horon -Date: Wed Jun 21 14:11:55 2023 +0300 - - migration: Implement switchover ack logic - - Implement switchover ack logic. This prevents the source from stopping - the VM and completing the migration until an ACK is received from the - destination that it's OK to do so. - - To achieve this, a new SaveVMHandlers handler switchover_ack_needed() - and a new return path message MIG_RP_MSG_SWITCHOVER_ACK are added. - - The switchover_ack_needed() handler is called during migration setup in - the destination to check if switchover ack is used by the migrated - device. - - When switchover is approved by all migrated devices in the destination - that support this capability, the MIG_RP_MSG_SWITCHOVER_ACK return path - message is sent to the source to notify it that it's OK to do - switchover. - - Signed-off-by: Avihai Horon - Reviewed-by: Peter Xu - Tested-by: YangHang Liu - Acked-by: Alex Williamson - Signed-off-by: Cédric Le Goater - -Conflicts: - - migration/migration.c - context changes due to commit f4584076fc31 ("migration: switch - from .vm_was_running to .vm_old_state") - -Signed-off-by: Cédric Le Goater ---- - include/migration/register.h | 2 ++ - migration/migration.c | 32 +++++++++++++++++++-- - migration/migration.h | 14 ++++++++++ - migration/savevm.c | 54 ++++++++++++++++++++++++++++++++++++ - migration/savevm.h | 1 + - migration/trace-events | 3 ++ - 6 files changed, 104 insertions(+), 2 deletions(-) - -diff --git a/include/migration/register.h b/include/migration/register.h -index a8dfd8fefd..90914f32f5 100644 ---- a/include/migration/register.h -+++ b/include/migration/register.h -@@ -71,6 +71,8 @@ typedef struct SaveVMHandlers { - int (*load_cleanup)(void *opaque); - /* Called when postcopy migration wants to resume from failure */ - int (*resume_prepare)(MigrationState *s, void *opaque); -+ /* Checks if switchover ack should be used. Called only in dest */ -+ bool (*switchover_ack_needed)(void *opaque); - } SaveVMHandlers; - - int register_savevm_live(const char *idstr, -diff --git a/migration/migration.c b/migration/migration.c -index 1ac5f19bc2..9bf1caee6c 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -76,6 +76,7 @@ enum mig_rp_message_type { - MIG_RP_MSG_REQ_PAGES, /* data (start: be64, len: be32) */ - MIG_RP_MSG_RECV_BITMAP, /* send recved_bitmap back to source */ - MIG_RP_MSG_RESUME_ACK, /* tell source that we are ready to resume */ -+ MIG_RP_MSG_SWITCHOVER_ACK, /* Tell source it's OK to do switchover */ - - MIG_RP_MSG_MAX - }; -@@ -756,6 +757,11 @@ bool migration_has_all_channels(void) - return true; - } - -+int migrate_send_rp_switchover_ack(MigrationIncomingState *mis) -+{ -+ return migrate_send_rp_message(mis, MIG_RP_MSG_SWITCHOVER_ACK, 0, NULL); -+} -+ - /* - * Send a 'SHUT' message on the return channel with the given value - * to indicate that we've finished with the RP. Non-0 value indicates -@@ -1415,6 +1421,7 @@ void migrate_init(MigrationState *s) - s->vm_was_running = false; - s->iteration_initial_bytes = 0; - s->threshold_size = 0; -+ s->switchover_acked = false; - } - - int migrate_add_blocker_internal(Error *reason, Error **errp) -@@ -1731,6 +1738,7 @@ static struct rp_cmd_args { - [MIG_RP_MSG_REQ_PAGES_ID] = { .len = -1, .name = "REQ_PAGES_ID" }, - [MIG_RP_MSG_RECV_BITMAP] = { .len = -1, .name = "RECV_BITMAP" }, - [MIG_RP_MSG_RESUME_ACK] = { .len = 4, .name = "RESUME_ACK" }, -+ [MIG_RP_MSG_SWITCHOVER_ACK] = { .len = 0, .name = "SWITCHOVER_ACK" }, - [MIG_RP_MSG_MAX] = { .len = -1, .name = "MAX" }, - }; - -@@ -1969,6 +1977,11 @@ retry: - } - break; - -+ case MIG_RP_MSG_SWITCHOVER_ACK: -+ ms->switchover_acked = true; -+ trace_source_return_path_thread_switchover_acked(); -+ break; -+ - default: - break; - } -@@ -2720,6 +2733,20 @@ static void migration_update_counters(MigrationState *s, - bandwidth, s->threshold_size); - } - -+static bool migration_can_switchover(MigrationState *s) -+{ -+ if (!migrate_switchover_ack()) { -+ return true; -+ } -+ -+ /* No reason to wait for switchover ACK if VM is stopped */ -+ if (!runstate_is_running()) { -+ return true; -+ } -+ -+ return s->switchover_acked; -+} -+ - /* Migration thread iteration status */ - typedef enum { - MIG_ITERATE_RESUME, /* Resume current iteration */ -@@ -2735,6 +2762,7 @@ static MigIterateState migration_iteration_run(MigrationState *s) - { - uint64_t must_precopy, can_postcopy; - bool in_postcopy = s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE; -+ bool can_switchover = migration_can_switchover(s); - - qemu_savevm_state_pending_estimate(&must_precopy, &can_postcopy); - uint64_t pending_size = must_precopy + can_postcopy; -@@ -2747,14 +2775,14 @@ static MigIterateState migration_iteration_run(MigrationState *s) - trace_migrate_pending_exact(pending_size, must_precopy, can_postcopy); - } - -- if (!pending_size || pending_size < s->threshold_size) { -+ if ((!pending_size || pending_size < s->threshold_size) && can_switchover) { - trace_migration_thread_low_pending(pending_size); - migration_completion(s); - return MIG_ITERATE_BREAK; - } - - /* Still a significant amount to transfer */ -- if (!in_postcopy && must_precopy <= s->threshold_size && -+ if (!in_postcopy && must_precopy <= s->threshold_size && can_switchover && - qatomic_read(&s->start_postcopy)) { - if (postcopy_start(s)) { - error_report("%s: postcopy failed to start", __func__); -diff --git a/migration/migration.h b/migration/migration.h -index 2b71df8617..e9679f8029 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -204,6 +204,13 @@ struct MigrationIncomingState { - * contains valid information. - */ - QemuMutex page_request_mutex; -+ -+ /* -+ * Number of devices that have yet to approve switchover. When this reaches -+ * zero an ACK that it's OK to do switchover is sent to the source. No lock -+ * is needed as this field is updated serially. -+ */ -+ unsigned int switchover_ack_pending_num; - }; - - MigrationIncomingState *migration_incoming_get_current(void); -@@ -421,6 +428,12 @@ struct MigrationState { - - /* QEMU_VM_VMDESCRIPTION content filled for all non-iterable devices. */ - JSONWriter *vmdesc; -+ -+ /* -+ * Indicates whether an ACK from the destination that it's OK to do -+ * switchover has been received. -+ */ -+ bool switchover_acked; - }; - - void migrate_set_state(int *state, int old_state, int new_state); -@@ -461,6 +474,7 @@ int migrate_send_rp_message_req_pages(MigrationIncomingState *mis, - void migrate_send_rp_recv_bitmap(MigrationIncomingState *mis, - char *block_name); - void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value); -+int migrate_send_rp_switchover_ack(MigrationIncomingState *mis); - - void dirty_bitmap_mig_before_vm_start(void); - void dirty_bitmap_mig_cancel_outgoing(void); -diff --git a/migration/savevm.c b/migration/savevm.c -index 211eff3a8b..aff70e6263 100644 ---- a/migration/savevm.c -+++ b/migration/savevm.c -@@ -2358,6 +2358,21 @@ static int loadvm_process_command(QEMUFile *f) - error_report("CMD_OPEN_RETURN_PATH failed"); - return -1; - } -+ -+ /* -+ * Switchover ack is enabled but no device uses it, so send an ACK to -+ * source that it's OK to switchover. Do it here, after return path has -+ * been created. -+ */ -+ if (migrate_switchover_ack() && !mis->switchover_ack_pending_num) { -+ int ret = migrate_send_rp_switchover_ack(mis); -+ if (ret) { -+ error_report( -+ "Could not send switchover ack RP MSG, err %d (%s)", ret, -+ strerror(-ret)); -+ return ret; -+ } -+ } - break; - - case MIG_CMD_PING: -@@ -2584,6 +2599,23 @@ static int qemu_loadvm_state_header(QEMUFile *f) - return 0; - } - -+static void qemu_loadvm_state_switchover_ack_needed(MigrationIncomingState *mis) -+{ -+ SaveStateEntry *se; -+ -+ QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { -+ if (!se->ops || !se->ops->switchover_ack_needed) { -+ continue; -+ } -+ -+ if (se->ops->switchover_ack_needed(se->opaque)) { -+ mis->switchover_ack_pending_num++; -+ } -+ } -+ -+ trace_loadvm_state_switchover_ack_needed(mis->switchover_ack_pending_num); -+} -+ - static int qemu_loadvm_state_setup(QEMUFile *f) - { - SaveStateEntry *se; -@@ -2787,6 +2819,10 @@ int qemu_loadvm_state(QEMUFile *f) - return -EINVAL; - } - -+ if (migrate_switchover_ack()) { -+ qemu_loadvm_state_switchover_ack_needed(mis); -+ } -+ - cpu_synchronize_all_pre_loadvm(); - - ret = qemu_loadvm_state_main(f, mis); -@@ -2860,6 +2896,24 @@ int qemu_load_device_state(QEMUFile *f) - return 0; - } - -+int qemu_loadvm_approve_switchover(void) -+{ -+ MigrationIncomingState *mis = migration_incoming_get_current(); -+ -+ if (!mis->switchover_ack_pending_num) { -+ return -EINVAL; -+ } -+ -+ mis->switchover_ack_pending_num--; -+ trace_loadvm_approve_switchover(mis->switchover_ack_pending_num); -+ -+ if (mis->switchover_ack_pending_num) { -+ return 0; -+ } -+ -+ return migrate_send_rp_switchover_ack(mis); -+} -+ - bool save_snapshot(const char *name, bool overwrite, const char *vmstate, - bool has_devices, strList *devices, Error **errp) - { -diff --git a/migration/savevm.h b/migration/savevm.h -index fb636735f0..e894bbc143 100644 ---- a/migration/savevm.h -+++ b/migration/savevm.h -@@ -65,6 +65,7 @@ int qemu_loadvm_state(QEMUFile *f); - void qemu_loadvm_state_cleanup(void); - int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis); - int qemu_load_device_state(QEMUFile *f); -+int qemu_loadvm_approve_switchover(void); - int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f, - bool in_postcopy, bool inactivate_disks); - -diff --git a/migration/trace-events b/migration/trace-events -index 92161eeac5..cda807d271 100644 ---- a/migration/trace-events -+++ b/migration/trace-events -@@ -7,6 +7,7 @@ qemu_loadvm_state_section_partend(uint32_t section_id) "%u" - qemu_loadvm_state_post_main(int ret) "%d" - qemu_loadvm_state_section_startfull(uint32_t section_id, const char *idstr, uint32_t instance_id, uint32_t version_id) "%u(%s) %u %u" - qemu_savevm_send_packaged(void) "" -+loadvm_state_switchover_ack_needed(unsigned int switchover_ack_pending_num) "Switchover ack pending num=%u" - loadvm_state_setup(void) "" - loadvm_state_cleanup(void) "" - loadvm_handle_cmd_packaged(unsigned int length) "%u" -@@ -23,6 +24,7 @@ loadvm_postcopy_ram_handle_discard_end(void) "" - loadvm_postcopy_ram_handle_discard_header(const char *ramid, uint16_t len) "%s: %ud" - loadvm_process_command(const char *s, uint16_t len) "com=%s len=%d" - loadvm_process_command_ping(uint32_t val) "0x%x" -+loadvm_approve_switchover(unsigned int switchover_ack_pending_num) "Switchover ack pending num=%u" - postcopy_ram_listen_thread_exit(void) "" - postcopy_ram_listen_thread_start(void) "" - qemu_savevm_send_postcopy_advise(void) "" -@@ -180,6 +182,7 @@ source_return_path_thread_loop_top(void) "" - source_return_path_thread_pong(uint32_t val) "0x%x" - source_return_path_thread_shut(uint32_t val) "0x%x" - source_return_path_thread_resume_ack(uint32_t v) "%"PRIu32 -+source_return_path_thread_switchover_acked(void) "" - migration_thread_low_pending(uint64_t pending) "%" PRIu64 - migrate_transferred(uint64_t tranferred, uint64_t time_spent, uint64_t bandwidth, uint64_t size) "transferred %" PRIu64 " time_spent %" PRIu64 " bandwidth %" PRIu64 " max_size %" PRId64 - process_incoming_migration_co_end(int ret, int ps) "ret=%d postcopy-state=%d" --- -2.39.3 - diff --git a/SOURCES/kvm-migration-Make-all-functions-check-have-the-same-for.patch b/SOURCES/kvm-migration-Make-all-functions-check-have-the-same-for.patch deleted file mode 100644 index f873f3f..0000000 --- a/SOURCES/kvm-migration-Make-all-functions-check-have-the-same-for.patch +++ /dev/null @@ -1,431 +0,0 @@ -From eaccfc91b34f93dcaf597e6b39f78741da618ff3 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 08/37] migration: Make all functions check have the same - format -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [6/28] 774df2a81502d3eab5d5b8f64fa9b69f8be43669 (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit 8f9c532756c5 -Author: Juan Quintela -Date: Wed Mar 1 23:11:08 2023 +0100 - - migration: Make all functions check have the same format - - Signed-off-by: Juan Quintela - Reviewed-by: Vladimir Sementsov-Ogievskiy - -Signed-off-by: Cédric Le Goater ---- - migration/options.c | 153 +++++++++++--------------------------------- - 1 file changed, 39 insertions(+), 114 deletions(-) - -diff --git a/migration/options.c b/migration/options.c -index e51d667e14..bcfe244fa9 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -33,27 +33,21 @@ - - bool migrate_auto_converge(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE]; - } - - bool migrate_background_snapshot(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]; - } - - bool migrate_block(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_BLOCK]; - } -@@ -61,95 +55,76 @@ bool migrate_block(void) - bool migrate_colo(void) - { - MigrationState *s = migrate_get_current(); -+ - return s->capabilities[MIGRATION_CAPABILITY_X_COLO]; - } - - bool migrate_compress(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_COMPRESS]; - } - - bool migrate_dirty_bitmaps(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS]; - } - - bool migrate_events(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_EVENTS]; - } - - bool migrate_ignore_shared(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_X_IGNORE_SHARED]; - } - - bool migrate_late_block_activate(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE]; - } - - bool migrate_multifd(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_MULTIFD]; - } - - bool migrate_pause_before_switchover(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER]; - } - - bool migrate_postcopy_blocktime(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME]; - } - - bool migrate_postcopy_preempt(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]; - } - - bool migrate_postcopy_ram(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM]; - } -@@ -163,54 +138,42 @@ bool migrate_rdma_pin_all(void) - - bool migrate_release_ram(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_RELEASE_RAM]; - } - - bool migrate_return_path(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_RETURN_PATH]; - } - - bool migrate_validate_uuid(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_VALIDATE_UUID]; - } - - bool migrate_xbzrle(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_XBZRLE]; - } - - bool migrate_zero_blocks(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS]; - } - - bool migrate_zero_copy_send(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND]; - } -@@ -224,9 +187,7 @@ bool migrate_postcopy(void) - - bool migrate_tls(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->parameters.tls_creds && *s->parameters.tls_creds; - } -@@ -491,126 +452,98 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, - - bool migrate_block_incremental(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->parameters.block_incremental; - } - - uint32_t migrate_checkpoint_delay(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->parameters.x_checkpoint_delay; - } - - int migrate_compress_level(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->parameters.compress_level; - } - - int migrate_compress_threads(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->parameters.compress_threads; - } - - int migrate_compress_wait_thread(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->parameters.compress_wait_thread; - } - - uint8_t migrate_cpu_throttle_increment(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->parameters.cpu_throttle_increment; - } - - uint8_t migrate_cpu_throttle_initial(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->parameters.cpu_throttle_initial; - } - - bool migrate_cpu_throttle_tailslow(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->parameters.cpu_throttle_tailslow; - } - - int migrate_decompress_threads(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->parameters.decompress_threads; - } - - uint8_t migrate_max_cpu_throttle(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->parameters.max_cpu_throttle; - } - - uint64_t migrate_max_bandwidth(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->parameters.max_bandwidth; - } - - int64_t migrate_max_postcopy_bandwidth(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->parameters.max_postcopy_bandwidth; - } - - int migrate_multifd_channels(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->parameters.multifd_channels; - } - - MultiFDCompression migrate_multifd_compression(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - assert(s->parameters.multifd_compression < MULTIFD_COMPRESSION__MAX); - return s->parameters.multifd_compression; -@@ -618,36 +551,28 @@ MultiFDCompression migrate_multifd_compression(void) - - int migrate_multifd_zlib_level(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->parameters.multifd_zlib_level; - } - - int migrate_multifd_zstd_level(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->parameters.multifd_zstd_level; - } - - uint8_t migrate_throttle_trigger_threshold(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->parameters.throttle_trigger_threshold; - } - - uint64_t migrate_xbzrle_cache_size(void) - { -- MigrationState *s; -- -- s = migrate_get_current(); -+ MigrationState *s = migrate_get_current(); - - return s->parameters.xbzrle_cache_size; - } --- -2.39.3 - diff --git a/SOURCES/kvm-migration-Make-dirty_sync_count-atomic.patch b/SOURCES/kvm-migration-Make-dirty_sync_count-atomic.patch deleted file mode 100644 index ad1de7b..0000000 --- a/SOURCES/kvm-migration-Make-dirty_sync_count-atomic.patch +++ /dev/null @@ -1,105 +0,0 @@ -From 886b511e0a225b1c4428c646534d7bcc65bd9e2a Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 11 Apr 2023 18:02:34 +0200 -Subject: [PATCH 14/56] migration: Make dirty_sync_count atomic -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [13/50] ef3ae8cdd960e944ba9e73a53d54c9a5a55bb1ce (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Peter Xu -(cherry picked from commit 536b5a4e56ec67c958f46e7d46cbd5ac34e5a239) -Signed-off-by: Peter Xu ---- - migration/migration.c | 3 ++- - migration/ram.c | 13 +++++++------ - migration/ram.h | 2 +- - 3 files changed, 10 insertions(+), 8 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 8f2847d298..8fca751050 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1148,7 +1148,8 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) - info->ram->normal = stat64_get(&ram_counters.normal); - info->ram->normal_bytes = info->ram->normal * page_size; - info->ram->mbps = s->mbps; -- info->ram->dirty_sync_count = ram_counters.dirty_sync_count; -+ info->ram->dirty_sync_count = -+ stat64_get(&ram_counters.dirty_sync_count); - info->ram->dirty_sync_missed_zero_copy = - stat64_get(&ram_counters.dirty_sync_missed_zero_copy); - info->ram->postcopy_requests = ram_counters.postcopy_requests; -diff --git a/migration/ram.c b/migration/ram.c -index b1722b6071..3c13136559 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -764,7 +764,7 @@ static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr) - /* We don't care if this fails to allocate a new cache page - * as long as it updated an old one */ - cache_insert(XBZRLE.cache, current_addr, XBZRLE.zero_target_page, -- ram_counters.dirty_sync_count); -+ stat64_get(&ram_counters.dirty_sync_count)); - } - - #define ENCODING_FLAG_XBZRLE 0x1 -@@ -790,13 +790,13 @@ static int save_xbzrle_page(RAMState *rs, PageSearchStatus *pss, - int encoded_len = 0, bytes_xbzrle; - uint8_t *prev_cached_page; - QEMUFile *file = pss->pss_channel; -+ uint64_t generation = stat64_get(&ram_counters.dirty_sync_count); - -- if (!cache_is_cached(XBZRLE.cache, current_addr, -- ram_counters.dirty_sync_count)) { -+ if (!cache_is_cached(XBZRLE.cache, current_addr, generation)) { - xbzrle_counters.cache_miss++; - if (!rs->last_stage) { - if (cache_insert(XBZRLE.cache, current_addr, *current_data, -- ram_counters.dirty_sync_count) == -1) { -+ generation) == -1) { - return -1; - } else { - /* update *current_data when the page has been -@@ -1209,7 +1209,7 @@ static void migration_bitmap_sync(RAMState *rs) - RAMBlock *block; - int64_t end_time; - -- ram_counters.dirty_sync_count++; -+ stat64_add(&ram_counters.dirty_sync_count, 1); - - if (!rs->time_last_bitmap_sync) { - rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); -@@ -1246,7 +1246,8 @@ static void migration_bitmap_sync(RAMState *rs) - rs->bytes_xfer_prev = stat64_get(&ram_counters.transferred); - } - if (migrate_use_events()) { -- qapi_event_send_migration_pass(ram_counters.dirty_sync_count); -+ uint64_t generation = stat64_get(&ram_counters.dirty_sync_count); -+ qapi_event_send_migration_pass(generation); - } - } - -diff --git a/migration/ram.h b/migration/ram.h -index bb52632424..8c0d07c43a 100644 ---- a/migration/ram.h -+++ b/migration/ram.h -@@ -42,7 +42,7 @@ - */ - typedef struct { - int64_t dirty_pages_rate; -- int64_t dirty_sync_count; -+ Stat64 dirty_sync_count; - Stat64 dirty_sync_missed_zero_copy; - Stat64 downtime_bytes; - Stat64 duplicate; --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Make-dirty_sync_missed_zero_copy-atomic.patch b/SOURCES/kvm-migration-Make-dirty_sync_missed_zero_copy-atomic.patch deleted file mode 100644 index b7b0f60..0000000 --- a/SOURCES/kvm-migration-Make-dirty_sync_missed_zero_copy-atomic.patch +++ /dev/null @@ -1,92 +0,0 @@ -From e9ff20d7f7e6c2354f3696e8bca265e535eeb801 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 11 Apr 2023 17:33:56 +0200 -Subject: [PATCH 11/56] migration: Make dirty_sync_missed_zero_copy atomic -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [10/50] 041230abb087db0e7ffae02b4f85772490b805a0 (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Peter Xu -(cherry picked from commit 4291823694fd8507831d26e2558d9cd0030841f7) -Signed-off-by: Peter Xu ---- - migration/migration.c | 2 +- - migration/multifd.c | 2 +- - migration/ram.c | 5 ----- - migration/ram.h | 4 +--- - 4 files changed, 3 insertions(+), 10 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index ca68808b5c..645fb4b3c5 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1150,7 +1150,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) - info->ram->mbps = s->mbps; - info->ram->dirty_sync_count = ram_counters.dirty_sync_count; - info->ram->dirty_sync_missed_zero_copy = -- ram_counters.dirty_sync_missed_zero_copy; -+ stat64_get(&ram_counters.dirty_sync_missed_zero_copy); - info->ram->postcopy_requests = ram_counters.postcopy_requests; - info->ram->page_size = page_size; - info->ram->multifd_bytes = stat64_get(&ram_counters.multifd_bytes); -diff --git a/migration/multifd.c b/migration/multifd.c -index 1c992abf53..903df2117b 100644 ---- a/migration/multifd.c -+++ b/migration/multifd.c -@@ -576,7 +576,7 @@ static int multifd_zero_copy_flush(QIOChannel *c) - return -1; - } - if (ret == 1) { -- dirty_sync_missed_zero_copy(); -+ stat64_add(&ram_counters.dirty_sync_missed_zero_copy, 1); - } - - return ret; -diff --git a/migration/ram.c b/migration/ram.c -index 71320ed27a..93e0a48af4 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -472,11 +472,6 @@ void ram_transferred_add(uint64_t bytes) - stat64_add(&ram_counters.transferred, bytes); - } - --void dirty_sync_missed_zero_copy(void) --{ -- ram_counters.dirty_sync_missed_zero_copy++; --} -- - struct MigrationOps { - int (*ram_save_target_page)(RAMState *rs, PageSearchStatus *pss); - }; -diff --git a/migration/ram.h b/migration/ram.h -index ed70391317..2170c55e67 100644 ---- a/migration/ram.h -+++ b/migration/ram.h -@@ -43,7 +43,7 @@ - typedef struct { - int64_t dirty_pages_rate; - int64_t dirty_sync_count; -- uint64_t dirty_sync_missed_zero_copy; -+ Stat64 dirty_sync_missed_zero_copy; - uint64_t downtime_bytes; - Stat64 duplicate; - Stat64 multifd_bytes; -@@ -114,6 +114,4 @@ void ram_write_tracking_prepare(void); - int ram_write_tracking_start(void); - void ram_write_tracking_stop(void); - --void dirty_sync_missed_zero_copy(void); -- - #endif --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Make-downtime_bytes-atomic.patch b/SOURCES/kvm-migration-Make-downtime_bytes-atomic.patch deleted file mode 100644 index 9b206bc..0000000 --- a/SOURCES/kvm-migration-Make-downtime_bytes-atomic.patch +++ /dev/null @@ -1,68 +0,0 @@ -From 4c6af064277b5445b31db4a598e1c4402ba56452 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 11 Apr 2023 17:38:11 +0200 -Subject: [PATCH 13/56] migration: Make downtime_bytes atomic -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [12/50] ebfc16aae8bc4a8c1fec431780a062950e6f50c4 (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Peter Xu -(cherry picked from commit 296a4ac2aa63038b6b702f2ee8f0f93ae26727ae) -Signed-off-by: Peter Xu ---- - migration/migration.c | 2 +- - migration/ram.c | 2 +- - migration/ram.h | 2 +- - 3 files changed, 3 insertions(+), 3 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 3a68d93d69..8f2847d298 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1156,7 +1156,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) - info->ram->multifd_bytes = stat64_get(&ram_counters.multifd_bytes); - info->ram->pages_per_second = s->pages_per_second; - info->ram->precopy_bytes = stat64_get(&ram_counters.precopy_bytes); -- info->ram->downtime_bytes = ram_counters.downtime_bytes; -+ info->ram->downtime_bytes = stat64_get(&ram_counters.downtime_bytes); - info->ram->postcopy_bytes = stat64_get(&ram_counters.postcopy_bytes); - - if (migrate_use_xbzrle()) { -diff --git a/migration/ram.c b/migration/ram.c -index 0b4693215e..b1722b6071 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -467,7 +467,7 @@ void ram_transferred_add(uint64_t bytes) - } else if (migration_in_postcopy()) { - stat64_add(&ram_counters.postcopy_bytes, bytes); - } else { -- ram_counters.downtime_bytes += bytes; -+ stat64_add(&ram_counters.downtime_bytes, bytes); - } - stat64_add(&ram_counters.transferred, bytes); - } -diff --git a/migration/ram.h b/migration/ram.h -index a766b895fa..bb52632424 100644 ---- a/migration/ram.h -+++ b/migration/ram.h -@@ -44,7 +44,7 @@ typedef struct { - int64_t dirty_pages_rate; - int64_t dirty_sync_count; - Stat64 dirty_sync_missed_zero_copy; -- uint64_t downtime_bytes; -+ Stat64 downtime_bytes; - Stat64 duplicate; - Stat64 multifd_bytes; - Stat64 normal; --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Make-multifd_bytes-atomic.patch b/SOURCES/kvm-migration-Make-multifd_bytes-atomic.patch deleted file mode 100644 index b315fdc..0000000 --- a/SOURCES/kvm-migration-Make-multifd_bytes-atomic.patch +++ /dev/null @@ -1,99 +0,0 @@ -From bfcc4bc8f60b541d545f1ea27b1ff156d8092d33 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 23 Nov 2022 20:36:56 +0100 -Subject: [PATCH 10/56] migration: Make multifd_bytes atomic -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [9/50] c2bc6b173770a0ea81c3f9d850c583c651647070 (peterx/qemu-kvm) - -In the spirit of: - -commit 394d323bc3451e4d07f13341cb8817fac8dfbadd -Author: Peter Xu -Date: Tue Oct 11 17:55:51 2022 -0400 - - migration: Use atomic ops properly for page accountings - -Reviewed-by: David Edmondson -Reviewed-by: Peter Xu -Signed-off-by: Juan Quintela -(cherry picked from commit cf671116facf4e29d91fce9c9ffb535385ffac81) -Signed-off-by: Peter Xu ---- - migration/migration.c | 4 ++-- - migration/multifd.c | 4 ++-- - migration/ram.h | 2 +- - 3 files changed, 5 insertions(+), 5 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index a91704d35c..ca68808b5c 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1153,7 +1153,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) - ram_counters.dirty_sync_missed_zero_copy; - info->ram->postcopy_requests = ram_counters.postcopy_requests; - info->ram->page_size = page_size; -- info->ram->multifd_bytes = ram_counters.multifd_bytes; -+ info->ram->multifd_bytes = stat64_get(&ram_counters.multifd_bytes); - info->ram->pages_per_second = s->pages_per_second; - info->ram->precopy_bytes = ram_counters.precopy_bytes; - info->ram->downtime_bytes = ram_counters.downtime_bytes; -@@ -3780,7 +3780,7 @@ static MigThrError migration_detect_error(MigrationState *s) - static uint64_t migration_total_bytes(MigrationState *s) - { - return qemu_file_total_transferred(s->to_dst_file) + -- ram_counters.multifd_bytes; -+ stat64_get(&ram_counters.multifd_bytes); - } - - static void migration_calculate_complete(MigrationState *s) -diff --git a/migration/multifd.c b/migration/multifd.c -index 6ef3a27938..1c992abf53 100644 ---- a/migration/multifd.c -+++ b/migration/multifd.c -@@ -432,9 +432,9 @@ static int multifd_send_pages(QEMUFile *f) - p->pages = pages; - transferred = ((uint64_t) pages->num) * p->page_size + p->packet_len; - qemu_file_acct_rate_limit(f, transferred); -- ram_counters.multifd_bytes += transferred; - qemu_mutex_unlock(&p->mutex); - stat64_add(&ram_counters.transferred, transferred); -+ stat64_add(&ram_counters.multifd_bytes, transferred); - qemu_sem_post(&p->sem); - - return 1; -@@ -627,9 +627,9 @@ int multifd_send_sync_main(QEMUFile *f) - p->flags |= MULTIFD_FLAG_SYNC; - p->pending_job++; - qemu_file_acct_rate_limit(f, p->packet_len); -- ram_counters.multifd_bytes += p->packet_len; - qemu_mutex_unlock(&p->mutex); - stat64_add(&ram_counters.transferred, p->packet_len); -+ stat64_add(&ram_counters.multifd_bytes, p->packet_len); - qemu_sem_post(&p->sem); - } - for (i = 0; i < migrate_multifd_channels(); i++) { -diff --git a/migration/ram.h b/migration/ram.h -index 7c026b5242..ed70391317 100644 ---- a/migration/ram.h -+++ b/migration/ram.h -@@ -46,7 +46,7 @@ typedef struct { - uint64_t dirty_sync_missed_zero_copy; - uint64_t downtime_bytes; - Stat64 duplicate; -- uint64_t multifd_bytes; -+ Stat64 multifd_bytes; - Stat64 normal; - Stat64 postcopy_bytes; - int64_t postcopy_requests; --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Make-postcopy_requests-atomic.patch b/SOURCES/kvm-migration-Make-postcopy_requests-atomic.patch deleted file mode 100644 index 894419a..0000000 --- a/SOURCES/kvm-migration-Make-postcopy_requests-atomic.patch +++ /dev/null @@ -1,69 +0,0 @@ -From e6ff4536a5e5f5bbfda370ecb525d0e066c3ab1c Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 11 Apr 2023 18:04:59 +0200 -Subject: [PATCH 15/56] migration: Make postcopy_requests atomic -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [14/50] d15c6052b77e7ded7bf34c66caa11bf86b75f2e8 (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Peter Xu -(cherry picked from commit 3c764f9b2bc3e5eb5ed93ab45c2de6d599fef00f) -Signed-off-by: Peter Xu ---- - migration/migration.c | 3 ++- - migration/ram.c | 2 +- - migration/ram.h | 2 +- - 3 files changed, 4 insertions(+), 3 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 8fca751050..39501a0ed8 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1152,7 +1152,8 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) - stat64_get(&ram_counters.dirty_sync_count); - info->ram->dirty_sync_missed_zero_copy = - stat64_get(&ram_counters.dirty_sync_missed_zero_copy); -- info->ram->postcopy_requests = ram_counters.postcopy_requests; -+ info->ram->postcopy_requests = -+ stat64_get(&ram_counters.postcopy_requests); - info->ram->page_size = page_size; - info->ram->multifd_bytes = stat64_get(&ram_counters.multifd_bytes); - info->ram->pages_per_second = s->pages_per_second; -diff --git a/migration/ram.c b/migration/ram.c -index 3c13136559..fe69ecaef4 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -2169,7 +2169,7 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len) - RAMBlock *ramblock; - RAMState *rs = ram_state; - -- ram_counters.postcopy_requests++; -+ stat64_add(&ram_counters.postcopy_requests, 1); - RCU_READ_LOCK_GUARD(); - - if (!rbname) { -diff --git a/migration/ram.h b/migration/ram.h -index 8c0d07c43a..afa68521d7 100644 ---- a/migration/ram.h -+++ b/migration/ram.h -@@ -49,7 +49,7 @@ typedef struct { - Stat64 multifd_bytes; - Stat64 normal; - Stat64 postcopy_bytes; -- int64_t postcopy_requests; -+ Stat64 postcopy_requests; - Stat64 precopy_bytes; - int64_t remaining; - Stat64 transferred; --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Make-precopy_bytes-atomic.patch b/SOURCES/kvm-migration-Make-precopy_bytes-atomic.patch deleted file mode 100644 index 8e6c177..0000000 --- a/SOURCES/kvm-migration-Make-precopy_bytes-atomic.patch +++ /dev/null @@ -1,68 +0,0 @@ -From 7e4d4316855f7f6556364eb16828f925b61c80d4 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 11 Apr 2023 17:36:48 +0200 -Subject: [PATCH 12/56] migration: Make precopy_bytes atomic -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [11/50] 23bec49b4b8f4d23c2192b401416139e3ca13626 (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Peter Xu -(cherry picked from commit b013b5d1f32ef88457e66c7ce576f6475238f97f) -Signed-off-by: Peter Xu ---- - migration/migration.c | 2 +- - migration/ram.c | 2 +- - migration/ram.h | 2 +- - 3 files changed, 3 insertions(+), 3 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 645fb4b3c5..3a68d93d69 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1155,7 +1155,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) - info->ram->page_size = page_size; - info->ram->multifd_bytes = stat64_get(&ram_counters.multifd_bytes); - info->ram->pages_per_second = s->pages_per_second; -- info->ram->precopy_bytes = ram_counters.precopy_bytes; -+ info->ram->precopy_bytes = stat64_get(&ram_counters.precopy_bytes); - info->ram->downtime_bytes = ram_counters.downtime_bytes; - info->ram->postcopy_bytes = stat64_get(&ram_counters.postcopy_bytes); - -diff --git a/migration/ram.c b/migration/ram.c -index 93e0a48af4..0b4693215e 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -463,7 +463,7 @@ RAMStats ram_counters; - void ram_transferred_add(uint64_t bytes) - { - if (runstate_is_running()) { -- ram_counters.precopy_bytes += bytes; -+ stat64_add(&ram_counters.precopy_bytes, bytes); - } else if (migration_in_postcopy()) { - stat64_add(&ram_counters.postcopy_bytes, bytes); - } else { -diff --git a/migration/ram.h b/migration/ram.h -index 2170c55e67..a766b895fa 100644 ---- a/migration/ram.h -+++ b/migration/ram.h -@@ -50,7 +50,7 @@ typedef struct { - Stat64 normal; - Stat64 postcopy_bytes; - int64_t postcopy_requests; -- uint64_t precopy_bytes; -+ Stat64 precopy_bytes; - int64_t remaining; - Stat64 transferred; - } RAMStats; --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Merge-ram_counters-and-ram_atomic_counters.patch b/SOURCES/kvm-migration-Merge-ram_counters-and-ram_atomic_counters.patch deleted file mode 100644 index 0679e89..0000000 --- a/SOURCES/kvm-migration-Merge-ram_counters-and-ram_atomic_counters.patch +++ /dev/null @@ -1,270 +0,0 @@ -From 5a87058eea6ee56f37fb454486c35baaf693d691 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 22 Feb 2023 15:56:45 +0100 -Subject: [PATCH 08/56] migration: Merge ram_counters and ram_atomic_counters -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [7/50] 90e395de66aa32b886cf151f7996a680190471f5 (peterx/qemu-kvm) - -Using MgrationStats as type for ram_counters mean that we didn't have -to re-declare each value in another struct. The need of atomic -counters have make us to create MigrationAtomicStats for this atomic -counters. - -Create RAMStats type which is a merge of MigrationStats and -MigrationAtomicStats removing unused members. - -Signed-off-by: Juan Quintela -Reviewed-by: Peter Xu - ---- - -Fix typos found by David Edmondson - -(cherry picked from commit abce5fa16d126ed085ccf8a5b3fe61a1efa20994) -Signed-off-by: Peter Xu ---- - migration/migration.c | 8 ++++---- - migration/multifd.c | 4 ++-- - migration/ram.c | 39 ++++++++++++++++----------------------- - migration/ram.h | 28 +++++++++++++++------------- - 4 files changed, 37 insertions(+), 42 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 99f86bd6c2..a91704d35c 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1140,12 +1140,12 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) - size_t page_size = qemu_target_page_size(); - - info->ram = g_malloc0(sizeof(*info->ram)); -- info->ram->transferred = stat64_get(&ram_atomic_counters.transferred); -+ info->ram->transferred = stat64_get(&ram_counters.transferred); - info->ram->total = ram_bytes_total(); -- info->ram->duplicate = stat64_get(&ram_atomic_counters.duplicate); -+ info->ram->duplicate = stat64_get(&ram_counters.duplicate); - /* legacy value. It is not used anymore */ - info->ram->skipped = 0; -- info->ram->normal = stat64_get(&ram_atomic_counters.normal); -+ info->ram->normal = stat64_get(&ram_counters.normal); - info->ram->normal_bytes = info->ram->normal * page_size; - info->ram->mbps = s->mbps; - info->ram->dirty_sync_count = ram_counters.dirty_sync_count; -@@ -1157,7 +1157,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) - info->ram->pages_per_second = s->pages_per_second; - info->ram->precopy_bytes = ram_counters.precopy_bytes; - info->ram->downtime_bytes = ram_counters.downtime_bytes; -- info->ram->postcopy_bytes = stat64_get(&ram_atomic_counters.postcopy_bytes); -+ info->ram->postcopy_bytes = stat64_get(&ram_counters.postcopy_bytes); - - if (migrate_use_xbzrle()) { - info->xbzrle_cache = g_malloc0(sizeof(*info->xbzrle_cache)); -diff --git a/migration/multifd.c b/migration/multifd.c -index cbc0dfe39b..01fab01a92 100644 ---- a/migration/multifd.c -+++ b/migration/multifd.c -@@ -433,7 +433,7 @@ static int multifd_send_pages(QEMUFile *f) - transferred = ((uint64_t) pages->num) * p->page_size + p->packet_len; - qemu_file_acct_rate_limit(f, transferred); - ram_counters.multifd_bytes += transferred; -- stat64_add(&ram_atomic_counters.transferred, transferred); -+ stat64_add(&ram_counters.transferred, transferred); - qemu_mutex_unlock(&p->mutex); - qemu_sem_post(&p->sem); - -@@ -628,7 +628,7 @@ int multifd_send_sync_main(QEMUFile *f) - p->pending_job++; - qemu_file_acct_rate_limit(f, p->packet_len); - ram_counters.multifd_bytes += p->packet_len; -- stat64_add(&ram_atomic_counters.transferred, p->packet_len); -+ stat64_add(&ram_counters.transferred, p->packet_len); - qemu_mutex_unlock(&p->mutex); - qemu_sem_post(&p->sem); - } -diff --git a/migration/ram.c b/migration/ram.c -index 0e68099bf9..71320ed27a 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -458,25 +458,18 @@ uint64_t ram_bytes_remaining(void) - 0; - } - --/* -- * NOTE: not all stats in ram_counters are used in reality. See comments -- * for struct MigrationAtomicStats. The ultimate result of ram migration -- * counters will be a merged version with both ram_counters and the atomic -- * fields in ram_atomic_counters. -- */ --MigrationStats ram_counters; --MigrationAtomicStats ram_atomic_counters; -+RAMStats ram_counters; - - void ram_transferred_add(uint64_t bytes) - { - if (runstate_is_running()) { - ram_counters.precopy_bytes += bytes; - } else if (migration_in_postcopy()) { -- stat64_add(&ram_atomic_counters.postcopy_bytes, bytes); -+ stat64_add(&ram_counters.postcopy_bytes, bytes); - } else { - ram_counters.downtime_bytes += bytes; - } -- stat64_add(&ram_atomic_counters.transferred, bytes); -+ stat64_add(&ram_counters.transferred, bytes); - } - - void dirty_sync_missed_zero_copy(void) -@@ -756,7 +749,7 @@ void mig_throttle_counter_reset(void) - - rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); - rs->num_dirty_pages_period = 0; -- rs->bytes_xfer_prev = stat64_get(&ram_atomic_counters.transferred); -+ rs->bytes_xfer_prev = stat64_get(&ram_counters.transferred); - } - - /** -@@ -1130,8 +1123,8 @@ uint64_t ram_pagesize_summary(void) - - uint64_t ram_get_total_transferred_pages(void) - { -- return stat64_get(&ram_atomic_counters.normal) + -- stat64_get(&ram_atomic_counters.duplicate) + -+ return stat64_get(&ram_counters.normal) + -+ stat64_get(&ram_counters.duplicate) + - compression_counters.pages + xbzrle_counters.pages; - } - -@@ -1192,7 +1185,7 @@ static void migration_trigger_throttle(RAMState *rs) - MigrationState *s = migrate_get_current(); - uint64_t threshold = s->parameters.throttle_trigger_threshold; - uint64_t bytes_xfer_period = -- stat64_get(&ram_atomic_counters.transferred) - rs->bytes_xfer_prev; -+ stat64_get(&ram_counters.transferred) - rs->bytes_xfer_prev; - uint64_t bytes_dirty_period = rs->num_dirty_pages_period * TARGET_PAGE_SIZE; - uint64_t bytes_dirty_threshold = bytes_xfer_period * threshold / 100; - -@@ -1255,7 +1248,7 @@ static void migration_bitmap_sync(RAMState *rs) - /* reset period counters */ - rs->time_last_bitmap_sync = end_time; - rs->num_dirty_pages_period = 0; -- rs->bytes_xfer_prev = stat64_get(&ram_atomic_counters.transferred); -+ rs->bytes_xfer_prev = stat64_get(&ram_counters.transferred); - } - if (migrate_use_events()) { - qapi_event_send_migration_pass(ram_counters.dirty_sync_count); -@@ -1331,7 +1324,7 @@ static int save_zero_page(PageSearchStatus *pss, QEMUFile *f, RAMBlock *block, - int len = save_zero_page_to_file(pss, f, block, offset); - - if (len) { -- stat64_add(&ram_atomic_counters.duplicate, 1); -+ stat64_add(&ram_counters.duplicate, 1); - ram_transferred_add(len); - return 1; - } -@@ -1368,9 +1361,9 @@ static bool control_save_page(PageSearchStatus *pss, RAMBlock *block, - } - - if (bytes_xmit > 0) { -- stat64_add(&ram_atomic_counters.normal, 1); -+ stat64_add(&ram_counters.normal, 1); - } else if (bytes_xmit == 0) { -- stat64_add(&ram_atomic_counters.duplicate, 1); -+ stat64_add(&ram_counters.duplicate, 1); - } - - return true; -@@ -1402,7 +1395,7 @@ static int save_normal_page(PageSearchStatus *pss, RAMBlock *block, - qemu_put_buffer(file, buf, TARGET_PAGE_SIZE); - } - ram_transferred_add(TARGET_PAGE_SIZE); -- stat64_add(&ram_atomic_counters.normal, 1); -+ stat64_add(&ram_counters.normal, 1); - return 1; - } - -@@ -1458,7 +1451,7 @@ static int ram_save_multifd_page(QEMUFile *file, RAMBlock *block, - if (multifd_queue_page(file, block, offset) < 0) { - return -1; - } -- stat64_add(&ram_atomic_counters.normal, 1); -+ stat64_add(&ram_counters.normal, 1); - - return 1; - } -@@ -1497,7 +1490,7 @@ update_compress_thread_counts(const CompressParam *param, int bytes_xmit) - ram_transferred_add(bytes_xmit); - - if (param->zero_page) { -- stat64_add(&ram_atomic_counters.duplicate, 1); -+ stat64_add(&ram_counters.duplicate, 1); - return; - } - -@@ -2632,9 +2625,9 @@ void acct_update_position(QEMUFile *f, size_t size, bool zero) - uint64_t pages = size / TARGET_PAGE_SIZE; - - if (zero) { -- stat64_add(&ram_atomic_counters.duplicate, pages); -+ stat64_add(&ram_counters.duplicate, pages); - } else { -- stat64_add(&ram_atomic_counters.normal, pages); -+ stat64_add(&ram_counters.normal, pages); - ram_transferred_add(size); - qemu_file_credit_transfer(f, size); - } -diff --git a/migration/ram.h b/migration/ram.h -index 81cbb0947c..7c026b5242 100644 ---- a/migration/ram.h -+++ b/migration/ram.h -@@ -35,25 +35,27 @@ - #include "qemu/stats64.h" - - /* -- * These are the migration statistic counters that need to be updated using -- * atomic ops (can be accessed by more than one thread). Here since we -- * cannot modify MigrationStats directly to use Stat64 as it was defined in -- * the QAPI scheme, we define an internal structure to hold them, and we -- * propagate the real values when QMP queries happen. -- * -- * IOW, the corresponding fields within ram_counters on these specific -- * fields will be always zero and not being used at all; they're just -- * placeholders to make it QAPI-compatible. -+ * These are the ram migration statistic counters. It is loosely -+ * based on MigrationStats. We change to Stat64 any counter that -+ * needs to be updated using atomic ops (can be accessed by more than -+ * one thread). - */ - typedef struct { -- Stat64 transferred; -+ int64_t dirty_pages_rate; -+ int64_t dirty_sync_count; -+ uint64_t dirty_sync_missed_zero_copy; -+ uint64_t downtime_bytes; - Stat64 duplicate; -+ uint64_t multifd_bytes; - Stat64 normal; - Stat64 postcopy_bytes; --} MigrationAtomicStats; -+ int64_t postcopy_requests; -+ uint64_t precopy_bytes; -+ int64_t remaining; -+ Stat64 transferred; -+} RAMStats; - --extern MigrationAtomicStats ram_atomic_counters; --extern MigrationStats ram_counters; -+extern RAMStats ram_counters; - extern XBZRLECacheStats xbzrle_counters; - extern CompressionStats compression_counters; - --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Minor-control-flow-simplification.patch b/SOURCES/kvm-migration-Minor-control-flow-simplification.patch deleted file mode 100644 index a0dbdd9..0000000 --- a/SOURCES/kvm-migration-Minor-control-flow-simplification.patch +++ /dev/null @@ -1,52 +0,0 @@ -From c3bc974ea4b5186a76daa433209c1209d94dd0b7 Mon Sep 17 00:00:00 2001 -From: Eric Blake -Date: Thu, 20 Apr 2023 09:35:51 -0500 -Subject: [PATCH 2/2] migration: Minor control flow simplification - -RH-Author: Eric Blake -RH-MergeRequest: 161: Avoid migration assertion from failed NFS server. -RH-Bugzilla: 2058982 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [2/2] 5afd8c25d6f14bdb2a380ecc77bc6c2f2a26df87 (ebblake/centos-qemu-kvm) - -No need to declare a temporary variable. - -Suggested-by: Juan Quintela -Fixes: 1df36e8c6289 ("migration: Handle block device inactivation failures better") -Signed-off-by: Eric Blake -Reviewed-by: Juan Quintela -Signed-off-by: Juan Quintela -(cherry picked from commit 5d39f44d7ac5c63f53d4d0900ceba9521bc27e49) ---- - migration/migration.c | 5 ++--- - 1 file changed, 2 insertions(+), 3 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index cb0d42c061..08007cef4e 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -3436,7 +3436,6 @@ static void migration_completion(MigrationState *s) - ret = global_state_store(); - - if (!ret) { -- bool inactivate = !migrate_colo_enabled(); - ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); - trace_migration_completion_vm_stop(ret); - if (ret >= 0) { -@@ -3444,10 +3443,10 @@ static void migration_completion(MigrationState *s) - MIGRATION_STATUS_DEVICE); - } - if (ret >= 0) { -- s->block_inactive = inactivate; -+ s->block_inactive = !migrate_colo_enabled(); - qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX); - ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false, -- inactivate); -+ s->block_inactive); - } - } - qemu_mutex_unlock_iothread(); --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Move-migrate_announce_params-to-option.c.patch b/SOURCES/kvm-migration-Move-migrate_announce_params-to-option.c.patch deleted file mode 100644 index 24dcb16..0000000 --- a/SOURCES/kvm-migration-Move-migrate_announce_params-to-option.c.patch +++ /dev/null @@ -1,90 +0,0 @@ -From 1f5232d611ecaaf61bcac151e7d90b8b452ac161 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Thu, 2 Mar 2023 01:17:23 +0100 -Subject: [PATCH 43/56] migration: Move migrate_announce_params() to option.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [42/50] 541be7adc7f81c269058485aef5b14e787b2efe6 (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Fabiano Rosas - ---- - -Fix extra whitespace (fabiano) - -(cherry picked from commit 2682c4eea72c621dfd0fb0151cbd758e81d1bdff) -Signed-off-by: Peter Xu ---- - migration/migration.c | 14 -------------- - migration/options.c | 17 +++++++++++++++++ - 2 files changed, 17 insertions(+), 14 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 724e841eb9..f27ce30be2 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -954,20 +954,6 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) - return params; - } - --AnnounceParameters *migrate_announce_params(void) --{ -- static AnnounceParameters ap; -- -- MigrationState *s = migrate_get_current(); -- -- ap.initial = s->parameters.announce_initial; -- ap.max = s->parameters.announce_max; -- ap.rounds = s->parameters.announce_rounds; -- ap.step = s->parameters.announce_step; -- -- return ≈ --} -- - /* - * Return true if we're already in the middle of a migration - * (i.e. any of the active or setup states) -diff --git a/migration/options.c b/migration/options.c -index 2cb04fbbd1..418aafac64 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -16,6 +16,7 @@ - #include "qapi/qapi-commands-migration.h" - #include "qapi/qmp/qerror.h" - #include "sysemu/runstate.h" -+#include "migration/misc.h" - #include "migration.h" - #include "ram.h" - #include "options.h" -@@ -589,3 +590,19 @@ uint64_t migrate_xbzrle_cache_size(void) - - return s->parameters.xbzrle_cache_size; - } -+ -+/* parameters helpers */ -+ -+AnnounceParameters *migrate_announce_params(void) -+{ -+ static AnnounceParameters ap; -+ -+ MigrationState *s = migrate_get_current(); -+ -+ ap.initial = s->parameters.announce_initial; -+ ap.max = s->parameters.announce_max; -+ ap.rounds = s->parameters.announce_rounds; -+ ap.step = s->parameters.announce_step; -+ -+ return ≈ -+} --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Move-migrate_cap_set-to-options.c.patch b/SOURCES/kvm-migration-Move-migrate_cap_set-to-options.c.patch deleted file mode 100644 index 0e33c4c..0000000 --- a/SOURCES/kvm-migration-Move-migrate_cap_set-to-options.c.patch +++ /dev/null @@ -1,110 +0,0 @@ -From 9c4f8d869f5bbdd07381f6baad2ed755b07d03f4 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 1 Mar 2023 23:25:44 +0100 -Subject: [PATCH 36/56] migration: Move migrate_cap_set() to options.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [35/50] d0cd6b8e9cf0534a56795d94c3da18622fa10ad7 (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit f80196b772ddeeb07d3d80d5c8382cb5d1063fa2) -Signed-off-by: Peter Xu ---- - migration/migration.c | 20 -------------------- - migration/options.c | 21 +++++++++++++++++++++ - migration/options.h | 1 + - 3 files changed, 22 insertions(+), 20 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 369cd91796..880a51210e 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1666,26 +1666,6 @@ void migrate_set_state(int *state, int old_state, int new_state) - } - } - --static bool migrate_cap_set(int cap, bool value, Error **errp) --{ -- MigrationState *s = migrate_get_current(); -- bool new_caps[MIGRATION_CAPABILITY__MAX]; -- -- if (migration_is_running(s->state)) { -- error_setg(errp, QERR_MIGRATION_ACTIVE); -- return false; -- } -- -- memcpy(new_caps, s->capabilities, sizeof(new_caps)); -- new_caps[cap] = value; -- -- if (!migrate_caps_check(s->capabilities, new_caps, errp)) { -- return false; -- } -- s->capabilities[cap] = value; -- return true; --} -- - static void migrate_set_block_incremental(MigrationState *s, bool value) - { - s->parameters.block_incremental = value; -diff --git a/migration/options.c b/migration/options.c -index 4cbe77e35a..f3b2d6e482 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -14,6 +14,7 @@ - #include "qemu/osdep.h" - #include "qapi/error.h" - #include "qapi/qapi-commands-migration.h" -+#include "qapi/qmp/qerror.h" - #include "sysemu/runstate.h" - #include "migration.h" - #include "ram.h" -@@ -392,6 +393,26 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) - return true; - } - -+bool migrate_cap_set(int cap, bool value, Error **errp) -+{ -+ MigrationState *s = migrate_get_current(); -+ bool new_caps[MIGRATION_CAPABILITY__MAX]; -+ -+ if (migration_is_running(s->state)) { -+ error_setg(errp, QERR_MIGRATION_ACTIVE); -+ return false; -+ } -+ -+ memcpy(new_caps, s->capabilities, sizeof(new_caps)); -+ new_caps[cap] = value; -+ -+ if (!migrate_caps_check(s->capabilities, new_caps, errp)) { -+ return false; -+ } -+ s->capabilities[cap] = value; -+ return true; -+} -+ - MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp) - { - MigrationCapabilityStatusList *head = NULL, **tail = &head; -diff --git a/migration/options.h b/migration/options.h -index e779f14161..5979e4ff90 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -41,5 +41,6 @@ bool migrate_zero_copy_send(void); - /* capabilities helpers */ - - bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp); -+bool migrate_cap_set(int cap, bool value, Error **errp); - - #endif --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Move-migrate_caps_check-to-options.c.patch b/SOURCES/kvm-migration-Move-migrate_caps_check-to-options.c.patch deleted file mode 100644 index 0d6fa08..0000000 --- a/SOURCES/kvm-migration-Move-migrate_caps_check-to-options.c.patch +++ /dev/null @@ -1,458 +0,0 @@ -From 3af7c7aaf7407ec14c19e54d52a2229ce4dbb7c5 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 1 Mar 2023 23:05:53 +0100 -Subject: [PATCH 33/56] migration: Move migrate_caps_check() to options.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [32/50] 12999471063d97fffb2b04c6dcb80083b902f963 (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit 77608706459bd197e25ac1ef54591b9f8a0b46f8) -Signed-off-by: Peter Xu ---- - migration/migration.c | 190 ----------------------------------------- - migration/options.c | 192 ++++++++++++++++++++++++++++++++++++++++++ - migration/options.h | 4 + - 3 files changed, 196 insertions(+), 190 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index f7facecd66..d9e30ca918 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -136,39 +136,6 @@ enum mig_rp_message_type { - MIG_RP_MSG_MAX - }; - --/* Migration capabilities set */ --struct MigrateCapsSet { -- int size; /* Capability set size */ -- MigrationCapability caps[]; /* Variadic array of capabilities */ --}; --typedef struct MigrateCapsSet MigrateCapsSet; -- --/* Define and initialize MigrateCapsSet */ --#define INITIALIZE_MIGRATE_CAPS_SET(_name, ...) \ -- MigrateCapsSet _name = { \ -- .size = sizeof((int []) { __VA_ARGS__ }) / sizeof(int), \ -- .caps = { __VA_ARGS__ } \ -- } -- --/* Background-snapshot compatibility check list */ --static const --INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot, -- MIGRATION_CAPABILITY_POSTCOPY_RAM, -- MIGRATION_CAPABILITY_DIRTY_BITMAPS, -- MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME, -- MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE, -- MIGRATION_CAPABILITY_RETURN_PATH, -- MIGRATION_CAPABILITY_MULTIFD, -- MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER, -- MIGRATION_CAPABILITY_AUTO_CONVERGE, -- MIGRATION_CAPABILITY_RELEASE_RAM, -- MIGRATION_CAPABILITY_RDMA_PIN_ALL, -- MIGRATION_CAPABILITY_COMPRESS, -- MIGRATION_CAPABILITY_XBZRLE, -- MIGRATION_CAPABILITY_X_COLO, -- MIGRATION_CAPABILITY_VALIDATE_UUID, -- MIGRATION_CAPABILITY_ZERO_COPY_SEND); -- - /* When we add fault tolerance, we could have several - migrations at once. For now we don't need to add - dynamic creation of migration */ -@@ -1235,163 +1202,6 @@ static void fill_source_migration_info(MigrationInfo *info) - info->status = state; - } - --typedef enum WriteTrackingSupport { -- WT_SUPPORT_UNKNOWN = 0, -- WT_SUPPORT_ABSENT, -- WT_SUPPORT_AVAILABLE, -- WT_SUPPORT_COMPATIBLE --} WriteTrackingSupport; -- --static --WriteTrackingSupport migrate_query_write_tracking(void) --{ -- /* Check if kernel supports required UFFD features */ -- if (!ram_write_tracking_available()) { -- return WT_SUPPORT_ABSENT; -- } -- /* -- * Check if current memory configuration is -- * compatible with required UFFD features. -- */ -- if (!ram_write_tracking_compatible()) { -- return WT_SUPPORT_AVAILABLE; -- } -- -- return WT_SUPPORT_COMPATIBLE; --} -- --/** -- * @migration_caps_check - check capability compatibility -- * -- * @old_caps: old capability list -- * @new_caps: new capability list -- * @errp: set *errp if the check failed, with reason -- * -- * Returns true if check passed, otherwise false. -- */ --static bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) --{ -- MigrationIncomingState *mis = migration_incoming_get_current(); -- --#ifndef CONFIG_LIVE_BLOCK_MIGRATION -- if (new_caps[MIGRATION_CAPABILITY_BLOCK]) { -- error_setg(errp, "QEMU compiled without old-style (blk/-b, inc/-i) " -- "block migration"); -- error_append_hint(errp, "Use drive_mirror+NBD instead.\n"); -- return false; -- } --#endif -- --#ifndef CONFIG_REPLICATION -- if (new_caps[MIGRATION_CAPABILITY_X_COLO]) { -- error_setg(errp, "QEMU compiled without replication module" -- " can't enable COLO"); -- error_append_hint(errp, "Please enable replication before COLO.\n"); -- return false; -- } --#endif -- -- if (new_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { -- /* This check is reasonably expensive, so only when it's being -- * set the first time, also it's only the destination that needs -- * special support. -- */ -- if (!old_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM] && -- runstate_check(RUN_STATE_INMIGRATE) && -- !postcopy_ram_supported_by_host(mis)) { -- /* postcopy_ram_supported_by_host will have emitted a more -- * detailed message -- */ -- error_setg(errp, "Postcopy is not supported"); -- return false; -- } -- -- if (new_caps[MIGRATION_CAPABILITY_X_IGNORE_SHARED]) { -- error_setg(errp, "Postcopy is not compatible with ignore-shared"); -- return false; -- } -- } -- -- if (new_caps[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) { -- WriteTrackingSupport wt_support; -- int idx; -- /* -- * Check if 'background-snapshot' capability is supported by -- * host kernel and compatible with guest memory configuration. -- */ -- wt_support = migrate_query_write_tracking(); -- if (wt_support < WT_SUPPORT_AVAILABLE) { -- error_setg(errp, "Background-snapshot is not supported by host kernel"); -- return false; -- } -- if (wt_support < WT_SUPPORT_COMPATIBLE) { -- error_setg(errp, "Background-snapshot is not compatible " -- "with guest memory configuration"); -- return false; -- } -- -- /* -- * Check if there are any migration capabilities -- * incompatible with 'background-snapshot'. -- */ -- for (idx = 0; idx < check_caps_background_snapshot.size; idx++) { -- int incomp_cap = check_caps_background_snapshot.caps[idx]; -- if (new_caps[incomp_cap]) { -- error_setg(errp, -- "Background-snapshot is not compatible with %s", -- MigrationCapability_str(incomp_cap)); -- return false; -- } -- } -- } -- --#ifdef CONFIG_LINUX -- if (new_caps[MIGRATION_CAPABILITY_ZERO_COPY_SEND] && -- (!new_caps[MIGRATION_CAPABILITY_MULTIFD] || -- new_caps[MIGRATION_CAPABILITY_COMPRESS] || -- new_caps[MIGRATION_CAPABILITY_XBZRLE] || -- migrate_multifd_compression() || -- migrate_use_tls())) { -- error_setg(errp, -- "Zero copy only available for non-compressed non-TLS multifd migration"); -- return false; -- } --#else -- if (new_caps[MIGRATION_CAPABILITY_ZERO_COPY_SEND]) { -- error_setg(errp, -- "Zero copy currently only available on Linux"); -- return false; -- } --#endif -- -- if (new_caps[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]) { -- if (!new_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { -- error_setg(errp, "Postcopy preempt requires postcopy-ram"); -- return false; -- } -- -- /* -- * Preempt mode requires urgent pages to be sent in separate -- * channel, OTOH compression logic will disorder all pages into -- * different compression channels, which is not compatible with the -- * preempt assumptions on channel assignments. -- */ -- if (new_caps[MIGRATION_CAPABILITY_COMPRESS]) { -- error_setg(errp, "Postcopy preempt not compatible with compress"); -- return false; -- } -- } -- -- if (new_caps[MIGRATION_CAPABILITY_MULTIFD]) { -- if (new_caps[MIGRATION_CAPABILITY_COMPRESS]) { -- error_setg(errp, "Multifd is not compatible with compress"); -- return false; -- } -- } -- -- return true; --} -- - static void fill_destination_migration_info(MigrationInfo *info) - { - MigrationIncomingState *mis = migration_incoming_get_current(); -diff --git a/migration/options.c b/migration/options.c -index 9c9b8e5863..367c930f46 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -12,7 +12,10 @@ - */ - - #include "qemu/osdep.h" -+#include "qapi/error.h" -+#include "sysemu/runstate.h" - #include "migration.h" -+#include "ram.h" - #include "options.h" - - bool migrate_auto_converge(void) -@@ -198,3 +201,192 @@ bool migrate_zero_copy_send(void) - - return s->capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND]; - } -+typedef enum WriteTrackingSupport { -+ WT_SUPPORT_UNKNOWN = 0, -+ WT_SUPPORT_ABSENT, -+ WT_SUPPORT_AVAILABLE, -+ WT_SUPPORT_COMPATIBLE -+} WriteTrackingSupport; -+ -+static -+WriteTrackingSupport migrate_query_write_tracking(void) -+{ -+ /* Check if kernel supports required UFFD features */ -+ if (!ram_write_tracking_available()) { -+ return WT_SUPPORT_ABSENT; -+ } -+ /* -+ * Check if current memory configuration is -+ * compatible with required UFFD features. -+ */ -+ if (!ram_write_tracking_compatible()) { -+ return WT_SUPPORT_AVAILABLE; -+ } -+ -+ return WT_SUPPORT_COMPATIBLE; -+} -+ -+/* Migration capabilities set */ -+struct MigrateCapsSet { -+ int size; /* Capability set size */ -+ MigrationCapability caps[]; /* Variadic array of capabilities */ -+}; -+typedef struct MigrateCapsSet MigrateCapsSet; -+ -+/* Define and initialize MigrateCapsSet */ -+#define INITIALIZE_MIGRATE_CAPS_SET(_name, ...) \ -+ MigrateCapsSet _name = { \ -+ .size = sizeof((int []) { __VA_ARGS__ }) / sizeof(int), \ -+ .caps = { __VA_ARGS__ } \ -+ } -+ -+/* Background-snapshot compatibility check list */ -+static const -+INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot, -+ MIGRATION_CAPABILITY_POSTCOPY_RAM, -+ MIGRATION_CAPABILITY_DIRTY_BITMAPS, -+ MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME, -+ MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE, -+ MIGRATION_CAPABILITY_RETURN_PATH, -+ MIGRATION_CAPABILITY_MULTIFD, -+ MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER, -+ MIGRATION_CAPABILITY_AUTO_CONVERGE, -+ MIGRATION_CAPABILITY_RELEASE_RAM, -+ MIGRATION_CAPABILITY_RDMA_PIN_ALL, -+ MIGRATION_CAPABILITY_COMPRESS, -+ MIGRATION_CAPABILITY_XBZRLE, -+ MIGRATION_CAPABILITY_X_COLO, -+ MIGRATION_CAPABILITY_VALIDATE_UUID, -+ MIGRATION_CAPABILITY_ZERO_COPY_SEND); -+ -+/** -+ * @migration_caps_check - check capability compatibility -+ * -+ * @old_caps: old capability list -+ * @new_caps: new capability list -+ * @errp: set *errp if the check failed, with reason -+ * -+ * Returns true if check passed, otherwise false. -+ */ -+bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) -+{ -+ MigrationIncomingState *mis = migration_incoming_get_current(); -+ -+#ifndef CONFIG_LIVE_BLOCK_MIGRATION -+ if (new_caps[MIGRATION_CAPABILITY_BLOCK]) { -+ error_setg(errp, "QEMU compiled without old-style (blk/-b, inc/-i) " -+ "block migration"); -+ error_append_hint(errp, "Use drive_mirror+NBD instead.\n"); -+ return false; -+ } -+#endif -+ -+#ifndef CONFIG_REPLICATION -+ if (new_caps[MIGRATION_CAPABILITY_X_COLO]) { -+ error_setg(errp, "QEMU compiled without replication module" -+ " can't enable COLO"); -+ error_append_hint(errp, "Please enable replication before COLO.\n"); -+ return false; -+ } -+#endif -+ -+ if (new_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { -+ /* This check is reasonably expensive, so only when it's being -+ * set the first time, also it's only the destination that needs -+ * special support. -+ */ -+ if (!old_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM] && -+ runstate_check(RUN_STATE_INMIGRATE) && -+ !postcopy_ram_supported_by_host(mis)) { -+ /* postcopy_ram_supported_by_host will have emitted a more -+ * detailed message -+ */ -+ error_setg(errp, "Postcopy is not supported"); -+ return false; -+ } -+ -+ if (new_caps[MIGRATION_CAPABILITY_X_IGNORE_SHARED]) { -+ error_setg(errp, "Postcopy is not compatible with ignore-shared"); -+ return false; -+ } -+ } -+ -+ if (new_caps[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) { -+ WriteTrackingSupport wt_support; -+ int idx; -+ /* -+ * Check if 'background-snapshot' capability is supported by -+ * host kernel and compatible with guest memory configuration. -+ */ -+ wt_support = migrate_query_write_tracking(); -+ if (wt_support < WT_SUPPORT_AVAILABLE) { -+ error_setg(errp, "Background-snapshot is not supported by host kernel"); -+ return false; -+ } -+ if (wt_support < WT_SUPPORT_COMPATIBLE) { -+ error_setg(errp, "Background-snapshot is not compatible " -+ "with guest memory configuration"); -+ return false; -+ } -+ -+ /* -+ * Check if there are any migration capabilities -+ * incompatible with 'background-snapshot'. -+ */ -+ for (idx = 0; idx < check_caps_background_snapshot.size; idx++) { -+ int incomp_cap = check_caps_background_snapshot.caps[idx]; -+ if (new_caps[incomp_cap]) { -+ error_setg(errp, -+ "Background-snapshot is not compatible with %s", -+ MigrationCapability_str(incomp_cap)); -+ return false; -+ } -+ } -+ } -+ -+#ifdef CONFIG_LINUX -+ if (new_caps[MIGRATION_CAPABILITY_ZERO_COPY_SEND] && -+ (!new_caps[MIGRATION_CAPABILITY_MULTIFD] || -+ new_caps[MIGRATION_CAPABILITY_COMPRESS] || -+ new_caps[MIGRATION_CAPABILITY_XBZRLE] || -+ migrate_multifd_compression() || -+ migrate_use_tls())) { -+ error_setg(errp, -+ "Zero copy only available for non-compressed non-TLS multifd migration"); -+ return false; -+ } -+#else -+ if (new_caps[MIGRATION_CAPABILITY_ZERO_COPY_SEND]) { -+ error_setg(errp, -+ "Zero copy currently only available on Linux"); -+ return false; -+ } -+#endif -+ -+ if (new_caps[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]) { -+ if (!new_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { -+ error_setg(errp, "Postcopy preempt requires postcopy-ram"); -+ return false; -+ } -+ -+ /* -+ * Preempt mode requires urgent pages to be sent in separate -+ * channel, OTOH compression logic will disorder all pages into -+ * different compression channels, which is not compatible with the -+ * preempt assumptions on channel assignments. -+ */ -+ if (new_caps[MIGRATION_CAPABILITY_COMPRESS]) { -+ error_setg(errp, "Postcopy preempt not compatible with compress"); -+ return false; -+ } -+ } -+ -+ if (new_caps[MIGRATION_CAPABILITY_MULTIFD]) { -+ if (new_caps[MIGRATION_CAPABILITY_COMPRESS]) { -+ error_setg(errp, "Multifd is not compatible with compress"); -+ return false; -+ } -+ } -+ -+ return true; -+} -diff --git a/migration/options.h b/migration/options.h -index 25c002b37a..e779f14161 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -38,4 +38,8 @@ bool migrate_xbzrle(void); - bool migrate_zero_blocks(void); - bool migrate_zero_copy_send(void); - -+/* capabilities helpers */ -+ -+bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp); -+ - #endif --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Move-migrate_colo_enabled-to-options.c.patch b/SOURCES/kvm-migration-Move-migrate_colo_enabled-to-options.c.patch deleted file mode 100644 index 47c6f83..0000000 --- a/SOURCES/kvm-migration-Move-migrate_colo_enabled-to-options.c.patch +++ /dev/null @@ -1,136 +0,0 @@ -From 13da9060fa2dfc666cd6f4b9bc85b7cee0fef45e Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 1 Mar 2023 22:00:16 +0100 -Subject: [PATCH 24/56] migration: Move migrate_colo_enabled() to options.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [23/50] 4809b1091edee38bd222af41b6313133705785c7 (peterx/qemu-kvm) - -Once that we are there, we rename the function to migrate_colo() to be -consistent with all other capabilities. - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit 5e8046445575dc5879e63c5d07af893d174813d0) -Signed-off-by: Peter Xu ---- - migration/migration.c | 16 +++++----------- - migration/migration.h | 1 - - migration/options.c | 6 ++++++ - migration/options.h | 1 + - 4 files changed, 12 insertions(+), 12 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 66ea55be06..59ee0ef82b 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -2411,7 +2411,7 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, - } - - if (blk || blk_inc) { -- if (migrate_colo_enabled()) { -+ if (migrate_colo()) { - error_setg(errp, "No disk migration is required in COLO mode"); - return false; - } -@@ -3304,7 +3304,7 @@ static void migration_completion(MigrationState *s) - * have done so in order to remember to reactivate - * them if migration fails or is cancelled. - */ -- s->block_inactive = !migrate_colo_enabled(); -+ s->block_inactive = !migrate_colo(); - qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX); - ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false, - s->block_inactive); -@@ -3357,7 +3357,7 @@ static void migration_completion(MigrationState *s) - goto fail; - } - -- if (migrate_colo_enabled() && s->state == MIGRATION_STATUS_ACTIVE) { -+ if (migrate_colo() && s->state == MIGRATION_STATUS_ACTIVE) { - /* COLO does not support postcopy */ - migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE, - MIGRATION_STATUS_COLO); -@@ -3435,12 +3435,6 @@ fail: - MIGRATION_STATUS_FAILED); - } - --bool migrate_colo_enabled(void) --{ -- MigrationState *s = migrate_get_current(); -- return s->capabilities[MIGRATION_CAPABILITY_X_COLO]; --} -- - typedef enum MigThrError { - /* No error detected */ - MIG_THR_ERR_NONE = 0, -@@ -3771,7 +3765,7 @@ static void migration_iteration_finish(MigrationState *s) - runstate_set(RUN_STATE_POSTMIGRATE); - break; - case MIGRATION_STATUS_COLO: -- if (!migrate_colo_enabled()) { -+ if (!migrate_colo()) { - error_report("%s: critical error: calling COLO code without " - "COLO enabled", __func__); - } -@@ -3967,7 +3961,7 @@ static void *migration_thread(void *opaque) - qemu_savevm_send_postcopy_advise(s->to_dst_file); - } - -- if (migrate_colo_enabled()) { -+ if (migrate_colo()) { - /* Notify migration destination that we enable COLO */ - qemu_savevm_send_colo_enable(s->to_dst_file); - } -diff --git a/migration/migration.h b/migration/migration.h -index a25fed6ef0..42f0c68b6f 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -463,7 +463,6 @@ bool migrate_use_zero_copy_send(void); - int migrate_use_tls(void); - int migrate_use_xbzrle(void); - uint64_t migrate_xbzrle_cache_size(void); --bool migrate_colo_enabled(void); - - bool migrate_use_block(void); - bool migrate_use_block_incremental(void); -diff --git a/migration/options.c b/migration/options.c -index 88a9a45913..bd33c5da0a 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -33,6 +33,12 @@ bool migrate_background_snapshot(void) - return s->capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]; - } - -+bool migrate_colo(void) -+{ -+ MigrationState *s = migrate_get_current(); -+ return s->capabilities[MIGRATION_CAPABILITY_X_COLO]; -+} -+ - bool migrate_dirty_bitmaps(void) - { - MigrationState *s; -diff --git a/migration/options.h b/migration/options.h -index 0dfa0af245..2a0ee61ff8 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -18,6 +18,7 @@ - - bool migrate_auto_converge(void); - bool migrate_background_snapshot(void); -+bool migrate_colo(void); - bool migrate_dirty_bitmaps(void); - bool migrate_ignore_shared(void); - bool migrate_late_block_activate(void); --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Move-migrate_postcopy-to-options.c.patch b/SOURCES/kvm-migration-Move-migrate_postcopy-to-options.c.patch deleted file mode 100644 index 892ec9e..0000000 --- a/SOURCES/kvm-migration-Move-migrate_postcopy-to-options.c.patch +++ /dev/null @@ -1,98 +0,0 @@ -From 710fe195a3c13ffe96795a7a2b550c00319997ea Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Thu, 2 Mar 2023 10:44:20 +0100 -Subject: [PATCH 47/56] migration: Move migrate_postcopy() to options.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [46/50] a4f3455b3524a331f44b481bf7a79318aef5abaa (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Fabiano Rosas -(cherry picked from commit f774fde5d4e97cbfc64dab6622c2c53c5fe5c9fe) -Signed-off-by: Peter Xu ---- - migration/migration.c | 5 ----- - migration/migration.h | 2 -- - migration/options.c | 8 ++++++++ - migration/options.h | 9 +++++++++ - 4 files changed, 17 insertions(+), 7 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index f27ce30be2..46a5ea4d42 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -2245,11 +2245,6 @@ void qmp_migrate_continue(MigrationStatus state, Error **errp) - qemu_sem_post(&s->pause_sem); - } - --bool migrate_postcopy(void) --{ -- return migrate_postcopy_ram() || migrate_dirty_bitmaps(); --} -- - int migrate_use_tls(void) - { - MigrationState *s; -diff --git a/migration/migration.h b/migration/migration.h -index 3ae938b19c..dcf906868d 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -447,8 +447,6 @@ bool migration_is_blocked(Error **errp); - bool migration_in_postcopy(void); - MigrationState *migrate_get_current(void); - --bool migrate_postcopy(void); -- - int migrate_use_tls(void); - - uint64_t ram_get_total_transferred_pages(void); -diff --git a/migration/options.c b/migration/options.c -index 615534c151..8bd2d949ae 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -204,6 +204,14 @@ bool migrate_zero_copy_send(void) - - return s->capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND]; - } -+ -+/* pseudo capabilities */ -+ -+bool migrate_postcopy(void) -+{ -+ return migrate_postcopy_ram() || migrate_dirty_bitmaps(); -+} -+ - typedef enum WriteTrackingSupport { - WT_SUPPORT_UNKNOWN = 0, - WT_SUPPORT_ABSENT, -diff --git a/migration/options.h b/migration/options.h -index 99f6bbd7a1..093bc907a1 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -38,6 +38,15 @@ bool migrate_xbzrle(void); - bool migrate_zero_blocks(void); - bool migrate_zero_copy_send(void); - -+/* -+ * pseudo capabilities -+ * -+ * These are functions that are used in a similar way to capabilities -+ * check, but they are not a capability. -+ */ -+ -+bool migrate_postcopy(void); -+ - /* capabilities helpers */ - - bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp); --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Move-migrate_use_block-to-options.c.patch b/SOURCES/kvm-migration-Move-migrate_use_block-to-options.c.patch deleted file mode 100644 index f7cb338..0000000 --- a/SOURCES/kvm-migration-Move-migrate_use_block-to-options.c.patch +++ /dev/null @@ -1,134 +0,0 @@ -From 276877a71778a5cef0dc5bc843e2679f0fdabb77 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 1 Mar 2023 22:23:57 +0100 -Subject: [PATCH 30/56] migration: Move migrate_use_block() to options.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [29/50] fcaeb0e07cf828f3cd0d115515b30d913525a0a2 (peterx/qemu-kvm) - -Once that we are there, we rename the function to migrate_block() -to be consistent with all other capabilities. - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit 9d4b1e5f22a838285ebeb8f0eb7cc8df1161998f) -Signed-off-by: Peter Xu ---- - migration/block.c | 2 +- - migration/migration.c | 11 +---------- - migration/migration.h | 1 - - migration/options.c | 9 +++++++++ - migration/options.h | 1 + - migration/savevm.c | 2 +- - 6 files changed, 13 insertions(+), 13 deletions(-) - -diff --git a/migration/block.c b/migration/block.c -index 4b167fa5cf..f0977217cf 100644 ---- a/migration/block.c -+++ b/migration/block.c -@@ -1001,7 +1001,7 @@ static int block_load(QEMUFile *f, void *opaque, int version_id) - - static bool block_is_active(void *opaque) - { -- return migrate_use_block(); -+ return migrate_block(); - } - - static SaveVMHandlers savevm_block_handlers = { -diff --git a/migration/migration.c b/migration/migration.c -index a4ede4294e..96f82bd165 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -2415,7 +2415,7 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, - error_setg(errp, "No disk migration is required in COLO mode"); - return false; - } -- if (migrate_use_block() || migrate_use_block_incremental()) { -+ if (migrate_block() || migrate_use_block_incremental()) { - error_setg(errp, "Command options are incompatible with " - "current migration capabilities"); - return false; -@@ -2622,15 +2622,6 @@ static int64_t migrate_max_postcopy_bandwidth(void) - return s->parameters.max_postcopy_bandwidth; - } - --bool migrate_use_block(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_BLOCK]; --} -- - bool migrate_use_return_path(void) - { - MigrationState *s; -diff --git a/migration/migration.h b/migration/migration.h -index e2bb5b1e2f..d4b68b08a5 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -457,7 +457,6 @@ int migrate_multifd_zstd_level(void); - int migrate_use_tls(void); - uint64_t migrate_xbzrle_cache_size(void); - --bool migrate_use_block(void); - bool migrate_use_block_incremental(void); - int migrate_max_cpu_throttle(void); - bool migrate_use_return_path(void); -diff --git a/migration/options.c b/migration/options.c -index 25264c500e..fe1eadeed6 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -33,6 +33,15 @@ bool migrate_background_snapshot(void) - return s->capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]; - } - -+bool migrate_block(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_BLOCK]; -+} -+ - bool migrate_colo(void) - { - MigrationState *s = migrate_get_current(); -diff --git a/migration/options.h b/migration/options.h -index 8f76a88329..e985a5233e 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -18,6 +18,7 @@ - - bool migrate_auto_converge(void); - bool migrate_background_snapshot(void); -+bool migrate_block(void); - bool migrate_colo(void); - bool migrate_compress(void); - bool migrate_dirty_bitmaps(void); -diff --git a/migration/savevm.c b/migration/savevm.c -index ebcf571e37..9671211339 100644 ---- a/migration/savevm.c -+++ b/migration/savevm.c -@@ -1612,7 +1612,7 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp) - return -EINVAL; - } - -- if (migrate_use_block()) { -+ if (migrate_block()) { - error_setg(errp, "Block migration and snapshots are incompatible"); - return -EINVAL; - } --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Move-migrate_use_block_incremental-to-opti.patch b/SOURCES/kvm-migration-Move-migrate_use_block_incremental-to-opti.patch deleted file mode 100644 index 3f20289..0000000 --- a/SOURCES/kvm-migration-Move-migrate_use_block_incremental-to-opti.patch +++ /dev/null @@ -1,121 +0,0 @@ -From def66503f4ccb97cf8029f88efe8e955edc8d32f Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Thu, 2 Mar 2023 00:49:47 +0100 -Subject: [PATCH 39/56] migration: Move migrate_use_block_incremental() to - option.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [38/50] 961fda6464df3384fbcee88c726b56a33c26e14e (peterx/qemu-kvm) - -To be consistent with every other parameter, rename to -migrate_block_incremental(). - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit 6f8be7080a1f79bf3832cf798fba1697c409c597) -Signed-off-by: Peter Xu ---- - migration/block.c | 2 +- - migration/migration.c | 11 +---------- - migration/migration.h | 1 - - migration/options.c | 9 +++++++++ - migration/options.h | 1 + - 5 files changed, 12 insertions(+), 12 deletions(-) - -diff --git a/migration/block.c b/migration/block.c -index f0977217cf..6d532ac7a2 100644 ---- a/migration/block.c -+++ b/migration/block.c -@@ -417,7 +417,7 @@ static int init_blk_migration(QEMUFile *f) - bmds->bulk_completed = 0; - bmds->total_sectors = sectors; - bmds->completed_sectors = 0; -- bmds->shared_base = migrate_use_block_incremental(); -+ bmds->shared_base = migrate_block_incremental(); - - assert(i < num_bs); - bmds_bs[i].bmds = bmds; -diff --git a/migration/migration.c b/migration/migration.c -index 78bca9a93f..724e841eb9 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -2157,7 +2157,7 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, - error_setg(errp, "No disk migration is required in COLO mode"); - return false; - } -- if (migrate_block() || migrate_use_block_incremental()) { -+ if (migrate_block() || migrate_block_incremental()) { - error_setg(errp, "Command options are incompatible with " - "current migration capabilities"); - return false; -@@ -2273,15 +2273,6 @@ int migrate_use_tls(void) - return s->parameters.tls_creds && *s->parameters.tls_creds; - } - --bool migrate_use_block_incremental(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->parameters.block_incremental; --} -- - /* migration thread support */ - /* - * Something bad happened to the RP stream, mark an error -diff --git a/migration/migration.h b/migration/migration.h -index 8451e5f2fe..86051af132 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -451,7 +451,6 @@ bool migrate_postcopy(void); - - int migrate_use_tls(void); - --bool migrate_use_block_incremental(void); - int migrate_max_cpu_throttle(void); - - uint64_t ram_get_total_transferred_pages(void); -diff --git a/migration/options.c b/migration/options.c -index 8d15be858c..2b6d88b4b9 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -463,6 +463,15 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, - - /* parameters */ - -+bool migrate_block_incremental(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.block_incremental; -+} -+ - int migrate_compress_level(void) - { - MigrationState *s; -diff --git a/migration/options.h b/migration/options.h -index b24ee92283..96d5a8e6e4 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -45,6 +45,7 @@ bool migrate_cap_set(int cap, bool value, Error **errp); - - /* parameters */ - -+bool migrate_block_incremental(void); - int migrate_compress_level(void); - int migrate_compress_threads(void); - int migrate_compress_wait_thread(void); --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Move-migrate_use_compression-to-options.c.patch b/SOURCES/kvm-migration-Move-migrate_use_compression-to-options.c.patch deleted file mode 100644 index 8b74183..0000000 --- a/SOURCES/kvm-migration-Move-migrate_use_compression-to-options.c.patch +++ /dev/null @@ -1,183 +0,0 @@ -From ae183bfc9d7b001d3c4929556b095a76203bc08d Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 1 Mar 2023 22:03:48 +0100 -Subject: [PATCH 25/56] migration: Move migrate_use_compression() to options.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [24/50] 126b865f51bd4a1ae3a46411fdcd59033bfc5376 (peterx/qemu-kvm) - -Once that we are there, we rename the function to migrate_compress() -to be consistent with all other capabilities. - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit a7a94d14358dd7b445e20c2f26218ff987747642) -Signed-off-by: Peter Xu ---- - migration/migration.c | 11 +---------- - migration/migration.h | 1 - - migration/options.c | 9 +++++++++ - migration/options.h | 1 + - migration/ram.c | 16 ++++++++-------- - 5 files changed, 19 insertions(+), 19 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 59ee0ef82b..c6e32555a8 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1133,7 +1133,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) - info->xbzrle_cache->overflow = xbzrle_counters.overflow; - } - -- if (migrate_use_compression()) { -+ if (migrate_compress()) { - info->compression = g_malloc0(sizeof(*info->compression)); - info->compression->pages = compression_counters.pages; - info->compression->busy = compression_counters.busy; -@@ -2522,15 +2522,6 @@ bool migrate_postcopy(void) - return migrate_postcopy_ram() || migrate_dirty_bitmaps(); - } - --bool migrate_use_compression(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_COMPRESS]; --} -- - int migrate_compress_level(void) - { - MigrationState *s; -diff --git a/migration/migration.h b/migration/migration.h -index 42f0c68b6f..77aa91c840 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -471,7 +471,6 @@ bool migrate_use_return_path(void); - - uint64_t ram_get_total_transferred_pages(void); - --bool migrate_use_compression(void); - int migrate_compress_level(void); - int migrate_compress_threads(void); - int migrate_compress_wait_thread(void); -diff --git a/migration/options.c b/migration/options.c -index bd33c5da0a..fa7a13d3dc 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -39,6 +39,15 @@ bool migrate_colo(void) - return s->capabilities[MIGRATION_CAPABILITY_X_COLO]; - } - -+bool migrate_compress(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_COMPRESS]; -+} -+ - bool migrate_dirty_bitmaps(void) - { - MigrationState *s; -diff --git a/migration/options.h b/migration/options.h -index 2a0ee61ff8..da2193fd94 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -19,6 +19,7 @@ - bool migrate_auto_converge(void); - bool migrate_background_snapshot(void); - bool migrate_colo(void); -+bool migrate_compress(void); - bool migrate_dirty_bitmaps(void); - bool migrate_ignore_shared(void); - bool migrate_late_block_activate(void); -diff --git a/migration/ram.c b/migration/ram.c -index 912ccd89fa..d050d0c5fd 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -586,7 +586,7 @@ static void compress_threads_save_cleanup(void) - { - int i, thread_count; - -- if (!migrate_use_compression() || !comp_param) { -+ if (!migrate_compress() || !comp_param) { - return; - } - -@@ -625,7 +625,7 @@ static int compress_threads_save_setup(void) - { - int i, thread_count; - -- if (!migrate_use_compression()) { -+ if (!migrate_compress()) { - return 0; - } - thread_count = migrate_compress_threads(); -@@ -1155,7 +1155,7 @@ static void migration_update_rates(RAMState *rs, int64_t end_time) - rs->xbzrle_bytes_prev = xbzrle_counters.bytes; - } - -- if (migrate_use_compression()) { -+ if (migrate_compress()) { - compression_counters.busy_rate = (double)(compression_counters.busy - - rs->compress_thread_busy_prev) / page_count; - rs->compress_thread_busy_prev = compression_counters.busy; -@@ -2270,7 +2270,7 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len) - - static bool save_page_use_compression(RAMState *rs) - { -- if (!migrate_use_compression()) { -+ if (!migrate_compress()) { - return false; - } - -@@ -3734,7 +3734,7 @@ static int wait_for_decompress_done(void) - { - int idx, thread_count; - -- if (!migrate_use_compression()) { -+ if (!migrate_compress()) { - return 0; - } - -@@ -3753,7 +3753,7 @@ static void compress_threads_load_cleanup(void) - { - int i, thread_count; - -- if (!migrate_use_compression()) { -+ if (!migrate_compress()) { - return; - } - thread_count = migrate_decompress_threads(); -@@ -3794,7 +3794,7 @@ static int compress_threads_load_setup(QEMUFile *f) - { - int i, thread_count; - -- if (!migrate_use_compression()) { -+ if (!migrate_compress()) { - return 0; - } - -@@ -4260,7 +4260,7 @@ static int ram_load_precopy(QEMUFile *f) - int flags = 0, ret = 0, invalid_flags = 0, len = 0, i = 0; - /* ADVISE is earlier, it shows the source has the postcopy capability on */ - bool postcopy_advised = migration_incoming_postcopy_advised(); -- if (!migrate_use_compression()) { -+ if (!migrate_compress()) { - invalid_flags |= RAM_SAVE_FLAG_COMPRESS_PAGE; - } - --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Move-migrate_use_events-to-options.c.patch b/SOURCES/kvm-migration-Move-migrate_use_events-to-options.c.patch deleted file mode 100644 index 41e05c3..0000000 --- a/SOURCES/kvm-migration-Move-migrate_use_events-to-options.c.patch +++ /dev/null @@ -1,120 +0,0 @@ -From 940f1eb4347c72edb3e1abc02c8d7e7c95753dcf Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 1 Mar 2023 22:08:09 +0100 -Subject: [PATCH 26/56] migration: Move migrate_use_events() to options.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [25/50] b3acd949af2a0fae18061d360e4f51dc12d32c6c (peterx/qemu-kvm) - -Once that we are there, we rename the function to migrate_events() -to be consistent with all other capabilities. - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit b890902c9c025b87d02e718eec3090fd3525ab18) -Signed-off-by: Peter Xu ---- - migration/migration.c | 11 +---------- - migration/migration.h | 1 - - migration/options.c | 9 +++++++++ - migration/options.h | 1 + - migration/ram.c | 2 +- - 5 files changed, 12 insertions(+), 12 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index c6e32555a8..032cd5c050 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -353,7 +353,7 @@ void migration_incoming_state_destroy(void) - - static void migrate_generate_event(int new_state) - { -- if (migrate_use_events()) { -+ if (migrate_events()) { - qapi_event_send_migration(new_state); - } - } -@@ -2558,15 +2558,6 @@ int migrate_decompress_threads(void) - return s->parameters.decompress_threads; - } - --bool migrate_use_events(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_EVENTS]; --} -- - bool migrate_use_multifd(void) - { - MigrationState *s; -diff --git a/migration/migration.h b/migration/migration.h -index 77aa91c840..bd06520c19 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -475,7 +475,6 @@ int migrate_compress_level(void); - int migrate_compress_threads(void); - int migrate_compress_wait_thread(void); - int migrate_decompress_threads(void); --bool migrate_use_events(void); - - /* Sending on the return path - generic and then for each message type */ - void migrate_send_rp_shut(MigrationIncomingState *mis, -diff --git a/migration/options.c b/migration/options.c -index fa7a13d3dc..d2219ee0e4 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -57,6 +57,15 @@ bool migrate_dirty_bitmaps(void) - return s->capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS]; - } - -+bool migrate_events(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_EVENTS]; -+} -+ - bool migrate_ignore_shared(void) - { - MigrationState *s; -diff --git a/migration/options.h b/migration/options.h -index da2193fd94..b998024eba 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -21,6 +21,7 @@ bool migrate_background_snapshot(void); - bool migrate_colo(void); - bool migrate_compress(void); - bool migrate_dirty_bitmaps(void); -+bool migrate_events(void); - bool migrate_ignore_shared(void); - bool migrate_late_block_activate(void); - bool migrate_pause_before_switchover(void); -diff --git a/migration/ram.c b/migration/ram.c -index d050d0c5fd..ee454a3849 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -1246,7 +1246,7 @@ static void migration_bitmap_sync(RAMState *rs) - rs->num_dirty_pages_period = 0; - rs->bytes_xfer_prev = stat64_get(&ram_counters.transferred); - } -- if (migrate_use_events()) { -+ if (migrate_events()) { - uint64_t generation = stat64_get(&ram_counters.dirty_sync_count); - qapi_event_send_migration_pass(generation); - } --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Move-migrate_use_multifd-to-options.c.patch b/SOURCES/kvm-migration-Move-migrate_use_multifd-to-options.c.patch deleted file mode 100644 index 97d6597..0000000 --- a/SOURCES/kvm-migration-Move-migrate_use_multifd-to-options.c.patch +++ /dev/null @@ -1,247 +0,0 @@ -From afd8fb766af2be5cff97753b026847b91b09a30e Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 1 Mar 2023 22:10:29 +0100 -Subject: [PATCH 27/56] migration: Move migrate_use_multifd() to options.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [26/50] f2d72eae9cc80b2402ef613e809b40aa296d2e4c (peterx/qemu-kvm) - -Once that we are there, we rename the function to migrate_multifd() -to be consistent with all other capabilities. - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit 51b07548f7c31793adc178c7460c5f4369733c61) -Signed-off-by: Peter Xu ---- - migration/migration.c | 19 +++++-------------- - migration/migration.h | 1 - - migration/multifd.c | 16 ++++++++-------- - migration/options.c | 9 +++++++++ - migration/options.h | 1 + - migration/ram.c | 2 +- - migration/socket.c | 2 +- - 7 files changed, 25 insertions(+), 25 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 032cd5c050..e1d7f25786 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -186,7 +186,7 @@ static void migrate_fd_cancel(MigrationState *s); - - static bool migration_needs_multiple_sockets(void) - { -- return migrate_use_multifd() || migrate_postcopy_preempt(); -+ return migrate_multifd() || migrate_postcopy_preempt(); - } - - static bool uri_supports_multi_channels(const char *uri) -@@ -732,7 +732,7 @@ void migration_fd_process_incoming(QEMUFile *f, Error **errp) - static bool migration_should_start_incoming(bool main_channel) - { - /* Multifd doesn't start unless all channels are established */ -- if (migrate_use_multifd()) { -+ if (migrate_multifd()) { - return migration_has_all_channels(); - } - -@@ -759,7 +759,7 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) - uint32_t channel_magic = 0; - int ret = 0; - -- if (migrate_use_multifd() && !migrate_postcopy_ram() && -+ if (migrate_multifd() && !migrate_postcopy_ram() && - qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) { - /* - * With multiple channels, it is possible that we receive channels -@@ -798,7 +798,7 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) - } else { - /* Multiple connections */ - assert(migration_needs_multiple_sockets()); -- if (migrate_use_multifd()) { -+ if (migrate_multifd()) { - multifd_recv_new_channel(ioc, &local_err); - } else { - assert(migrate_postcopy_preempt()); -@@ -834,7 +834,7 @@ bool migration_has_all_channels(void) - return false; - } - -- if (migrate_use_multifd()) { -+ if (migrate_multifd()) { - return multifd_recv_all_channels_created(); - } - -@@ -2558,15 +2558,6 @@ int migrate_decompress_threads(void) - return s->parameters.decompress_threads; - } - --bool migrate_use_multifd(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_MULTIFD]; --} -- - int migrate_multifd_channels(void) - { - MigrationState *s; -diff --git a/migration/migration.h b/migration/migration.h -index bd06520c19..49c0e13f41 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -449,7 +449,6 @@ MigrationState *migrate_get_current(void); - - bool migrate_postcopy(void); - --bool migrate_use_multifd(void); - int migrate_multifd_channels(void); - MultiFDCompression migrate_multifd_compression(void); - int migrate_multifd_zlib_level(void); -diff --git a/migration/multifd.c b/migration/multifd.c -index 903df2117b..6807328189 100644 ---- a/migration/multifd.c -+++ b/migration/multifd.c -@@ -516,7 +516,7 @@ void multifd_save_cleanup(void) - { - int i; - -- if (!migrate_use_multifd()) { -+ if (!migrate_multifd()) { - return; - } - multifd_send_terminate_threads(NULL); -@@ -587,7 +587,7 @@ int multifd_send_sync_main(QEMUFile *f) - int i; - bool flush_zero_copy; - -- if (!migrate_use_multifd()) { -+ if (!migrate_multifd()) { - return 0; - } - if (multifd_send_state->pages->num) { -@@ -911,7 +911,7 @@ int multifd_save_setup(Error **errp) - uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size(); - uint8_t i; - -- if (!migrate_use_multifd()) { -+ if (!migrate_multifd()) { - return 0; - } - -@@ -1016,7 +1016,7 @@ static void multifd_recv_terminate_threads(Error *err) - - void multifd_load_shutdown(void) - { -- if (migrate_use_multifd()) { -+ if (migrate_multifd()) { - multifd_recv_terminate_threads(NULL); - } - } -@@ -1025,7 +1025,7 @@ void multifd_load_cleanup(void) - { - int i; - -- if (!migrate_use_multifd()) { -+ if (!migrate_multifd()) { - return; - } - multifd_recv_terminate_threads(NULL); -@@ -1072,7 +1072,7 @@ void multifd_recv_sync_main(void) - { - int i; - -- if (!migrate_use_multifd()) { -+ if (!migrate_multifd()) { - return; - } - for (i = 0; i < migrate_multifd_channels(); i++) { -@@ -1170,7 +1170,7 @@ int multifd_load_setup(Error **errp) - * Return successfully if multiFD recv state is already initialised - * or multiFD is not enabled. - */ -- if (multifd_recv_state || !migrate_use_multifd()) { -+ if (multifd_recv_state || !migrate_multifd()) { - return 0; - } - -@@ -1216,7 +1216,7 @@ bool multifd_recv_all_channels_created(void) - { - int thread_count = migrate_multifd_channels(); - -- if (!migrate_use_multifd()) { -+ if (!migrate_multifd()) { - return true; - } - -diff --git a/migration/options.c b/migration/options.c -index d2219ee0e4..58673fc101 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -84,6 +84,15 @@ bool migrate_late_block_activate(void) - return s->capabilities[MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE]; - } - -+bool migrate_multifd(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_MULTIFD]; -+} -+ - bool migrate_pause_before_switchover(void) - { - MigrationState *s; -diff --git a/migration/options.h b/migration/options.h -index b998024eba..d07269ee38 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -24,6 +24,7 @@ bool migrate_dirty_bitmaps(void); - bool migrate_events(void); - bool migrate_ignore_shared(void); - bool migrate_late_block_activate(void); -+bool migrate_multifd(void); - bool migrate_pause_before_switchover(void); - bool migrate_postcopy_blocktime(void); - bool migrate_postcopy_preempt(void); -diff --git a/migration/ram.c b/migration/ram.c -index ee454a3849..859dd7b63f 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -2362,7 +2362,7 @@ static int ram_save_target_page_legacy(RAMState *rs, PageSearchStatus *pss) - * if host page size == guest page size the dest guest during run may - * still see partially copied pages which is data corruption. - */ -- if (migrate_use_multifd() && !migration_in_postcopy()) { -+ if (migrate_multifd() && !migration_in_postcopy()) { - return ram_save_multifd_page(pss->pss_channel, block, offset); - } - -diff --git a/migration/socket.c b/migration/socket.c -index ebf9ac41af..f4835a256a 100644 ---- a/migration/socket.c -+++ b/migration/socket.c -@@ -183,7 +183,7 @@ socket_start_incoming_migration_internal(SocketAddress *saddr, - - qio_net_listener_set_name(listener, "migration-socket-listener"); - -- if (migrate_use_multifd()) { -+ if (migrate_multifd()) { - num = migrate_multifd_channels(); - } else if (migrate_postcopy_preempt()) { - num = RAM_CHANNEL_MAX; --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Move-migrate_use_return-to-options.c.patch b/SOURCES/kvm-migration-Move-migrate_use_return-to-options.c.patch deleted file mode 100644 index b250d40..0000000 --- a/SOURCES/kvm-migration-Move-migrate_use_return-to-options.c.patch +++ /dev/null @@ -1,138 +0,0 @@ -From 145b630767dbc7020ddf39b20075f4691f71321a Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 1 Mar 2023 22:25:47 +0100 -Subject: [PATCH 31/56] migration: Move migrate_use_return() to options.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [30/50] 5cc150188bcc61b69ea0844253597594ab18fc13 (peterx/qemu-kvm) - -Once that we are there, we rename the function to migrate_return_path() -to be consistent with all other capabilities. - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit 38ad1110e368bf91453c0abbd657224d57b65d47) -Signed-off-by: Peter Xu ---- - migration/migration.c | 11 +---------- - migration/migration.h | 1 - - migration/options.c | 9 +++++++++ - migration/options.h | 1 + - migration/rdma.c | 6 +++--- - 5 files changed, 14 insertions(+), 14 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 96f82bd165..f7facecd66 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -2622,15 +2622,6 @@ static int64_t migrate_max_postcopy_bandwidth(void) - return s->parameters.max_postcopy_bandwidth; - } - --bool migrate_use_return_path(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_RETURN_PATH]; --} -- - bool migrate_use_block_incremental(void) - { - MigrationState *s; -@@ -4175,7 +4166,7 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) - * precopy, only if user specified "return-path" capability would - * QEMU uses the return path. - */ -- if (migrate_postcopy_ram() || migrate_use_return_path()) { -+ if (migrate_postcopy_ram() || migrate_return_path()) { - if (open_return_path_on_source(s, !resume)) { - error_report("Unable to open return-path for postcopy"); - migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED); -diff --git a/migration/migration.h b/migration/migration.h -index d4b68b08a5..24184622a8 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -459,7 +459,6 @@ uint64_t migrate_xbzrle_cache_size(void); - - bool migrate_use_block_incremental(void); - int migrate_max_cpu_throttle(void); --bool migrate_use_return_path(void); - - uint64_t ram_get_total_transferred_pages(void); - -diff --git a/migration/options.c b/migration/options.c -index fe1eadeed6..2003e413da 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -147,6 +147,15 @@ bool migrate_release_ram(void) - return s->capabilities[MIGRATION_CAPABILITY_RELEASE_RAM]; - } - -+bool migrate_return_path(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_RETURN_PATH]; -+} -+ - bool migrate_validate_uuid(void) - { - MigrationState *s; -diff --git a/migration/options.h b/migration/options.h -index e985a5233e..316efd1063 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -31,6 +31,7 @@ bool migrate_postcopy_blocktime(void); - bool migrate_postcopy_preempt(void); - bool migrate_postcopy_ram(void); - bool migrate_release_ram(void); -+bool migrate_return_path(void); - bool migrate_validate_uuid(void); - bool migrate_xbzrle(void); - bool migrate_zero_blocks(void); -diff --git a/migration/rdma.c b/migration/rdma.c -index f35f021963..bf55e2f163 100644 ---- a/migration/rdma.c -+++ b/migration/rdma.c -@@ -3373,7 +3373,7 @@ static int qemu_rdma_accept(RDMAContext *rdma) - * initialize the RDMAContext for return path for postcopy after first - * connection request reached. - */ -- if ((migrate_postcopy() || migrate_use_return_path()) -+ if ((migrate_postcopy() || migrate_return_path()) - && !rdma->is_return_path) { - rdma_return_path = qemu_rdma_data_init(rdma->host_port, NULL); - if (rdma_return_path == NULL) { -@@ -3456,7 +3456,7 @@ static int qemu_rdma_accept(RDMAContext *rdma) - } - - /* Accept the second connection request for return path */ -- if ((migrate_postcopy() || migrate_use_return_path()) -+ if ((migrate_postcopy() || migrate_return_path()) - && !rdma->is_return_path) { - qemu_set_fd_handler(rdma->channel->fd, rdma_accept_incoming_migration, - NULL, -@@ -4193,7 +4193,7 @@ void rdma_start_outgoing_migration(void *opaque, - } - - /* RDMA postcopy need a separate queue pair for return path */ -- if (migrate_postcopy() || migrate_use_return_path()) { -+ if (migrate_postcopy() || migrate_return_path()) { - rdma_return_path = qemu_rdma_data_init(host_port, errp); - - if (rdma_return_path == NULL) { --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Move-migrate_use_tls-to-options.c.patch b/SOURCES/kvm-migration-Move-migrate_use_tls-to-options.c.patch deleted file mode 100644 index 84734af..0000000 --- a/SOURCES/kvm-migration-Move-migrate_use_tls-to-options.c.patch +++ /dev/null @@ -1,134 +0,0 @@ -From 2e2df63892e191e91216b8253171162f69b93387 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Thu, 2 Mar 2023 10:41:23 +0100 -Subject: [PATCH 49/56] migration: Move migrate_use_tls() to options.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [48/50] 314431b0f5e92d2211e58a8161f32d7b67d69e38 (peterx/qemu-kvm) - -Once there, rename it to migrate_tls() and make it return bool for -consistency. - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy - ---- - -Fix typos found by fabiano - -(cherry picked from commit 10d4703be5d884bbbb6ecafe0e8bb270ad6ea937) -Signed-off-by: Peter Xu ---- - migration/migration.c | 9 --------- - migration/migration.h | 2 -- - migration/options.c | 11 ++++++++++- - migration/options.h | 1 + - migration/tls.c | 3 ++- - 5 files changed, 13 insertions(+), 13 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index c2e109329d..22ef83c619 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -2177,15 +2177,6 @@ void qmp_migrate_continue(MigrationStatus state, Error **errp) - qemu_sem_post(&s->pause_sem); - } - --int migrate_use_tls(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->parameters.tls_creds && *s->parameters.tls_creds; --} -- - /* migration thread support */ - /* - * Something bad happened to the RP stream, mark an error -diff --git a/migration/migration.h b/migration/migration.h -index dcf906868d..2b71df8617 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -447,8 +447,6 @@ bool migration_is_blocked(Error **errp); - bool migration_in_postcopy(void); - MigrationState *migrate_get_current(void); - --int migrate_use_tls(void); -- - uint64_t ram_get_total_transferred_pages(void); - - /* Sending on the return path - generic and then for each message type */ -diff --git a/migration/options.c b/migration/options.c -index 8e8753d9be..d4c0714683 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -214,6 +214,15 @@ bool migrate_postcopy(void) - return migrate_postcopy_ram() || migrate_dirty_bitmaps(); - } - -+bool migrate_tls(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.tls_creds && *s->parameters.tls_creds; -+} -+ - typedef enum WriteTrackingSupport { - WT_SUPPORT_UNKNOWN = 0, - WT_SUPPORT_ABSENT, -@@ -363,7 +372,7 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) - new_caps[MIGRATION_CAPABILITY_COMPRESS] || - new_caps[MIGRATION_CAPABILITY_XBZRLE] || - migrate_multifd_compression() || -- migrate_use_tls())) { -+ migrate_tls())) { - error_setg(errp, - "Zero copy only available for non-compressed non-TLS multifd migration"); - return false; -diff --git a/migration/options.h b/migration/options.h -index 1b78fa9f3d..13318a16c7 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -46,6 +46,7 @@ bool migrate_zero_copy_send(void); - */ - - bool migrate_postcopy(void); -+bool migrate_tls(void); - - /* capabilities helpers */ - -diff --git a/migration/tls.c b/migration/tls.c -index 4d2166a209..acd38e0b62 100644 ---- a/migration/tls.c -+++ b/migration/tls.c -@@ -22,6 +22,7 @@ - #include "channel.h" - #include "migration.h" - #include "tls.h" -+#include "options.h" - #include "crypto/tlscreds.h" - #include "qemu/error-report.h" - #include "qapi/error.h" -@@ -165,7 +166,7 @@ void migration_tls_channel_connect(MigrationState *s, - - bool migrate_channel_requires_tls_upgrade(QIOChannel *ioc) - { -- if (!migrate_use_tls()) { -+ if (!migrate_tls()) { - return false; - } - --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Move-migrate_use_xbzrle-to-options.c.patch b/SOURCES/kvm-migration-Move-migrate_use_xbzrle-to-options.c.patch deleted file mode 100644 index e3a8bab..0000000 --- a/SOURCES/kvm-migration-Move-migrate_use_xbzrle-to-options.c.patch +++ /dev/null @@ -1,156 +0,0 @@ -From 2184f7dae0df5fa52deba2dc884e09c6bdbc7b5f Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 1 Mar 2023 22:20:13 +0100 -Subject: [PATCH 29/56] migration: Move migrate_use_xbzrle() to options.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [28/50] fc8bee0f691a96e6bd0b41f2511abe507b81fea5 (peterx/qemu-kvm) - -Once that we are there, we rename the function to migrate_xbzrle() -to be consistent with all other capabilities. -We change the type to return bool also for consistency. - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit 87dca0c9bb63014ef73ad82f7aedea1cb5a822e7) -Signed-off-by: Peter Xu ---- - migration/migration.c | 11 +---------- - migration/migration.h | 1 - - migration/options.c | 9 +++++++++ - migration/options.h | 1 + - migration/ram.c | 10 +++++----- - 5 files changed, 16 insertions(+), 16 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 1d63718e88..a4ede4294e 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1122,7 +1122,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) - info->ram->downtime_bytes = stat64_get(&ram_counters.downtime_bytes); - info->ram->postcopy_bytes = stat64_get(&ram_counters.postcopy_bytes); - -- if (migrate_use_xbzrle()) { -+ if (migrate_xbzrle()) { - info->xbzrle_cache = g_malloc0(sizeof(*info->xbzrle_cache)); - info->xbzrle_cache->cache_size = migrate_xbzrle_cache_size(); - info->xbzrle_cache->bytes = xbzrle_counters.bytes; -@@ -2604,15 +2604,6 @@ int migrate_use_tls(void) - return s->parameters.tls_creds && *s->parameters.tls_creds; - } - --int migrate_use_xbzrle(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_XBZRLE]; --} -- - uint64_t migrate_xbzrle_cache_size(void) - { - MigrationState *s; -diff --git a/migration/migration.h b/migration/migration.h -index c939f82d53..e2bb5b1e2f 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -455,7 +455,6 @@ int migrate_multifd_zlib_level(void); - int migrate_multifd_zstd_level(void); - - int migrate_use_tls(void); --int migrate_use_xbzrle(void); - uint64_t migrate_xbzrle_cache_size(void); - - bool migrate_use_block(void); -diff --git a/migration/options.c b/migration/options.c -index f357c99996..25264c500e 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -147,6 +147,15 @@ bool migrate_validate_uuid(void) - return s->capabilities[MIGRATION_CAPABILITY_VALIDATE_UUID]; - } - -+bool migrate_xbzrle(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_XBZRLE]; -+} -+ - bool migrate_zero_blocks(void) - { - MigrationState *s; -diff --git a/migration/options.h b/migration/options.h -index ad22f4d24a..8f76a88329 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -31,6 +31,7 @@ bool migrate_postcopy_preempt(void); - bool migrate_postcopy_ram(void); - bool migrate_release_ram(void); - bool migrate_validate_uuid(void); -+bool migrate_xbzrle(void); - bool migrate_zero_blocks(void); - bool migrate_zero_copy_send(void); - -diff --git a/migration/ram.c b/migration/ram.c -index 859dd7b63f..4576d0d849 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -156,14 +156,14 @@ static struct { - - static void XBZRLE_cache_lock(void) - { -- if (migrate_use_xbzrle()) { -+ if (migrate_xbzrle()) { - qemu_mutex_lock(&XBZRLE.lock); - } - } - - static void XBZRLE_cache_unlock(void) - { -- if (migrate_use_xbzrle()) { -+ if (migrate_xbzrle()) { - qemu_mutex_unlock(&XBZRLE.lock); - } - } -@@ -1137,7 +1137,7 @@ static void migration_update_rates(RAMState *rs, int64_t end_time) - return; - } - -- if (migrate_use_xbzrle()) { -+ if (migrate_xbzrle()) { - double encoded_size, unencoded_size; - - xbzrle_counters.cache_miss_rate = (double)(xbzrle_counters.cache_miss - -@@ -1626,7 +1626,7 @@ static int find_dirty_block(RAMState *rs, PageSearchStatus *pss) - /* Flag that we've looped */ - pss->complete_round = true; - /* After the first round, enable XBZRLE. */ -- if (migrate_use_xbzrle()) { -+ if (migrate_xbzrle()) { - rs->xbzrle_enabled = true; - } - } -@@ -2979,7 +2979,7 @@ static int xbzrle_init(void) - { - Error *local_err = NULL; - -- if (!migrate_use_xbzrle()) { -+ if (!migrate_xbzrle()) { - return 0; - } - --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Move-migrate_use_zero_copy_send-to-options.patch b/SOURCES/kvm-migration-Move-migrate_use_zero_copy_send-to-options.patch deleted file mode 100644 index 90031df..0000000 --- a/SOURCES/kvm-migration-Move-migrate_use_zero_copy_send-to-options.patch +++ /dev/null @@ -1,167 +0,0 @@ -From 6eb252887378d639ad2e90dd426a1812d4b72ca6 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 1 Mar 2023 22:17:14 +0100 -Subject: [PATCH 28/56] migration: Move migrate_use_zero_copy_send() to - options.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [27/50] 5a4c2b5e75c62e0f60f9c4121a2756bd140a60d9 (peterx/qemu-kvm) - -Once that we are there, we rename the function to -migrate_zero_copy_send() to be consistent with all other capabilities. - -We can remove the CONFIG_LINUX guard. We already check that we can't -setup this capability in migrate_caps_check(). - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit b4bc342c766640e0cb8a0b72f71e0ee5545fb790) -Signed-off-by: Peter Xu ---- - migration/migration.c | 13 +------------ - migration/migration.h | 5 ----- - migration/multifd.c | 8 ++++---- - migration/options.c | 9 +++++++++ - migration/options.h | 1 + - migration/socket.c | 2 +- - 6 files changed, 16 insertions(+), 22 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index e1d7f25786..1d63718e88 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1609,7 +1609,7 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp) - } - - #ifdef CONFIG_LINUX -- if (migrate_use_zero_copy_send() && -+ if (migrate_zero_copy_send() && - ((params->has_multifd_compression && params->multifd_compression) || - (params->tls_creds && *params->tls_creds))) { - error_setg(errp, -@@ -2595,17 +2595,6 @@ int migrate_multifd_zstd_level(void) - return s->parameters.multifd_zstd_level; - } - --#ifdef CONFIG_LINUX --bool migrate_use_zero_copy_send(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND]; --} --#endif -- - int migrate_use_tls(void) - { - MigrationState *s; -diff --git a/migration/migration.h b/migration/migration.h -index 49c0e13f41..c939f82d53 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -454,11 +454,6 @@ MultiFDCompression migrate_multifd_compression(void); - int migrate_multifd_zlib_level(void); - int migrate_multifd_zstd_level(void); - --#ifdef CONFIG_LINUX --bool migrate_use_zero_copy_send(void); --#else --#define migrate_use_zero_copy_send() (false) --#endif - int migrate_use_tls(void); - int migrate_use_xbzrle(void); - uint64_t migrate_xbzrle_cache_size(void); -diff --git a/migration/multifd.c b/migration/multifd.c -index 6807328189..cce3ad6988 100644 ---- a/migration/multifd.c -+++ b/migration/multifd.c -@@ -25,7 +25,7 @@ - #include "trace.h" - #include "multifd.h" - #include "threadinfo.h" -- -+#include "options.h" - #include "qemu/yank.h" - #include "io/channel-socket.h" - #include "yank_functions.h" -@@ -608,7 +608,7 @@ int multifd_send_sync_main(QEMUFile *f) - * all the dirty bitmaps. - */ - -- flush_zero_copy = migrate_use_zero_copy_send(); -+ flush_zero_copy = migrate_zero_copy_send(); - - for (i = 0; i < migrate_multifd_channels(); i++) { - MultiFDSendParams *p = &multifd_send_state->params[i]; -@@ -653,7 +653,7 @@ static void *multifd_send_thread(void *opaque) - MigrationThread *thread = NULL; - Error *local_err = NULL; - int ret = 0; -- bool use_zero_copy_send = migrate_use_zero_copy_send(); -+ bool use_zero_copy_send = migrate_zero_copy_send(); - - thread = MigrationThreadAdd(p->name, qemu_get_thread_id()); - -@@ -945,7 +945,7 @@ int multifd_save_setup(Error **errp) - p->page_size = qemu_target_page_size(); - p->page_count = page_count; - -- if (migrate_use_zero_copy_send()) { -+ if (migrate_zero_copy_send()) { - p->write_flags = QIO_CHANNEL_WRITE_FLAG_ZERO_COPY; - } else { - p->write_flags = 0; -diff --git a/migration/options.c b/migration/options.c -index 58673fc101..f357c99996 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -155,3 +155,12 @@ bool migrate_zero_blocks(void) - - return s->capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS]; - } -+ -+bool migrate_zero_copy_send(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND]; -+} -diff --git a/migration/options.h b/migration/options.h -index d07269ee38..ad22f4d24a 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -32,5 +32,6 @@ bool migrate_postcopy_ram(void); - bool migrate_release_ram(void); - bool migrate_validate_uuid(void); - bool migrate_zero_blocks(void); -+bool migrate_zero_copy_send(void); - - #endif -diff --git a/migration/socket.c b/migration/socket.c -index f4835a256a..1b6f5baefb 100644 ---- a/migration/socket.c -+++ b/migration/socket.c -@@ -98,7 +98,7 @@ static void socket_outgoing_migration(QIOTask *task, - - trace_migration_socket_outgoing_connected(data->hostname); - -- if (migrate_use_zero_copy_send() && -+ if (migrate_zero_copy_send() && - !qio_channel_has_feature(sioc, QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY)) { - error_setg(&err, "Zero copy send feature not detected in host kernel"); - } --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Move-migration_properties-to-options.c.patch b/SOURCES/kvm-migration-Move-migration_properties-to-options.c.patch deleted file mode 100644 index 145b510..0000000 --- a/SOURCES/kvm-migration-Move-migration_properties-to-options.c.patch +++ /dev/null @@ -1,409 +0,0 @@ -From 0911e025a9dc8a0c85944ac11fb9df72e5ad0677 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 09/37] migration: Move migration_properties to options.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [7/28] ff07358afa0c90f13125b177b0e08c74ef1b9905 (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit f9436522c8dd -Author: Juan Quintela -Date: Thu Mar 2 12:55:57 2023 +0100 - - migration: Move migration_properties to options.c - - Signed-off-by: Juan Quintela - Reviewed-by: Vladimir Sementsov-Ogievskiy - -Signed-off-by: Cédric Le Goater ---- - migration/migration.c | 157 ------------------------------------------ - migration/options.c | 155 +++++++++++++++++++++++++++++++++++++++++ - migration/options.h | 7 ++ - 3 files changed, 162 insertions(+), 157 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 08f87f2b0e..1ac5f19bc2 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -52,8 +52,6 @@ - #include "io/channel-tls.h" - #include "migration/colo.h" - #include "hw/boards.h" --#include "hw/qdev-properties.h" --#include "hw/qdev-properties-system.h" - #include "monitor/monitor.h" - #include "net/announce.h" - #include "qemu/queue.h" -@@ -65,51 +63,6 @@ - #include "sysemu/qtest.h" - #include "options.h" - --#define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */ -- --/* Time in milliseconds we are allowed to stop the source, -- * for sending the last part */ --#define DEFAULT_MIGRATE_SET_DOWNTIME 300 -- --/* Default compression thread count */ --#define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8 --/* Default decompression thread count, usually decompression is at -- * least 4 times as fast as compression.*/ --#define DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT 2 --/*0: means nocompress, 1: best speed, ... 9: best compress ratio */ --#define DEFAULT_MIGRATE_COMPRESS_LEVEL 1 --/* Define default autoconverge cpu throttle migration parameters */ --#define DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD 50 --#define DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL 20 --#define DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT 10 --#define DEFAULT_MIGRATE_MAX_CPU_THROTTLE 99 -- --/* Migration XBZRLE default cache size */ --#define DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE (64 * 1024 * 1024) -- --/* The delay time (in ms) between two COLO checkpoints */ --#define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY (200 * 100) --#define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2 --#define DEFAULT_MIGRATE_MULTIFD_COMPRESSION MULTIFD_COMPRESSION_NONE --/* 0: means nocompress, 1: best speed, ... 9: best compress ratio */ --#define DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL 1 --/* 0: means nocompress, 1: best speed, ... 20: best compress ratio */ --#define DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL 1 -- --/* Background transfer rate for postcopy, 0 means unlimited, note -- * that page requests can still exceed this limit. -- */ --#define DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH 0 -- --/* -- * Parameters for self_announce_delay giving a stream of RARP/ARP -- * packets after migration. -- */ --#define DEFAULT_MIGRATE_ANNOUNCE_INITIAL 50 --#define DEFAULT_MIGRATE_ANNOUNCE_MAX 550 --#define DEFAULT_MIGRATE_ANNOUNCE_ROUNDS 5 --#define DEFAULT_MIGRATE_ANNOUNCE_STEP 100 -- - static NotifierList migration_state_notifiers = - NOTIFIER_LIST_INITIALIZER(migration_state_notifiers); - -@@ -3336,116 +3289,6 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) - s->migration_thread_running = true; - } - --#define DEFINE_PROP_MIG_CAP(name, x) \ -- DEFINE_PROP_BOOL(name, MigrationState, capabilities[x], false) -- --static Property migration_properties[] = { -- DEFINE_PROP_BOOL("store-global-state", MigrationState, -- store_global_state, true), -- DEFINE_PROP_BOOL("send-configuration", MigrationState, -- send_configuration, true), -- DEFINE_PROP_BOOL("send-section-footer", MigrationState, -- send_section_footer, true), -- DEFINE_PROP_BOOL("decompress-error-check", MigrationState, -- decompress_error_check, true), -- DEFINE_PROP_UINT8("x-clear-bitmap-shift", MigrationState, -- clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT), -- DEFINE_PROP_BOOL("x-preempt-pre-7-2", MigrationState, -- preempt_pre_7_2, false), -- -- /* Migration parameters */ -- DEFINE_PROP_UINT8("x-compress-level", MigrationState, -- parameters.compress_level, -- DEFAULT_MIGRATE_COMPRESS_LEVEL), -- DEFINE_PROP_UINT8("x-compress-threads", MigrationState, -- parameters.compress_threads, -- DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT), -- DEFINE_PROP_BOOL("x-compress-wait-thread", MigrationState, -- parameters.compress_wait_thread, true), -- DEFINE_PROP_UINT8("x-decompress-threads", MigrationState, -- parameters.decompress_threads, -- DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT), -- DEFINE_PROP_UINT8("x-throttle-trigger-threshold", MigrationState, -- parameters.throttle_trigger_threshold, -- DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD), -- DEFINE_PROP_UINT8("x-cpu-throttle-initial", MigrationState, -- parameters.cpu_throttle_initial, -- DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL), -- DEFINE_PROP_UINT8("x-cpu-throttle-increment", MigrationState, -- parameters.cpu_throttle_increment, -- DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT), -- DEFINE_PROP_BOOL("x-cpu-throttle-tailslow", MigrationState, -- parameters.cpu_throttle_tailslow, false), -- DEFINE_PROP_SIZE("x-max-bandwidth", MigrationState, -- parameters.max_bandwidth, MAX_THROTTLE), -- DEFINE_PROP_UINT64("x-downtime-limit", MigrationState, -- parameters.downtime_limit, -- DEFAULT_MIGRATE_SET_DOWNTIME), -- DEFINE_PROP_UINT32("x-checkpoint-delay", MigrationState, -- parameters.x_checkpoint_delay, -- DEFAULT_MIGRATE_X_CHECKPOINT_DELAY), -- DEFINE_PROP_UINT8("multifd-channels", MigrationState, -- parameters.multifd_channels, -- DEFAULT_MIGRATE_MULTIFD_CHANNELS), -- DEFINE_PROP_MULTIFD_COMPRESSION("multifd-compression", MigrationState, -- parameters.multifd_compression, -- DEFAULT_MIGRATE_MULTIFD_COMPRESSION), -- DEFINE_PROP_UINT8("multifd-zlib-level", MigrationState, -- parameters.multifd_zlib_level, -- DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL), -- DEFINE_PROP_UINT8("multifd-zstd-level", MigrationState, -- parameters.multifd_zstd_level, -- DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL), -- DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState, -- parameters.xbzrle_cache_size, -- DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE), -- DEFINE_PROP_SIZE("max-postcopy-bandwidth", MigrationState, -- parameters.max_postcopy_bandwidth, -- DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH), -- DEFINE_PROP_UINT8("max-cpu-throttle", MigrationState, -- parameters.max_cpu_throttle, -- DEFAULT_MIGRATE_MAX_CPU_THROTTLE), -- DEFINE_PROP_SIZE("announce-initial", MigrationState, -- parameters.announce_initial, -- DEFAULT_MIGRATE_ANNOUNCE_INITIAL), -- DEFINE_PROP_SIZE("announce-max", MigrationState, -- parameters.announce_max, -- DEFAULT_MIGRATE_ANNOUNCE_MAX), -- DEFINE_PROP_SIZE("announce-rounds", MigrationState, -- parameters.announce_rounds, -- DEFAULT_MIGRATE_ANNOUNCE_ROUNDS), -- DEFINE_PROP_SIZE("announce-step", MigrationState, -- parameters.announce_step, -- DEFAULT_MIGRATE_ANNOUNCE_STEP), -- DEFINE_PROP_STRING("tls-creds", MigrationState, parameters.tls_creds), -- DEFINE_PROP_STRING("tls-hostname", MigrationState, parameters.tls_hostname), -- DEFINE_PROP_STRING("tls-authz", MigrationState, parameters.tls_authz), -- -- /* Migration capabilities */ -- DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE), -- DEFINE_PROP_MIG_CAP("x-rdma-pin-all", MIGRATION_CAPABILITY_RDMA_PIN_ALL), -- DEFINE_PROP_MIG_CAP("x-auto-converge", MIGRATION_CAPABILITY_AUTO_CONVERGE), -- DEFINE_PROP_MIG_CAP("x-zero-blocks", MIGRATION_CAPABILITY_ZERO_BLOCKS), -- DEFINE_PROP_MIG_CAP("x-compress", MIGRATION_CAPABILITY_COMPRESS), -- DEFINE_PROP_MIG_CAP("x-events", MIGRATION_CAPABILITY_EVENTS), -- DEFINE_PROP_MIG_CAP("x-postcopy-ram", MIGRATION_CAPABILITY_POSTCOPY_RAM), -- DEFINE_PROP_MIG_CAP("x-postcopy-preempt", -- MIGRATION_CAPABILITY_POSTCOPY_PREEMPT), -- DEFINE_PROP_MIG_CAP("x-colo", MIGRATION_CAPABILITY_X_COLO), -- DEFINE_PROP_MIG_CAP("x-release-ram", MIGRATION_CAPABILITY_RELEASE_RAM), -- DEFINE_PROP_MIG_CAP("x-block", MIGRATION_CAPABILITY_BLOCK), -- DEFINE_PROP_MIG_CAP("x-return-path", MIGRATION_CAPABILITY_RETURN_PATH), -- DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_MULTIFD), -- DEFINE_PROP_MIG_CAP("x-background-snapshot", -- MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT), --#ifdef CONFIG_LINUX -- DEFINE_PROP_MIG_CAP("x-zero-copy-send", -- MIGRATION_CAPABILITY_ZERO_COPY_SEND), --#endif -- -- DEFINE_PROP_END_OF_LIST(), --}; -- - static void migration_class_init(ObjectClass *klass, void *data) - { - DeviceClass *dc = DEVICE_CLASS(klass); -diff --git a/migration/options.c b/migration/options.c -index bcfe244fa9..a76984276d 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -31,6 +31,161 @@ - #define MAX_MIGRATE_DOWNTIME_SECONDS 2000 - #define MAX_MIGRATE_DOWNTIME (MAX_MIGRATE_DOWNTIME_SECONDS * 1000) - -+#define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */ -+ -+/* Time in milliseconds we are allowed to stop the source, -+ * for sending the last part */ -+#define DEFAULT_MIGRATE_SET_DOWNTIME 300 -+ -+/* Default compression thread count */ -+#define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8 -+/* Default decompression thread count, usually decompression is at -+ * least 4 times as fast as compression.*/ -+#define DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT 2 -+/*0: means nocompress, 1: best speed, ... 9: best compress ratio */ -+#define DEFAULT_MIGRATE_COMPRESS_LEVEL 1 -+/* Define default autoconverge cpu throttle migration parameters */ -+#define DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD 50 -+#define DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL 20 -+#define DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT 10 -+#define DEFAULT_MIGRATE_MAX_CPU_THROTTLE 99 -+ -+/* Migration XBZRLE default cache size */ -+#define DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE (64 * 1024 * 1024) -+ -+/* The delay time (in ms) between two COLO checkpoints */ -+#define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY (200 * 100) -+#define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2 -+#define DEFAULT_MIGRATE_MULTIFD_COMPRESSION MULTIFD_COMPRESSION_NONE -+/* 0: means nocompress, 1: best speed, ... 9: best compress ratio */ -+#define DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL 1 -+/* 0: means nocompress, 1: best speed, ... 20: best compress ratio */ -+#define DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL 1 -+ -+/* Background transfer rate for postcopy, 0 means unlimited, note -+ * that page requests can still exceed this limit. -+ */ -+#define DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH 0 -+ -+/* -+ * Parameters for self_announce_delay giving a stream of RARP/ARP -+ * packets after migration. -+ */ -+#define DEFAULT_MIGRATE_ANNOUNCE_INITIAL 50 -+#define DEFAULT_MIGRATE_ANNOUNCE_MAX 550 -+#define DEFAULT_MIGRATE_ANNOUNCE_ROUNDS 5 -+#define DEFAULT_MIGRATE_ANNOUNCE_STEP 100 -+ -+#define DEFINE_PROP_MIG_CAP(name, x) \ -+ DEFINE_PROP_BOOL(name, MigrationState, capabilities[x], false) -+ -+Property migration_properties[] = { -+ DEFINE_PROP_BOOL("store-global-state", MigrationState, -+ store_global_state, true), -+ DEFINE_PROP_BOOL("send-configuration", MigrationState, -+ send_configuration, true), -+ DEFINE_PROP_BOOL("send-section-footer", MigrationState, -+ send_section_footer, true), -+ DEFINE_PROP_BOOL("decompress-error-check", MigrationState, -+ decompress_error_check, true), -+ DEFINE_PROP_UINT8("x-clear-bitmap-shift", MigrationState, -+ clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT), -+ DEFINE_PROP_BOOL("x-preempt-pre-7-2", MigrationState, -+ preempt_pre_7_2, false), -+ -+ /* Migration parameters */ -+ DEFINE_PROP_UINT8("x-compress-level", MigrationState, -+ parameters.compress_level, -+ DEFAULT_MIGRATE_COMPRESS_LEVEL), -+ DEFINE_PROP_UINT8("x-compress-threads", MigrationState, -+ parameters.compress_threads, -+ DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT), -+ DEFINE_PROP_BOOL("x-compress-wait-thread", MigrationState, -+ parameters.compress_wait_thread, true), -+ DEFINE_PROP_UINT8("x-decompress-threads", MigrationState, -+ parameters.decompress_threads, -+ DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT), -+ DEFINE_PROP_UINT8("x-throttle-trigger-threshold", MigrationState, -+ parameters.throttle_trigger_threshold, -+ DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD), -+ DEFINE_PROP_UINT8("x-cpu-throttle-initial", MigrationState, -+ parameters.cpu_throttle_initial, -+ DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL), -+ DEFINE_PROP_UINT8("x-cpu-throttle-increment", MigrationState, -+ parameters.cpu_throttle_increment, -+ DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT), -+ DEFINE_PROP_BOOL("x-cpu-throttle-tailslow", MigrationState, -+ parameters.cpu_throttle_tailslow, false), -+ DEFINE_PROP_SIZE("x-max-bandwidth", MigrationState, -+ parameters.max_bandwidth, MAX_THROTTLE), -+ DEFINE_PROP_UINT64("x-downtime-limit", MigrationState, -+ parameters.downtime_limit, -+ DEFAULT_MIGRATE_SET_DOWNTIME), -+ DEFINE_PROP_UINT32("x-checkpoint-delay", MigrationState, -+ parameters.x_checkpoint_delay, -+ DEFAULT_MIGRATE_X_CHECKPOINT_DELAY), -+ DEFINE_PROP_UINT8("multifd-channels", MigrationState, -+ parameters.multifd_channels, -+ DEFAULT_MIGRATE_MULTIFD_CHANNELS), -+ DEFINE_PROP_MULTIFD_COMPRESSION("multifd-compression", MigrationState, -+ parameters.multifd_compression, -+ DEFAULT_MIGRATE_MULTIFD_COMPRESSION), -+ DEFINE_PROP_UINT8("multifd-zlib-level", MigrationState, -+ parameters.multifd_zlib_level, -+ DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL), -+ DEFINE_PROP_UINT8("multifd-zstd-level", MigrationState, -+ parameters.multifd_zstd_level, -+ DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL), -+ DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState, -+ parameters.xbzrle_cache_size, -+ DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE), -+ DEFINE_PROP_SIZE("max-postcopy-bandwidth", MigrationState, -+ parameters.max_postcopy_bandwidth, -+ DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH), -+ DEFINE_PROP_UINT8("max-cpu-throttle", MigrationState, -+ parameters.max_cpu_throttle, -+ DEFAULT_MIGRATE_MAX_CPU_THROTTLE), -+ DEFINE_PROP_SIZE("announce-initial", MigrationState, -+ parameters.announce_initial, -+ DEFAULT_MIGRATE_ANNOUNCE_INITIAL), -+ DEFINE_PROP_SIZE("announce-max", MigrationState, -+ parameters.announce_max, -+ DEFAULT_MIGRATE_ANNOUNCE_MAX), -+ DEFINE_PROP_SIZE("announce-rounds", MigrationState, -+ parameters.announce_rounds, -+ DEFAULT_MIGRATE_ANNOUNCE_ROUNDS), -+ DEFINE_PROP_SIZE("announce-step", MigrationState, -+ parameters.announce_step, -+ DEFAULT_MIGRATE_ANNOUNCE_STEP), -+ DEFINE_PROP_STRING("tls-creds", MigrationState, parameters.tls_creds), -+ DEFINE_PROP_STRING("tls-hostname", MigrationState, parameters.tls_hostname), -+ DEFINE_PROP_STRING("tls-authz", MigrationState, parameters.tls_authz), -+ -+ /* Migration capabilities */ -+ DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE), -+ DEFINE_PROP_MIG_CAP("x-rdma-pin-all", MIGRATION_CAPABILITY_RDMA_PIN_ALL), -+ DEFINE_PROP_MIG_CAP("x-auto-converge", MIGRATION_CAPABILITY_AUTO_CONVERGE), -+ DEFINE_PROP_MIG_CAP("x-zero-blocks", MIGRATION_CAPABILITY_ZERO_BLOCKS), -+ DEFINE_PROP_MIG_CAP("x-compress", MIGRATION_CAPABILITY_COMPRESS), -+ DEFINE_PROP_MIG_CAP("x-events", MIGRATION_CAPABILITY_EVENTS), -+ DEFINE_PROP_MIG_CAP("x-postcopy-ram", MIGRATION_CAPABILITY_POSTCOPY_RAM), -+ DEFINE_PROP_MIG_CAP("x-postcopy-preempt", -+ MIGRATION_CAPABILITY_POSTCOPY_PREEMPT), -+ DEFINE_PROP_MIG_CAP("x-colo", MIGRATION_CAPABILITY_X_COLO), -+ DEFINE_PROP_MIG_CAP("x-release-ram", MIGRATION_CAPABILITY_RELEASE_RAM), -+ DEFINE_PROP_MIG_CAP("x-block", MIGRATION_CAPABILITY_BLOCK), -+ DEFINE_PROP_MIG_CAP("x-return-path", MIGRATION_CAPABILITY_RETURN_PATH), -+ DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_MULTIFD), -+ DEFINE_PROP_MIG_CAP("x-background-snapshot", -+ MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT), -+#ifdef CONFIG_LINUX -+ DEFINE_PROP_MIG_CAP("x-zero-copy-send", -+ MIGRATION_CAPABILITY_ZERO_COPY_SEND), -+#endif -+ -+ DEFINE_PROP_END_OF_LIST(), -+}; -+ - bool migrate_auto_converge(void) - { - MigrationState *s = migrate_get_current(); -diff --git a/migration/options.h b/migration/options.h -index 89067e59a0..7b0f7245ad 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -14,6 +14,9 @@ - #ifndef QEMU_MIGRATION_OPTIONS_H - #define QEMU_MIGRATION_OPTIONS_H - -+#include "hw/qdev-properties.h" -+#include "hw/qdev-properties-system.h" -+ - /* constants */ - - /* Amount of time to allocate to each "chunk" of bandwidth-throttled -@@ -21,6 +24,10 @@ - #define BUFFER_DELAY 100 - #define XFER_LIMIT_RATIO (1000 / BUFFER_DELAY) - -+/* migration properties */ -+ -+extern Property migration_properties[]; -+ - /* capabilities */ - - bool migrate_auto_converge(void); --- -2.39.3 - diff --git a/SOURCES/kvm-migration-Move-parameters-functions-to-option.c.patch b/SOURCES/kvm-migration-Move-parameters-functions-to-option.c.patch deleted file mode 100644 index ad4510b..0000000 --- a/SOURCES/kvm-migration-Move-parameters-functions-to-option.c.patch +++ /dev/null @@ -1,317 +0,0 @@ -From d5ea4c82c44a59ac70313eb1eac77999ca5fde36 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Thu, 2 Mar 2023 00:39:03 +0100 -Subject: [PATCH 37/56] migration: Move parameters functions to option.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [36/50] 2540921028025504723e762c0a1d2f295ac5a6d1 (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit 1dfc4b9e19bcf1ad41a1be9ac82db35b9647c3c1) -Signed-off-by: Peter Xu ---- - migration/migration.c | 91 --------------------------------------- - migration/migration.h | 11 ----- - migration/multifd-zlib.c | 1 + - migration/multifd-zstd.c | 1 + - migration/options.c | 93 ++++++++++++++++++++++++++++++++++++++++ - migration/options.h | 13 ++++++ - 6 files changed, 108 insertions(+), 102 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 880a51210e..7f2e770deb 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -2264,79 +2264,6 @@ bool migrate_postcopy(void) - return migrate_postcopy_ram() || migrate_dirty_bitmaps(); - } - --int migrate_compress_level(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->parameters.compress_level; --} -- --int migrate_compress_threads(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->parameters.compress_threads; --} -- --int migrate_compress_wait_thread(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->parameters.compress_wait_thread; --} -- --int migrate_decompress_threads(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->parameters.decompress_threads; --} -- --int migrate_multifd_channels(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->parameters.multifd_channels; --} -- --MultiFDCompression migrate_multifd_compression(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- assert(s->parameters.multifd_compression < MULTIFD_COMPRESSION__MAX); -- return s->parameters.multifd_compression; --} -- --int migrate_multifd_zlib_level(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->parameters.multifd_zlib_level; --} -- --int migrate_multifd_zstd_level(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->parameters.multifd_zstd_level; --} -- - int migrate_use_tls(void) - { - MigrationState *s; -@@ -2346,24 +2273,6 @@ int migrate_use_tls(void) - return s->parameters.tls_creds && *s->parameters.tls_creds; - } - --uint64_t migrate_xbzrle_cache_size(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->parameters.xbzrle_cache_size; --} -- --static int64_t migrate_max_postcopy_bandwidth(void) --{ -- MigrationState *s; -- -- s = migrate_get_current(); -- -- return s->parameters.max_postcopy_bandwidth; --} -- - bool migrate_use_block_incremental(void) - { - MigrationState *s; -diff --git a/migration/migration.h b/migration/migration.h -index 24184622a8..8451e5f2fe 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -449,24 +449,13 @@ MigrationState *migrate_get_current(void); - - bool migrate_postcopy(void); - --int migrate_multifd_channels(void); --MultiFDCompression migrate_multifd_compression(void); --int migrate_multifd_zlib_level(void); --int migrate_multifd_zstd_level(void); -- - int migrate_use_tls(void); --uint64_t migrate_xbzrle_cache_size(void); - - bool migrate_use_block_incremental(void); - int migrate_max_cpu_throttle(void); - - uint64_t ram_get_total_transferred_pages(void); - --int migrate_compress_level(void); --int migrate_compress_threads(void); --int migrate_compress_wait_thread(void); --int migrate_decompress_threads(void); -- - /* Sending on the return path - generic and then for each message type */ - void migrate_send_rp_shut(MigrationIncomingState *mis, - uint32_t value); -diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c -index 37770248e1..81701250ad 100644 ---- a/migration/multifd-zlib.c -+++ b/migration/multifd-zlib.c -@@ -18,6 +18,7 @@ - #include "qapi/error.h" - #include "migration.h" - #include "trace.h" -+#include "options.h" - #include "multifd.h" - - struct zlib_data { -diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c -index f4a8e1ed1f..d1d29e76cc 100644 ---- a/migration/multifd-zstd.c -+++ b/migration/multifd-zstd.c -@@ -18,6 +18,7 @@ - #include "qapi/error.h" - #include "migration.h" - #include "trace.h" -+#include "options.h" - #include "multifd.h" - - struct zstd_data { -diff --git a/migration/options.c b/migration/options.c -index f3b2d6e482..8d15be858c 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -460,3 +460,96 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, - s->capabilities[cap->value->capability] = cap->value->state; - } - } -+ -+/* parameters */ -+ -+int migrate_compress_level(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.compress_level; -+} -+ -+int migrate_compress_threads(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.compress_threads; -+} -+ -+int migrate_compress_wait_thread(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.compress_wait_thread; -+} -+ -+int migrate_decompress_threads(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.decompress_threads; -+} -+ -+int64_t migrate_max_postcopy_bandwidth(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.max_postcopy_bandwidth; -+} -+ -+int migrate_multifd_channels(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.multifd_channels; -+} -+ -+MultiFDCompression migrate_multifd_compression(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ assert(s->parameters.multifd_compression < MULTIFD_COMPRESSION__MAX); -+ return s->parameters.multifd_compression; -+} -+ -+int migrate_multifd_zlib_level(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.multifd_zlib_level; -+} -+ -+int migrate_multifd_zstd_level(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.multifd_zstd_level; -+} -+ -+uint64_t migrate_xbzrle_cache_size(void) -+{ -+ MigrationState *s; -+ -+ s = migrate_get_current(); -+ -+ return s->parameters.xbzrle_cache_size; -+} -diff --git a/migration/options.h b/migration/options.h -index 5979e4ff90..b24ee92283 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -43,4 +43,17 @@ bool migrate_zero_copy_send(void); - bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp); - bool migrate_cap_set(int cap, bool value, Error **errp); - -+/* parameters */ -+ -+int migrate_compress_level(void); -+int migrate_compress_threads(void); -+int migrate_compress_wait_thread(void); -+int migrate_decompress_threads(void); -+int64_t migrate_max_postcopy_bandwidth(void); -+int migrate_multifd_channels(void); -+MultiFDCompression migrate_multifd_compression(void); -+int migrate_multifd_zlib_level(void); -+int migrate_multifd_zstd_level(void); -+uint64_t migrate_xbzrle_cache_size(void); -+ - #endif --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Move-qmp_migrate_set_capabilities-to-optio.patch b/SOURCES/kvm-migration-Move-qmp_migrate_set_capabilities-to-optio.patch deleted file mode 100644 index 10f185b..0000000 --- a/SOURCES/kvm-migration-Move-qmp_migrate_set_capabilities-to-optio.patch +++ /dev/null @@ -1,100 +0,0 @@ -From d967ec22cdb20e0a846f050a2bc7bd4caa87940d Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 1 Mar 2023 23:18:02 +0100 -Subject: [PATCH 35/56] migration: Move qmp_migrate_set_capabilities() to - options.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [34/50] 16b62ca7e06c58d71389c449dc19c11939dd0882 (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit 45c1de13f09b1fd4ea26f54e6da12aae52f34cb8) -Signed-off-by: Peter Xu ---- - migration/migration.c | 26 -------------------------- - migration/options.c | 26 ++++++++++++++++++++++++++ - 2 files changed, 26 insertions(+), 26 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 3dc8ee4875..369cd91796 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1222,32 +1222,6 @@ MigrationInfo *qmp_query_migrate(Error **errp) - return info; - } - --void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, -- Error **errp) --{ -- MigrationState *s = migrate_get_current(); -- MigrationCapabilityStatusList *cap; -- bool new_caps[MIGRATION_CAPABILITY__MAX]; -- -- if (migration_is_running(s->state)) { -- error_setg(errp, QERR_MIGRATION_ACTIVE); -- return; -- } -- -- memcpy(new_caps, s->capabilities, sizeof(new_caps)); -- for (cap = params; cap; cap = cap->next) { -- new_caps[cap->value->capability] = cap->value->state; -- } -- -- if (!migrate_caps_check(s->capabilities, new_caps, errp)) { -- return; -- } -- -- for (cap = params; cap; cap = cap->next) { -- s->capabilities[cap->value->capability] = cap->value->state; -- } --} -- - /* - * Check whether the parameters are valid. Error will be put into errp - * (if provided). Return true if valid, otherwise false. -diff --git a/migration/options.c b/migration/options.c -index ff621bdeb3..4cbe77e35a 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -413,3 +413,29 @@ MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp) - - return head; - } -+ -+void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, -+ Error **errp) -+{ -+ MigrationState *s = migrate_get_current(); -+ MigrationCapabilityStatusList *cap; -+ bool new_caps[MIGRATION_CAPABILITY__MAX]; -+ -+ if (migration_is_running(s->state)) { -+ error_setg(errp, QERR_MIGRATION_ACTIVE); -+ return; -+ } -+ -+ memcpy(new_caps, s->capabilities, sizeof(new_caps)); -+ for (cap = params; cap; cap = cap->next) { -+ new_caps[cap->value->capability] = cap->value->state; -+ } -+ -+ if (!migrate_caps_check(s->capabilities, new_caps, errp)) { -+ return; -+ } -+ -+ for (cap = params; cap; cap = cap->next) { -+ s->capabilities[cap->value->capability] = cap->value->state; -+ } -+} --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Move-qmp_migrate_set_parameters-to-options.patch b/SOURCES/kvm-migration-Move-qmp_migrate_set_parameters-to-options.patch deleted file mode 100644 index 3685a33..0000000 --- a/SOURCES/kvm-migration-Move-qmp_migrate_set_parameters-to-options.patch +++ /dev/null @@ -1,943 +0,0 @@ -From 944bf4759d1279c342ddd29c47d47c9670b64625 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Thu, 2 Mar 2023 10:13:16 +0100 -Subject: [PATCH 50/56] migration: Move qmp_migrate_set_parameters() to - options.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [49/50] b55f7afe868e117d4212f1518b9a37514cc99b33 (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit 09d6c9658474e8573c5ada58dca8b20fe47dd99e) -Signed-off-by: Peter Xu ---- - migration/migration.c | 420 ------------------------------------------ - migration/options.c | 418 +++++++++++++++++++++++++++++++++++++++++ - migration/options.h | 11 ++ - 3 files changed, 429 insertions(+), 420 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 22ef83c619..08f87f2b0e 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -67,19 +67,10 @@ - - #define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */ - --/* Amount of time to allocate to each "chunk" of bandwidth-throttled -- * data. */ --#define BUFFER_DELAY 100 --#define XFER_LIMIT_RATIO (1000 / BUFFER_DELAY) -- - /* Time in milliseconds we are allowed to stop the source, - * for sending the last part */ - #define DEFAULT_MIGRATE_SET_DOWNTIME 300 - --/* Maximum migrate downtime set to 2000 seconds */ --#define MAX_MIGRATE_DOWNTIME_SECONDS 2000 --#define MAX_MIGRATE_DOWNTIME (MAX_MIGRATE_DOWNTIME_SECONDS * 1000) -- - /* Default compression thread count */ - #define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8 - /* Default decompression thread count, usually decompression is at -@@ -1140,417 +1131,6 @@ MigrationInfo *qmp_query_migrate(Error **errp) - return info; - } - --/* -- * Check whether the parameters are valid. Error will be put into errp -- * (if provided). Return true if valid, otherwise false. -- */ --static bool migrate_params_check(MigrationParameters *params, Error **errp) --{ -- if (params->has_compress_level && -- (params->compress_level > 9)) { -- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level", -- "a value between 0 and 9"); -- return false; -- } -- -- if (params->has_compress_threads && (params->compress_threads < 1)) { -- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -- "compress_threads", -- "a value between 1 and 255"); -- return false; -- } -- -- if (params->has_decompress_threads && (params->decompress_threads < 1)) { -- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -- "decompress_threads", -- "a value between 1 and 255"); -- return false; -- } -- -- if (params->has_throttle_trigger_threshold && -- (params->throttle_trigger_threshold < 1 || -- params->throttle_trigger_threshold > 100)) { -- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -- "throttle_trigger_threshold", -- "an integer in the range of 1 to 100"); -- return false; -- } -- -- if (params->has_cpu_throttle_initial && -- (params->cpu_throttle_initial < 1 || -- params->cpu_throttle_initial > 99)) { -- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -- "cpu_throttle_initial", -- "an integer in the range of 1 to 99"); -- return false; -- } -- -- if (params->has_cpu_throttle_increment && -- (params->cpu_throttle_increment < 1 || -- params->cpu_throttle_increment > 99)) { -- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -- "cpu_throttle_increment", -- "an integer in the range of 1 to 99"); -- return false; -- } -- -- if (params->has_max_bandwidth && (params->max_bandwidth > SIZE_MAX)) { -- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -- "max_bandwidth", -- "an integer in the range of 0 to "stringify(SIZE_MAX) -- " bytes/second"); -- return false; -- } -- -- if (params->has_downtime_limit && -- (params->downtime_limit > MAX_MIGRATE_DOWNTIME)) { -- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -- "downtime_limit", -- "an integer in the range of 0 to " -- stringify(MAX_MIGRATE_DOWNTIME)" ms"); -- return false; -- } -- -- /* x_checkpoint_delay is now always positive */ -- -- if (params->has_multifd_channels && (params->multifd_channels < 1)) { -- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -- "multifd_channels", -- "a value between 1 and 255"); -- return false; -- } -- -- if (params->has_multifd_zlib_level && -- (params->multifd_zlib_level > 9)) { -- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zlib_level", -- "a value between 0 and 9"); -- return false; -- } -- -- if (params->has_multifd_zstd_level && -- (params->multifd_zstd_level > 20)) { -- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zstd_level", -- "a value between 0 and 20"); -- return false; -- } -- -- if (params->has_xbzrle_cache_size && -- (params->xbzrle_cache_size < qemu_target_page_size() || -- !is_power_of_2(params->xbzrle_cache_size))) { -- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -- "xbzrle_cache_size", -- "a power of two no less than the target page size"); -- return false; -- } -- -- if (params->has_max_cpu_throttle && -- (params->max_cpu_throttle < params->cpu_throttle_initial || -- params->max_cpu_throttle > 99)) { -- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -- "max_cpu_throttle", -- "an integer in the range of cpu_throttle_initial to 99"); -- return false; -- } -- -- if (params->has_announce_initial && -- params->announce_initial > 100000) { -- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -- "announce_initial", -- "a value between 0 and 100000"); -- return false; -- } -- if (params->has_announce_max && -- params->announce_max > 100000) { -- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -- "announce_max", -- "a value between 0 and 100000"); -- return false; -- } -- if (params->has_announce_rounds && -- params->announce_rounds > 1000) { -- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -- "announce_rounds", -- "a value between 0 and 1000"); -- return false; -- } -- if (params->has_announce_step && -- (params->announce_step < 1 || -- params->announce_step > 10000)) { -- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -- "announce_step", -- "a value between 0 and 10000"); -- return false; -- } -- -- if (params->has_block_bitmap_mapping && -- !check_dirty_bitmap_mig_alias_map(params->block_bitmap_mapping, errp)) { -- error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: "); -- return false; -- } -- --#ifdef CONFIG_LINUX -- if (migrate_zero_copy_send() && -- ((params->has_multifd_compression && params->multifd_compression) || -- (params->tls_creds && *params->tls_creds))) { -- error_setg(errp, -- "Zero copy only available for non-compressed non-TLS multifd migration"); -- return false; -- } --#endif -- -- return true; --} -- --static void migrate_params_test_apply(MigrateSetParameters *params, -- MigrationParameters *dest) --{ -- *dest = migrate_get_current()->parameters; -- -- /* TODO use QAPI_CLONE() instead of duplicating it inline */ -- -- if (params->has_compress_level) { -- dest->compress_level = params->compress_level; -- } -- -- if (params->has_compress_threads) { -- dest->compress_threads = params->compress_threads; -- } -- -- if (params->has_compress_wait_thread) { -- dest->compress_wait_thread = params->compress_wait_thread; -- } -- -- if (params->has_decompress_threads) { -- dest->decompress_threads = params->decompress_threads; -- } -- -- if (params->has_throttle_trigger_threshold) { -- dest->throttle_trigger_threshold = params->throttle_trigger_threshold; -- } -- -- if (params->has_cpu_throttle_initial) { -- dest->cpu_throttle_initial = params->cpu_throttle_initial; -- } -- -- if (params->has_cpu_throttle_increment) { -- dest->cpu_throttle_increment = params->cpu_throttle_increment; -- } -- -- if (params->has_cpu_throttle_tailslow) { -- dest->cpu_throttle_tailslow = params->cpu_throttle_tailslow; -- } -- -- if (params->tls_creds) { -- assert(params->tls_creds->type == QTYPE_QSTRING); -- dest->tls_creds = params->tls_creds->u.s; -- } -- -- if (params->tls_hostname) { -- assert(params->tls_hostname->type == QTYPE_QSTRING); -- dest->tls_hostname = params->tls_hostname->u.s; -- } -- -- if (params->has_max_bandwidth) { -- dest->max_bandwidth = params->max_bandwidth; -- } -- -- if (params->has_downtime_limit) { -- dest->downtime_limit = params->downtime_limit; -- } -- -- if (params->has_x_checkpoint_delay) { -- dest->x_checkpoint_delay = params->x_checkpoint_delay; -- } -- -- if (params->has_block_incremental) { -- dest->block_incremental = params->block_incremental; -- } -- if (params->has_multifd_channels) { -- dest->multifd_channels = params->multifd_channels; -- } -- if (params->has_multifd_compression) { -- dest->multifd_compression = params->multifd_compression; -- } -- if (params->has_xbzrle_cache_size) { -- dest->xbzrle_cache_size = params->xbzrle_cache_size; -- } -- if (params->has_max_postcopy_bandwidth) { -- dest->max_postcopy_bandwidth = params->max_postcopy_bandwidth; -- } -- if (params->has_max_cpu_throttle) { -- dest->max_cpu_throttle = params->max_cpu_throttle; -- } -- if (params->has_announce_initial) { -- dest->announce_initial = params->announce_initial; -- } -- if (params->has_announce_max) { -- dest->announce_max = params->announce_max; -- } -- if (params->has_announce_rounds) { -- dest->announce_rounds = params->announce_rounds; -- } -- if (params->has_announce_step) { -- dest->announce_step = params->announce_step; -- } -- -- if (params->has_block_bitmap_mapping) { -- dest->has_block_bitmap_mapping = true; -- dest->block_bitmap_mapping = params->block_bitmap_mapping; -- } --} -- --static void migrate_params_apply(MigrateSetParameters *params, Error **errp) --{ -- MigrationState *s = migrate_get_current(); -- -- /* TODO use QAPI_CLONE() instead of duplicating it inline */ -- -- if (params->has_compress_level) { -- s->parameters.compress_level = params->compress_level; -- } -- -- if (params->has_compress_threads) { -- s->parameters.compress_threads = params->compress_threads; -- } -- -- if (params->has_compress_wait_thread) { -- s->parameters.compress_wait_thread = params->compress_wait_thread; -- } -- -- if (params->has_decompress_threads) { -- s->parameters.decompress_threads = params->decompress_threads; -- } -- -- if (params->has_throttle_trigger_threshold) { -- s->parameters.throttle_trigger_threshold = params->throttle_trigger_threshold; -- } -- -- if (params->has_cpu_throttle_initial) { -- s->parameters.cpu_throttle_initial = params->cpu_throttle_initial; -- } -- -- if (params->has_cpu_throttle_increment) { -- s->parameters.cpu_throttle_increment = params->cpu_throttle_increment; -- } -- -- if (params->has_cpu_throttle_tailslow) { -- s->parameters.cpu_throttle_tailslow = params->cpu_throttle_tailslow; -- } -- -- if (params->tls_creds) { -- g_free(s->parameters.tls_creds); -- assert(params->tls_creds->type == QTYPE_QSTRING); -- s->parameters.tls_creds = g_strdup(params->tls_creds->u.s); -- } -- -- if (params->tls_hostname) { -- g_free(s->parameters.tls_hostname); -- assert(params->tls_hostname->type == QTYPE_QSTRING); -- s->parameters.tls_hostname = g_strdup(params->tls_hostname->u.s); -- } -- -- if (params->tls_authz) { -- g_free(s->parameters.tls_authz); -- assert(params->tls_authz->type == QTYPE_QSTRING); -- s->parameters.tls_authz = g_strdup(params->tls_authz->u.s); -- } -- -- if (params->has_max_bandwidth) { -- s->parameters.max_bandwidth = params->max_bandwidth; -- if (s->to_dst_file && !migration_in_postcopy()) { -- qemu_file_set_rate_limit(s->to_dst_file, -- s->parameters.max_bandwidth / XFER_LIMIT_RATIO); -- } -- } -- -- if (params->has_downtime_limit) { -- s->parameters.downtime_limit = params->downtime_limit; -- } -- -- if (params->has_x_checkpoint_delay) { -- s->parameters.x_checkpoint_delay = params->x_checkpoint_delay; -- if (migration_in_colo_state()) { -- colo_checkpoint_notify(s); -- } -- } -- -- if (params->has_block_incremental) { -- s->parameters.block_incremental = params->block_incremental; -- } -- if (params->has_multifd_channels) { -- s->parameters.multifd_channels = params->multifd_channels; -- } -- if (params->has_multifd_compression) { -- s->parameters.multifd_compression = params->multifd_compression; -- } -- if (params->has_xbzrle_cache_size) { -- s->parameters.xbzrle_cache_size = params->xbzrle_cache_size; -- xbzrle_cache_resize(params->xbzrle_cache_size, errp); -- } -- if (params->has_max_postcopy_bandwidth) { -- s->parameters.max_postcopy_bandwidth = params->max_postcopy_bandwidth; -- if (s->to_dst_file && migration_in_postcopy()) { -- qemu_file_set_rate_limit(s->to_dst_file, -- s->parameters.max_postcopy_bandwidth / XFER_LIMIT_RATIO); -- } -- } -- if (params->has_max_cpu_throttle) { -- s->parameters.max_cpu_throttle = params->max_cpu_throttle; -- } -- if (params->has_announce_initial) { -- s->parameters.announce_initial = params->announce_initial; -- } -- if (params->has_announce_max) { -- s->parameters.announce_max = params->announce_max; -- } -- if (params->has_announce_rounds) { -- s->parameters.announce_rounds = params->announce_rounds; -- } -- if (params->has_announce_step) { -- s->parameters.announce_step = params->announce_step; -- } -- -- if (params->has_block_bitmap_mapping) { -- qapi_free_BitmapMigrationNodeAliasList( -- s->parameters.block_bitmap_mapping); -- -- s->parameters.has_block_bitmap_mapping = true; -- s->parameters.block_bitmap_mapping = -- QAPI_CLONE(BitmapMigrationNodeAliasList, -- params->block_bitmap_mapping); -- } --} -- --void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp) --{ -- MigrationParameters tmp; -- -- /* TODO Rewrite "" to null instead */ -- if (params->tls_creds -- && params->tls_creds->type == QTYPE_QNULL) { -- qobject_unref(params->tls_creds->u.n); -- params->tls_creds->type = QTYPE_QSTRING; -- params->tls_creds->u.s = strdup(""); -- } -- /* TODO Rewrite "" to null instead */ -- if (params->tls_hostname -- && params->tls_hostname->type == QTYPE_QNULL) { -- qobject_unref(params->tls_hostname->u.n); -- params->tls_hostname->type = QTYPE_QSTRING; -- params->tls_hostname->u.s = strdup(""); -- } -- -- migrate_params_test_apply(params, &tmp); -- -- if (!migrate_params_check(&tmp, errp)) { -- /* Invalid parameter */ -- return; -- } -- -- migrate_params_apply(params, errp); --} -- -- - void qmp_migrate_start_postcopy(Error **errp) - { - MigrationState *s = migrate_get_current(); -diff --git a/migration/options.c b/migration/options.c -index d4c0714683..4701c75a4d 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -12,17 +12,25 @@ - */ - - #include "qemu/osdep.h" -+#include "exec/target_page.h" - #include "qapi/clone-visitor.h" - #include "qapi/error.h" - #include "qapi/qapi-commands-migration.h" - #include "qapi/qapi-visit-migration.h" - #include "qapi/qmp/qerror.h" -+#include "qapi/qmp/qnull.h" - #include "sysemu/runstate.h" -+#include "migration/colo.h" - #include "migration/misc.h" - #include "migration.h" -+#include "qemu-file.h" - #include "ram.h" - #include "options.h" - -+/* Maximum migrate downtime set to 2000 seconds */ -+#define MAX_MIGRATE_DOWNTIME_SECONDS 2000 -+#define MAX_MIGRATE_DOWNTIME (MAX_MIGRATE_DOWNTIME_SECONDS * 1000) -+ - bool migrate_auto_converge(void) - { - MigrationState *s; -@@ -729,3 +737,413 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) - - return params; - } -+ -+/* -+ * Check whether the parameters are valid. Error will be put into errp -+ * (if provided). Return true if valid, otherwise false. -+ */ -+bool migrate_params_check(MigrationParameters *params, Error **errp) -+{ -+ if (params->has_compress_level && -+ (params->compress_level > 9)) { -+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level", -+ "a value between 0 and 9"); -+ return false; -+ } -+ -+ if (params->has_compress_threads && (params->compress_threads < 1)) { -+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -+ "compress_threads", -+ "a value between 1 and 255"); -+ return false; -+ } -+ -+ if (params->has_decompress_threads && (params->decompress_threads < 1)) { -+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -+ "decompress_threads", -+ "a value between 1 and 255"); -+ return false; -+ } -+ -+ if (params->has_throttle_trigger_threshold && -+ (params->throttle_trigger_threshold < 1 || -+ params->throttle_trigger_threshold > 100)) { -+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -+ "throttle_trigger_threshold", -+ "an integer in the range of 1 to 100"); -+ return false; -+ } -+ -+ if (params->has_cpu_throttle_initial && -+ (params->cpu_throttle_initial < 1 || -+ params->cpu_throttle_initial > 99)) { -+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -+ "cpu_throttle_initial", -+ "an integer in the range of 1 to 99"); -+ return false; -+ } -+ -+ if (params->has_cpu_throttle_increment && -+ (params->cpu_throttle_increment < 1 || -+ params->cpu_throttle_increment > 99)) { -+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -+ "cpu_throttle_increment", -+ "an integer in the range of 1 to 99"); -+ return false; -+ } -+ -+ if (params->has_max_bandwidth && (params->max_bandwidth > SIZE_MAX)) { -+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -+ "max_bandwidth", -+ "an integer in the range of 0 to "stringify(SIZE_MAX) -+ " bytes/second"); -+ return false; -+ } -+ -+ if (params->has_downtime_limit && -+ (params->downtime_limit > MAX_MIGRATE_DOWNTIME)) { -+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -+ "downtime_limit", -+ "an integer in the range of 0 to " -+ stringify(MAX_MIGRATE_DOWNTIME)" ms"); -+ return false; -+ } -+ -+ /* x_checkpoint_delay is now always positive */ -+ -+ if (params->has_multifd_channels && (params->multifd_channels < 1)) { -+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -+ "multifd_channels", -+ "a value between 1 and 255"); -+ return false; -+ } -+ -+ if (params->has_multifd_zlib_level && -+ (params->multifd_zlib_level > 9)) { -+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zlib_level", -+ "a value between 0 and 9"); -+ return false; -+ } -+ -+ if (params->has_multifd_zstd_level && -+ (params->multifd_zstd_level > 20)) { -+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zstd_level", -+ "a value between 0 and 20"); -+ return false; -+ } -+ -+ if (params->has_xbzrle_cache_size && -+ (params->xbzrle_cache_size < qemu_target_page_size() || -+ !is_power_of_2(params->xbzrle_cache_size))) { -+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -+ "xbzrle_cache_size", -+ "a power of two no less than the target page size"); -+ return false; -+ } -+ -+ if (params->has_max_cpu_throttle && -+ (params->max_cpu_throttle < params->cpu_throttle_initial || -+ params->max_cpu_throttle > 99)) { -+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -+ "max_cpu_throttle", -+ "an integer in the range of cpu_throttle_initial to 99"); -+ return false; -+ } -+ -+ if (params->has_announce_initial && -+ params->announce_initial > 100000) { -+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -+ "announce_initial", -+ "a value between 0 and 100000"); -+ return false; -+ } -+ if (params->has_announce_max && -+ params->announce_max > 100000) { -+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -+ "announce_max", -+ "a value between 0 and 100000"); -+ return false; -+ } -+ if (params->has_announce_rounds && -+ params->announce_rounds > 1000) { -+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -+ "announce_rounds", -+ "a value between 0 and 1000"); -+ return false; -+ } -+ if (params->has_announce_step && -+ (params->announce_step < 1 || -+ params->announce_step > 10000)) { -+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, -+ "announce_step", -+ "a value between 0 and 10000"); -+ return false; -+ } -+ -+ if (params->has_block_bitmap_mapping && -+ !check_dirty_bitmap_mig_alias_map(params->block_bitmap_mapping, errp)) { -+ error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: "); -+ return false; -+ } -+ -+#ifdef CONFIG_LINUX -+ if (migrate_zero_copy_send() && -+ ((params->has_multifd_compression && params->multifd_compression) || -+ (params->tls_creds && *params->tls_creds))) { -+ error_setg(errp, -+ "Zero copy only available for non-compressed non-TLS multifd migration"); -+ return false; -+ } -+#endif -+ -+ return true; -+} -+ -+static void migrate_params_test_apply(MigrateSetParameters *params, -+ MigrationParameters *dest) -+{ -+ *dest = migrate_get_current()->parameters; -+ -+ /* TODO use QAPI_CLONE() instead of duplicating it inline */ -+ -+ if (params->has_compress_level) { -+ dest->compress_level = params->compress_level; -+ } -+ -+ if (params->has_compress_threads) { -+ dest->compress_threads = params->compress_threads; -+ } -+ -+ if (params->has_compress_wait_thread) { -+ dest->compress_wait_thread = params->compress_wait_thread; -+ } -+ -+ if (params->has_decompress_threads) { -+ dest->decompress_threads = params->decompress_threads; -+ } -+ -+ if (params->has_throttle_trigger_threshold) { -+ dest->throttle_trigger_threshold = params->throttle_trigger_threshold; -+ } -+ -+ if (params->has_cpu_throttle_initial) { -+ dest->cpu_throttle_initial = params->cpu_throttle_initial; -+ } -+ -+ if (params->has_cpu_throttle_increment) { -+ dest->cpu_throttle_increment = params->cpu_throttle_increment; -+ } -+ -+ if (params->has_cpu_throttle_tailslow) { -+ dest->cpu_throttle_tailslow = params->cpu_throttle_tailslow; -+ } -+ -+ if (params->tls_creds) { -+ assert(params->tls_creds->type == QTYPE_QSTRING); -+ dest->tls_creds = params->tls_creds->u.s; -+ } -+ -+ if (params->tls_hostname) { -+ assert(params->tls_hostname->type == QTYPE_QSTRING); -+ dest->tls_hostname = params->tls_hostname->u.s; -+ } -+ -+ if (params->has_max_bandwidth) { -+ dest->max_bandwidth = params->max_bandwidth; -+ } -+ -+ if (params->has_downtime_limit) { -+ dest->downtime_limit = params->downtime_limit; -+ } -+ -+ if (params->has_x_checkpoint_delay) { -+ dest->x_checkpoint_delay = params->x_checkpoint_delay; -+ } -+ -+ if (params->has_block_incremental) { -+ dest->block_incremental = params->block_incremental; -+ } -+ if (params->has_multifd_channels) { -+ dest->multifd_channels = params->multifd_channels; -+ } -+ if (params->has_multifd_compression) { -+ dest->multifd_compression = params->multifd_compression; -+ } -+ if (params->has_xbzrle_cache_size) { -+ dest->xbzrle_cache_size = params->xbzrle_cache_size; -+ } -+ if (params->has_max_postcopy_bandwidth) { -+ dest->max_postcopy_bandwidth = params->max_postcopy_bandwidth; -+ } -+ if (params->has_max_cpu_throttle) { -+ dest->max_cpu_throttle = params->max_cpu_throttle; -+ } -+ if (params->has_announce_initial) { -+ dest->announce_initial = params->announce_initial; -+ } -+ if (params->has_announce_max) { -+ dest->announce_max = params->announce_max; -+ } -+ if (params->has_announce_rounds) { -+ dest->announce_rounds = params->announce_rounds; -+ } -+ if (params->has_announce_step) { -+ dest->announce_step = params->announce_step; -+ } -+ -+ if (params->has_block_bitmap_mapping) { -+ dest->has_block_bitmap_mapping = true; -+ dest->block_bitmap_mapping = params->block_bitmap_mapping; -+ } -+} -+ -+static void migrate_params_apply(MigrateSetParameters *params, Error **errp) -+{ -+ MigrationState *s = migrate_get_current(); -+ -+ /* TODO use QAPI_CLONE() instead of duplicating it inline */ -+ -+ if (params->has_compress_level) { -+ s->parameters.compress_level = params->compress_level; -+ } -+ -+ if (params->has_compress_threads) { -+ s->parameters.compress_threads = params->compress_threads; -+ } -+ -+ if (params->has_compress_wait_thread) { -+ s->parameters.compress_wait_thread = params->compress_wait_thread; -+ } -+ -+ if (params->has_decompress_threads) { -+ s->parameters.decompress_threads = params->decompress_threads; -+ } -+ -+ if (params->has_throttle_trigger_threshold) { -+ s->parameters.throttle_trigger_threshold = params->throttle_trigger_threshold; -+ } -+ -+ if (params->has_cpu_throttle_initial) { -+ s->parameters.cpu_throttle_initial = params->cpu_throttle_initial; -+ } -+ -+ if (params->has_cpu_throttle_increment) { -+ s->parameters.cpu_throttle_increment = params->cpu_throttle_increment; -+ } -+ -+ if (params->has_cpu_throttle_tailslow) { -+ s->parameters.cpu_throttle_tailslow = params->cpu_throttle_tailslow; -+ } -+ -+ if (params->tls_creds) { -+ g_free(s->parameters.tls_creds); -+ assert(params->tls_creds->type == QTYPE_QSTRING); -+ s->parameters.tls_creds = g_strdup(params->tls_creds->u.s); -+ } -+ -+ if (params->tls_hostname) { -+ g_free(s->parameters.tls_hostname); -+ assert(params->tls_hostname->type == QTYPE_QSTRING); -+ s->parameters.tls_hostname = g_strdup(params->tls_hostname->u.s); -+ } -+ -+ if (params->tls_authz) { -+ g_free(s->parameters.tls_authz); -+ assert(params->tls_authz->type == QTYPE_QSTRING); -+ s->parameters.tls_authz = g_strdup(params->tls_authz->u.s); -+ } -+ -+ if (params->has_max_bandwidth) { -+ s->parameters.max_bandwidth = params->max_bandwidth; -+ if (s->to_dst_file && !migration_in_postcopy()) { -+ qemu_file_set_rate_limit(s->to_dst_file, -+ s->parameters.max_bandwidth / XFER_LIMIT_RATIO); -+ } -+ } -+ -+ if (params->has_downtime_limit) { -+ s->parameters.downtime_limit = params->downtime_limit; -+ } -+ -+ if (params->has_x_checkpoint_delay) { -+ s->parameters.x_checkpoint_delay = params->x_checkpoint_delay; -+ if (migration_in_colo_state()) { -+ colo_checkpoint_notify(s); -+ } -+ } -+ -+ if (params->has_block_incremental) { -+ s->parameters.block_incremental = params->block_incremental; -+ } -+ if (params->has_multifd_channels) { -+ s->parameters.multifd_channels = params->multifd_channels; -+ } -+ if (params->has_multifd_compression) { -+ s->parameters.multifd_compression = params->multifd_compression; -+ } -+ if (params->has_xbzrle_cache_size) { -+ s->parameters.xbzrle_cache_size = params->xbzrle_cache_size; -+ xbzrle_cache_resize(params->xbzrle_cache_size, errp); -+ } -+ if (params->has_max_postcopy_bandwidth) { -+ s->parameters.max_postcopy_bandwidth = params->max_postcopy_bandwidth; -+ if (s->to_dst_file && migration_in_postcopy()) { -+ qemu_file_set_rate_limit(s->to_dst_file, -+ s->parameters.max_postcopy_bandwidth / XFER_LIMIT_RATIO); -+ } -+ } -+ if (params->has_max_cpu_throttle) { -+ s->parameters.max_cpu_throttle = params->max_cpu_throttle; -+ } -+ if (params->has_announce_initial) { -+ s->parameters.announce_initial = params->announce_initial; -+ } -+ if (params->has_announce_max) { -+ s->parameters.announce_max = params->announce_max; -+ } -+ if (params->has_announce_rounds) { -+ s->parameters.announce_rounds = params->announce_rounds; -+ } -+ if (params->has_announce_step) { -+ s->parameters.announce_step = params->announce_step; -+ } -+ -+ if (params->has_block_bitmap_mapping) { -+ qapi_free_BitmapMigrationNodeAliasList( -+ s->parameters.block_bitmap_mapping); -+ -+ s->parameters.has_block_bitmap_mapping = true; -+ s->parameters.block_bitmap_mapping = -+ QAPI_CLONE(BitmapMigrationNodeAliasList, -+ params->block_bitmap_mapping); -+ } -+} -+ -+void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp) -+{ -+ MigrationParameters tmp; -+ -+ /* TODO Rewrite "" to null instead */ -+ if (params->tls_creds -+ && params->tls_creds->type == QTYPE_QNULL) { -+ qobject_unref(params->tls_creds->u.n); -+ params->tls_creds->type = QTYPE_QSTRING; -+ params->tls_creds->u.s = strdup(""); -+ } -+ /* TODO Rewrite "" to null instead */ -+ if (params->tls_hostname -+ && params->tls_hostname->type == QTYPE_QNULL) { -+ qobject_unref(params->tls_hostname->u.n); -+ params->tls_hostname->type = QTYPE_QSTRING; -+ params->tls_hostname->u.s = strdup(""); -+ } -+ -+ migrate_params_test_apply(params, &tmp); -+ -+ if (!migrate_params_check(&tmp, errp)) { -+ /* Invalid parameter */ -+ return; -+ } -+ -+ migrate_params_apply(params, errp); -+} -diff --git a/migration/options.h b/migration/options.h -index 13318a16c7..89067e59a0 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -14,6 +14,13 @@ - #ifndef QEMU_MIGRATION_OPTIONS_H - #define QEMU_MIGRATION_OPTIONS_H - -+/* constants */ -+ -+/* Amount of time to allocate to each "chunk" of bandwidth-throttled -+ * data. */ -+#define BUFFER_DELAY 100 -+#define XFER_LIMIT_RATIO (1000 / BUFFER_DELAY) -+ - /* capabilities */ - - bool migrate_auto_converge(void); -@@ -74,4 +81,8 @@ int migrate_multifd_zstd_level(void); - uint8_t migrate_throttle_trigger_threshold(void); - uint64_t migrate_xbzrle_cache_size(void); - -+/* parameters helpers */ -+ -+bool migrate_params_check(MigrationParameters *params, Error **errp); -+ - #endif --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Move-qmp_query_migrate_capabilities-to-opt.patch b/SOURCES/kvm-migration-Move-qmp_query_migrate_capabilities-to-opt.patch deleted file mode 100644 index d2564de..0000000 --- a/SOURCES/kvm-migration-Move-qmp_query_migrate_capabilities-to-opt.patch +++ /dev/null @@ -1,100 +0,0 @@ -From 00cc3c3598828588619a7b3696819060bddaddb8 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 1 Mar 2023 23:15:59 +0100 -Subject: [PATCH 34/56] migration: Move qmp_query_migrate_capabilities() to - options.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [33/50] dbfa8f1e7aa7e000b4622ce2da12d7d418710f19 (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit 4d0c6b695bf5252402ebf967f83baebfd2f4b91e) -Signed-off-by: Peter Xu ---- - migration/migration.c | 22 ---------------------- - migration/options.c | 23 +++++++++++++++++++++++ - 2 files changed, 23 insertions(+), 22 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index d9e30ca918..3dc8ee4875 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -886,28 +886,6 @@ void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value) - migrate_send_rp_message(mis, MIG_RP_MSG_RESUME_ACK, sizeof(buf), &buf); - } - --MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp) --{ -- MigrationCapabilityStatusList *head = NULL, **tail = &head; -- MigrationCapabilityStatus *caps; -- MigrationState *s = migrate_get_current(); -- int i; -- -- for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { --#ifndef CONFIG_LIVE_BLOCK_MIGRATION -- if (i == MIGRATION_CAPABILITY_BLOCK) { -- continue; -- } --#endif -- caps = g_malloc0(sizeof(*caps)); -- caps->capability = i; -- caps->state = s->capabilities[i]; -- QAPI_LIST_APPEND(tail, caps); -- } -- -- return head; --} -- - MigrationParameters *qmp_query_migrate_parameters(Error **errp) - { - MigrationParameters *params; -diff --git a/migration/options.c b/migration/options.c -index 367c930f46..ff621bdeb3 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -13,6 +13,7 @@ - - #include "qemu/osdep.h" - #include "qapi/error.h" -+#include "qapi/qapi-commands-migration.h" - #include "sysemu/runstate.h" - #include "migration.h" - #include "ram.h" -@@ -390,3 +391,25 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) - - return true; - } -+ -+MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp) -+{ -+ MigrationCapabilityStatusList *head = NULL, **tail = &head; -+ MigrationCapabilityStatus *caps; -+ MigrationState *s = migrate_get_current(); -+ int i; -+ -+ for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { -+#ifndef CONFIG_LIVE_BLOCK_MIGRATION -+ if (i == MIGRATION_CAPABILITY_BLOCK) { -+ continue; -+ } -+#endif -+ caps = g_malloc0(sizeof(*caps)); -+ caps->capability = i; -+ caps->state = s->capabilities[i]; -+ QAPI_LIST_APPEND(tail, caps); -+ } -+ -+ return head; -+} --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Pass-migrate_caps_check-the-old-and-new-ca.patch b/SOURCES/kvm-migration-Pass-migrate_caps_check-the-old-and-new-ca.patch deleted file mode 100644 index 7339ce0..0000000 --- a/SOURCES/kvm-migration-Pass-migrate_caps_check-the-old-and-new-ca.patch +++ /dev/null @@ -1,226 +0,0 @@ -From 4782b59a8b0b5762f87505ac7a83b37ddd2e0b3f Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 1 Mar 2023 20:28:56 +0100 -Subject: [PATCH 19/56] migration: Pass migrate_caps_check() the old and new - caps -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [18/50] df78d680d03f15d7cb7401ad89e68a4fc93fa835 (peterx/qemu-kvm) - -We used to pass the old capabilities array and the new -capabilities as a list. - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit b02c7fc9ef447787414e6fa67eff75e7b7b30180) -Signed-off-by: Peter Xu ---- - migration/migration.c | 80 +++++++++++++++++-------------------------- - 1 file changed, 31 insertions(+), 49 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index d8e5fb6226..e8f596bcfa 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1299,30 +1299,20 @@ WriteTrackingSupport migrate_query_write_tracking(void) - } - - /** -- * @migration_caps_check - check capability validity -+ * @migration_caps_check - check capability compatibility - * -- * @cap_list: old capability list, array of bool -- * @params: new capabilities to be applied soon -+ * @old_caps: old capability list -+ * @new_caps: new capability list - * @errp: set *errp if the check failed, with reason - * - * Returns true if check passed, otherwise false. - */ --static bool migrate_caps_check(bool *cap_list, -- MigrationCapabilityStatusList *params, -- Error **errp) -+static bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) - { -- MigrationCapabilityStatusList *cap; -- bool old_postcopy_cap; - MigrationIncomingState *mis = migration_incoming_get_current(); - -- old_postcopy_cap = cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]; -- -- for (cap = params; cap; cap = cap->next) { -- cap_list[cap->value->capability] = cap->value->state; -- } -- - #ifndef CONFIG_LIVE_BLOCK_MIGRATION -- if (cap_list[MIGRATION_CAPABILITY_BLOCK]) { -+ if (new_caps[MIGRATION_CAPABILITY_BLOCK]) { - error_setg(errp, "QEMU compiled without old-style (blk/-b, inc/-i) " - "block migration"); - error_append_hint(errp, "Use drive_mirror+NBD instead.\n"); -@@ -1331,7 +1321,7 @@ static bool migrate_caps_check(bool *cap_list, - #endif - - #ifndef CONFIG_REPLICATION -- if (cap_list[MIGRATION_CAPABILITY_X_COLO]) { -+ if (new_caps[MIGRATION_CAPABILITY_X_COLO]) { - error_setg(errp, "QEMU compiled without replication module" - " can't enable COLO"); - error_append_hint(errp, "Please enable replication before COLO.\n"); -@@ -1339,12 +1329,13 @@ static bool migrate_caps_check(bool *cap_list, - } - #endif - -- if (cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { -+ if (new_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { - /* This check is reasonably expensive, so only when it's being - * set the first time, also it's only the destination that needs - * special support. - */ -- if (!old_postcopy_cap && runstate_check(RUN_STATE_INMIGRATE) && -+ if (!old_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM] && -+ runstate_check(RUN_STATE_INMIGRATE) && - !postcopy_ram_supported_by_host(mis)) { - /* postcopy_ram_supported_by_host will have emitted a more - * detailed message -@@ -1353,13 +1344,13 @@ static bool migrate_caps_check(bool *cap_list, - return false; - } - -- if (cap_list[MIGRATION_CAPABILITY_X_IGNORE_SHARED]) { -+ if (new_caps[MIGRATION_CAPABILITY_X_IGNORE_SHARED]) { - error_setg(errp, "Postcopy is not compatible with ignore-shared"); - return false; - } - } - -- if (cap_list[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) { -+ if (new_caps[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) { - WriteTrackingSupport wt_support; - int idx; - /* -@@ -1383,7 +1374,7 @@ static bool migrate_caps_check(bool *cap_list, - */ - for (idx = 0; idx < check_caps_background_snapshot.size; idx++) { - int incomp_cap = check_caps_background_snapshot.caps[idx]; -- if (cap_list[incomp_cap]) { -+ if (new_caps[incomp_cap]) { - error_setg(errp, - "Background-snapshot is not compatible with %s", - MigrationCapability_str(incomp_cap)); -@@ -1393,10 +1384,10 @@ static bool migrate_caps_check(bool *cap_list, - } - - #ifdef CONFIG_LINUX -- if (cap_list[MIGRATION_CAPABILITY_ZERO_COPY_SEND] && -- (!cap_list[MIGRATION_CAPABILITY_MULTIFD] || -- cap_list[MIGRATION_CAPABILITY_COMPRESS] || -- cap_list[MIGRATION_CAPABILITY_XBZRLE] || -+ if (new_caps[MIGRATION_CAPABILITY_ZERO_COPY_SEND] && -+ (!new_caps[MIGRATION_CAPABILITY_MULTIFD] || -+ new_caps[MIGRATION_CAPABILITY_COMPRESS] || -+ new_caps[MIGRATION_CAPABILITY_XBZRLE] || - migrate_multifd_compression() || - migrate_use_tls())) { - error_setg(errp, -@@ -1404,15 +1395,15 @@ static bool migrate_caps_check(bool *cap_list, - return false; - } - #else -- if (cap_list[MIGRATION_CAPABILITY_ZERO_COPY_SEND]) { -+ if (new_caps[MIGRATION_CAPABILITY_ZERO_COPY_SEND]) { - error_setg(errp, - "Zero copy currently only available on Linux"); - return false; - } - #endif - -- if (cap_list[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]) { -- if (!cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { -+ if (new_caps[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]) { -+ if (!new_caps[MIGRATION_CAPABILITY_POSTCOPY_RAM]) { - error_setg(errp, "Postcopy preempt requires postcopy-ram"); - return false; - } -@@ -1423,14 +1414,14 @@ static bool migrate_caps_check(bool *cap_list, - * different compression channels, which is not compatible with the - * preempt assumptions on channel assignments. - */ -- if (cap_list[MIGRATION_CAPABILITY_COMPRESS]) { -+ if (new_caps[MIGRATION_CAPABILITY_COMPRESS]) { - error_setg(errp, "Postcopy preempt not compatible with compress"); - return false; - } - } - -- if (cap_list[MIGRATION_CAPABILITY_MULTIFD]) { -- if (cap_list[MIGRATION_CAPABILITY_COMPRESS]) { -+ if (new_caps[MIGRATION_CAPABILITY_MULTIFD]) { -+ if (new_caps[MIGRATION_CAPABILITY_COMPRESS]) { - error_setg(errp, "Multifd is not compatible with compress"); - return false; - } -@@ -1486,15 +1477,19 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, - { - MigrationState *s = migrate_get_current(); - MigrationCapabilityStatusList *cap; -- bool cap_list[MIGRATION_CAPABILITY__MAX]; -+ bool new_caps[MIGRATION_CAPABILITY__MAX]; - - if (migration_is_running(s->state)) { - error_setg(errp, QERR_MIGRATION_ACTIVE); - return; - } - -- memcpy(cap_list, s->capabilities, sizeof(cap_list)); -- if (!migrate_caps_check(cap_list, params, errp)) { -+ memcpy(new_caps, s->capabilities, sizeof(new_caps)); -+ for (cap = params; cap; cap = cap->next) { -+ new_caps[cap->value->capability] = cap->value->state; -+ } -+ -+ if (!migrate_caps_check(s->capabilities, new_caps, errp)) { - return; - } - -@@ -4634,27 +4629,14 @@ static void migration_instance_init(Object *obj) - */ - static bool migration_object_check(MigrationState *ms, Error **errp) - { -- MigrationCapabilityStatusList *head = NULL; - /* Assuming all off */ -- bool cap_list[MIGRATION_CAPABILITY__MAX] = { 0 }, ret; -- int i; -+ bool old_caps[MIGRATION_CAPABILITY__MAX] = { 0 }; - - if (!migrate_params_check(&ms->parameters, errp)) { - return false; - } - -- for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { -- if (ms->capabilities[i]) { -- QAPI_LIST_PREPEND(head, migrate_cap_add(i, true)); -- } -- } -- -- ret = migrate_caps_check(cap_list, head, errp); -- -- /* It works with head == NULL */ -- qapi_free_MigrationCapabilityStatusList(head); -- -- return ret; -+ return migrate_caps_check(old_caps, ms->capabilities, errp); - } - - static const TypeInfo migration_type = { --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Rename-duplicate-to-zero_pages.patch b/SOURCES/kvm-migration-Rename-duplicate-to-zero_pages.patch deleted file mode 100644 index 22acab5..0000000 --- a/SOURCES/kvm-migration-Rename-duplicate-to-zero_pages.patch +++ /dev/null @@ -1,109 +0,0 @@ -From 3cecf66655a0dd599666bcac8add2dee85d5651f Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 19 Apr 2023 18:16:05 +0200 -Subject: [PATCH 16/56] migration: Rename duplicate to zero_pages -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [15/50] 89db3c8b167c0f411ba95ce2730540c0e8f1206b (peterx/qemu-kvm) - -Rest of counters that refer to pages has a _pages suffix. -And historically, this showed the number of pages composed of the same -character, here comes the name "duplicated". But since years ago, it -refers to the number of zero_pages. - -Signed-off-by: Juan Quintela -Reviewed-by: Peter Xu -(cherry picked from commit 1a386e8de5995fb5478ea99baa6d3e71abcf4b80) -Signed-off-by: Peter Xu ---- - migration/migration.c | 2 +- - migration/ram.c | 10 +++++----- - migration/ram.h | 2 +- - 3 files changed, 7 insertions(+), 7 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 39501a0ed8..c15e2a61ca 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1142,7 +1142,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) - info->ram = g_malloc0(sizeof(*info->ram)); - info->ram->transferred = stat64_get(&ram_counters.transferred); - info->ram->total = ram_bytes_total(); -- info->ram->duplicate = stat64_get(&ram_counters.duplicate); -+ info->ram->duplicate = stat64_get(&ram_counters.zero_pages); - /* legacy value. It is not used anymore */ - info->ram->skipped = 0; - info->ram->normal = stat64_get(&ram_counters.normal); -diff --git a/migration/ram.c b/migration/ram.c -index fe69ecaef4..19d345a030 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -1119,7 +1119,7 @@ uint64_t ram_pagesize_summary(void) - uint64_t ram_get_total_transferred_pages(void) - { - return stat64_get(&ram_counters.normal) + -- stat64_get(&ram_counters.duplicate) + -+ stat64_get(&ram_counters.zero_pages) + - compression_counters.pages + xbzrle_counters.pages; - } - -@@ -1320,7 +1320,7 @@ static int save_zero_page(PageSearchStatus *pss, QEMUFile *f, RAMBlock *block, - int len = save_zero_page_to_file(pss, f, block, offset); - - if (len) { -- stat64_add(&ram_counters.duplicate, 1); -+ stat64_add(&ram_counters.zero_pages, 1); - ram_transferred_add(len); - return 1; - } -@@ -1359,7 +1359,7 @@ static bool control_save_page(PageSearchStatus *pss, RAMBlock *block, - if (bytes_xmit > 0) { - stat64_add(&ram_counters.normal, 1); - } else if (bytes_xmit == 0) { -- stat64_add(&ram_counters.duplicate, 1); -+ stat64_add(&ram_counters.zero_pages, 1); - } - - return true; -@@ -1486,7 +1486,7 @@ update_compress_thread_counts(const CompressParam *param, int bytes_xmit) - ram_transferred_add(bytes_xmit); - - if (param->zero_page) { -- stat64_add(&ram_counters.duplicate, 1); -+ stat64_add(&ram_counters.zero_pages, 1); - return; - } - -@@ -2621,7 +2621,7 @@ void acct_update_position(QEMUFile *f, size_t size, bool zero) - uint64_t pages = size / TARGET_PAGE_SIZE; - - if (zero) { -- stat64_add(&ram_counters.duplicate, pages); -+ stat64_add(&ram_counters.zero_pages, pages); - } else { - stat64_add(&ram_counters.normal, pages); - ram_transferred_add(size); -diff --git a/migration/ram.h b/migration/ram.h -index afa68521d7..55258334fe 100644 ---- a/migration/ram.h -+++ b/migration/ram.h -@@ -45,7 +45,7 @@ typedef struct { - Stat64 dirty_sync_count; - Stat64 dirty_sync_missed_zero_copy; - Stat64 downtime_bytes; -- Stat64 duplicate; -+ Stat64 zero_pages; - Stat64 multifd_bytes; - Stat64 normal; - Stat64 postcopy_bytes; --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Rename-normal-to-normal_pages.patch b/SOURCES/kvm-migration-Rename-normal-to-normal_pages.patch deleted file mode 100644 index 8ad6447..0000000 --- a/SOURCES/kvm-migration-Rename-normal-to-normal_pages.patch +++ /dev/null @@ -1,109 +0,0 @@ -From 7e27e7ea83856e1a7222ff46d91495f48fb6be4d Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 19 Apr 2023 18:19:45 +0200 -Subject: [PATCH 17/56] migration: Rename normal to normal_pages -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [16/50] 7df8b946918def9657bbe357861a6d72b5399ac6 (peterx/qemu-kvm) - -Rest of counters that refer to pages has a _pages suffix. -And historically, this showed the number of full pages transferred. -The name "normal" refered to the fact that they were sent without any -optimization (compression, xbzrle, zero_page, ...). - -Signed-off-by: Juan Quintela -Reviewed-by: Peter Xu -(cherry picked from commit 8c0cda8fa0de0a50148e2c60552afca9cffca643) -Signed-off-by: Peter Xu ---- - migration/migration.c | 2 +- - migration/ram.c | 10 +++++----- - migration/ram.h | 2 +- - 3 files changed, 7 insertions(+), 7 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index c15e2a61ca..f1b3439e5f 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1145,7 +1145,7 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) - info->ram->duplicate = stat64_get(&ram_counters.zero_pages); - /* legacy value. It is not used anymore */ - info->ram->skipped = 0; -- info->ram->normal = stat64_get(&ram_counters.normal); -+ info->ram->normal = stat64_get(&ram_counters.normal_pages); - info->ram->normal_bytes = info->ram->normal * page_size; - info->ram->mbps = s->mbps; - info->ram->dirty_sync_count = -diff --git a/migration/ram.c b/migration/ram.c -index 19d345a030..229714045a 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -1118,7 +1118,7 @@ uint64_t ram_pagesize_summary(void) - - uint64_t ram_get_total_transferred_pages(void) - { -- return stat64_get(&ram_counters.normal) + -+ return stat64_get(&ram_counters.normal_pages) + - stat64_get(&ram_counters.zero_pages) + - compression_counters.pages + xbzrle_counters.pages; - } -@@ -1357,7 +1357,7 @@ static bool control_save_page(PageSearchStatus *pss, RAMBlock *block, - } - - if (bytes_xmit > 0) { -- stat64_add(&ram_counters.normal, 1); -+ stat64_add(&ram_counters.normal_pages, 1); - } else if (bytes_xmit == 0) { - stat64_add(&ram_counters.zero_pages, 1); - } -@@ -1391,7 +1391,7 @@ static int save_normal_page(PageSearchStatus *pss, RAMBlock *block, - qemu_put_buffer(file, buf, TARGET_PAGE_SIZE); - } - ram_transferred_add(TARGET_PAGE_SIZE); -- stat64_add(&ram_counters.normal, 1); -+ stat64_add(&ram_counters.normal_pages, 1); - return 1; - } - -@@ -1447,7 +1447,7 @@ static int ram_save_multifd_page(QEMUFile *file, RAMBlock *block, - if (multifd_queue_page(file, block, offset) < 0) { - return -1; - } -- stat64_add(&ram_counters.normal, 1); -+ stat64_add(&ram_counters.normal_pages, 1); - - return 1; - } -@@ -2623,7 +2623,7 @@ void acct_update_position(QEMUFile *f, size_t size, bool zero) - if (zero) { - stat64_add(&ram_counters.zero_pages, pages); - } else { -- stat64_add(&ram_counters.normal, pages); -+ stat64_add(&ram_counters.normal_pages, pages); - ram_transferred_add(size); - qemu_file_credit_transfer(f, size); - } -diff --git a/migration/ram.h b/migration/ram.h -index 55258334fe..a6e0d70226 100644 ---- a/migration/ram.h -+++ b/migration/ram.h -@@ -47,7 +47,7 @@ typedef struct { - Stat64 downtime_bytes; - Stat64 zero_pages; - Stat64 multifd_bytes; -- Stat64 normal; -+ Stat64 normal_pages; - Stat64 postcopy_bytes; - Stat64 postcopy_requests; - Stat64 precopy_bytes; --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Update-atomic-stats-out-of-the-mutex.patch b/SOURCES/kvm-migration-Update-atomic-stats-out-of-the-mutex.patch deleted file mode 100644 index 7e78d82..0000000 --- a/SOURCES/kvm-migration-Update-atomic-stats-out-of-the-mutex.patch +++ /dev/null @@ -1,52 +0,0 @@ -From c0d377e1bf442a09b82fddbb8588fcddf6439854 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Thu, 24 Nov 2022 17:26:19 +0100 -Subject: [PATCH 09/56] migration: Update atomic stats out of the mutex -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [8/50] 88e9dbc9a3e5aef60a7c98c871144904c7062b1f (peterx/qemu-kvm) - -Reviewed-by: David Edmondson -Reviewed-by: Peter Xu -Signed-off-by: Juan Quintela -(cherry picked from commit 30fb22cda45bea43a3c0e26049ebdd71a9503ffd) -Signed-off-by: Peter Xu ---- - migration/multifd.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/migration/multifd.c b/migration/multifd.c -index 01fab01a92..6ef3a27938 100644 ---- a/migration/multifd.c -+++ b/migration/multifd.c -@@ -433,8 +433,8 @@ static int multifd_send_pages(QEMUFile *f) - transferred = ((uint64_t) pages->num) * p->page_size + p->packet_len; - qemu_file_acct_rate_limit(f, transferred); - ram_counters.multifd_bytes += transferred; -- stat64_add(&ram_counters.transferred, transferred); - qemu_mutex_unlock(&p->mutex); -+ stat64_add(&ram_counters.transferred, transferred); - qemu_sem_post(&p->sem); - - return 1; -@@ -628,8 +628,8 @@ int multifd_send_sync_main(QEMUFile *f) - p->pending_job++; - qemu_file_acct_rate_limit(f, p->packet_len); - ram_counters.multifd_bytes += p->packet_len; -- stat64_add(&ram_counters.transferred, p->packet_len); - qemu_mutex_unlock(&p->mutex); -+ stat64_add(&ram_counters.transferred, p->packet_len); - qemu_sem_post(&p->sem); - } - for (i = 0; i < migrate_multifd_channels(); i++) { --- -2.39.1 - diff --git a/SOURCES/kvm-migration-Use-migrate_max_postcopy_bandwidth.patch b/SOURCES/kvm-migration-Use-migrate_max_postcopy_bandwidth.patch deleted file mode 100644 index f179761..0000000 --- a/SOURCES/kvm-migration-Use-migrate_max_postcopy_bandwidth.patch +++ /dev/null @@ -1,40 +0,0 @@ -From 8d203baa6cbd1f371e308c2c9d59a5ca7d29dca8 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Thu, 2 Mar 2023 10:55:30 +0100 -Subject: [PATCH 38/56] migration: Use migrate_max_postcopy_bandwidth() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [37/50] d62948e9ee40a85ed9b460a583c3b0e43cd5d47f (peterx/qemu-kvm) - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit 5390adec03a7d8bc6bcf5887f726b0ddaeb90681) -Signed-off-by: Peter Xu ---- - migration/migration.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/migration/migration.c b/migration/migration.c -index 7f2e770deb..78bca9a93f 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -3799,7 +3799,7 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) - - if (resume) { - /* This is a resumed migration */ -- rate_limit = s->parameters.max_postcopy_bandwidth / -+ rate_limit = migrate_max_postcopy_bandwidth() / - XFER_LIMIT_RATIO; - } else { - /* This is a fresh new migration */ --- -2.39.1 - diff --git a/SOURCES/kvm-migration-mark-mixed-functions-that-can-suspend.patch b/SOURCES/kvm-migration-mark-mixed-functions-that-can-suspend.patch deleted file mode 100644 index 9451696..0000000 --- a/SOURCES/kvm-migration-mark-mixed-functions-that-can-suspend.patch +++ /dev/null @@ -1,153 +0,0 @@ -From cfdf5715a2334ad06b5966ec986d134bbd5ba08b Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Fri, 16 Dec 2022 12:48:16 +0100 -Subject: [PATCH 05/56] migration: mark mixed functions that can suspend -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [4/50] 9f055b526edd06a3440999d5de91e5d624678c7d (peterx/qemu-kvm) - -There should be no paths from a coroutine_fn to aio_poll, however in -practice coroutine_mixed_fn will call aio_poll in the !qemu_in_coroutine() -path. By marking mixed functions, we can track accurately the call paths -that execute entirely in coroutine context, and find more missing -coroutine_fn markers. This results in more accurate checks that -coroutine code does not end up blocking. - -If the marking were extended transitively to all functions that call -these ones, static analysis could be done much more efficiently. -However, this is a start and makes it possible to use vrc's path-based -searches to find potential bugs where coroutine_fns call blocking functions. - -Signed-off-by: Paolo Bonzini -(cherry picked from commit 394b9407e4c515f96df6647d629ee28cbb86f07c) -Signed-off-by: Peter Xu ---- - include/migration/qemu-file-types.h | 4 ++-- - migration/qemu-file.c | 14 +++++++------- - migration/qemu-file.h | 6 +++--- - 3 files changed, 12 insertions(+), 12 deletions(-) - -diff --git a/include/migration/qemu-file-types.h b/include/migration/qemu-file-types.h -index 2867e3da84..1436f9ce92 100644 ---- a/include/migration/qemu-file-types.h -+++ b/include/migration/qemu-file-types.h -@@ -35,7 +35,7 @@ void qemu_put_byte(QEMUFile *f, int v); - void qemu_put_be16(QEMUFile *f, unsigned int v); - void qemu_put_be32(QEMUFile *f, unsigned int v); - void qemu_put_be64(QEMUFile *f, uint64_t v); --size_t qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size); -+size_t coroutine_mixed_fn qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size); - - int qemu_get_byte(QEMUFile *f); - -@@ -161,7 +161,7 @@ static inline void qemu_get_sbe64s(QEMUFile *f, int64_t *pv) - qemu_get_be64s(f, (uint64_t *)pv); - } - --size_t qemu_get_counted_string(QEMUFile *f, char buf[256]); -+size_t coroutine_mixed_fn qemu_get_counted_string(QEMUFile *f, char buf[256]); - - void qemu_put_counted_string(QEMUFile *f, const char *name); - -diff --git a/migration/qemu-file.c b/migration/qemu-file.c -index 102ab3b439..ee04240a21 100644 ---- a/migration/qemu-file.c -+++ b/migration/qemu-file.c -@@ -392,7 +392,7 @@ size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset, - * case if the underlying file descriptor gives a short read, and that can - * happen even on a blocking fd. - */ --static ssize_t qemu_fill_buffer(QEMUFile *f) -+static ssize_t coroutine_mixed_fn qemu_fill_buffer(QEMUFile *f) - { - int len; - int pending; -@@ -585,7 +585,7 @@ void qemu_file_skip(QEMUFile *f, int size) - * return as many as it managed to read (assuming blocking fd's which - * all current QEMUFile are) - */ --size_t qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t size, size_t offset) -+size_t coroutine_mixed_fn qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t size, size_t offset) - { - ssize_t pending; - size_t index; -@@ -633,7 +633,7 @@ size_t qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t size, size_t offset) - * return as many as it managed to read (assuming blocking fd's which - * all current QEMUFile are) - */ --size_t qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size) -+size_t coroutine_mixed_fn qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size) - { - size_t pending = size; - size_t done = 0; -@@ -674,7 +674,7 @@ size_t qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size) - * Note: Since **buf may get changed, the caller should take care to - * keep a pointer to the original buffer if it needs to deallocate it. - */ --size_t qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size) -+size_t coroutine_mixed_fn qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size) - { - if (size < IO_BUF_SIZE) { - size_t res; -@@ -696,7 +696,7 @@ size_t qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size) - * Peeks a single byte from the buffer; this isn't guaranteed to work if - * offset leaves a gap after the previous read/peeked data. - */ --int qemu_peek_byte(QEMUFile *f, int offset) -+int coroutine_mixed_fn qemu_peek_byte(QEMUFile *f, int offset) - { - int index = f->buf_index + offset; - -@@ -713,7 +713,7 @@ int qemu_peek_byte(QEMUFile *f, int offset) - return f->buf[index]; - } - --int qemu_get_byte(QEMUFile *f) -+int coroutine_mixed_fn qemu_get_byte(QEMUFile *f) - { - int result; - -@@ -894,7 +894,7 @@ int qemu_put_qemu_file(QEMUFile *f_des, QEMUFile *f_src) - * else 0 - * (Note a 0 length string will return 0 either way) - */ --size_t qemu_get_counted_string(QEMUFile *f, char buf[256]) -+size_t coroutine_fn qemu_get_counted_string(QEMUFile *f, char buf[256]) - { - size_t len = qemu_get_byte(f); - size_t res = qemu_get_buffer(f, (uint8_t *)buf, len); -diff --git a/migration/qemu-file.h b/migration/qemu-file.h -index 9d0155a2a1..d16cd50448 100644 ---- a/migration/qemu-file.h -+++ b/migration/qemu-file.h -@@ -108,8 +108,8 @@ bool qemu_file_is_writable(QEMUFile *f); - - #include "migration/qemu-file-types.h" - --size_t qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t size, size_t offset); --size_t qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size); -+size_t coroutine_mixed_fn qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t size, size_t offset); -+size_t coroutine_mixed_fn qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size); - ssize_t qemu_put_compression_data(QEMUFile *f, z_stream *stream, - const uint8_t *p, size_t size); - int qemu_put_qemu_file(QEMUFile *f_des, QEMUFile *f_src); -@@ -119,7 +119,7 @@ int qemu_put_qemu_file(QEMUFile *f_des, QEMUFile *f_src); - * is; you aren't guaranteed to be able to peak to +n bytes unless you've - * previously peeked +n-1. - */ --int qemu_peek_byte(QEMUFile *f, int offset); -+int coroutine_mixed_fn qemu_peek_byte(QEMUFile *f, int offset); - void qemu_file_skip(QEMUFile *f, int size); - /* - * qemu_file_credit_transfer: --- -2.39.1 - diff --git a/SOURCES/kvm-migration-move-migration_global_dump-to-migration-hm.patch b/SOURCES/kvm-migration-move-migration_global_dump-to-migration-hm.patch deleted file mode 100644 index 4e73c80..0000000 --- a/SOURCES/kvm-migration-move-migration_global_dump-to-migration-hm.patch +++ /dev/null @@ -1,121 +0,0 @@ -From 96e6914cbfb18bb8287c57b9ac9a6b364d3e7a22 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 22 Feb 2023 17:18:05 +0100 -Subject: [PATCH 20/56] migration: move migration_global_dump() to - migration-hmp-cmds.c -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [19/50] c8d330a2833c706b9bd78f7154be882e3977ad06 (peterx/qemu-kvm) - -It is only used there, so we can make it static. -Once there, remove spice.h that it is not used. - -Signed-off-by: Juan Quintela -Reviewed-by: Peter Xu -Reviewed-by: Philippe Mathieu-Daudé - ---- - -fix David Edmonson ui/qemu-spice.h unintended removal - -(cherry picked from commit c938157713e723165a42cb6e8364adb6fcbd0e22) -Signed-off-by: Peter Xu ---- - include/migration/misc.h | 1 - - migration/migration-hmp-cmds.c | 22 +++++++++++++++++++++- - migration/migration.c | 19 ------------------- - 3 files changed, 21 insertions(+), 21 deletions(-) - -diff --git a/include/migration/misc.h b/include/migration/misc.h -index 8b49841016..5ebe13b4b9 100644 ---- a/include/migration/misc.h -+++ b/include/migration/misc.h -@@ -66,7 +66,6 @@ bool migration_has_finished(MigrationState *); - bool migration_has_failed(MigrationState *); - /* ...and after the device transmission */ - bool migration_in_postcopy_after_devices(MigrationState *); --void migration_global_dump(Monitor *mon); - /* True if incoming migration entered POSTCOPY_INCOMING_DISCARD */ - bool migration_in_incoming_postcopy(void); - /* True if incoming migration entered POSTCOPY_INCOMING_ADVISE */ -diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c -index 72519ea99f..71da91967a 100644 ---- a/migration/migration-hmp-cmds.c -+++ b/migration/migration-hmp-cmds.c -@@ -15,7 +15,6 @@ - - #include "qemu/osdep.h" - #include "block/qapi.h" --#include "migration/misc.h" - #include "migration/snapshot.h" - #include "monitor/hmp.h" - #include "monitor/monitor.h" -@@ -30,6 +29,27 @@ - #include "qemu/sockets.h" - #include "sysemu/runstate.h" - #include "ui/qemu-spice.h" -+#include "sysemu/sysemu.h" -+#include "migration.h" -+ -+static void migration_global_dump(Monitor *mon) -+{ -+ MigrationState *ms = migrate_get_current(); -+ -+ monitor_printf(mon, "globals:\n"); -+ monitor_printf(mon, "store-global-state: %s\n", -+ ms->store_global_state ? "on" : "off"); -+ monitor_printf(mon, "only-migratable: %s\n", -+ only_migratable ? "on" : "off"); -+ monitor_printf(mon, "send-configuration: %s\n", -+ ms->send_configuration ? "on" : "off"); -+ monitor_printf(mon, "send-section-footer: %s\n", -+ ms->send_section_footer ? "on" : "off"); -+ monitor_printf(mon, "decompress-error-check: %s\n", -+ ms->decompress_error_check ? "on" : "off"); -+ monitor_printf(mon, "clear-bitmap-shift: %u\n", -+ ms->clear_bitmap_shift); -+} - - void hmp_info_migrate(Monitor *mon, const QDict *qdict) - { -diff --git a/migration/migration.c b/migration/migration.c -index e8f596bcfa..aa96ffdc5b 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -4420,25 +4420,6 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) - s->migration_thread_running = true; - } - --void migration_global_dump(Monitor *mon) --{ -- MigrationState *ms = migrate_get_current(); -- -- monitor_printf(mon, "globals:\n"); -- monitor_printf(mon, "store-global-state: %s\n", -- ms->store_global_state ? "on" : "off"); -- monitor_printf(mon, "only-migratable: %s\n", -- only_migratable ? "on" : "off"); -- monitor_printf(mon, "send-configuration: %s\n", -- ms->send_configuration ? "on" : "off"); -- monitor_printf(mon, "send-section-footer: %s\n", -- ms->send_section_footer ? "on" : "off"); -- monitor_printf(mon, "decompress-error-check: %s\n", -- ms->decompress_error_check ? "on" : "off"); -- monitor_printf(mon, "clear-bitmap-shift: %u\n", -- ms->clear_bitmap_shift); --} -- - #define DEFINE_PROP_MIG_CAP(name, x) \ - DEFINE_PROP_BOOL(name, MigrationState, capabilities[x], false) - --- -2.39.1 - diff --git a/SOURCES/kvm-migration-postcopy-Detect-file-system-on-dest-host.patch b/SOURCES/kvm-migration-postcopy-Detect-file-system-on-dest-host.patch deleted file mode 100644 index 7700466..0000000 --- a/SOURCES/kvm-migration-postcopy-Detect-file-system-on-dest-host.patch +++ /dev/null @@ -1,117 +0,0 @@ -From 4827d5be5357ab89e0c46f606ad828bf97d36471 Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Wed, 19 Apr 2023 12:17:38 -0400 -Subject: [PATCH 04/56] migration/postcopy: Detect file system on dest host -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [3/50] 121aeeda8a019f79dba6c077c7018bd1c86f3d71 (peterx/qemu-kvm) - -Postcopy requires the memory support userfaultfd to work. Right now we -check it but it's a bit too late (when switching to postcopy migration). - -Do that early right at enabling of postcopy. - -Note that this is still only a best effort because ramblocks can be -dynamically created. We can add check in hostmem creations and fail if -postcopy enabled, but maybe that's too aggressive. - -Still, we have chance to fail the most obvious where we know there's an -existing unsupported ramblock. - -Signed-off-by: Peter Xu -Reviewed-by: Juan Quintela -Signed-off-by: Juan Quintela -(cherry picked from commit ae30b9b2892b85e6c3d5c0b8d1949c4d77a2954a) -Signed-off-by: Peter Xu ---- - migration/postcopy-ram.c | 34 ++++++++++++++++++++++++++++++---- - 1 file changed, 30 insertions(+), 4 deletions(-) - -diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c -index 93f39f8e06..bbb8af61ae 100644 ---- a/migration/postcopy-ram.c -+++ b/migration/postcopy-ram.c -@@ -36,6 +36,7 @@ - #include "yank_functions.h" - #include "tls.h" - #include "qemu/userfaultfd.h" -+#include "qemu/mmap-alloc.h" - - /* Arbitrary limit on size of each discard command, - * keeps them around ~200 bytes -@@ -336,11 +337,12 @@ static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis) - - /* Callback from postcopy_ram_supported_by_host block iterator. - */ --static int test_ramblock_postcopiable(RAMBlock *rb, void *opaque) -+static int test_ramblock_postcopiable(RAMBlock *rb) - { - const char *block_name = qemu_ram_get_idstr(rb); - ram_addr_t length = qemu_ram_get_used_length(rb); - size_t pagesize = qemu_ram_pagesize(rb); -+ QemuFsType fs; - - if (length % pagesize) { - error_report("Postcopy requires RAM blocks to be a page size multiple," -@@ -348,6 +350,15 @@ static int test_ramblock_postcopiable(RAMBlock *rb, void *opaque) - "page size of 0x%zx", block_name, length, pagesize); - return 1; - } -+ -+ if (rb->fd >= 0) { -+ fs = qemu_fd_getfs(rb->fd); -+ if (fs != QEMU_FS_TYPE_TMPFS && fs != QEMU_FS_TYPE_HUGETLBFS) { -+ error_report("Host backend files need to be TMPFS or HUGETLBFS only"); -+ return 1; -+ } -+ } -+ - return 0; - } - -@@ -366,6 +377,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) - struct uffdio_range range_struct; - uint64_t feature_mask; - Error *local_err = NULL; -+ RAMBlock *block; - - if (qemu_target_page_size() > pagesize) { - error_report("Target page size bigger than host page size"); -@@ -390,9 +402,23 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) - goto out; - } - -- /* We don't support postcopy with shared RAM yet */ -- if (foreach_not_ignored_block(test_ramblock_postcopiable, NULL)) { -- goto out; -+ /* -+ * We don't support postcopy with some type of ramblocks. -+ * -+ * NOTE: we explicitly ignored ramblock_is_ignored() instead we checked -+ * all possible ramblocks. This is because this function can be called -+ * when creating the migration object, during the phase RAM_MIGRATABLE -+ * is not even properly set for all the ramblocks. -+ * -+ * A side effect of this is we'll also check against RAM_SHARED -+ * ramblocks even if migrate_ignore_shared() is set (in which case -+ * we'll never migrate RAM_SHARED at all), but normally this shouldn't -+ * affect in reality, or we can revisit. -+ */ -+ RAMBLOCK_FOREACH(block) { -+ if (test_ramblock_postcopiable(block)) { -+ goto out; -+ } - } - - /* --- -2.39.1 - diff --git a/SOURCES/kvm-migration-remove-extra-whitespace-character-for-code.patch b/SOURCES/kvm-migration-remove-extra-whitespace-character-for-code.patch deleted file mode 100644 index 88eb791..0000000 --- a/SOURCES/kvm-migration-remove-extra-whitespace-character-for-code.patch +++ /dev/null @@ -1,44 +0,0 @@ -From 93c9a1ae812720d3a29980a3c5fcfc1e916993de Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?=E6=9D=8E=E7=9A=86=E4=BF=8A?= -Date: Fri, 17 Mar 2023 09:57:13 +0000 -Subject: [PATCH 07/56] migration: remove extra whitespace character for code - style -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [6/50] bc1cd812f8dfc18e47e1644b5333c703eae23d2d (peterx/qemu-kvm) - -Fix code style. - -Signed-off-by: 李皆俊 -Reviewed-by: Juan Quintela -Signed-off-by: Juan Quintela -(cherry picked from commit 8ebb6ecc3798e66a9ba98355983762bedfa1b72d) -Signed-off-by: Peter Xu ---- - migration/ram.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/migration/ram.c b/migration/ram.c -index 79d881f735..0e68099bf9 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -3293,7 +3293,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque) - - migration_ops = g_malloc0(sizeof(MigrationOps)); - migration_ops->ram_save_target_page = ram_save_target_page_legacy; -- ret = multifd_send_sync_main(f); -+ ret = multifd_send_sync_main(f); - if (ret < 0) { - return ret; - } --- -2.39.1 - diff --git a/SOURCES/kvm-migration-rename-enabled_capabilities-to-capabilitie.patch b/SOURCES/kvm-migration-rename-enabled_capabilities-to-capabilitie.patch deleted file mode 100644 index 52b19b3..0000000 --- a/SOURCES/kvm-migration-rename-enabled_capabilities-to-capabilitie.patch +++ /dev/null @@ -1,329 +0,0 @@ -From ee566ec12099992f9134bda1db92dd568427245a Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 1 Mar 2023 18:26:59 +0100 -Subject: [PATCH 18/56] migration: rename enabled_capabilities to capabilities -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [17/50] 841a27addf273d8f559bc8ebd2c854200e8ca673 (peterx/qemu-kvm) - -It is clear from the context what that means, and such a long name -with the extra long names of the capabilities make very difficilut to -stay inside the 80 columns limit. - -Signed-off-by: Juan Quintela -Reviewed-by: Vladimir Sementsov-Ogievskiy -(cherry picked from commit 0cec2056ff67557c18d7b8ab1b70ab47c9e31f2f) -Signed-off-by: Peter Xu ---- - migration/migration.c | 52 +++++++++++++++++++++---------------------- - migration/migration.h | 2 +- - migration/rdma.c | 4 ++-- - migration/savevm.c | 6 ++--- - 4 files changed, 31 insertions(+), 33 deletions(-) - -diff --git a/migration/migration.c b/migration/migration.c -index f1b3439e5f..d8e5fb6226 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -364,8 +364,7 @@ static bool migrate_late_block_activate(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[ -- MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE]; -+ return s->capabilities[MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE]; - } - - /* -@@ -944,7 +943,7 @@ MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp) - #endif - caps = g_malloc0(sizeof(*caps)); - caps->capability = i; -- caps->state = s->enabled_capabilities[i]; -+ caps->state = s->capabilities[i]; - QAPI_LIST_APPEND(tail, caps); - } - -@@ -1494,13 +1493,13 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, - return; - } - -- memcpy(cap_list, s->enabled_capabilities, sizeof(cap_list)); -+ memcpy(cap_list, s->capabilities, sizeof(cap_list)); - if (!migrate_caps_check(cap_list, params, errp)) { - return; - } - - for (cap = params; cap; cap = cap->next) { -- s->enabled_capabilities[cap->value->capability] = cap->value->state; -+ s->capabilities[cap->value->capability] = cap->value->state; - } - } - -@@ -2569,7 +2568,7 @@ bool migrate_release_ram(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[MIGRATION_CAPABILITY_RELEASE_RAM]; -+ return s->capabilities[MIGRATION_CAPABILITY_RELEASE_RAM]; - } - - bool migrate_postcopy_ram(void) -@@ -2578,7 +2577,7 @@ bool migrate_postcopy_ram(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM]; -+ return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM]; - } - - bool migrate_postcopy(void) -@@ -2592,7 +2591,7 @@ bool migrate_auto_converge(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE]; -+ return s->capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE]; - } - - bool migrate_zero_blocks(void) -@@ -2601,7 +2600,7 @@ bool migrate_zero_blocks(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS]; -+ return s->capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS]; - } - - bool migrate_postcopy_blocktime(void) -@@ -2610,7 +2609,7 @@ bool migrate_postcopy_blocktime(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME]; -+ return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME]; - } - - bool migrate_use_compression(void) -@@ -2619,7 +2618,7 @@ bool migrate_use_compression(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[MIGRATION_CAPABILITY_COMPRESS]; -+ return s->capabilities[MIGRATION_CAPABILITY_COMPRESS]; - } - - int migrate_compress_level(void) -@@ -2664,7 +2663,7 @@ bool migrate_dirty_bitmaps(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS]; -+ return s->capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS]; - } - - bool migrate_ignore_shared(void) -@@ -2673,7 +2672,7 @@ bool migrate_ignore_shared(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[MIGRATION_CAPABILITY_X_IGNORE_SHARED]; -+ return s->capabilities[MIGRATION_CAPABILITY_X_IGNORE_SHARED]; - } - - bool migrate_validate_uuid(void) -@@ -2682,7 +2681,7 @@ bool migrate_validate_uuid(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[MIGRATION_CAPABILITY_VALIDATE_UUID]; -+ return s->capabilities[MIGRATION_CAPABILITY_VALIDATE_UUID]; - } - - bool migrate_use_events(void) -@@ -2691,7 +2690,7 @@ bool migrate_use_events(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[MIGRATION_CAPABILITY_EVENTS]; -+ return s->capabilities[MIGRATION_CAPABILITY_EVENTS]; - } - - bool migrate_use_multifd(void) -@@ -2700,7 +2699,7 @@ bool migrate_use_multifd(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[MIGRATION_CAPABILITY_MULTIFD]; -+ return s->capabilities[MIGRATION_CAPABILITY_MULTIFD]; - } - - bool migrate_pause_before_switchover(void) -@@ -2709,8 +2708,7 @@ bool migrate_pause_before_switchover(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[ -- MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER]; -+ return s->capabilities[MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER]; - } - - int migrate_multifd_channels(void) -@@ -2757,7 +2755,7 @@ bool migrate_use_zero_copy_send(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND]; -+ return s->capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND]; - } - #endif - -@@ -2776,7 +2774,7 @@ int migrate_use_xbzrle(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[MIGRATION_CAPABILITY_XBZRLE]; -+ return s->capabilities[MIGRATION_CAPABILITY_XBZRLE]; - } - - uint64_t migrate_xbzrle_cache_size(void) -@@ -2803,7 +2801,7 @@ bool migrate_use_block(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[MIGRATION_CAPABILITY_BLOCK]; -+ return s->capabilities[MIGRATION_CAPABILITY_BLOCK]; - } - - bool migrate_use_return_path(void) -@@ -2812,7 +2810,7 @@ bool migrate_use_return_path(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[MIGRATION_CAPABILITY_RETURN_PATH]; -+ return s->capabilities[MIGRATION_CAPABILITY_RETURN_PATH]; - } - - bool migrate_use_block_incremental(void) -@@ -2830,7 +2828,7 @@ bool migrate_background_snapshot(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]; -+ return s->capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]; - } - - bool migrate_postcopy_preempt(void) -@@ -2839,7 +2837,7 @@ bool migrate_postcopy_preempt(void) - - s = migrate_get_current(); - -- return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]; -+ return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]; - } - - /* migration thread support */ -@@ -3584,7 +3582,7 @@ fail: - bool migrate_colo_enabled(void) - { - MigrationState *s = migrate_get_current(); -- return s->enabled_capabilities[MIGRATION_CAPABILITY_X_COLO]; -+ return s->capabilities[MIGRATION_CAPABILITY_X_COLO]; - } - - typedef enum MigThrError { -@@ -4447,7 +4445,7 @@ void migration_global_dump(Monitor *mon) - } - - #define DEFINE_PROP_MIG_CAP(name, x) \ -- DEFINE_PROP_BOOL(name, MigrationState, enabled_capabilities[x], false) -+ DEFINE_PROP_BOOL(name, MigrationState, capabilities[x], false) - - static Property migration_properties[] = { - DEFINE_PROP_BOOL("store-global-state", MigrationState, -@@ -4646,7 +4644,7 @@ static bool migration_object_check(MigrationState *ms, Error **errp) - } - - for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { -- if (ms->enabled_capabilities[i]) { -+ if (ms->capabilities[i]) { - QAPI_LIST_PREPEND(head, migrate_cap_add(i, true)); - } - } -diff --git a/migration/migration.h b/migration/migration.h -index 310ae8901b..04e0860b4e 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -310,7 +310,7 @@ struct MigrationState { - int64_t downtime_start; - int64_t downtime; - int64_t expected_downtime; -- bool enabled_capabilities[MIGRATION_CAPABILITY__MAX]; -+ bool capabilities[MIGRATION_CAPABILITY__MAX]; - int64_t setup_time; - /* - * Whether guest was running when we enter the completion stage. -diff --git a/migration/rdma.c b/migration/rdma.c -index df646be35e..f35f021963 100644 ---- a/migration/rdma.c -+++ b/migration/rdma.c -@@ -4179,7 +4179,7 @@ void rdma_start_outgoing_migration(void *opaque, - } - - ret = qemu_rdma_source_init(rdma, -- s->enabled_capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL], errp); -+ s->capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL], errp); - - if (ret) { - goto err; -@@ -4201,7 +4201,7 @@ void rdma_start_outgoing_migration(void *opaque, - } - - ret = qemu_rdma_source_init(rdma_return_path, -- s->enabled_capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL], errp); -+ s->capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL], errp); - - if (ret) { - goto return_path_err; -diff --git a/migration/savevm.c b/migration/savevm.c -index aa54a67fda..589ef926ab 100644 ---- a/migration/savevm.c -+++ b/migration/savevm.c -@@ -253,7 +253,7 @@ static uint32_t get_validatable_capabilities_count(void) - uint32_t result = 0; - int i; - for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) { -- if (should_validate_capability(i) && s->enabled_capabilities[i]) { -+ if (should_validate_capability(i) && s->capabilities[i]) { - result++; - } - } -@@ -275,7 +275,7 @@ static int configuration_pre_save(void *opaque) - state->capabilities = g_renew(MigrationCapability, state->capabilities, - state->caps_count); - for (i = j = 0; i < MIGRATION_CAPABILITY__MAX; i++) { -- if (should_validate_capability(i) && s->enabled_capabilities[i]) { -+ if (should_validate_capability(i) && s->capabilities[i]) { - state->capabilities[j++] = i; - } - } -@@ -325,7 +325,7 @@ static bool configuration_validate_capabilities(SaveState *state) - continue; - } - source_state = test_bit(i, source_caps_bm); -- target_state = s->enabled_capabilities[i]; -+ target_state = s->capabilities[i]; - if (source_state != target_state) { - error_report("Capability %s is %s, but received capability is %s", - MigrationCapability_str(i), --- -2.39.1 - diff --git a/SOURCES/kvm-mirror-Don-t-call-job_pause_point-under-graph-lock.patch b/SOURCES/kvm-mirror-Don-t-call-job_pause_point-under-graph-lock.patch new file mode 100644 index 0000000..871a80e --- /dev/null +++ b/SOURCES/kvm-mirror-Don-t-call-job_pause_point-under-graph-lock.patch @@ -0,0 +1,90 @@ +From 21cadc8ed200ad9af13b217b441b9f12cd2163ee Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 13 Mar 2024 16:30:00 +0100 +Subject: [PATCH 1/4] mirror: Don't call job_pause_point() under graph lock + +RH-Author: Kevin Wolf +RH-MergeRequest: 231: Fix deadlock and crash during storage migration +RH-Jira: RHEL-28125 +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [1/3] 093910fcd5ea1516d0ed48a9cd73dad6170590f3 (kmwolf/centos-qemu-kvm) + +Calling job_pause_point() while holding the graph reader lock +potentially results in a deadlock: bdrv_graph_wrlock() first drains +everything, including the mirror job, which pauses it. The job is only +unpaused at the end of the drain section, which is when the graph writer +lock has been successfully taken. However, if the job happens to be +paused at a pause point where it still holds the reader lock, the writer +lock can't be taken as long as the job is still paused. + +Mark job_pause_point() as GRAPH_UNLOCKED and fix mirror accordingly. + +Cc: qemu-stable@nongnu.org +Buglink: https://issues.redhat.com/browse/RHEL-28125 +Fixes: 004915a96a7a ("block: Protect bs->backing with graph_lock") +Signed-off-by: Kevin Wolf +Message-ID: <20240313153000.33121-1-kwolf@redhat.com> +Reviewed-by: Eric Blake +Signed-off-by: Kevin Wolf +(cherry picked from commit ae5a40e8581185654a667fbbf7e4adbc2a2a3e45) +Signed-off-by: Kevin Wolf +--- + block/mirror.c | 10 ++++++---- + include/qemu/job.h | 2 +- + 2 files changed, 7 insertions(+), 5 deletions(-) + +diff --git a/block/mirror.c b/block/mirror.c +index 5145eb53e1..1bdce3b657 100644 +--- a/block/mirror.c ++++ b/block/mirror.c +@@ -479,9 +479,9 @@ static unsigned mirror_perform(MirrorBlockJob *s, int64_t offset, + return bytes_handled; + } + +-static void coroutine_fn GRAPH_RDLOCK mirror_iteration(MirrorBlockJob *s) ++static void coroutine_fn GRAPH_UNLOCKED mirror_iteration(MirrorBlockJob *s) + { +- BlockDriverState *source = s->mirror_top_bs->backing->bs; ++ BlockDriverState *source; + MirrorOp *pseudo_op; + int64_t offset; + /* At least the first dirty chunk is mirrored in one iteration. */ +@@ -489,6 +489,10 @@ static void coroutine_fn GRAPH_RDLOCK mirror_iteration(MirrorBlockJob *s) + bool write_zeroes_ok = bdrv_can_write_zeroes_with_unmap(blk_bs(s->target)); + int max_io_bytes = MAX(s->buf_size / MAX_IN_FLIGHT, MAX_IO_BYTES); + ++ bdrv_graph_co_rdlock(); ++ source = s->mirror_top_bs->backing->bs; ++ bdrv_graph_co_rdunlock(); ++ + bdrv_dirty_bitmap_lock(s->dirty_bitmap); + offset = bdrv_dirty_iter_next(s->dbi); + if (offset < 0) { +@@ -1066,9 +1070,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) + mirror_wait_for_free_in_flight_slot(s); + continue; + } else if (cnt != 0) { +- bdrv_graph_co_rdlock(); + mirror_iteration(s); +- bdrv_graph_co_rdunlock(); + } + } + +diff --git a/include/qemu/job.h b/include/qemu/job.h +index 9ea98b5927..2b873f2576 100644 +--- a/include/qemu/job.h ++++ b/include/qemu/job.h +@@ -483,7 +483,7 @@ void job_enter(Job *job); + * + * Called with job_mutex *not* held. + */ +-void coroutine_fn job_pause_point(Job *job); ++void coroutine_fn GRAPH_UNLOCKED job_pause_point(Job *job); + + /** + * @job: The job that calls the function. +-- +2.39.3 + diff --git a/SOURCES/kvm-monitor-only-run-coroutine-commands-in-qemu_aio_cont.patch b/SOURCES/kvm-monitor-only-run-coroutine-commands-in-qemu_aio_cont.patch new file mode 100644 index 0000000..345a2b4 --- /dev/null +++ b/SOURCES/kvm-monitor-only-run-coroutine-commands-in-qemu_aio_cont.patch @@ -0,0 +1,1630 @@ +From 972e553e605e8916fc47c2d51cdbde940fd7d855 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Thu, 18 Jan 2024 09:48:23 -0500 +Subject: [PATCH 13/22] monitor: only run coroutine commands in + qemu_aio_context + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter +RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Hanna Czenczek +RH-Commit: [9/17] ec5690fcade04a88bd1815bf2ae0377e80fe3d51 (stefanha/centos-stream-qemu-kvm) + +monitor_qmp_dispatcher_co() runs in the iohandler AioContext that is not +polled during nested event loops. The coroutine currently reschedules +itself in the main loop's qemu_aio_context AioContext, which is polled +during nested event loops. One known problem is that QMP device-add +calls drain_call_rcu(), which temporarily drops the BQL, leading to all +sorts of havoc like other vCPU threads re-entering device emulation code +while another vCPU thread is waiting in device emulation code with +aio_poll(). + +Paolo Bonzini suggested running non-coroutine QMP handlers in the +iohandler AioContext. This avoids trouble with nested event loops. His +original idea was to move coroutine rescheduling to +monitor_qmp_dispatch(), but I resorted to moving it to qmp_dispatch() +because we don't know if the QMP handler needs to run in coroutine +context in monitor_qmp_dispatch(). monitor_qmp_dispatch() would have +been nicer since it's associated with the monitor implementation and not +as general as qmp_dispatch(), which is also used by qemu-ga. + +A number of qemu-iotests need updated .out files because the order of +QMP events vs QMP responses has changed. + +Solves Issue #1933. + +Cc: qemu-stable@nongnu.org +Fixes: 7bed89958bfbf40df9ca681cefbdca63abdde39d ("device_core: use drain_call_rcu in in qmp_device_add") +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2215192 +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2214985 +Buglink: https://issues.redhat.com/browse/RHEL-17369 +Signed-off-by: Stefan Hajnoczi +Message-ID: <20240118144823.1497953-4-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Tested-by: Fiona Ebner +Signed-off-by: Kevin Wolf +(cherry picked from commit effd60c878176bcaf97fa7ce2b12d04bb8ead6f7) +Signed-off-by: Stefan Hajnoczi +--- + monitor/qmp.c | 17 ---- + qapi/qmp-dispatch.c | 24 +++++- + tests/qemu-iotests/060.out | 4 +- + tests/qemu-iotests/071.out | 4 +- + tests/qemu-iotests/081.out | 16 ++-- + tests/qemu-iotests/087.out | 12 +-- + tests/qemu-iotests/108.out | 2 +- + tests/qemu-iotests/109 | 4 +- + tests/qemu-iotests/109.out | 78 ++++++++----------- + tests/qemu-iotests/117.out | 2 +- + tests/qemu-iotests/120.out | 2 +- + tests/qemu-iotests/127.out | 2 +- + tests/qemu-iotests/140.out | 2 +- + tests/qemu-iotests/143.out | 2 +- + tests/qemu-iotests/156.out | 2 +- + tests/qemu-iotests/176.out | 16 ++-- + tests/qemu-iotests/182.out | 2 +- + tests/qemu-iotests/183.out | 4 +- + tests/qemu-iotests/184.out | 32 ++++---- + tests/qemu-iotests/185 | 6 +- + tests/qemu-iotests/185.out | 45 +++++++++-- + tests/qemu-iotests/191.out | 16 ++-- + tests/qemu-iotests/195.out | 16 ++-- + tests/qemu-iotests/223.out | 12 +-- + tests/qemu-iotests/227.out | 32 ++++---- + tests/qemu-iotests/247.out | 2 +- + tests/qemu-iotests/273.out | 8 +- + tests/qemu-iotests/308 | 4 +- + tests/qemu-iotests/308.out | 4 +- + tests/qemu-iotests/tests/file-io-error | 5 +- + tests/qemu-iotests/tests/iothreads-resize.out | 2 +- + tests/qemu-iotests/tests/qsd-jobs.out | 4 +- + 32 files changed, 205 insertions(+), 178 deletions(-) + +diff --git a/monitor/qmp.c b/monitor/qmp.c +index 6eee450fe4..a239945e8d 100644 +--- a/monitor/qmp.c ++++ b/monitor/qmp.c +@@ -321,14 +321,6 @@ void coroutine_fn monitor_qmp_dispatcher_co(void *data) + qemu_coroutine_yield(); + } + +- /* +- * Move the coroutine from iohandler_ctx to qemu_aio_context for +- * executing the command handler so that it can make progress if it +- * involves an AIO_WAIT_WHILE(). +- */ +- aio_co_schedule(qemu_get_aio_context(), qmp_dispatcher_co); +- qemu_coroutine_yield(); +- + /* Process request */ + if (req_obj->req) { + if (trace_event_get_state(TRACE_MONITOR_QMP_CMD_IN_BAND)) { +@@ -355,15 +347,6 @@ void coroutine_fn monitor_qmp_dispatcher_co(void *data) + } + + qmp_request_free(req_obj); +- +- /* +- * Yield and reschedule so the main loop stays responsive. +- * +- * Move back to iohandler_ctx so that nested event loops for +- * qemu_aio_context don't start new monitor commands. +- */ +- aio_co_schedule(iohandler_get_aio_context(), qmp_dispatcher_co); +- qemu_coroutine_yield(); + } + qatomic_set(&qmp_dispatcher_co, NULL); + } +diff --git a/qapi/qmp-dispatch.c b/qapi/qmp-dispatch.c +index 555528b6bb..176b549473 100644 +--- a/qapi/qmp-dispatch.c ++++ b/qapi/qmp-dispatch.c +@@ -206,9 +206,31 @@ QDict *coroutine_mixed_fn qmp_dispatch(const QmpCommandList *cmds, QObject *requ + assert(!(oob && qemu_in_coroutine())); + assert(monitor_cur() == NULL); + if (!!(cmd->options & QCO_COROUTINE) == qemu_in_coroutine()) { ++ if (qemu_in_coroutine()) { ++ /* ++ * Move the coroutine from iohandler_ctx to qemu_aio_context for ++ * executing the command handler so that it can make progress if it ++ * involves an AIO_WAIT_WHILE(). ++ */ ++ aio_co_schedule(qemu_get_aio_context(), qemu_coroutine_self()); ++ qemu_coroutine_yield(); ++ } ++ + monitor_set_cur(qemu_coroutine_self(), cur_mon); + cmd->fn(args, &ret, &err); + monitor_set_cur(qemu_coroutine_self(), NULL); ++ ++ if (qemu_in_coroutine()) { ++ /* ++ * Yield and reschedule so the main loop stays responsive. ++ * ++ * Move back to iohandler_ctx so that nested event loops for ++ * qemu_aio_context don't start new monitor commands. ++ */ ++ aio_co_schedule(iohandler_get_aio_context(), ++ qemu_coroutine_self()); ++ qemu_coroutine_yield(); ++ } + } else { + /* + * Actual context doesn't match the one the command needs. +@@ -232,7 +254,7 @@ QDict *coroutine_mixed_fn qmp_dispatch(const QmpCommandList *cmds, QObject *requ + .errp = &err, + .co = qemu_coroutine_self(), + }; +- aio_bh_schedule_oneshot(qemu_get_aio_context(), do_qmp_dispatch_bh, ++ aio_bh_schedule_oneshot(iohandler_get_aio_context(), do_qmp_dispatch_bh, + &data); + qemu_coroutine_yield(); + } +diff --git a/tests/qemu-iotests/060.out b/tests/qemu-iotests/060.out +index 329977d9b9..a37bf446e9 100644 +--- a/tests/qemu-iotests/060.out ++++ b/tests/qemu-iotests/060.out +@@ -421,8 +421,8 @@ QMP_VERSION + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_IMAGE_CORRUPTED", "data": {"device": "none0", "msg": "Preventing invalid write on metadata (overlaps with refcount table)", "offset": 65536, "node-name": "drive", "fatal": true, "size": 65536}} + write failed: Input/output error + {"return": ""} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + + === Testing incoming inactive corrupted image === + +@@ -432,8 +432,8 @@ QMP_VERSION + qcow2: Image is corrupt: L2 table offset 0x2a2a2a00 unaligned (L1 index: 0); further non-fatal corruption events will be suppressed + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_IMAGE_CORRUPTED", "data": {"device": "", "msg": "L2 table offset 0x2a2a2a00 unaligned (L1 index: 0)", "node-name": "drive", "fatal": false}} + {"return": ""} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + + corrupt: false + *** done +diff --git a/tests/qemu-iotests/071.out b/tests/qemu-iotests/071.out +index bca0c02f5c..a2923b05c2 100644 +--- a/tests/qemu-iotests/071.out ++++ b/tests/qemu-iotests/071.out +@@ -45,8 +45,8 @@ QMP_VERSION + {"return": {}} + read failed: Input/output error + {"return": ""} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + + + === Testing blkverify on existing block device === +@@ -84,9 +84,9 @@ wrote 512/512 bytes at offset 0 + {"return": ""} + read failed: Input/output error + {"return": ""} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} + QEMU_PROG: Failed to flush the L2 table cache: Input/output error + QEMU_PROG: Failed to flush the refcount block cache: Input/output error ++{"return": {}} + + *** done +diff --git a/tests/qemu-iotests/081.out b/tests/qemu-iotests/081.out +index 615c083549..aba85ea564 100644 +--- a/tests/qemu-iotests/081.out ++++ b/tests/qemu-iotests/081.out +@@ -35,8 +35,8 @@ QMP_VERSION + read 10485760/10485760 bytes at offset 0 + 10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + {"return": ""} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + + + == using quorum rewrite corrupted mode == +@@ -67,8 +67,8 @@ QMP_VERSION + read 10485760/10485760 bytes at offset 0 + 10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + {"return": ""} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + + -- checking that the image has been corrected -- + read 10485760/10485760 bytes at offset 0 +@@ -106,8 +106,8 @@ QMP_VERSION + {"return": {}} + {"return": {}} + {"return": {}} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + + Testing: + QMP_VERSION +@@ -115,8 +115,8 @@ QMP_VERSION + {"return": {}} + {"return": {}} + {"error": {"class": "GenericError", "desc": "Cannot add a child to a quorum in blkverify mode"}} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + + + == dynamically removing a child from a quorum == +@@ -125,31 +125,31 @@ QMP_VERSION + {"return": {}} + {"return": {}} + {"return": {}} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + + Testing: + QMP_VERSION + {"return": {}} + {"return": {}} + {"error": {"class": "GenericError", "desc": "The number of children cannot be lower than the vote threshold 2"}} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + + Testing: + QMP_VERSION + {"return": {}} + {"error": {"class": "GenericError", "desc": "blkverify=on can only be set if there are exactly two files and vote-threshold is 2"}} + {"error": {"class": "GenericError", "desc": "Cannot find device='drive0-quorum' nor node-name='drive0-quorum'"}} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + + Testing: + QMP_VERSION + {"return": {}} + {"return": {}} + {"error": {"class": "GenericError", "desc": "The number of children cannot be lower than the vote threshold 2"}} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + + *** done +diff --git a/tests/qemu-iotests/087.out b/tests/qemu-iotests/087.out +index e1c23a6983..97b6d8036d 100644 +--- a/tests/qemu-iotests/087.out ++++ b/tests/qemu-iotests/087.out +@@ -7,8 +7,8 @@ Testing: + QMP_VERSION + {"return": {}} + {"error": {"class": "GenericError", "desc": "'node-name' must be specified for the root node"}} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + + + === Duplicate ID === +@@ -18,8 +18,8 @@ QMP_VERSION + {"return": {}} + {"error": {"class": "GenericError", "desc": "node-name=disk is conflicting with a device id"}} + {"error": {"class": "GenericError", "desc": "Duplicate nodes with node-name='test-node'"}} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + + + === aio=native without O_DIRECT === +@@ -28,8 +28,8 @@ Testing: + QMP_VERSION + {"return": {}} + {"error": {"class": "GenericError", "desc": "aio=native was specified, but it requires cache.direct=on, which was not specified."}} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + + + === Encrypted image QCow === +@@ -40,8 +40,8 @@ QMP_VERSION + {"return": {}} + {"return": {}} + {"error": {"class": "GenericError", "desc": "Use of AES-CBC encrypted IMGFMT images is no longer supported in system emulators"}} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + + + === Encrypted image LUKS === +@@ -52,8 +52,8 @@ QMP_VERSION + {"return": {}} + {"return": {}} + {"return": {}} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + + + === Missing driver === +@@ -63,7 +63,7 @@ Testing: -S + QMP_VERSION + {"return": {}} + {"error": {"class": "GenericError", "desc": "Parameter 'driver' is missing"}} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + + *** done +diff --git a/tests/qemu-iotests/108.out b/tests/qemu-iotests/108.out +index b5401d788d..b9c876b394 100644 +--- a/tests/qemu-iotests/108.out ++++ b/tests/qemu-iotests/108.out +@@ -173,8 +173,8 @@ OK: Reftable is where we expect it + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "create"}} + {"return": {}} + { "execute": "quit" } +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + + wrote 65536/65536 bytes at offset 0 + 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +diff --git a/tests/qemu-iotests/109 b/tests/qemu-iotests/109 +index e207a555f3..0fb580f9a5 100755 +--- a/tests/qemu-iotests/109 ++++ b/tests/qemu-iotests/109 +@@ -57,13 +57,13 @@ run_qemu() + _launch_qemu -drive file="${source_img}",format=raw,cache=${CACHEMODE},aio=${AIOMODE},id=src + _send_qemu_cmd $QEMU_HANDLE "{ 'execute': 'qmp_capabilities' }" "return" + +- _send_qemu_cmd $QEMU_HANDLE \ ++ capture_events="$qmp_event" _send_qemu_cmd $QEMU_HANDLE \ + "{'execute':'drive-mirror', 'arguments':{ + 'device': 'src', 'target': '$raw_img', $qmp_format + 'mode': 'existing', 'sync': 'full'}}" \ + "return" + +- _send_qemu_cmd $QEMU_HANDLE '' "$qmp_event" ++ capture_events="$qmp_event JOB_STATUS_CHANGE" _wait_event $QEMU_HANDLE "$qmp_event" + if test "$qmp_event" = BLOCK_JOB_ERROR; then + _send_qemu_cmd $QEMU_HANDLE '' '"status": "null"' + fi +diff --git a/tests/qemu-iotests/109.out b/tests/qemu-iotests/109.out +index 965c9a6a0a..3ae8552ff7 100644 +--- a/tests/qemu-iotests/109.out ++++ b/tests/qemu-iotests/109.out +@@ -7,7 +7,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE + { 'execute': 'qmp_capabilities' } + {"return": {}} + {'execute':'drive-mirror', 'arguments':{ +- 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', ++ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', + 'mode': 'existing', 'sync': 'full'}} + WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed raw. + Automatically detecting the format is dangerous for raw images, write operations on block 0 will be restricted. +@@ -23,8 +23,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed + {"execute":"query-block-jobs"} + {"return": []} + {"execute":"quit"} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + read 512/512 bytes at offset 0 + 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + { 'execute': 'qmp_capabilities' } +@@ -35,12 +35,10 @@ read 512/512 bytes at offset 0 + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "src"}} + {"return": {}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 1024, "offset": 1024, "speed": 0, "type": "mirror"}} + {"execute":"query-block-jobs"} + {"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 1024, "offset": 1024, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]} + {"execute":"quit"} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} +@@ -50,6 +48,7 @@ read 512/512 bytes at offset 0 + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 1024, "offset": 1024, "speed": 0, "type": "mirror"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} ++{"return": {}} + Images are identical. + + === Writing a qcow2 header into raw === +@@ -59,7 +58,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE + { 'execute': 'qmp_capabilities' } + {"return": {}} + {'execute':'drive-mirror', 'arguments':{ +- 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', ++ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', + 'mode': 'existing', 'sync': 'full'}} + WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed raw. + Automatically detecting the format is dangerous for raw images, write operations on block 0 will be restricted. +@@ -75,8 +74,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed + {"execute":"query-block-jobs"} + {"return": []} + {"execute":"quit"} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + read 512/512 bytes at offset 0 + 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + { 'execute': 'qmp_capabilities' } +@@ -87,12 +86,10 @@ read 512/512 bytes at offset 0 + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "src"}} + {"return": {}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 197120, "offset": 197120, "speed": 0, "type": "mirror"}} + {"execute":"query-block-jobs"} + {"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 197120, "offset": 197120, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]} + {"execute":"quit"} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} +@@ -102,6 +99,7 @@ read 512/512 bytes at offset 0 + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 197120, "offset": 197120, "speed": 0, "type": "mirror"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} ++{"return": {}} + Images are identical. + + === Writing a qed header into raw === +@@ -111,7 +109,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE + { 'execute': 'qmp_capabilities' } + {"return": {}} + {'execute':'drive-mirror', 'arguments':{ +- 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', ++ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', + 'mode': 'existing', 'sync': 'full'}} + WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed raw. + Automatically detecting the format is dangerous for raw images, write operations on block 0 will be restricted. +@@ -127,8 +125,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed + {"execute":"query-block-jobs"} + {"return": []} + {"execute":"quit"} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + read 512/512 bytes at offset 0 + 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + { 'execute': 'qmp_capabilities' } +@@ -139,12 +137,10 @@ read 512/512 bytes at offset 0 + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "src"}} + {"return": {}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 327680, "offset": 327680, "speed": 0, "type": "mirror"}} + {"execute":"query-block-jobs"} + {"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 327680, "offset": 327680, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]} + {"execute":"quit"} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} +@@ -154,6 +150,7 @@ read 512/512 bytes at offset 0 + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 327680, "offset": 327680, "speed": 0, "type": "mirror"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} ++{"return": {}} + Images are identical. + + === Writing a vdi header into raw === +@@ -163,7 +160,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE + { 'execute': 'qmp_capabilities' } + {"return": {}} + {'execute':'drive-mirror', 'arguments':{ +- 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', ++ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', + 'mode': 'existing', 'sync': 'full'}} + WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed raw. + Automatically detecting the format is dangerous for raw images, write operations on block 0 will be restricted. +@@ -179,8 +176,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed + {"execute":"query-block-jobs"} + {"return": []} + {"execute":"quit"} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + read 512/512 bytes at offset 0 + 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + { 'execute': 'qmp_capabilities' } +@@ -191,12 +188,10 @@ read 512/512 bytes at offset 0 + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "src"}} + {"return": {}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 1024, "offset": 1024, "speed": 0, "type": "mirror"}} + {"execute":"query-block-jobs"} + {"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 1024, "offset": 1024, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]} + {"execute":"quit"} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} +@@ -206,6 +201,7 @@ read 512/512 bytes at offset 0 + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 1024, "offset": 1024, "speed": 0, "type": "mirror"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} ++{"return": {}} + Images are identical. + + === Writing a vmdk header into raw === +@@ -215,7 +211,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE + { 'execute': 'qmp_capabilities' } + {"return": {}} + {'execute':'drive-mirror', 'arguments':{ +- 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', ++ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', + 'mode': 'existing', 'sync': 'full'}} + WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed raw. + Automatically detecting the format is dangerous for raw images, write operations on block 0 will be restricted. +@@ -231,8 +227,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed + {"execute":"query-block-jobs"} + {"return": []} + {"execute":"quit"} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + read 512/512 bytes at offset 0 + 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + { 'execute': 'qmp_capabilities' } +@@ -243,12 +239,10 @@ read 512/512 bytes at offset 0 + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "src"}} + {"return": {}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 65536, "offset": 65536, "speed": 0, "type": "mirror"}} + {"execute":"query-block-jobs"} + {"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 65536, "offset": 65536, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]} + {"execute":"quit"} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} +@@ -258,6 +252,7 @@ read 512/512 bytes at offset 0 + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 65536, "offset": 65536, "speed": 0, "type": "mirror"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} ++{"return": {}} + Images are identical. + + === Writing a vpc header into raw === +@@ -267,7 +262,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE + { 'execute': 'qmp_capabilities' } + {"return": {}} + {'execute':'drive-mirror', 'arguments':{ +- 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', ++ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', + 'mode': 'existing', 'sync': 'full'}} + WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed raw. + Automatically detecting the format is dangerous for raw images, write operations on block 0 will be restricted. +@@ -283,8 +278,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed + {"execute":"query-block-jobs"} + {"return": []} + {"execute":"quit"} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + read 512/512 bytes at offset 0 + 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + { 'execute': 'qmp_capabilities' } +@@ -295,12 +290,10 @@ read 512/512 bytes at offset 0 + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "src"}} + {"return": {}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 2560, "offset": 2560, "speed": 0, "type": "mirror"}} + {"execute":"query-block-jobs"} + {"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 2560, "offset": 2560, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]} + {"execute":"quit"} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} +@@ -310,6 +303,7 @@ read 512/512 bytes at offset 0 + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 2560, "offset": 2560, "speed": 0, "type": "mirror"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} ++{"return": {}} + Images are identical. + + === Copying sample image empty.bochs into raw === +@@ -318,7 +312,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE + { 'execute': 'qmp_capabilities' } + {"return": {}} + {'execute':'drive-mirror', 'arguments':{ +- 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', ++ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', + 'mode': 'existing', 'sync': 'full'}} + WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed raw. + Automatically detecting the format is dangerous for raw images, write operations on block 0 will be restricted. +@@ -334,8 +328,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed + {"execute":"query-block-jobs"} + {"return": []} + {"execute":"quit"} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + read 512/512 bytes at offset 0 + 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + { 'execute': 'qmp_capabilities' } +@@ -346,12 +340,10 @@ read 512/512 bytes at offset 0 + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "src"}} + {"return": {}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 2560, "offset": 2560, "speed": 0, "type": "mirror"}} + {"execute":"query-block-jobs"} + {"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 2560, "offset": 2560, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]} + {"execute":"quit"} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} +@@ -361,6 +353,7 @@ read 512/512 bytes at offset 0 + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 2560, "offset": 2560, "speed": 0, "type": "mirror"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} ++{"return": {}} + Images are identical. + + === Copying sample image iotest-dirtylog-10G-4M.vhdx into raw === +@@ -369,7 +362,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE + { 'execute': 'qmp_capabilities' } + {"return": {}} + {'execute':'drive-mirror', 'arguments':{ +- 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', ++ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', + 'mode': 'existing', 'sync': 'full'}} + WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed raw. + Automatically detecting the format is dangerous for raw images, write operations on block 0 will be restricted. +@@ -385,8 +378,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed + {"execute":"query-block-jobs"} + {"return": []} + {"execute":"quit"} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + read 512/512 bytes at offset 0 + 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + { 'execute': 'qmp_capabilities' } +@@ -397,12 +390,10 @@ read 512/512 bytes at offset 0 + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "src"}} + {"return": {}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 31457280, "offset": 31457280, "speed": 0, "type": "mirror"}} + {"execute":"query-block-jobs"} + {"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 31457280, "offset": 31457280, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]} + {"execute":"quit"} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} +@@ -412,6 +403,7 @@ read 512/512 bytes at offset 0 + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 31457280, "offset": 31457280, "speed": 0, "type": "mirror"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} ++{"return": {}} + Images are identical. + + === Copying sample image parallels-v1 into raw === +@@ -420,7 +412,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE + { 'execute': 'qmp_capabilities' } + {"return": {}} + {'execute':'drive-mirror', 'arguments':{ +- 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', ++ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', + 'mode': 'existing', 'sync': 'full'}} + WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed raw. + Automatically detecting the format is dangerous for raw images, write operations on block 0 will be restricted. +@@ -436,8 +428,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed + {"execute":"query-block-jobs"} + {"return": []} + {"execute":"quit"} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + read 512/512 bytes at offset 0 + 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + { 'execute': 'qmp_capabilities' } +@@ -448,12 +440,10 @@ read 512/512 bytes at offset 0 + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "src"}} + {"return": {}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 327680, "offset": 327680, "speed": 0, "type": "mirror"}} + {"execute":"query-block-jobs"} + {"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 327680, "offset": 327680, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]} + {"execute":"quit"} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} +@@ -463,6 +453,7 @@ read 512/512 bytes at offset 0 + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 327680, "offset": 327680, "speed": 0, "type": "mirror"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} ++{"return": {}} + Images are identical. + + === Copying sample image simple-pattern.cloop into raw === +@@ -471,7 +462,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE + { 'execute': 'qmp_capabilities' } + {"return": {}} + {'execute':'drive-mirror', 'arguments':{ +- 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', ++ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', + 'mode': 'existing', 'sync': 'full'}} + WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed raw. + Automatically detecting the format is dangerous for raw images, write operations on block 0 will be restricted. +@@ -487,8 +478,8 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed + {"execute":"query-block-jobs"} + {"return": []} + {"execute":"quit"} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + read 512/512 bytes at offset 0 + 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + { 'execute': 'qmp_capabilities' } +@@ -499,12 +490,10 @@ read 512/512 bytes at offset 0 + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "src"}} + {"return": {}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 2048, "offset": 2048, "speed": 0, "type": "mirror"}} + {"execute":"query-block-jobs"} + {"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 2048, "offset": 2048, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]} + {"execute":"quit"} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} +@@ -514,6 +503,7 @@ read 512/512 bytes at offset 0 + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 2048, "offset": 2048, "speed": 0, "type": "mirror"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} ++{"return": {}} + Images are identical. + + === Write legitimate MBR into raw === +@@ -522,7 +512,7 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=SIZE + { 'execute': 'qmp_capabilities' } + {"return": {}} + {'execute':'drive-mirror', 'arguments':{ +- 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', ++ 'device': 'src', 'target': 'TEST_DIR/t.IMGFMT', + 'mode': 'existing', 'sync': 'full'}} + WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed raw. + Automatically detecting the format is dangerous for raw images, write operations on block 0 will be restricted. +@@ -530,12 +520,10 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "src"}} + {"return": {}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 512, "offset": 512, "speed": 0, "type": "mirror"}} + {"execute":"query-block-jobs"} + {"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 512, "offset": 512, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]} + {"execute":"quit"} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} +@@ -545,6 +533,7 @@ WARNING: Image format was not specified for 'TEST_DIR/t.raw' and probing guessed + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 512, "offset": 512, "speed": 0, "type": "mirror"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} ++{"return": {}} + Images are identical. + { 'execute': 'qmp_capabilities' } + {"return": {}} +@@ -554,12 +543,10 @@ Images are identical. + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "src"}} + {"return": {}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "src", "len": 512, "offset": 512, "speed": 0, "type": "mirror"}} + {"execute":"query-block-jobs"} + {"return": [{"auto-finalize": true, "io-status": "ok", "device": "src", "auto-dismiss": true, "busy": false, "len": 512, "offset": 512, "status": "ready", "paused": false, "speed": 0, "ready": true, "type": "mirror", "actively-synced": false}]} + {"execute":"quit"} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "src"}} +@@ -569,5 +556,6 @@ Images are identical. + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "src", "len": 512, "offset": 512, "speed": 0, "type": "mirror"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "src"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "src"}} ++{"return": {}} + Images are identical. + *** done +diff --git a/tests/qemu-iotests/117.out b/tests/qemu-iotests/117.out +index 735ffd25c6..1cea9e0217 100644 +--- a/tests/qemu-iotests/117.out ++++ b/tests/qemu-iotests/117.out +@@ -18,8 +18,8 @@ wrote 65536/65536 bytes at offset 0 + 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + {"return": ""} + { 'execute': 'quit' } +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + No errors were found on the image. + read 65536/65536 bytes at offset 0 + 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +diff --git a/tests/qemu-iotests/120.out b/tests/qemu-iotests/120.out +index 0744c1f136..35d84a5bc5 100644 +--- a/tests/qemu-iotests/120.out ++++ b/tests/qemu-iotests/120.out +@@ -5,8 +5,8 @@ QMP_VERSION + wrote 65536/65536 bytes at offset 0 + 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + {"return": ""} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + read 65536/65536 bytes at offset 0 + 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + read 65536/65536 bytes at offset 0 +diff --git a/tests/qemu-iotests/127.out b/tests/qemu-iotests/127.out +index 1685c4850a..dd8c4a8aa9 100644 +--- a/tests/qemu-iotests/127.out ++++ b/tests/qemu-iotests/127.out +@@ -28,6 +28,6 @@ wrote 42/42 bytes at offset 0 + { 'execute': 'quit' } + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "mirror"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "mirror"}} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + *** done +diff --git a/tests/qemu-iotests/140.out b/tests/qemu-iotests/140.out +index 312f76d5da..32866440ae 100644 +--- a/tests/qemu-iotests/140.out ++++ b/tests/qemu-iotests/140.out +@@ -19,6 +19,6 @@ read 65536/65536 bytes at offset 0 + qemu-io: can't open device nbd+unix:///drv?socket=SOCK_DIR/nbd: Requested export not available + server reported: export 'drv' not present + { 'execute': 'quit' } +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + *** done +diff --git a/tests/qemu-iotests/143.out b/tests/qemu-iotests/143.out +index 9ec5888e0e..d6afa32abc 100644 +--- a/tests/qemu-iotests/143.out ++++ b/tests/qemu-iotests/143.out +@@ -10,6 +10,6 @@ server reported: export 'no_such_export' not present + qemu-io: can't open device nbd+unix:///aa--aa1?socket=SOCK_DIR/nbd: Requested export not available + server reported: export 'aa--aa...' not present + { 'execute': 'quit' } +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + *** done +diff --git a/tests/qemu-iotests/156.out b/tests/qemu-iotests/156.out +index 4a22f0c41a..07e5e83f5d 100644 +--- a/tests/qemu-iotests/156.out ++++ b/tests/qemu-iotests/156.out +@@ -72,8 +72,8 @@ read 65536/65536 bytes at offset 196608 + {"return": ""} + + { 'execute': 'quit' } +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + + read 65536/65536 bytes at offset 0 + 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +diff --git a/tests/qemu-iotests/176.out b/tests/qemu-iotests/176.out +index 9d09b60452..45e9153ef3 100644 +--- a/tests/qemu-iotests/176.out ++++ b/tests/qemu-iotests/176.out +@@ -169,8 +169,8 @@ QMP_VERSION + {"return": {}} + {"return": {}} + {"return": {}} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + wrote 196608/196608 bytes at offset 2147287040 + 192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + wrote 131072/131072 bytes at offset 2147352576 +@@ -206,8 +206,8 @@ QMP_VERSION + {"return": {}} + {"return": {}} + {"return": {"sha256": HASH}} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + + === Test pass bitmap.1 === + +@@ -218,8 +218,8 @@ QMP_VERSION + {"return": {}} + {"return": {}} + {"return": {}} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + wrote 196608/196608 bytes at offset 2147287040 + 192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + wrote 131072/131072 bytes at offset 2147352576 +@@ -256,8 +256,8 @@ QMP_VERSION + {"return": {}} + {"return": {}} + {"return": {"sha256": HASH}} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + + === Test pass bitmap.2 === + +@@ -268,8 +268,8 @@ QMP_VERSION + {"return": {}} + {"return": {}} + {"return": {}} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + wrote 196608/196608 bytes at offset 2147287040 + 192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + wrote 131072/131072 bytes at offset 2147352576 +@@ -306,8 +306,8 @@ QMP_VERSION + {"return": {}} + {"return": {}} + {"return": {"sha256": HASH}} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + + === Test pass bitmap.3 === + +@@ -318,8 +318,8 @@ QMP_VERSION + {"return": {}} + {"return": {}} + {"return": {}} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + wrote 196608/196608 bytes at offset 2147287040 + 192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + wrote 131072/131072 bytes at offset 2147352576 +@@ -353,6 +353,6 @@ QMP_VERSION + {"return": {}} + {"return": {}} + {"return": {"sha256": HASH}} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + *** done +diff --git a/tests/qemu-iotests/182.out b/tests/qemu-iotests/182.out +index 57f7265458..83fc1a4797 100644 +--- a/tests/qemu-iotests/182.out ++++ b/tests/qemu-iotests/182.out +@@ -53,6 +53,6 @@ Formatting 'TEST_DIR/t.qcow2.overlay', fmt=qcow2 cluster_size=65536 extended_l2= + {'execute': 'qmp_capabilities'} + {"return": {}} + {'execute': 'quit'} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + *** done +diff --git a/tests/qemu-iotests/183.out b/tests/qemu-iotests/183.out +index fd9c2e52a5..51aa41c888 100644 +--- a/tests/qemu-iotests/183.out ++++ b/tests/qemu-iotests/183.out +@@ -53,11 +53,11 @@ wrote 65536/65536 bytes at offset 1048576 + === Shut down and check image === + + {"execute":"quit"} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} + {"return": {}} + {"execute":"quit"} +-{"return": {}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + No errors were found on the image. + No errors were found on the image. + wrote 65536/65536 bytes at offset 1048576 +diff --git a/tests/qemu-iotests/184.out b/tests/qemu-iotests/184.out +index 77e5489d65..e8f631f853 100644 +--- a/tests/qemu-iotests/184.out ++++ b/tests/qemu-iotests/184.out +@@ -89,10 +89,6 @@ Testing: + "return": [ + ] + } +-{ +- "return": { +- } +-} + { + "timestamp": { + "seconds": TIMESTAMP, +@@ -104,6 +100,10 @@ Testing: + "reason": "host-qmp-quit" + } + } ++{ ++ "return": { ++ } ++} + + + == property changes in ThrottleGroup == +@@ -169,10 +169,6 @@ Testing: + "iops-total-max": 0 + } + } +-{ +- "return": { +- } +-} + { + "timestamp": { + "seconds": TIMESTAMP, +@@ -184,6 +180,10 @@ Testing: + "reason": "host-qmp-quit" + } + } ++{ ++ "return": { ++ } ++} + + + == object creation/set errors == +@@ -211,10 +211,6 @@ Testing: + "desc": "bps/iops/max total values and read/write values cannot be used at the same time" + } + } +-{ +- "return": { +- } +-} + { + "timestamp": { + "seconds": TIMESTAMP, +@@ -226,6 +222,10 @@ Testing: + "reason": "host-qmp-quit" + } + } ++{ ++ "return": { ++ } ++} + + + == don't specify group == +@@ -247,10 +247,6 @@ Testing: + "desc": "Parameter 'throttle-group' is missing" + } + } +-{ +- "return": { +- } +-} + { + "timestamp": { + "seconds": TIMESTAMP, +@@ -262,6 +258,10 @@ Testing: + "reason": "host-qmp-quit" + } + } ++{ ++ "return": { ++ } ++} + + + *** done +diff --git a/tests/qemu-iotests/185 b/tests/qemu-iotests/185 +index 2ae0a85bbf..17489fb91c 100755 +--- a/tests/qemu-iotests/185 ++++ b/tests/qemu-iotests/185 +@@ -344,14 +344,14 @@ wait_for_job_and_quit() { + + sleep 1 + ++ # List of expected events ++ capture_events='BLOCK_JOB_CANCELLED JOB_STATUS_CHANGE SHUTDOWN' ++ + _send_qemu_cmd $h \ + '{"execute": "quit"}' \ + 'return' + +- # List of expected events +- capture_events='BLOCK_JOB_CANCELLED JOB_STATUS_CHANGE SHUTDOWN' + _wait_event $h 'SHUTDOWN' +- QEMU_EVENTS= # Ignore all JOB_STATUS_CHANGE events that came before SHUTDOWN + _wait_event $h 'JOB_STATUS_CHANGE' # standby + _wait_event $h 'JOB_STATUS_CHANGE' # ready + _wait_event $h 'JOB_STATUS_CHANGE' # standby +diff --git a/tests/qemu-iotests/185.out b/tests/qemu-iotests/185.out +index 7292c26bae..6af0953c4d 100644 +--- a/tests/qemu-iotests/185.out ++++ b/tests/qemu-iotests/185.out +@@ -40,9 +40,16 @@ Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off comp + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} + {"return": {}} + { 'execute': 'quit' } +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "disk"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "disk", "len": 67108864, "offset": 524288, "speed": 65536, "type": "commit"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "disk"}} ++{"return": {}} + + === Start active commit job and exit qemu === + +@@ -56,9 +63,16 @@ Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off comp + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} + {"return": {}} + { 'execute': 'quit' } +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "disk"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "disk", "len": 4194304, "offset": 4194304, "speed": 65536, "type": "commit"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "disk"}} ++{"return": {}} + + === Start mirror job and exit qemu === + +@@ -75,9 +89,16 @@ Formatting 'TEST_DIR/t.qcow2.copy', fmt=qcow2 cluster_size=65536 extended_l2=off + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} + {"return": {}} + { 'execute': 'quit' } +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "disk"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "disk", "len": 4194304, "offset": 4194304, "speed": 65536, "type": "mirror"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "disk"}} ++{"return": {}} + + === Start backup job and exit qemu === + +@@ -97,9 +118,16 @@ Formatting 'TEST_DIR/t.qcow2.copy', fmt=qcow2 cluster_size=65536 extended_l2=off + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} + {"return": {}} + { 'execute': 'quit' } +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "disk"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "disk", "len": 67108864, "offset": 65536, "speed": 65536, "type": "backup"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "disk"}} ++{"return": {}} + + === Start streaming job and exit qemu === + +@@ -112,9 +140,16 @@ Formatting 'TEST_DIR/t.qcow2.copy', fmt=qcow2 cluster_size=65536 extended_l2=off + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} + {"return": {}} + { 'execute': 'quit' } +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "paused", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "disk"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "disk", "len": 67108864, "offset": 524288, "speed": 65536, "type": "stream"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "disk"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "disk"}} ++{"return": {}} + No errors were found on the image. + + === Start mirror to throttled QSD and exit qemu === +diff --git a/tests/qemu-iotests/191.out b/tests/qemu-iotests/191.out +index ea88777374..c3309e4bc6 100644 +--- a/tests/qemu-iotests/191.out ++++ b/tests/qemu-iotests/191.out +@@ -378,10 +378,6 @@ wrote 65536/65536 bytes at offset 1048576 + ] + } + { 'execute': 'quit' } +-{ +- "return": { +- } +-} + { + "timestamp": { + "seconds": TIMESTAMP, +@@ -393,6 +389,10 @@ wrote 65536/65536 bytes at offset 1048576 + "reason": "host-qmp-quit" + } + } ++{ ++ "return": { ++ } ++} + image: TEST_DIR/t.IMGFMT + file format: IMGFMT + virtual size: 64 MiB (67108864 bytes) +@@ -796,10 +796,6 @@ wrote 65536/65536 bytes at offset 1048576 + ] + } + { 'execute': 'quit' } +-{ +- "return": { +- } +-} + { + "timestamp": { + "seconds": TIMESTAMP, +@@ -811,6 +807,10 @@ wrote 65536/65536 bytes at offset 1048576 + "reason": "host-qmp-quit" + } + } ++{ ++ "return": { ++ } ++} + image: TEST_DIR/t.IMGFMT + file format: IMGFMT + virtual size: 64 MiB (67108864 bytes) +diff --git a/tests/qemu-iotests/195.out b/tests/qemu-iotests/195.out +index ec84df5012..91717d302e 100644 +--- a/tests/qemu-iotests/195.out ++++ b/tests/qemu-iotests/195.out +@@ -17,10 +17,6 @@ Testing: -drive if=none,file=TEST_DIR/t.IMGFMT,backing.node-name=mid + "return": { + } + } +-{ +- "return": { +- } +-} + { + "timestamp": { + "seconds": TIMESTAMP, +@@ -32,6 +28,10 @@ Testing: -drive if=none,file=TEST_DIR/t.IMGFMT,backing.node-name=mid + "reason": "host-qmp-quit" + } + } ++{ ++ "return": { ++ } ++} + + image: TEST_DIR/t.IMGFMT.mid + file format: IMGFMT +@@ -55,10 +55,6 @@ Testing: -drive if=none,file=TEST_DIR/t.IMGFMT,node-name=top + "return": { + } + } +-{ +- "return": { +- } +-} + { + "timestamp": { + "seconds": TIMESTAMP, +@@ -70,6 +66,10 @@ Testing: -drive if=none,file=TEST_DIR/t.IMGFMT,node-name=top + "reason": "host-qmp-quit" + } + } ++{ ++ "return": { ++ } ++} + + image: TEST_DIR/t.IMGFMT + file format: IMGFMT +diff --git a/tests/qemu-iotests/223.out b/tests/qemu-iotests/223.out +index e5e7f42caa..5f5b42e2dc 100644 +--- a/tests/qemu-iotests/223.out ++++ b/tests/qemu-iotests/223.out +@@ -11,8 +11,8 @@ QMP_VERSION + {"return": {}} + {"return": {}} + {"return": {}} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + + + === Write part of the file under active bitmap === +@@ -145,14 +145,14 @@ read 2097152/2097152 bytes at offset 2097152 + + {"execute":"nbd-server-remove", + "arguments":{"name":"n"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "n"}} + {"return": {}} + {"execute":"nbd-server-remove", + "arguments":{"name":"n2"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "n"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "n2"}} + {"return": {}} + {"execute":"nbd-server-remove", + "arguments":{"name":"n2"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "n2"}} + {"error": {"class": "GenericError", "desc": "Export 'n2' is not found"}} + {"execute":"nbd-server-stop"} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "n3"}} +@@ -267,14 +267,14 @@ read 2097152/2097152 bytes at offset 2097152 + + {"execute":"nbd-server-remove", + "arguments":{"name":"n"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "n"}} + {"return": {}} + {"execute":"nbd-server-remove", + "arguments":{"name":"n2"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "n"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "n2"}} + {"return": {}} + {"execute":"nbd-server-remove", + "arguments":{"name":"n2"}} +-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "n2"}} + {"error": {"class": "GenericError", "desc": "Export 'n2' is not found"}} + {"execute":"nbd-server-stop"} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "n3"}} +@@ -282,8 +282,8 @@ read 2097152/2097152 bytes at offset 2097152 + {"execute":"nbd-server-stop"} + {"error": {"class": "GenericError", "desc": "NBD server not running"}} + {"execute":"quit"} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + + === Use qemu-nbd as server === + +diff --git a/tests/qemu-iotests/227.out b/tests/qemu-iotests/227.out +index a947b1a87d..d6a1d4ecb6 100644 +--- a/tests/qemu-iotests/227.out ++++ b/tests/qemu-iotests/227.out +@@ -54,10 +54,6 @@ Testing: -drive driver=null-co,read-zeroes=on,if=virtio + } + ] + } +-{ +- "return": { +- } +-} + { + "timestamp": { + "seconds": TIMESTAMP, +@@ -69,6 +65,10 @@ Testing: -drive driver=null-co,read-zeroes=on,if=virtio + "reason": "host-qmp-quit" + } + } ++{ ++ "return": { ++ } ++} + + + === blockstats with -drive if=none === +@@ -124,10 +124,6 @@ Testing: -drive driver=null-co,if=none + } + ] + } +-{ +- "return": { +- } +-} + { + "timestamp": { + "seconds": TIMESTAMP, +@@ -139,6 +135,10 @@ Testing: -drive driver=null-co,if=none + "reason": "host-qmp-quit" + } + } ++{ ++ "return": { ++ } ++} + + + === blockstats with -blockdev === +@@ -155,10 +155,6 @@ Testing: -blockdev driver=null-co,node-name=null + "return": [ + ] + } +-{ +- "return": { +- } +-} + { + "timestamp": { + "seconds": TIMESTAMP, +@@ -170,6 +166,10 @@ Testing: -blockdev driver=null-co,node-name=null + "reason": "host-qmp-quit" + } + } ++{ ++ "return": { ++ } ++} + + + === blockstats with -blockdev and -device === +@@ -226,10 +226,6 @@ Testing: -blockdev driver=null-co,read-zeroes=on,node-name=null -device virtio-b + } + ] + } +-{ +- "return": { +- } +-} + { + "timestamp": { + "seconds": TIMESTAMP, +@@ -241,5 +237,9 @@ Testing: -blockdev driver=null-co,read-zeroes=on,node-name=null -device virtio-b + "reason": "host-qmp-quit" + } + } ++{ ++ "return": { ++ } ++} + + *** done +diff --git a/tests/qemu-iotests/247.out b/tests/qemu-iotests/247.out +index e909e83994..7d252e7fe4 100644 +--- a/tests/qemu-iotests/247.out ++++ b/tests/qemu-iotests/247.out +@@ -17,6 +17,6 @@ QMP_VERSION + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_COMPLETED", "data": {"device": "job0", "len": 134217728, "offset": 134217728, "speed": 0, "type": "commit"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job0"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + *** done +diff --git a/tests/qemu-iotests/273.out b/tests/qemu-iotests/273.out +index 6a74a8138b..71843f02de 100644 +--- a/tests/qemu-iotests/273.out ++++ b/tests/qemu-iotests/273.out +@@ -282,10 +282,6 @@ Testing: -blockdev file,node-name=base,filename=TEST_DIR/t.IMGFMT.base -blockdev + ] + } + } +-{ +- "return": { +- } +-} + { + "timestamp": { + "seconds": TIMESTAMP, +@@ -297,5 +293,9 @@ Testing: -blockdev file,node-name=base,filename=TEST_DIR/t.IMGFMT.base -blockdev + "reason": "host-qmp-quit" + } + } ++{ ++ "return": { ++ } ++} + + *** done +diff --git a/tests/qemu-iotests/308 b/tests/qemu-iotests/308 +index de12b2b1b9..ea81dc496a 100755 +--- a/tests/qemu-iotests/308 ++++ b/tests/qemu-iotests/308 +@@ -77,6 +77,7 @@ fuse_export_add() + # $1: Export ID + fuse_export_del() + { ++ capture_events="BLOCK_EXPORT_DELETED" \ + _send_qemu_cmd $QEMU_HANDLE \ + "{'execute': 'block-export-del', + 'arguments': { +@@ -84,8 +85,7 @@ fuse_export_del() + } }" \ + 'return' + +- _send_qemu_cmd $QEMU_HANDLE \ +- '' \ ++ _wait_event $QEMU_HANDLE \ + 'BLOCK_EXPORT_DELETED' + } + +diff --git a/tests/qemu-iotests/308.out b/tests/qemu-iotests/308.out +index d5767133b1..e5e233691d 100644 +--- a/tests/qemu-iotests/308.out ++++ b/tests/qemu-iotests/308.out +@@ -165,9 +165,9 @@ OK: Post-truncate image size is as expected + + === Tear down === + {'execute': 'quit'} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "export-mp"}} ++{"return": {}} + + === Compare copy with original === + Images are identical. +@@ -201,9 +201,9 @@ wrote 67108864/67108864 bytes at offset 0 + read 67108864/67108864 bytes at offset 0 + 64 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + {'execute': 'quit'} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "export"}} ++{"return": {}} + read 67108864/67108864 bytes at offset 0 + 64 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + *** done +diff --git a/tests/qemu-iotests/tests/file-io-error b/tests/qemu-iotests/tests/file-io-error +index 88ee5f670c..fb8db73b31 100755 +--- a/tests/qemu-iotests/tests/file-io-error ++++ b/tests/qemu-iotests/tests/file-io-error +@@ -99,13 +99,12 @@ echo + $QEMU_IO -f file -c 'write 0 64M' "$TEST_DIR/fuse-export" | _filter_qemu_io + echo + +-_send_qemu_cmd $QEMU_HANDLE \ ++capture_events=BLOCK_EXPORT_DELETED _send_qemu_cmd $QEMU_HANDLE \ + "{'execute': 'block-export-del', + 'arguments': {'id': 'exp0'}}" \ + 'return' + +-_send_qemu_cmd $QEMU_HANDLE \ +- '' \ ++_wait_event $QEMU_HANDLE \ + 'BLOCK_EXPORT_DELETED' + + _send_qemu_cmd $QEMU_HANDLE \ +diff --git a/tests/qemu-iotests/tests/iothreads-resize.out b/tests/qemu-iotests/tests/iothreads-resize.out +index 2ca5a9d964..2967ac8f0d 100644 +--- a/tests/qemu-iotests/tests/iothreads-resize.out ++++ b/tests/qemu-iotests/tests/iothreads-resize.out +@@ -3,8 +3,8 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 + QMP_VERSION + {"return": {}} + {"return": {}} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++{"return": {}} + image: TEST_DIR/t.IMGFMT + file format: IMGFMT + virtual size: 128 MiB (134217728 bytes) +diff --git a/tests/qemu-iotests/tests/qsd-jobs.out b/tests/qemu-iotests/tests/qsd-jobs.out +index c1bc9b8356..aa6b6d1aef 100644 +--- a/tests/qemu-iotests/tests/qsd-jobs.out ++++ b/tests/qemu-iotests/tests/qsd-jobs.out +@@ -7,8 +7,8 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file=TEST_DIR/ + QMP_VERSION + {"return": {}} + {"return": {}} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "job0", "len": 0, "offset": 0, "speed": 0, "type": "commit"}} ++{"return": {}} + + === Streaming can't get permission on base node === + +@@ -17,6 +17,6 @@ QMP_VERSION + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}} + {"error": {"class": "GenericError", "desc": "Permission conflict on node 'fmt_base': permissions 'write' are both required by an unnamed block device (uses node 'fmt_base' as 'root' child) and unshared by stream job 'job0' (uses node 'fmt_base' as 'intermediate node' child)."}} +-{"return": {}} + {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "export1"}} ++{"return": {}} + *** done +-- +2.39.3 + diff --git a/SOURCES/kvm-multifd-Create-property-multifd-flush-after-each-sec.patch b/SOURCES/kvm-multifd-Create-property-multifd-flush-after-each-sec.patch deleted file mode 100644 index 0bebd2e..0000000 --- a/SOURCES/kvm-multifd-Create-property-multifd-flush-after-each-sec.patch +++ /dev/null @@ -1,127 +0,0 @@ -From 2a5ea92ca0a5dffad54e4d06a683f683996cea9a Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 21 Jun 2022 12:13:14 +0200 -Subject: [PATCH 05/12] multifd: Create property - multifd-flush-after-each-section -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: quintela1 -RH-MergeRequest: 186: Multifd flushes its channels 10 times per second -RH-Bugzilla: 2196295 -RH-Acked-by: Peter Xu -RH-Acked-by: Leonardo Brás -RH-Commit: [1/3] 5bf5348e8be5b1d1629b859ce1ddb7aa0d72c0d6 (juan.quintela/c9s-qemu-kvm) - -We used to flush all channels at the end of each RAM section -sent. That is not needed, so preparing to only flush after a full -iteration through all the RAM. - -Default value of the property is false. But we return "true" in -migrate_multifd_flush_after_each_section() until we implement the code -in following patches. - -Signed-off-by: Juan Quintela -Reviewed-by: Dr. David Alan Gilbert -Acked-by: Peter Xu - ---- - -Rename each-iteration to after-each-section -Rename multifd-sync-after-each-section to - multifd-flush-after-each-section -Move to machine-8.0 (peter) - -conflit hw_compat_8_0 and hw_compat_rhel_9_2 - -(cherry picked from commit 77c259a4cb1c9799754b48f570301ebf1de5ded8) ---- - hw/core/machine.c | 2 ++ - migration/migration.h | 12 ++++++++++++ - migration/options.c | 13 +++++++++++++ - migration/options.h | 1 + - 4 files changed, 28 insertions(+) - -diff --git a/hw/core/machine.c b/hw/core/machine.c -index 5abdc8c39b..5ea52317b9 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -54,6 +54,8 @@ const char *rhel_old_machine_deprecation = - "machine types for previous major releases are deprecated"; - - GlobalProperty hw_compat_rhel_9_2[] = { -+ /* hw_compat_rhel_9_2 from hw_compat_8_0 */ -+ { "migration", "multifd-flush-after-each-section", "on"}, - /* hw_compat_rhel_9_2 from hw_compat_7_2 */ - { "e1000e", "migrate-timadj", "off" }, - /* hw_compat_rhel_9_2 from hw_compat_7_2 */ -diff --git a/migration/migration.h b/migration/migration.h -index 7ccf460aa2..04c78c1fd6 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -411,6 +411,18 @@ struct MigrationState { - */ - bool preempt_pre_7_2; - -+ /* -+ * flush every channel after each section sent. -+ * -+ * This assures that we can't mix pages from one iteration through -+ * ram pages with pages for the following iteration. We really -+ * only need to do this flush after we have go through all the -+ * dirty pages. For historical reasons, we do that after each -+ * section. This is suboptimal (we flush too many times). -+ * Default value is false. Setting this property has no effect -+ * until the patch that removes this comment. (since 8.1) -+ */ -+ bool multifd_flush_after_each_section; - /* - * This decides the size of guest memory chunk that will be used - * to track dirty bitmap clearing. The size of memory chunk will -diff --git a/migration/options.c b/migration/options.c -index ccd7ef3907..5b0d080ecb 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -88,6 +88,8 @@ Property migration_properties[] = { - send_section_footer, true), - DEFINE_PROP_BOOL("decompress-error-check", MigrationState, - decompress_error_check, true), -+ DEFINE_PROP_BOOL("multifd-flush-after-each-section", MigrationState, -+ multifd_flush_after_each_section, true), - DEFINE_PROP_UINT8("x-clear-bitmap-shift", MigrationState, - clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT), - DEFINE_PROP_BOOL("x-preempt-pre-7-2", MigrationState, -@@ -344,6 +346,17 @@ bool migrate_zero_copy_send(void) - - /* pseudo capabilities */ - -+bool migrate_multifd_flush_after_each_section(void) -+{ -+ MigrationState *s = migrate_get_current(); -+ -+ /* -+ * Until the patch that remove this comment, we always return that -+ * the property is enabled. -+ */ -+ return true || s->multifd_flush_after_each_section; -+} -+ - bool migrate_postcopy(void) - { - return migrate_postcopy_ram() || migrate_dirty_bitmaps(); -diff --git a/migration/options.h b/migration/options.h -index 0fc7be6869..271f49ae5f 100644 ---- a/migration/options.h -+++ b/migration/options.h -@@ -60,6 +60,7 @@ bool migrate_zero_copy_send(void); - * check, but they are not a capability. - */ - -+bool migrate_multifd_flush_after_each_section(void); - bool migrate_postcopy(void); - bool migrate_tls(void); - --- -2.39.3 - diff --git a/SOURCES/kvm-multifd-Fix-the-number-of-channels-ready.patch b/SOURCES/kvm-multifd-Fix-the-number-of-channels-ready.patch deleted file mode 100644 index abf21e6..0000000 --- a/SOURCES/kvm-multifd-Fix-the-number-of-channels-ready.patch +++ /dev/null @@ -1,58 +0,0 @@ -From af6f2a543c7db6d67d33fd12615a50e57fc3fe66 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 26 Apr 2023 12:20:36 +0200 -Subject: [PATCH 19/21] multifd: Fix the number of channels ready -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Leonardo Brás -RH-MergeRequest: 171: multifd: Fix the number of channels ready -RH-Bugzilla: 2196289 -RH-Acked-by: Peter Xu -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/1] a5e271ba249d85b27a68d3cff10480ca3a112c5d (LeoBras/centos-qemu-kvm) - -We don't wait in the sem when we are doing a sync_main. Make it wait -there. To make things clearer, we mark the channel ready at the -begining of the thread loop. - -Signed-off-by: Juan Quintela -Reviewed-by: Fabiano Rosas -(cherry picked from commit d2026ee117147893f8d80f060cede6d872ecbd7f) -Signed-off-by: Leonardo Bras ---- - migration/multifd.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/migration/multifd.c b/migration/multifd.c -index cce3ad6988..6a59c03dd2 100644 ---- a/migration/multifd.c -+++ b/migration/multifd.c -@@ -635,6 +635,7 @@ int multifd_send_sync_main(QEMUFile *f) - for (i = 0; i < migrate_multifd_channels(); i++) { - MultiFDSendParams *p = &multifd_send_state->params[i]; - -+ qemu_sem_wait(&multifd_send_state->channels_ready); - trace_multifd_send_sync_main_wait(p->id); - qemu_sem_wait(&p->sem_sync); - -@@ -668,6 +669,7 @@ static void *multifd_send_thread(void *opaque) - p->num_packets = 1; - - while (true) { -+ qemu_sem_post(&multifd_send_state->channels_ready); - qemu_sem_wait(&p->sem); - - if (qatomic_read(&multifd_send_state->exiting)) { -@@ -736,7 +738,6 @@ static void *multifd_send_thread(void *opaque) - if (flags & MULTIFD_FLAG_SYNC) { - qemu_sem_post(&p->sem_sync); - } -- qemu_sem_post(&multifd_send_state->channels_ready); - } else if (p->quit) { - qemu_mutex_unlock(&p->mutex); - break; --- -2.39.3 - diff --git a/SOURCES/kvm-multifd-Only-flush-once-each-full-round-of-memory.patch b/SOURCES/kvm-multifd-Only-flush-once-each-full-round-of-memory.patch deleted file mode 100644 index 3f76384..0000000 --- a/SOURCES/kvm-multifd-Only-flush-once-each-full-round-of-memory.patch +++ /dev/null @@ -1,166 +0,0 @@ -From e6f770506091eada46c63ac1c8b934b508e3807f Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 21 Jun 2022 13:36:11 +0200 -Subject: [PATCH 07/12] multifd: Only flush once each full round of memory -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: quintela1 -RH-MergeRequest: 186: Multifd flushes its channels 10 times per second -RH-Bugzilla: 2196295 -RH-Acked-by: Peter Xu -RH-Acked-by: Leonardo Brás -RH-Commit: [3/3] 33f76dfc72a2552a42dc7f0fe3923564185a7bf7 (juan.quintela/c9s-qemu-kvm) - -We need to add a new flag to mean to flush at that point. -Notice that we still flush at the end of setup and at the end of -complete stages. - -Signed-off-by: Juan Quintela -Acked-by: Peter Xu - ---- - -Add missing qemu_fflush(), now it passes all tests always. -In the previous version, the check that changes the default value to -false got lost in some rebase. Get it back. - -(cherry picked from commit 294e5a4034e81b3d8db03b4e0f691386f20d6ed3) ---- - migration/migration.h | 3 +-- - migration/options.c | 8 ++------ - migration/ram.c | 28 +++++++++++++++++++++++++++- - 3 files changed, 30 insertions(+), 9 deletions(-) - -diff --git a/migration/migration.h b/migration/migration.h -index 04c78c1fd6..dfec649af8 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -419,8 +419,7 @@ struct MigrationState { - * only need to do this flush after we have go through all the - * dirty pages. For historical reasons, we do that after each - * section. This is suboptimal (we flush too many times). -- * Default value is false. Setting this property has no effect -- * until the patch that removes this comment. (since 8.1) -+ * Default value is false. (since 8.1) - */ - bool multifd_flush_after_each_section; - /* -diff --git a/migration/options.c b/migration/options.c -index 5b0d080ecb..e13c7cb8e5 100644 ---- a/migration/options.c -+++ b/migration/options.c -@@ -89,7 +89,7 @@ Property migration_properties[] = { - DEFINE_PROP_BOOL("decompress-error-check", MigrationState, - decompress_error_check, true), - DEFINE_PROP_BOOL("multifd-flush-after-each-section", MigrationState, -- multifd_flush_after_each_section, true), -+ multifd_flush_after_each_section, false), - DEFINE_PROP_UINT8("x-clear-bitmap-shift", MigrationState, - clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT), - DEFINE_PROP_BOOL("x-preempt-pre-7-2", MigrationState, -@@ -350,11 +350,7 @@ bool migrate_multifd_flush_after_each_section(void) - { - MigrationState *s = migrate_get_current(); - -- /* -- * Until the patch that remove this comment, we always return that -- * the property is enabled. -- */ -- return true || s->multifd_flush_after_each_section; -+ return s->multifd_flush_after_each_section; - } - - bool migrate_postcopy(void) -diff --git a/migration/ram.c b/migration/ram.c -index 1e2414d681..e9dcda8b9d 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -86,6 +86,7 @@ - #define RAM_SAVE_FLAG_XBZRLE 0x40 - /* 0x80 is reserved in qemu-file.h for RAM_SAVE_FLAG_HOOK */ - #define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100 -+#define RAM_SAVE_FLAG_MULTIFD_FLUSH 0x200 - /* We can't use any flag that is bigger than 0x200 */ - - int (*xbzrle_encode_buffer_func)(uint8_t *, uint8_t *, int, -@@ -1581,6 +1582,7 @@ retry: - * associated with the search process. - * - * Returns: -+ * <0: An error happened - * PAGE_ALL_CLEAN: no dirty page found, give up - * PAGE_TRY_AGAIN: no dirty page found, retry for next block - * PAGE_DIRTY_FOUND: dirty page found -@@ -1608,6 +1610,15 @@ static int find_dirty_block(RAMState *rs, PageSearchStatus *pss) - pss->page = 0; - pss->block = QLIST_NEXT_RCU(pss->block, next); - if (!pss->block) { -+ if (!migrate_multifd_flush_after_each_section()) { -+ QEMUFile *f = rs->pss[RAM_CHANNEL_PRECOPY].pss_channel; -+ int ret = multifd_send_sync_main(f); -+ if (ret < 0) { -+ return ret; -+ } -+ qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH); -+ qemu_fflush(f); -+ } - /* - * If memory migration starts over, we will meet a dirtied page - * which may still exists in compression threads's ring, so we -@@ -2600,6 +2611,9 @@ static int ram_find_and_save_block(RAMState *rs) - break; - } else if (res == PAGE_TRY_AGAIN) { - continue; -+ } else if (res < 0) { -+ pages = res; -+ break; - } - } - } -@@ -3286,6 +3300,10 @@ static int ram_save_setup(QEMUFile *f, void *opaque) - return ret; - } - -+ if (!migrate_multifd_flush_after_each_section()) { -+ qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH); -+ } -+ - qemu_put_be64(f, RAM_SAVE_FLAG_EOS); - qemu_fflush(f); - -@@ -3471,6 +3489,9 @@ static int ram_save_complete(QEMUFile *f, void *opaque) - return ret; - } - -+ if (!migrate_multifd_flush_after_each_section()) { -+ qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH); -+ } - qemu_put_be64(f, RAM_SAVE_FLAG_EOS); - qemu_fflush(f); - -@@ -4152,7 +4173,9 @@ int ram_load_postcopy(QEMUFile *f, int channel) - } - decompress_data_with_multi_threads(f, page_buffer, len); - break; -- -+ case RAM_SAVE_FLAG_MULTIFD_FLUSH: -+ multifd_recv_sync_main(); -+ break; - case RAM_SAVE_FLAG_EOS: - /* normal exit */ - if (migrate_multifd_flush_after_each_section()) { -@@ -4426,6 +4449,9 @@ static int ram_load_precopy(QEMUFile *f) - break; - } - break; -+ case RAM_SAVE_FLAG_MULTIFD_FLUSH: -+ multifd_recv_sync_main(); -+ break; - case RAM_SAVE_FLAG_EOS: - /* normal exit */ - if (migrate_multifd_flush_after_each_section()) { --- -2.39.3 - diff --git a/SOURCES/kvm-multifd-Protect-multifd_send_sync_main-calls.patch b/SOURCES/kvm-multifd-Protect-multifd_send_sync_main-calls.patch deleted file mode 100644 index 779841f..0000000 --- a/SOURCES/kvm-multifd-Protect-multifd_send_sync_main-calls.patch +++ /dev/null @@ -1,78 +0,0 @@ -From c4bfb4900b95e13bef2d86b83c33786c7c4f6289 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Tue, 21 Jun 2022 12:21:32 +0200 -Subject: [PATCH 06/12] multifd: Protect multifd_send_sync_main() calls -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: quintela1 -RH-MergeRequest: 186: Multifd flushes its channels 10 times per second -RH-Bugzilla: 2196295 -RH-Acked-by: Peter Xu -RH-Acked-by: Leonardo Brás -RH-Commit: [2/3] a91adf59c6b2f39bf4a308f566b00e39cae6e0ae (juan.quintela/c9s-qemu-kvm) - -We only need to do that on the ram_save_iterate() call on sending and -on destination when we get a RAM_SAVE_FLAG_EOS. - -In setup() and complete() we need to synch in both new and old cases, -so don't add a check there. - -Signed-off-by: Juan Quintela -Reviewed-by: Dr. David Alan Gilbert -Acked-by: Peter Xu - ---- - -Remove the wrappers that we take out on patch 5. - -(cherry picked from commit b05292c237030343516d073b1a1e5f49ffc017a8) ---- - migration/ram.c | 16 +++++++++++----- - 1 file changed, 11 insertions(+), 5 deletions(-) - -diff --git a/migration/ram.c b/migration/ram.c -index 01356f60a4..1e2414d681 100644 ---- a/migration/ram.c -+++ b/migration/ram.c -@@ -3394,9 +3394,11 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) - out: - if (ret >= 0 - && migration_is_setup_or_active(migrate_get_current()->state)) { -- ret = multifd_send_sync_main(rs->pss[RAM_CHANNEL_PRECOPY].pss_channel); -- if (ret < 0) { -- return ret; -+ if (migrate_multifd_flush_after_each_section()) { -+ ret = multifd_send_sync_main(rs->pss[RAM_CHANNEL_PRECOPY].pss_channel); -+ if (ret < 0) { -+ return ret; -+ } - } - - qemu_put_be64(f, RAM_SAVE_FLAG_EOS); -@@ -4153,7 +4155,9 @@ int ram_load_postcopy(QEMUFile *f, int channel) - - case RAM_SAVE_FLAG_EOS: - /* normal exit */ -- multifd_recv_sync_main(); -+ if (migrate_multifd_flush_after_each_section()) { -+ multifd_recv_sync_main(); -+ } - break; - default: - error_report("Unknown combination of migration flags: 0x%x" -@@ -4424,7 +4428,9 @@ static int ram_load_precopy(QEMUFile *f) - break; - case RAM_SAVE_FLAG_EOS: - /* normal exit */ -- multifd_recv_sync_main(); -+ if (migrate_multifd_flush_after_each_section()) { -+ multifd_recv_sync_main(); -+ } - break; - default: - if (flags & RAM_SAVE_FLAG_HOOK) { --- -2.39.3 - diff --git a/SOURCES/kvm-nbd-server-Fix-drained_poll-to-wake-coroutine-in-rig.patch b/SOURCES/kvm-nbd-server-Fix-drained_poll-to-wake-coroutine-in-rig.patch deleted file mode 100644 index 214b6dd..0000000 --- a/SOURCES/kvm-nbd-server-Fix-drained_poll-to-wake-coroutine-in-rig.patch +++ /dev/null @@ -1,159 +0,0 @@ -From 639f65d2cd4c6627a1d22c4b418b41400fe40154 Mon Sep 17 00:00:00 2001 -From: Kevin Wolf -Date: Wed, 17 May 2023 17:28:33 +0200 -Subject: [PATCH 03/21] nbd/server: Fix drained_poll to wake coroutine in right - AioContext - -RH-Author: Kevin Wolf -RH-MergeRequest: 166: block/graph-lock: Disable locking for now -RH-Bugzilla: 2186725 -RH-Acked-by: Eric Blake -RH-Acked-by: Emanuele Giuseppe Esposito -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [3/4] 177092e61360c2feb04377890b32fdeb2d1cfefc (kmwolf/centos-qemu-kvm) - -nbd_drained_poll() generally runs in the main thread, not whatever -iothread the NBD server coroutine is meant to run in, so it can't -directly reenter the coroutines to wake them up. - -The code seems to have the right intention, it specifies the correct -AioContext when it calls qemu_aio_coroutine_enter(). However, this -functions doesn't schedule the coroutine to run in that AioContext, but -it assumes it is already called in the home thread of the AioContext. - -To fix this, add a new thread-safe qio_channel_wake_read() that can be -called in the main thread to wake up the coroutine in its AioContext, -and use this in nbd_drained_poll(). - -Cc: qemu-stable@nongnu.org -Signed-off-by: Kevin Wolf -Message-Id: <20230517152834.277483-3-kwolf@redhat.com> -Reviewed-by: Eric Blake -Signed-off-by: Kevin Wolf -(cherry picked from commit 7c1f51bf38de8cea4ed5030467646c37b46edeb7) -Signed-off-by: Kevin Wolf ---- - include/io/channel.h | 10 ++++++++++ - io/channel.c | 33 +++++++++++++++++++++++++++------ - nbd/server.c | 3 +-- - 3 files changed, 38 insertions(+), 8 deletions(-) - -diff --git a/include/io/channel.h b/include/io/channel.h -index 153fbd2904..2b905423a9 100644 ---- a/include/io/channel.h -+++ b/include/io/channel.h -@@ -757,6 +757,16 @@ void qio_channel_detach_aio_context(QIOChannel *ioc); - void coroutine_fn qio_channel_yield(QIOChannel *ioc, - GIOCondition condition); - -+/** -+ * qio_channel_wake_read: -+ * @ioc: the channel object -+ * -+ * If qio_channel_yield() is currently waiting for the channel to become -+ * readable, interrupt it and reenter immediately. This function is safe to call -+ * from any thread. -+ */ -+void qio_channel_wake_read(QIOChannel *ioc); -+ - /** - * qio_channel_wait: - * @ioc: the channel object -diff --git a/io/channel.c b/io/channel.c -index a8c7f11649..3c9b7beb65 100644 ---- a/io/channel.c -+++ b/io/channel.c -@@ -19,6 +19,7 @@ - */ - - #include "qemu/osdep.h" -+#include "block/aio-wait.h" - #include "io/channel.h" - #include "qapi/error.h" - #include "qemu/main-loop.h" -@@ -514,7 +515,11 @@ int qio_channel_flush(QIOChannel *ioc, - static void qio_channel_restart_read(void *opaque) - { - QIOChannel *ioc = opaque; -- Coroutine *co = ioc->read_coroutine; -+ Coroutine *co = qatomic_xchg(&ioc->read_coroutine, NULL); -+ -+ if (!co) { -+ return; -+ } - - /* Assert that aio_co_wake() reenters the coroutine directly */ - assert(qemu_get_current_aio_context() == -@@ -525,7 +530,11 @@ static void qio_channel_restart_read(void *opaque) - static void qio_channel_restart_write(void *opaque) - { - QIOChannel *ioc = opaque; -- Coroutine *co = ioc->write_coroutine; -+ Coroutine *co = qatomic_xchg(&ioc->write_coroutine, NULL); -+ -+ if (!co) { -+ return; -+ } - - /* Assert that aio_co_wake() reenters the coroutine directly */ - assert(qemu_get_current_aio_context() == -@@ -568,7 +577,11 @@ void qio_channel_detach_aio_context(QIOChannel *ioc) - void coroutine_fn qio_channel_yield(QIOChannel *ioc, - GIOCondition condition) - { -+ AioContext *ioc_ctx = ioc->ctx ?: qemu_get_aio_context(); -+ - assert(qemu_in_coroutine()); -+ assert(in_aio_context_home_thread(ioc_ctx)); -+ - if (condition == G_IO_IN) { - assert(!ioc->read_coroutine); - ioc->read_coroutine = qemu_coroutine_self(); -@@ -580,18 +593,26 @@ void coroutine_fn qio_channel_yield(QIOChannel *ioc, - } - qio_channel_set_aio_fd_handlers(ioc); - qemu_coroutine_yield(); -+ assert(in_aio_context_home_thread(ioc_ctx)); - - /* Allow interrupting the operation by reentering the coroutine other than - * through the aio_fd_handlers. */ -- if (condition == G_IO_IN && ioc->read_coroutine) { -- ioc->read_coroutine = NULL; -+ if (condition == G_IO_IN) { -+ assert(ioc->read_coroutine == NULL); - qio_channel_set_aio_fd_handlers(ioc); -- } else if (condition == G_IO_OUT && ioc->write_coroutine) { -- ioc->write_coroutine = NULL; -+ } else if (condition == G_IO_OUT) { -+ assert(ioc->write_coroutine == NULL); - qio_channel_set_aio_fd_handlers(ioc); - } - } - -+void qio_channel_wake_read(QIOChannel *ioc) -+{ -+ Coroutine *co = qatomic_xchg(&ioc->read_coroutine, NULL); -+ if (co) { -+ aio_co_wake(co); -+ } -+} - - static gboolean qio_channel_wait_complete(QIOChannel *ioc, - GIOCondition condition, -diff --git a/nbd/server.c b/nbd/server.c -index 3d8d0d81df..ea47522e8f 100644 ---- a/nbd/server.c -+++ b/nbd/server.c -@@ -1599,8 +1599,7 @@ static bool nbd_drained_poll(void *opaque) - * enter it here so we don't depend on the client to wake it up. - */ - if (client->recv_coroutine != NULL && client->read_yielding) { -- qemu_aio_coroutine_enter(exp->common.ctx, -- client->recv_coroutine); -+ qio_channel_wake_read(client->ioc); - } - - return true; --- -2.39.3 - diff --git a/SOURCES/kvm-nbd-server-Fix-race-in-draining-the-export.patch b/SOURCES/kvm-nbd-server-Fix-race-in-draining-the-export.patch new file mode 100644 index 0000000..7298992 --- /dev/null +++ b/SOURCES/kvm-nbd-server-Fix-race-in-draining-the-export.patch @@ -0,0 +1,95 @@ +From dc4dd11233522d8195782a2196aaae44bd924575 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 14 Mar 2024 17:58:24 +0100 +Subject: [PATCH 2/4] nbd/server: Fix race in draining the export + +RH-Author: Kevin Wolf +RH-MergeRequest: 231: Fix deadlock and crash during storage migration +RH-Jira: RHEL-28125 +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [2/3] 036e1b171986099904dee468c59d77437e3d3d61 (kmwolf/centos-qemu-kvm) + +When draining an NBD export, nbd_drained_begin() first sets +client->quiescing so that nbd_client_receive_next_request() won't start +any new request coroutines. Then nbd_drained_poll() tries to makes sure +that we wait for any existing request coroutines by checking that +client->nb_requests has become 0. + +However, there is a small window between creating a new request +coroutine and increasing client->nb_requests. If a coroutine is in this +state, it won't be waited for and drain returns too early. + +In the context of switching to a different AioContext, this means that +blk_aio_attached() will see client->recv_coroutine != NULL and fail its +assertion. + +Fix this by increasing client->nb_requests immediately when starting the +coroutine. Doing this after the checks if we should create a new +coroutine is okay because client->lock is held. + +Cc: qemu-stable@nongnu.org +Fixes: fd6afc501a01 ("nbd/server: Use drained block ops to quiesce the server") +Signed-off-by: Kevin Wolf +Message-ID: <20240314165825.40261-2-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 9c707525cbb1dd1e56876e45c70c0c08f2876d41) +Signed-off-by: Kevin Wolf +--- + nbd/server.c | 15 +++++++-------- + 1 file changed, 7 insertions(+), 8 deletions(-) + +diff --git a/nbd/server.c b/nbd/server.c +index 941832f178..c3484cc1eb 100644 +--- a/nbd/server.c ++++ b/nbd/server.c +@@ -3007,8 +3007,8 @@ static coroutine_fn int nbd_handle_request(NBDClient *client, + /* Owns a reference to the NBDClient passed as opaque. */ + static coroutine_fn void nbd_trip(void *opaque) + { +- NBDClient *client = opaque; +- NBDRequestData *req = NULL; ++ NBDRequestData *req = opaque; ++ NBDClient *client = req->client; + NBDRequest request = { 0 }; /* GCC thinks it can be used uninitialized */ + int ret; + Error *local_err = NULL; +@@ -3037,8 +3037,6 @@ static coroutine_fn void nbd_trip(void *opaque) + goto done; + } + +- req = nbd_request_get(client); +- + /* + * nbd_co_receive_request() returns -EAGAIN when nbd_drained_begin() has + * set client->quiescing but by the time we get back nbd_drained_end() may +@@ -3112,9 +3110,7 @@ static coroutine_fn void nbd_trip(void *opaque) + } + + done: +- if (req) { +- nbd_request_put(req); +- } ++ nbd_request_put(req); + + qemu_mutex_unlock(&client->lock); + +@@ -3143,10 +3139,13 @@ disconnect: + */ + static void nbd_client_receive_next_request(NBDClient *client) + { ++ NBDRequestData *req; ++ + if (!client->recv_coroutine && client->nb_requests < MAX_NBD_REQUESTS && + !client->quiescing) { + nbd_client_get(client); +- client->recv_coroutine = qemu_coroutine_create(nbd_trip, client); ++ req = nbd_request_get(client); ++ client->recv_coroutine = qemu_coroutine_create(nbd_trip, req); + aio_co_schedule(client->exp->common.ctx, client->recv_coroutine); + } + } +-- +2.39.3 + diff --git a/SOURCES/kvm-nbd-server-avoid-per-NBDRequest-nbd_client_get-put.patch b/SOURCES/kvm-nbd-server-avoid-per-NBDRequest-nbd_client_get-put.patch new file mode 100644 index 0000000..339e234 --- /dev/null +++ b/SOURCES/kvm-nbd-server-avoid-per-NBDRequest-nbd_client_get-put.patch @@ -0,0 +1,53 @@ +From cd7788a857a6099206c4063e3ef69cb9e4aebcbc Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Thu, 21 Dec 2023 14:24:50 -0500 +Subject: [PATCH 070/101] nbd/server: avoid per-NBDRequest nbd_client_get/put() + +RH-Author: Kevin Wolf +RH-MergeRequest: 214: Remove AioContext lock +RH-Jira: RHEL-15965 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [1/26] 5acb090ac4adf4260cd9e9c5605a27012b2a33aa (kmwolf/centos-qemu-kvm) + +nbd_trip() processes a single NBD request from start to finish and holds +an NBDClient reference throughout. NBDRequest does not outlive the scope +of nbd_trip(). Therefore it is unnecessary to ref/unref NBDClient for +each NBDRequest. + +Removing these nbd_client_get()/nbd_client_put() calls will make +thread-safety easier in the commits that follow. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Paolo Bonzini +Message-ID: <20231221192452.1785567-5-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +--- + nbd/server.c | 3 --- + 1 file changed, 3 deletions(-) + +diff --git a/nbd/server.c b/nbd/server.c +index 895cf0a752..0b09ccc8dc 100644 +--- a/nbd/server.c ++++ b/nbd/server.c +@@ -1557,7 +1557,6 @@ static NBDRequestData *nbd_request_get(NBDClient *client) + client->nb_requests++; + + req = g_new0(NBDRequestData, 1); +- nbd_client_get(client); + req->client = client; + return req; + } +@@ -1578,8 +1577,6 @@ static void nbd_request_put(NBDRequestData *req) + } + + nbd_client_receive_next_request(client); +- +- nbd_client_put(client); + } + + static void blk_aio_attached(AioContext *ctx, void *opaque) +-- +2.39.3 + diff --git a/SOURCES/kvm-nbd-server-introduce-NBDClient-lock-to-protect-field.patch b/SOURCES/kvm-nbd-server-introduce-NBDClient-lock-to-protect-field.patch new file mode 100644 index 0000000..e0d763d --- /dev/null +++ b/SOURCES/kvm-nbd-server-introduce-NBDClient-lock-to-protect-field.patch @@ -0,0 +1,373 @@ +From bb0a6afff7f23a3ddb460dc1b2e70c06565f8a3f Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Thu, 21 Dec 2023 14:24:52 -0500 +Subject: [PATCH 072/101] nbd/server: introduce NBDClient->lock to protect + fields + +RH-Author: Kevin Wolf +RH-MergeRequest: 214: Remove AioContext lock +RH-Jira: RHEL-15965 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [3/26] 49b64adaaf8b1c30f339d1ecc8ea89fb9db63f1c (kmwolf/centos-qemu-kvm) + +NBDClient has a number of fields that are accessed by both the export +AioContext and the main loop thread. When the AioContext lock is removed +these fields will need another form of protection. + +Add NBDClient->lock and protect fields that are accessed by both +threads. Also add assertions where possible and otherwise add doc +comments stating assumptions about which thread and lock holding. + +Note this patch moves the client->recv_coroutine assertion from +nbd_co_receive_request() to nbd_trip() where client->lock is held. + +Signed-off-by: Stefan Hajnoczi +Message-ID: <20231221192452.1785567-7-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +--- + nbd/server.c | 144 +++++++++++++++++++++++++++++++++++++++------------ + 1 file changed, 111 insertions(+), 33 deletions(-) + +diff --git a/nbd/server.c b/nbd/server.c +index e91e2e0903..941832f178 100644 +--- a/nbd/server.c ++++ b/nbd/server.c +@@ -125,23 +125,25 @@ struct NBDClient { + int refcount; /* atomic */ + void (*close_fn)(NBDClient *client, bool negotiated); + ++ QemuMutex lock; ++ + NBDExport *exp; + QCryptoTLSCreds *tlscreds; + char *tlsauthz; + QIOChannelSocket *sioc; /* The underlying data channel */ + QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */ + +- Coroutine *recv_coroutine; ++ Coroutine *recv_coroutine; /* protected by lock */ + + CoMutex send_lock; + Coroutine *send_coroutine; + +- bool read_yielding; +- bool quiescing; ++ bool read_yielding; /* protected by lock */ ++ bool quiescing; /* protected by lock */ + + QTAILQ_ENTRY(NBDClient) next; +- int nb_requests; +- bool closing; ++ int nb_requests; /* protected by lock */ ++ bool closing; /* protected by lock */ + + uint32_t check_align; /* If non-zero, check for aligned client requests */ + +@@ -1415,11 +1417,18 @@ nbd_read_eof(NBDClient *client, void *buffer, size_t size, Error **errp) + + len = qio_channel_readv(client->ioc, &iov, 1, errp); + if (len == QIO_CHANNEL_ERR_BLOCK) { +- client->read_yielding = true; ++ WITH_QEMU_LOCK_GUARD(&client->lock) { ++ client->read_yielding = true; ++ ++ /* Prompt main loop thread to re-run nbd_drained_poll() */ ++ aio_wait_kick(); ++ } + qio_channel_yield(client->ioc, G_IO_IN); +- client->read_yielding = false; +- if (client->quiescing) { +- return -EAGAIN; ++ WITH_QEMU_LOCK_GUARD(&client->lock) { ++ client->read_yielding = false; ++ if (client->quiescing) { ++ return -EAGAIN; ++ } + } + continue; + } else if (len < 0) { +@@ -1528,6 +1537,7 @@ void nbd_client_put(NBDClient *client) + blk_exp_unref(&client->exp->common); + } + g_free(client->contexts.bitmaps); ++ qemu_mutex_destroy(&client->lock); + g_free(client); + } + } +@@ -1561,11 +1571,13 @@ static void client_close(NBDClient *client, bool negotiated) + { + assert(qemu_in_main_thread()); + +- if (client->closing) { +- return; +- } ++ WITH_QEMU_LOCK_GUARD(&client->lock) { ++ if (client->closing) { ++ return; ++ } + +- client->closing = true; ++ client->closing = true; ++ } + + /* Force requests to finish. They will drop their own references, + * then we'll close the socket and free the NBDClient. +@@ -1579,6 +1591,7 @@ static void client_close(NBDClient *client, bool negotiated) + } + } + ++/* Runs in export AioContext with client->lock held */ + static NBDRequestData *nbd_request_get(NBDClient *client) + { + NBDRequestData *req; +@@ -1591,6 +1604,7 @@ static NBDRequestData *nbd_request_get(NBDClient *client) + return req; + } + ++/* Runs in export AioContext with client->lock held */ + static void nbd_request_put(NBDRequestData *req) + { + NBDClient *client = req->client; +@@ -1614,14 +1628,18 @@ static void blk_aio_attached(AioContext *ctx, void *opaque) + NBDExport *exp = opaque; + NBDClient *client; + ++ assert(qemu_in_main_thread()); ++ + trace_nbd_blk_aio_attached(exp->name, ctx); + + exp->common.ctx = ctx; + + QTAILQ_FOREACH(client, &exp->clients, next) { +- assert(client->nb_requests == 0); +- assert(client->recv_coroutine == NULL); +- assert(client->send_coroutine == NULL); ++ WITH_QEMU_LOCK_GUARD(&client->lock) { ++ assert(client->nb_requests == 0); ++ assert(client->recv_coroutine == NULL); ++ assert(client->send_coroutine == NULL); ++ } + } + } + +@@ -1629,6 +1647,8 @@ static void blk_aio_detach(void *opaque) + { + NBDExport *exp = opaque; + ++ assert(qemu_in_main_thread()); ++ + trace_nbd_blk_aio_detach(exp->name, exp->common.ctx); + + exp->common.ctx = NULL; +@@ -1639,8 +1659,12 @@ static void nbd_drained_begin(void *opaque) + NBDExport *exp = opaque; + NBDClient *client; + ++ assert(qemu_in_main_thread()); ++ + QTAILQ_FOREACH(client, &exp->clients, next) { +- client->quiescing = true; ++ WITH_QEMU_LOCK_GUARD(&client->lock) { ++ client->quiescing = true; ++ } + } + } + +@@ -1649,28 +1673,48 @@ static void nbd_drained_end(void *opaque) + NBDExport *exp = opaque; + NBDClient *client; + ++ assert(qemu_in_main_thread()); ++ + QTAILQ_FOREACH(client, &exp->clients, next) { +- client->quiescing = false; +- nbd_client_receive_next_request(client); ++ WITH_QEMU_LOCK_GUARD(&client->lock) { ++ client->quiescing = false; ++ nbd_client_receive_next_request(client); ++ } + } + } + ++/* Runs in export AioContext */ ++static void nbd_wake_read_bh(void *opaque) ++{ ++ NBDClient *client = opaque; ++ qio_channel_wake_read(client->ioc); ++} ++ + static bool nbd_drained_poll(void *opaque) + { + NBDExport *exp = opaque; + NBDClient *client; + ++ assert(qemu_in_main_thread()); ++ + QTAILQ_FOREACH(client, &exp->clients, next) { +- if (client->nb_requests != 0) { +- /* +- * If there's a coroutine waiting for a request on nbd_read_eof() +- * enter it here so we don't depend on the client to wake it up. +- */ +- if (client->recv_coroutine != NULL && client->read_yielding) { +- qio_channel_wake_read(client->ioc); +- } ++ WITH_QEMU_LOCK_GUARD(&client->lock) { ++ if (client->nb_requests != 0) { ++ /* ++ * If there's a coroutine waiting for a request on nbd_read_eof() ++ * enter it here so we don't depend on the client to wake it up. ++ * ++ * Schedule a BH in the export AioContext to avoid missing the ++ * wake up due to the race between qio_channel_wake_read() and ++ * qio_channel_yield(). ++ */ ++ if (client->recv_coroutine != NULL && client->read_yielding) { ++ aio_bh_schedule_oneshot(nbd_export_aio_context(client->exp), ++ nbd_wake_read_bh, client); ++ } + +- return true; ++ return true; ++ } + } + } + +@@ -1681,6 +1725,8 @@ static void nbd_eject_notifier(Notifier *n, void *data) + { + NBDExport *exp = container_of(n, NBDExport, eject_notifier); + ++ assert(qemu_in_main_thread()); ++ + blk_exp_request_shutdown(&exp->common); + } + +@@ -2566,7 +2612,6 @@ static int coroutine_fn nbd_co_receive_request(NBDRequestData *req, + int ret; + + g_assert(qemu_in_coroutine()); +- assert(client->recv_coroutine == qemu_coroutine_self()); + ret = nbd_receive_request(client, request, errp); + if (ret < 0) { + return ret; +@@ -2975,6 +3020,9 @@ static coroutine_fn void nbd_trip(void *opaque) + */ + + trace_nbd_trip(); ++ ++ qemu_mutex_lock(&client->lock); ++ + if (client->closing) { + goto done; + } +@@ -2990,7 +3038,21 @@ static coroutine_fn void nbd_trip(void *opaque) + } + + req = nbd_request_get(client); +- ret = nbd_co_receive_request(req, &request, &local_err); ++ ++ /* ++ * nbd_co_receive_request() returns -EAGAIN when nbd_drained_begin() has ++ * set client->quiescing but by the time we get back nbd_drained_end() may ++ * have already cleared client->quiescing. In that case we try again ++ * because nothing else will spawn an nbd_trip() coroutine until we set ++ * client->recv_coroutine = NULL further down. ++ */ ++ do { ++ assert(client->recv_coroutine == qemu_coroutine_self()); ++ qemu_mutex_unlock(&client->lock); ++ ret = nbd_co_receive_request(req, &request, &local_err); ++ qemu_mutex_lock(&client->lock); ++ } while (ret == -EAGAIN && !client->quiescing); ++ + client->recv_coroutine = NULL; + + if (client->closing) { +@@ -3002,15 +3064,16 @@ static coroutine_fn void nbd_trip(void *opaque) + } + + if (ret == -EAGAIN) { +- assert(client->quiescing); + goto done; + } + + nbd_client_receive_next_request(client); ++ + if (ret == -EIO) { + goto disconnect; + } + ++ qemu_mutex_unlock(&client->lock); + qio_channel_set_cork(client->ioc, true); + + if (ret < 0) { +@@ -3030,6 +3093,10 @@ static coroutine_fn void nbd_trip(void *opaque) + g_free(request.contexts->bitmaps); + g_free(request.contexts); + } ++ ++ qio_channel_set_cork(client->ioc, false); ++ qemu_mutex_lock(&client->lock); ++ + if (ret < 0) { + error_prepend(&local_err, "Failed to send reply: "); + goto disconnect; +@@ -3044,11 +3111,13 @@ static coroutine_fn void nbd_trip(void *opaque) + goto disconnect; + } + +- qio_channel_set_cork(client->ioc, false); + done: + if (req) { + nbd_request_put(req); + } ++ ++ qemu_mutex_unlock(&client->lock); ++ + if (!nbd_client_put_nonzero(client)) { + aio_co_reschedule_self(qemu_get_aio_context()); + nbd_client_put(client); +@@ -3059,13 +3128,19 @@ disconnect: + if (local_err) { + error_reportf_err(local_err, "Disconnect client, due to: "); + } ++ + nbd_request_put(req); ++ qemu_mutex_unlock(&client->lock); + + aio_co_reschedule_self(qemu_get_aio_context()); + client_close(client, true); + nbd_client_put(client); + } + ++/* ++ * Runs in export AioContext and main loop thread. Caller must hold ++ * client->lock. ++ */ + static void nbd_client_receive_next_request(NBDClient *client) + { + if (!client->recv_coroutine && client->nb_requests < MAX_NBD_REQUESTS && +@@ -3091,7 +3166,9 @@ static coroutine_fn void nbd_co_client_start(void *opaque) + return; + } + +- nbd_client_receive_next_request(client); ++ WITH_QEMU_LOCK_GUARD(&client->lock) { ++ nbd_client_receive_next_request(client); ++ } + } + + /* +@@ -3108,6 +3185,7 @@ void nbd_client_new(QIOChannelSocket *sioc, + Coroutine *co; + + client = g_new0(NBDClient, 1); ++ qemu_mutex_init(&client->lock); + client->refcount = 1; + client->tlscreds = tlscreds; + if (tlscreds) { +-- +2.39.3 + diff --git a/SOURCES/kvm-nbd-server-only-traverse-NBDExport-clients-from-main.patch b/SOURCES/kvm-nbd-server-only-traverse-NBDExport-clients-from-main.patch new file mode 100644 index 0000000..3ca11a9 --- /dev/null +++ b/SOURCES/kvm-nbd-server-only-traverse-NBDExport-clients-from-main.patch @@ -0,0 +1,176 @@ +From 8b60d72532b6511b41d82d591fb4f509314ef15f Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Thu, 21 Dec 2023 14:24:51 -0500 +Subject: [PATCH 071/101] nbd/server: only traverse NBDExport->clients from + main loop thread + +RH-Author: Kevin Wolf +RH-MergeRequest: 214: Remove AioContext lock +RH-Jira: RHEL-15965 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [2/26] e7794a3a5c363c7508ee505c4ba03d9ef8862ca9 (kmwolf/centos-qemu-kvm) + +The NBD clients list is currently accessed from both the export +AioContext and the main loop thread. When the AioContext lock is removed +there will be nothing protecting the clients list. + +Adding a lock around the clients list is tricky because NBDClient +structs are refcounted and may be freed from the export AioContext or +the main loop thread. nbd_export_request_shutdown() -> client_close() -> +nbd_client_put() is also tricky because the list lock would be held +while indirectly dropping references to NDBClients. + +A simpler approach is to only allow nbd_client_put() and client_close() +calls from the main loop thread. Then the NBD clients list is only +accessed from the main loop thread and no fancy locking is needed. + +nbd_trip() just needs to reschedule itself in the main loop AioContext +before calling nbd_client_put() and client_close(). This costs more CPU +cycles per NBD request so add nbd_client_put_nonzero() to optimize the +common case where more references to NBDClient remain. + +Note that nbd_client_get() can still be called from either thread, so +make NBDClient->refcount atomic. + +Signed-off-by: Stefan Hajnoczi +Message-ID: <20231221192452.1785567-6-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +--- + nbd/server.c | 61 +++++++++++++++++++++++++++++++++++++++++++--------- + 1 file changed, 51 insertions(+), 10 deletions(-) + +diff --git a/nbd/server.c b/nbd/server.c +index 0b09ccc8dc..e91e2e0903 100644 +--- a/nbd/server.c ++++ b/nbd/server.c +@@ -122,7 +122,7 @@ struct NBDMetaContexts { + }; + + struct NBDClient { +- int refcount; ++ int refcount; /* atomic */ + void (*close_fn)(NBDClient *client, bool negotiated); + + NBDExport *exp; +@@ -1501,14 +1501,17 @@ static int coroutine_fn nbd_receive_request(NBDClient *client, NBDRequest *reque + + #define MAX_NBD_REQUESTS 16 + ++/* Runs in export AioContext and main loop thread */ + void nbd_client_get(NBDClient *client) + { +- client->refcount++; ++ qatomic_inc(&client->refcount); + } + + void nbd_client_put(NBDClient *client) + { +- if (--client->refcount == 0) { ++ assert(qemu_in_main_thread()); ++ ++ if (qatomic_fetch_dec(&client->refcount) == 1) { + /* The last reference should be dropped by client->close, + * which is called by client_close. + */ +@@ -1529,8 +1532,35 @@ void nbd_client_put(NBDClient *client) + } + } + ++/* ++ * Tries to release the reference to @client, but only if other references ++ * remain. This is an optimization for the common case where we want to avoid ++ * the expense of scheduling nbd_client_put() in the main loop thread. ++ * ++ * Returns true upon success or false if the reference was not released because ++ * it is the last reference. ++ */ ++static bool nbd_client_put_nonzero(NBDClient *client) ++{ ++ int old = qatomic_read(&client->refcount); ++ int expected; ++ ++ do { ++ if (old == 1) { ++ return false; ++ } ++ ++ expected = old; ++ old = qatomic_cmpxchg(&client->refcount, expected, expected - 1); ++ } while (old != expected); ++ ++ return true; ++} ++ + static void client_close(NBDClient *client, bool negotiated) + { ++ assert(qemu_in_main_thread()); ++ + if (client->closing) { + return; + } +@@ -2933,15 +2963,20 @@ static coroutine_fn int nbd_handle_request(NBDClient *client, + static coroutine_fn void nbd_trip(void *opaque) + { + NBDClient *client = opaque; +- NBDRequestData *req; ++ NBDRequestData *req = NULL; + NBDRequest request = { 0 }; /* GCC thinks it can be used uninitialized */ + int ret; + Error *local_err = NULL; + ++ /* ++ * Note that nbd_client_put() and client_close() must be called from the ++ * main loop thread. Use aio_co_reschedule_self() to switch AioContext ++ * before calling these functions. ++ */ ++ + trace_nbd_trip(); + if (client->closing) { +- nbd_client_put(client); +- return; ++ goto done; + } + + if (client->quiescing) { +@@ -2949,10 +2984,9 @@ static coroutine_fn void nbd_trip(void *opaque) + * We're switching between AIO contexts. Don't attempt to receive a new + * request and kick the main context which may be waiting for us. + */ +- nbd_client_put(client); + client->recv_coroutine = NULL; + aio_wait_kick(); +- return; ++ goto done; + } + + req = nbd_request_get(client); +@@ -3012,8 +3046,13 @@ static coroutine_fn void nbd_trip(void *opaque) + + qio_channel_set_cork(client->ioc, false); + done: +- nbd_request_put(req); +- nbd_client_put(client); ++ if (req) { ++ nbd_request_put(req); ++ } ++ if (!nbd_client_put_nonzero(client)) { ++ aio_co_reschedule_self(qemu_get_aio_context()); ++ nbd_client_put(client); ++ } + return; + + disconnect: +@@ -3021,6 +3060,8 @@ disconnect: + error_reportf_err(local_err, "Disconnect client, due to: "); + } + nbd_request_put(req); ++ ++ aio_co_reschedule_self(qemu_get_aio_context()); + client_close(client, true); + nbd_client_put(client); + } +-- +2.39.3 + diff --git a/SOURCES/kvm-net-socket-move-fd-type-checking-to-its-own-function.patch b/SOURCES/kvm-net-socket-move-fd-type-checking-to-its-own-function.patch deleted file mode 100644 index 20b9c04..0000000 --- a/SOURCES/kvm-net-socket-move-fd-type-checking-to-its-own-function.patch +++ /dev/null @@ -1,78 +0,0 @@ -From d6b3f9e4b388b8d621761104ddf075d6087f6d6c Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Fri, 9 Jun 2023 09:27:47 +0200 -Subject: [PATCH 09/12] net: socket: move fd type checking to its own function - -RH-Author: Laurent Vivier -RH-MergeRequest: 187: net: socket: do not close file descriptor if it's not a socket -RH-Jira: RHEL-582 -RH-Acked-by: Stefano Brivio -RH-Acked-by: Jason Wang -RH-Acked-by: David Gibson (Red Hat) -RH-Commit: [2/3] 9726f0ae81ac209b5db33dc7767f652867d8ca0a (lvivier/qemu-kvm-centos) - -JIRA: https://issues.redhat.com/browse/RHEL-582 - -Reviewed-by: David Gibson -Signed-off-by: Laurent Vivier -Signed-off-by: Jason Wang -(cherry picked from commit 23455ae341656ca867ee4a171826b9d280d6acb5) ---- - net/socket.c | 28 ++++++++++++++++++++-------- - 1 file changed, 20 insertions(+), 8 deletions(-) - -diff --git a/net/socket.c b/net/socket.c -index 24dcaa55bc..6b1f0fec3a 100644 ---- a/net/socket.c -+++ b/net/socket.c -@@ -446,16 +446,32 @@ static NetSocketState *net_socket_fd_init_stream(NetClientState *peer, - return s; - } - -+static int net_socket_fd_check(int fd, Error **errp) -+{ -+ int so_type, optlen = sizeof(so_type); -+ -+ if (getsockopt(fd, SOL_SOCKET, SO_TYPE, (char *)&so_type, -+ (socklen_t *)&optlen) < 0) { -+ error_setg(errp, "can't get socket option SO_TYPE"); -+ return -1; -+ } -+ if (so_type != SOCK_DGRAM && so_type != SOCK_STREAM) { -+ error_setg(errp, "socket type=%d for fd=%d must be either" -+ " SOCK_DGRAM or SOCK_STREAM", so_type, fd); -+ return -1; -+ } -+ return so_type; -+} -+ - static NetSocketState *net_socket_fd_init(NetClientState *peer, - const char *model, const char *name, - int fd, int is_connected, - const char *mc, Error **errp) - { -- int so_type = -1, optlen=sizeof(so_type); -+ int so_type; - -- if(getsockopt(fd, SOL_SOCKET, SO_TYPE, (char *)&so_type, -- (socklen_t *)&optlen)< 0) { -- error_setg(errp, "can't get socket option SO_TYPE"); -+ so_type = net_socket_fd_check(fd, errp); -+ if (so_type < 0) { - close(fd); - return NULL; - } -@@ -465,10 +481,6 @@ static NetSocketState *net_socket_fd_init(NetClientState *peer, - mc, errp); - case SOCK_STREAM: - return net_socket_fd_init_stream(peer, model, name, fd, is_connected); -- default: -- error_setg(errp, "socket type=%d for fd=%d must be either" -- " SOCK_DGRAM or SOCK_STREAM", so_type, fd); -- close(fd); - } - return NULL; - } --- -2.39.3 - diff --git a/SOURCES/kvm-net-socket-prepare-to-cleanup-net_init_socket.patch b/SOURCES/kvm-net-socket-prepare-to-cleanup-net_init_socket.patch deleted file mode 100644 index 269da29..0000000 --- a/SOURCES/kvm-net-socket-prepare-to-cleanup-net_init_socket.patch +++ /dev/null @@ -1,60 +0,0 @@ -From a467540e49e76c5961d86e3f47d3f8fcad8cef09 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Fri, 9 Jun 2023 09:27:46 +0200 -Subject: [PATCH 08/12] net: socket: prepare to cleanup net_init_socket() - -RH-Author: Laurent Vivier -RH-MergeRequest: 187: net: socket: do not close file descriptor if it's not a socket -RH-Jira: RHEL-582 -RH-Acked-by: Stefano Brivio -RH-Acked-by: Jason Wang -RH-Acked-by: David Gibson (Red Hat) -RH-Commit: [1/3] 3e4f8370586ae1ac2474fef971a239edb31eeb67 (lvivier/qemu-kvm-centos) - -JIRA: https://issues.redhat.com/browse/RHEL-582 - -Use directly net_socket_fd_init_stream() and net_socket_fd_init_dgram() -when the socket type is already known. - -Reviewed-by: David Gibson -Signed-off-by: Laurent Vivier -Signed-off-by: Jason Wang -(cherry picked from commit 006c3fa74c3edb978ff46d2851699e9a95609da5) ---- - net/socket.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/net/socket.c b/net/socket.c -index ba6e5b0b00..24dcaa55bc 100644 ---- a/net/socket.c -+++ b/net/socket.c -@@ -587,7 +587,7 @@ static int net_socket_connect_init(NetClientState *peer, - break; - } - } -- s = net_socket_fd_init(peer, model, name, fd, connected, NULL, errp); -+ s = net_socket_fd_init_stream(peer, model, name, fd, connected); - if (!s) { - return -1; - } -@@ -629,7 +629,7 @@ static int net_socket_mcast_init(NetClientState *peer, - return -1; - } - -- s = net_socket_fd_init(peer, model, name, fd, 0, NULL, errp); -+ s = net_socket_fd_init_dgram(peer, model, name, fd, 0, NULL, errp); - if (!s) { - return -1; - } -@@ -683,7 +683,7 @@ static int net_socket_udp_init(NetClientState *peer, - } - qemu_socket_set_nonblock(fd); - -- s = net_socket_fd_init(peer, model, name, fd, 0, NULL, errp); -+ s = net_socket_fd_init_dgram(peer, model, name, fd, 0, NULL, errp); - if (!s) { - return -1; - } --- -2.39.3 - diff --git a/SOURCES/kvm-net-socket-remove-net_init_socket.patch b/SOURCES/kvm-net-socket-remove-net_init_socket.patch deleted file mode 100644 index 98c96f2..0000000 --- a/SOURCES/kvm-net-socket-remove-net_init_socket.patch +++ /dev/null @@ -1,102 +0,0 @@ -From ecb4f97895849c562112b76a30ddc2037e8df79e Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Fri, 9 Jun 2023 09:27:48 +0200 -Subject: [PATCH 10/12] net: socket: remove net_init_socket() - -RH-Author: Laurent Vivier -RH-MergeRequest: 187: net: socket: do not close file descriptor if it's not a socket -RH-Jira: RHEL-582 -RH-Acked-by: Stefano Brivio -RH-Acked-by: Jason Wang -RH-Acked-by: David Gibson (Red Hat) -RH-Commit: [3/3] e1d7939f5df4a77c2fff62d1ae4899a7a3615ad9 (lvivier/qemu-kvm-centos) - -JIRA: https://issues.redhat.com/browse/RHEL-582 - -Move the file descriptor type checking before doing anything with it. -If it's not usable, don't close it as it could be in use by another -part of QEMU, only fail and report an error. - -Reviewed-by: David Gibson -Signed-off-by: Laurent Vivier -Signed-off-by: Jason Wang -(cherry picked from commit b6aeee02980e193f744f74c48fd900940feb2799) ---- - net/socket.c | 43 +++++++++++++++++-------------------------- - 1 file changed, 17 insertions(+), 26 deletions(-) - -diff --git a/net/socket.c b/net/socket.c -index 6b1f0fec3a..8e3702e1f3 100644 ---- a/net/socket.c -+++ b/net/socket.c -@@ -463,28 +463,6 @@ static int net_socket_fd_check(int fd, Error **errp) - return so_type; - } - --static NetSocketState *net_socket_fd_init(NetClientState *peer, -- const char *model, const char *name, -- int fd, int is_connected, -- const char *mc, Error **errp) --{ -- int so_type; -- -- so_type = net_socket_fd_check(fd, errp); -- if (so_type < 0) { -- close(fd); -- return NULL; -- } -- switch(so_type) { -- case SOCK_DGRAM: -- return net_socket_fd_init_dgram(peer, model, name, fd, is_connected, -- mc, errp); -- case SOCK_STREAM: -- return net_socket_fd_init_stream(peer, model, name, fd, is_connected); -- } -- return NULL; --} -- - static void net_socket_accept(void *opaque) - { - NetSocketState *s = opaque; -@@ -728,21 +706,34 @@ int net_init_socket(const Netdev *netdev, const char *name, - } - - if (sock->fd) { -- int fd, ret; -+ int fd, ret, so_type; - - fd = monitor_fd_param(monitor_cur(), sock->fd, errp); - if (fd == -1) { - return -1; - } -+ so_type = net_socket_fd_check(fd, errp); -+ if (so_type < 0) { -+ return -1; -+ } - ret = qemu_socket_try_set_nonblock(fd); - if (ret < 0) { - error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d", - name, fd); - return -1; - } -- if (!net_socket_fd_init(peer, "socket", name, fd, 1, sock->mcast, -- errp)) { -- return -1; -+ switch (so_type) { -+ case SOCK_DGRAM: -+ if (!net_socket_fd_init_dgram(peer, "socket", name, fd, 1, -+ sock->mcast, errp)) { -+ return -1; -+ } -+ break; -+ case SOCK_STREAM: -+ if (!net_socket_fd_init_stream(peer, "socket", name, fd, 1)) { -+ return -1; -+ } -+ break; - } - return 0; - } --- -2.39.3 - diff --git a/SOURCES/kvm-numa-Validate-cluster-and-NUMA-node-boundary-if-requ.patch b/SOURCES/kvm-numa-Validate-cluster-and-NUMA-node-boundary-if-requ.patch deleted file mode 100644 index 66d68f1..0000000 --- a/SOURCES/kvm-numa-Validate-cluster-and-NUMA-node-boundary-if-requ.patch +++ /dev/null @@ -1,145 +0,0 @@ -From 760a2f284f6d4cd3cd3b1685411bbca21c4ad233 Mon Sep 17 00:00:00 2001 -From: Gavin Shan -Date: Tue, 27 Jun 2023 20:20:09 +1000 -Subject: [PATCH 1/6] numa: Validate cluster and NUMA node boundary if required -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Gavin Shan -RH-MergeRequest: 175: hw/arm: Validate CPU cluster and NUMA node boundary for RHEL machines -RH-Bugzilla: 2171363 -RH-Acked-by: Cornelia Huck -RH-Acked-by: Eric Auger -RH-Commit: [1/3] 24580064b9a0076ec4d9a916839d85135ac48cd9 - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2171363 - -For some architectures like ARM64, multiple CPUs in one cluster can be -associated with different NUMA nodes, which is irregular configuration -because we shouldn't have this in baremetal environment. The irregular -configuration causes Linux guest to misbehave, as the following warning -messages indicate. - - -smp 6,maxcpus=6,sockets=2,clusters=1,cores=3,threads=1 \ - -numa node,nodeid=0,cpus=0-1,memdev=ram0 \ - -numa node,nodeid=1,cpus=2-3,memdev=ram1 \ - -numa node,nodeid=2,cpus=4-5,memdev=ram2 \ - - ------------[ cut here ]------------ - WARNING: CPU: 0 PID: 1 at kernel/sched/topology.c:2271 build_sched_domains+0x284/0x910 - Modules linked in: - CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.14.0-268.el9.aarch64 #1 - pstate: 00400005 (nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) - pc : build_sched_domains+0x284/0x910 - lr : build_sched_domains+0x184/0x910 - sp : ffff80000804bd50 - x29: ffff80000804bd50 x28: 0000000000000002 x27: 0000000000000000 - x26: ffff800009cf9a80 x25: 0000000000000000 x24: ffff800009cbf840 - x23: ffff000080325000 x22: ffff0000005df800 x21: ffff80000a4ce508 - x20: 0000000000000000 x19: ffff000080324440 x18: 0000000000000014 - x17: 00000000388925c0 x16: 000000005386a066 x15: 000000009c10cc2e - x14: 00000000000001c0 x13: 0000000000000001 x12: ffff00007fffb1a0 - x11: ffff00007fffb180 x10: ffff80000a4ce508 x9 : 0000000000000041 - x8 : ffff80000a4ce500 x7 : ffff80000a4cf920 x6 : 0000000000000001 - x5 : 0000000000000001 x4 : 0000000000000007 x3 : 0000000000000002 - x2 : 0000000000001000 x1 : ffff80000a4cf928 x0 : 0000000000000001 - Call trace: - build_sched_domains+0x284/0x910 - sched_init_domains+0xac/0xe0 - sched_init_smp+0x48/0xc8 - kernel_init_freeable+0x140/0x1ac - kernel_init+0x28/0x140 - ret_from_fork+0x10/0x20 - -Improve the situation to warn when multiple CPUs in one cluster have -been associated with different NUMA nodes. However, one NUMA node is -allowed to be associated with different clusters. - -Signed-off-by: Gavin Shan -Acked-by: Philippe Mathieu-Daudé -Acked-by: Igor Mammedov -Message-Id: <20230509002739.18388-2-gshan@redhat.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit a494fdb715832000ee9047a549a35aacfea8175e) -Signed-off-by: Gavin Shan ---- - hw/core/machine.c | 42 ++++++++++++++++++++++++++++++++++++++++++ - include/hw/boards.h | 1 + - 2 files changed, 43 insertions(+) - -diff --git a/hw/core/machine.c b/hw/core/machine.c -index c28702b690..5abdc8c39b 100644 ---- a/hw/core/machine.c -+++ b/hw/core/machine.c -@@ -1496,6 +1496,45 @@ static void machine_numa_finish_cpu_init(MachineState *machine) - g_string_free(s, true); - } - -+static void validate_cpu_cluster_to_numa_boundary(MachineState *ms) -+{ -+ MachineClass *mc = MACHINE_GET_CLASS(ms); -+ NumaState *state = ms->numa_state; -+ const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(ms); -+ const CPUArchId *cpus = possible_cpus->cpus; -+ int i, j; -+ -+ if (state->num_nodes <= 1 || possible_cpus->len <= 1) { -+ return; -+ } -+ -+ /* -+ * The Linux scheduling domain can't be parsed when the multiple CPUs -+ * in one cluster have been associated with different NUMA nodes. However, -+ * it's fine to associate one NUMA node with CPUs in different clusters. -+ */ -+ for (i = 0; i < possible_cpus->len; i++) { -+ for (j = i + 1; j < possible_cpus->len; j++) { -+ if (cpus[i].props.has_socket_id && -+ cpus[i].props.has_cluster_id && -+ cpus[i].props.has_node_id && -+ cpus[j].props.has_socket_id && -+ cpus[j].props.has_cluster_id && -+ cpus[j].props.has_node_id && -+ cpus[i].props.socket_id == cpus[j].props.socket_id && -+ cpus[i].props.cluster_id == cpus[j].props.cluster_id && -+ cpus[i].props.node_id != cpus[j].props.node_id) { -+ warn_report("CPU-%d and CPU-%d in socket-%" PRId64 "-cluster-%" PRId64 -+ " have been associated with node-%" PRId64 " and node-%" PRId64 -+ " respectively. It can cause OSes like Linux to" -+ " misbehave", i, j, cpus[i].props.socket_id, -+ cpus[i].props.cluster_id, cpus[i].props.node_id, -+ cpus[j].props.node_id); -+ } -+ } -+ } -+} -+ - MemoryRegion *machine_consume_memdev(MachineState *machine, - HostMemoryBackend *backend) - { -@@ -1581,6 +1620,9 @@ void machine_run_board_init(MachineState *machine, const char *mem_path, Error * - numa_complete_configuration(machine); - if (machine->numa_state->num_nodes) { - machine_numa_finish_cpu_init(machine); -+ if (machine_class->cpu_cluster_has_numa_boundary) { -+ validate_cpu_cluster_to_numa_boundary(machine); -+ } - } - } - -diff --git a/include/hw/boards.h b/include/hw/boards.h -index 5f08bd7550..3628671228 100644 ---- a/include/hw/boards.h -+++ b/include/hw/boards.h -@@ -275,6 +275,7 @@ struct MachineClass { - bool nvdimm_supported; - bool numa_mem_supported; - bool auto_enable_numa; -+ bool cpu_cluster_has_numa_boundary; - SMPCompatProps smp_props; - const char *default_ram_id; - --- -2.39.3 - diff --git a/SOURCES/kvm-pc-bios-s390-ccw-Don-t-use-__bss_start-with-the-larl.patch b/SOURCES/kvm-pc-bios-s390-ccw-Don-t-use-__bss_start-with-the-larl.patch deleted file mode 100644 index 312af68..0000000 --- a/SOURCES/kvm-pc-bios-s390-ccw-Don-t-use-__bss_start-with-the-larl.patch +++ /dev/null @@ -1,78 +0,0 @@ -From 7495a51c586818925470fb247882f5ba0f7b0ffd Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Tue, 27 Jun 2023 09:47:03 +0200 -Subject: [PATCH 34/37] pc-bios/s390-ccw: Don't use __bss_start with the "larl" - instruction -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -RH-MergeRequest: 180: Fix misaligned symbol error in the s390-ccw image during qemu-kvm build with binutils 2.40 -RH-Bugzilla: 2220866 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Miroslav Rezanina -RH-Commit: [4/4] 2483a50c0ed37fa29db649ec44220ac83c215698 (thuth/qemu-kvm-cs9) - -start.S currently cannot be compiled with Clang 16 and binutils 2.40: - - ld: start.o(.text+0x8): misaligned symbol `__bss_start' (0xc1e5) for - relocation R_390_PC32DBL - -According to the built-in linker script of ld, the symbol __bss_start -can actually point *before* the .bss section and does not need to have -any alignment, so in certain situations (like when using the internal -assembler of Clang), the __bss_start symbol can indeed be unaligned -and thus it is not suitable for being used with the "larl" instruction -that needs an address that is at least aligned to halfwords. -The problem went unnoticed so far since binutils <= 2.39 did not -check the alignment, but starting with binutils 2.40, such unaligned -addresses are now refused. - -Fix it by loading the address indirectly instead. - -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2216662 -Reported-by: Miroslav Rezanina -Suggested-by: Andreas Krebbel -Message-Id: <20230629104821.194859-8-thuth@redhat.com> -Reviewed-by: Claudio Imbrenda -Signed-off-by: Thomas Huth -(cherry picked from commit 7cd50cbe4ca3e2860b31b06ec92c17c54bd82d48) ---- - pc-bios/s390-ccw/start.S | 6 ++++-- - 1 file changed, 4 insertions(+), 2 deletions(-) - -diff --git a/pc-bios/s390-ccw/start.S b/pc-bios/s390-ccw/start.S -index abd6fe6639..22c1c296df 100644 ---- a/pc-bios/s390-ccw/start.S -+++ b/pc-bios/s390-ccw/start.S -@@ -19,7 +19,8 @@ _start: - larl %r15,stack + STACK_SIZE - STACK_FRAME_SIZE /* Set up stack */ - - /* clear bss */ -- larl %r2,__bss_start -+ larl %r2,bss_start_literal /* __bss_start might be unaligned ... */ -+ lg %r2,0(%r2) /* ... so load it indirectly */ - larl %r3,_end - slgr %r3,%r2 /* get sizeof bss */ - ltgr %r3,%r3 /* bss empty? */ -@@ -45,7 +46,6 @@ done: - memsetxc: - xc 0(1,%r1),0(%r1) - -- - /* - * void disabled_wait(void) - * -@@ -113,6 +113,8 @@ io_new_code: - br %r14 - - .align 8 -+bss_start_literal: -+ .quad __bss_start - disabled_wait_psw: - .quad 0x0002000180000000,0x0000000000000000 - enabled_wait_psw: --- -2.39.3 - diff --git a/SOURCES/kvm-pc-bios-s390-ccw-Fix-indentation-in-start.S.patch b/SOURCES/kvm-pc-bios-s390-ccw-Fix-indentation-in-start.S.patch deleted file mode 100644 index bd13187..0000000 --- a/SOURCES/kvm-pc-bios-s390-ccw-Fix-indentation-in-start.S.patch +++ /dev/null @@ -1,218 +0,0 @@ -From 24bc8fc932ae1c88cc2e97f0f90786a7be411bb2 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Tue, 27 Jun 2023 09:47:00 +0200 -Subject: [PATCH 32/37] pc-bios/s390-ccw: Fix indentation in start.S -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -RH-MergeRequest: 180: Fix misaligned symbol error in the s390-ccw image during qemu-kvm build with binutils 2.40 -RH-Bugzilla: 2220866 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Miroslav Rezanina -RH-Commit: [2/4] cf8fa053602ce1cfac0b6efa67f491688d4f9348 (thuth/qemu-kvm-cs9) - -start.S is currently indented with a mixture of spaces and tabs, which -is quite ugly. QEMU coding style says indentation should be 4 spaces, -and this is also what we are using in the assembler files in the -tests/tcg/s390x/ folder already, so let's adjust start.S accordingly. - -Reviewed-by: Cédric Le Goater -Message-Id: <20230627074703.99608-2-thuth@redhat.com> -Reviewed-by: Claudio Imbrenda -Reviewed-by: Eric Farman -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Thomas Huth -(cherry picked from commit f52420fa4fd9f519dc42c20d2616aba4149adc25) ---- - pc-bios/s390-ccw/start.S | 136 +++++++++++++++++++-------------------- - 1 file changed, 68 insertions(+), 68 deletions(-) - -diff --git a/pc-bios/s390-ccw/start.S b/pc-bios/s390-ccw/start.S -index 6072906df4..d29de09cc6 100644 ---- a/pc-bios/s390-ccw/start.S -+++ b/pc-bios/s390-ccw/start.S -@@ -10,37 +10,37 @@ - * directory. - */ - -- .globl _start -+ .globl _start - _start: - -- larl %r15, stack + 0x8000 /* Set up stack */ -+ larl %r15,stack + 0x8000 /* Set up stack */ - -- /* clear bss */ -- larl %r2, __bss_start -- larl %r3, _end -- slgr %r3, %r2 /* get sizeof bss */ -- ltgr %r3,%r3 /* bss empty? */ -- jz done -- aghi %r3,-1 -- srlg %r4,%r3,8 /* how many 256 byte chunks? */ -- ltgr %r4,%r4 -- lgr %r1,%r2 -- jz remainder -+ /* clear bss */ -+ larl %r2,__bss_start -+ larl %r3,_end -+ slgr %r3,%r2 /* get sizeof bss */ -+ ltgr %r3,%r3 /* bss empty? */ -+ jz done -+ aghi %r3,-1 -+ srlg %r4,%r3,8 /* how many 256 byte chunks? */ -+ ltgr %r4,%r4 -+ lgr %r1,%r2 -+ jz remainder - loop: -- xc 0(256,%r1),0(%r1) -- la %r1,256(%r1) -- brctg %r4,loop -+ xc 0(256,%r1),0(%r1) -+ la %r1,256(%r1) -+ brctg %r4,loop - remainder: -- larl %r2,memsetxc -- ex %r3,0(%r2) -+ larl %r2,memsetxc -+ ex %r3,0(%r2) - done: -- /* set up a pgm exception disabled wait psw */ -- larl %r2, disabled_wait_psw -- mvc 0x01d0(16), 0(%r2) -- j main /* And call C */ -+ /* set up a pgm exception disabled wait psw */ -+ larl %r2,disabled_wait_psw -+ mvc 0x01d0(16),0(%r2) -+ j main /* And call C */ - - memsetxc: -- xc 0(1,%r1),0(%r1) -+ xc 0(1,%r1),0(%r1) - - - /* -@@ -48,11 +48,11 @@ memsetxc: - * - * stops the current guest cpu. - */ -- .globl disabled_wait -+ .globl disabled_wait - disabled_wait: -- larl %r1,disabled_wait_psw -- lpswe 0(%r1) --1: j 1b -+ larl %r1,disabled_wait_psw -+ lpswe 0(%r1) -+1: j 1b - - - /* -@@ -60,61 +60,61 @@ disabled_wait: - * - * eats one sclp interrupt - */ -- .globl consume_sclp_int -+ .globl consume_sclp_int - consume_sclp_int: -- /* enable service interrupts in cr0 */ -- stctg %c0,%c0,0(%r15) -- oi 6(%r15),0x2 -- lctlg %c0,%c0,0(%r15) -- /* prepare external call handler */ -- larl %r1, external_new_code -- stg %r1, 0x1b8 -- larl %r1, external_new_mask -- mvc 0x1b0(8),0(%r1) -- /* load enabled wait PSW */ -- larl %r1, enabled_wait_psw -- lpswe 0(%r1) -+ /* enable service interrupts in cr0 */ -+ stctg %c0,%c0,0(%r15) -+ oi 6(%r15),0x2 -+ lctlg %c0,%c0,0(%r15) -+ /* prepare external call handler */ -+ larl %r1,external_new_code -+ stg %r1,0x1b8 -+ larl %r1,external_new_mask -+ mvc 0x1b0(8),0(%r1) -+ /* load enabled wait PSW */ -+ larl %r1,enabled_wait_psw -+ lpswe 0(%r1) - - /* - * void consume_io_int(void) - * - * eats one I/O interrupt - */ -- .globl consume_io_int -+ .globl consume_io_int - consume_io_int: -- /* enable I/O interrupts in cr6 */ -- stctg %c6,%c6,0(%r15) -- oi 4(%r15), 0xff -- lctlg %c6,%c6,0(%r15) -- /* prepare i/o call handler */ -- larl %r1, io_new_code -- stg %r1, 0x1f8 -- larl %r1, io_new_mask -- mvc 0x1f0(8),0(%r1) -- /* load enabled wait PSW */ -- larl %r1, enabled_wait_psw -- lpswe 0(%r1) -+ /* enable I/O interrupts in cr6 */ -+ stctg %c6,%c6,0(%r15) -+ oi 4(%r15), 0xff -+ lctlg %c6,%c6,0(%r15) -+ /* prepare i/o call handler */ -+ larl %r1,io_new_code -+ stg %r1,0x1f8 -+ larl %r1,io_new_mask -+ mvc 0x1f0(8),0(%r1) -+ /* load enabled wait PSW */ -+ larl %r1,enabled_wait_psw -+ lpswe 0(%r1) - - external_new_code: -- /* disable service interrupts in cr0 */ -- stctg %c0,%c0,0(%r15) -- ni 6(%r15),0xfd -- lctlg %c0,%c0,0(%r15) -- br %r14 -+ /* disable service interrupts in cr0 */ -+ stctg %c0,%c0,0(%r15) -+ ni 6(%r15),0xfd -+ lctlg %c0,%c0,0(%r15) -+ br %r14 - - io_new_code: -- /* disable I/O interrupts in cr6 */ -- stctg %c6,%c6,0(%r15) -- ni 4(%r15), 0x00 -- lctlg %c6,%c6,0(%r15) -- br %r14 -+ /* disable I/O interrupts in cr6 */ -+ stctg %c6,%c6,0(%r15) -+ ni 4(%r15),0x00 -+ lctlg %c6,%c6,0(%r15) -+ br %r14 - -- .align 8 -+ .align 8 - disabled_wait_psw: -- .quad 0x0002000180000000,0x0000000000000000 -+ .quad 0x0002000180000000,0x0000000000000000 - enabled_wait_psw: -- .quad 0x0302000180000000,0x0000000000000000 -+ .quad 0x0302000180000000,0x0000000000000000 - external_new_mask: -- .quad 0x0000000180000000 -+ .quad 0x0000000180000000 - io_new_mask: -- .quad 0x0000000180000000 -+ .quad 0x0000000180000000 --- -2.39.3 - diff --git a/SOURCES/kvm-pc-bios-s390-ccw-Makefile-Use-z-noexecstack-to-silen.patch b/SOURCES/kvm-pc-bios-s390-ccw-Makefile-Use-z-noexecstack-to-silen.patch deleted file mode 100644 index 907fe43..0000000 --- a/SOURCES/kvm-pc-bios-s390-ccw-Makefile-Use-z-noexecstack-to-silen.patch +++ /dev/null @@ -1,50 +0,0 @@ -From b5b243cbbb897b236c08699529e13457e1e49924 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Thu, 22 Jun 2023 15:08:22 +0200 -Subject: [PATCH 31/37] pc-bios/s390-ccw/Makefile: Use -z noexecstack to - silence linker warning -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -RH-MergeRequest: 180: Fix misaligned symbol error in the s390-ccw image during qemu-kvm build with binutils 2.40 -RH-Bugzilla: 2220866 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/4] 04f6f83169f1c5545a0e2772b4babfc6a50bd5bf (thuth/qemu-kvm-cs9) - -Recent versions of ld complain when linking the s390-ccw bios: - - /usr/bin/ld: warning: start.o: missing .note.GNU-stack section implies - executable stack - /usr/bin/ld: NOTE: This behaviour is deprecated and will be removed in - a future version of the linker - -We can silence the warning by telling the linker to mark the stack -as not executable. - -Message-Id: <20230622130822.396793-1-thuth@redhat.com> -Acked-by: Christian Borntraeger -Signed-off-by: Thomas Huth -(cherry picked from commit 442ef32ee5b6059a8f247fb2def9d449578d0a89) ---- - pc-bios/s390-ccw/Makefile | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/pc-bios/s390-ccw/Makefile b/pc-bios/s390-ccw/Makefile -index 10e8f5cb63..2a590af4a9 100644 ---- a/pc-bios/s390-ccw/Makefile -+++ b/pc-bios/s390-ccw/Makefile -@@ -53,7 +53,7 @@ config-cc.mak: Makefile - $(call cc-option,-march=z900,-march=z10)) 3> config-cc.mak - -include config-cc.mak - --LDFLAGS += -Wl,-pie -nostdlib -+LDFLAGS += -Wl,-pie -nostdlib -z noexecstack - - build-all: s390-ccw.img s390-netboot.img - --- -2.39.3 - diff --git a/SOURCES/kvm-pc-bios-s390-ccw-Provide-space-for-initial-stack-fra.patch b/SOURCES/kvm-pc-bios-s390-ccw-Provide-space-for-initial-stack-fra.patch deleted file mode 100644 index 0c4ce6f..0000000 --- a/SOURCES/kvm-pc-bios-s390-ccw-Provide-space-for-initial-stack-fra.patch +++ /dev/null @@ -1,59 +0,0 @@ -From 2c52aebf90f28121a3e46a9305304406023b9747 Mon Sep 17 00:00:00 2001 -From: Thomas Huth -Date: Tue, 27 Jun 2023 09:47:01 +0200 -Subject: [PATCH 33/37] pc-bios/s390-ccw: Provide space for initial stack frame - in start.S -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -RH-MergeRequest: 180: Fix misaligned symbol error in the s390-ccw image during qemu-kvm build with binutils 2.40 -RH-Bugzilla: 2220866 -RH-Acked-by: Cédric Le Goater -RH-Acked-by: Miroslav Rezanina -RH-Commit: [3/4] c2f69ce5998861fe20b799bf0113def8cf0cd128 (thuth/qemu-kvm-cs9) - -Providing the space of a stack frame is the duty of the caller, -so we should reserve 160 bytes before jumping into the main function. -Otherwise the main() function might write past the stack array. - -While we're at it, add a proper STACK_SIZE macro for the stack size -instead of using magic numbers (this is also required for the following -patch). - -Reviewed-by: Christian Borntraeger -Reviewed-by: Cédric Le Goater -Message-Id: <20230627074703.99608-3-thuth@redhat.com> -Reviewed-by: Eric Farman -Reviewed-by: Claudio Imbrenda -Reviewed-by: Marc Hartmayer -Reviewed-by: Philippe Mathieu-Daudé -Signed-off-by: Thomas Huth -(cherry picked from commit 74fe98ee7fb3344dbd085d1fa32c0dc2fc2c831f) ---- - pc-bios/s390-ccw/start.S | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) - -diff --git a/pc-bios/s390-ccw/start.S b/pc-bios/s390-ccw/start.S -index d29de09cc6..abd6fe6639 100644 ---- a/pc-bios/s390-ccw/start.S -+++ b/pc-bios/s390-ccw/start.S -@@ -10,10 +10,13 @@ - * directory. - */ - -+#define STACK_SIZE 0x8000 -+#define STACK_FRAME_SIZE 160 -+ - .globl _start - _start: - -- larl %r15,stack + 0x8000 /* Set up stack */ -+ larl %r15,stack + STACK_SIZE - STACK_FRAME_SIZE /* Set up stack */ - - /* clear bss */ - larl %r2,__bss_start --- -2.39.3 - diff --git a/SOURCES/kvm-pc-q35-set-SMBIOS-entry-point-type-to-auto-by-defaul.patch b/SOURCES/kvm-pc-q35-set-SMBIOS-entry-point-type-to-auto-by-defaul.patch new file mode 100644 index 0000000..f37f65f --- /dev/null +++ b/SOURCES/kvm-pc-q35-set-SMBIOS-entry-point-type-to-auto-by-defaul.patch @@ -0,0 +1,115 @@ +From 9ca64f73238d9f1b9f13d8e941ba42771a992afb Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Fri, 29 Dec 2023 14:06:05 +0100 +Subject: [PATCH 20/20] pc/q35: set SMBIOS entry point type to 'auto' by + default + +RH-Author: Igor Mammedov +RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS +RH-Jira: RHEL-21705 +RH-Acked-by: MST +RH-Acked-by: Ani Sinha +RH-Commit: [18/18] c7fc6ac7350bca3ff99e58620710a86218385781 + +JIRA: https://issues.redhat.com/browse/RHEL-21705 + + Use smbios-entry-point-type='auto' for newer machine types as a workaround + for Windows not detecting SMBIOS tables. Which makes QEMU pick SMBIOS tables + based on configuration (with 2.x preferred and fallback to 3.x if the former + isn't compatible with configuration) + + Default compat setting of smbios-entry-point-type after series + for pc/q35 machines: + * 9.0-newer: 'auto' + * 8.1-8.2: '64' + * 8.0-older: '32' + Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2008 + Signed-off-by: Igor Mammedov + Reviewed-by: Ani Sinha + Tested-by: Fiona Ebner + +Conflicts: hw/i386/pc_piix.c hw/i386/pc_q35.c + due to RHEL machine types + +REHL only parts: + Fix RHEL 'pc' machine types at SMBIOS 2.X + for the latest RHEL 'q35' machine type use version autoselect + which propagates to RHEL 9.4 q35 macine type while RHEL 9.2 q35 and older + are kept at SMBIOS_ENTRY_POINT_TYPE_32 (see: pc_q35_machine_rhel920_options) + +Signed-off-by: Igor Mammedov +--- + hw/i386/pc.c | 2 +- + hw/i386/pc_piix.c | 7 +++++++ + hw/i386/pc_q35.c | 5 +++++ + 3 files changed, 13 insertions(+), 1 deletion(-) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index ae6777fc1a..d6f267b220 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -2004,7 +2004,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) + mc->nvdimm_supported = true; + mc->smp_props.dies_supported = true; + mc->default_ram_id = "pc.ram"; +- pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_64; ++ pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_AUTO; + + object_class_property_add(oc, PC_MACHINE_MAX_RAM_BELOW_4G, "size", + pc_machine_get_max_ram_below_4g, pc_machine_set_max_ram_below_4g, +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index 7344b35cf1..54d1c58bce 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -539,9 +539,14 @@ static void pc_i440fx_machine_options(MachineClass *m) + + static void pc_i440fx_8_2_machine_options(MachineClass *m) + { ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ + pc_i440fx_machine_options(m); + m->alias = "pc"; + m->is_default = true; ++ ++ /* For pc-i44fx-8.2 and 8.1, use SMBIOS 3.X by default */ ++ pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_64; + } + + DEFINE_I440FX_MACHINE(v8_2, "pc-i440fx-8.2", NULL, +@@ -982,6 +987,8 @@ static void pc_machine_rhel7_options(MachineClass *m) + m->alias = "pc"; + m->is_default = 1; + m->smp_props.prefer_sockets = true; ++ /* there aren't ne PC macine types in RHEL9, keep it at SMBIOS 2.X */ ++ pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_32; + } + + static void pc_init_rhel760(MachineState *machine) +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 9a22ff5dd6..cd5fb7380e 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -377,8 +377,11 @@ static void pc_q35_machine_options(MachineClass *m) + + static void pc_q35_8_2_machine_options(MachineClass *m) + { ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + pc_q35_machine_options(m); + m->alias = "q35"; ++ /* For pc-q35-8.2 and 8.1, use SMBIOS 3.X by default */ ++ pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_64; + } + + DEFINE_Q35_MACHINE(v8_2, "pc-q35-8.2", NULL, +@@ -712,6 +715,8 @@ static void pc_q35_machine_rhel_options(MachineClass *m) + m->alias = "q35"; + m->max_cpus = 710; + compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); ++ /* use SMBIOS version autoselect by default for the latest RHEL machine */ ++ pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_AUTO; + } + + static void pc_q35_init_rhel940(MachineState *machine) +-- +2.39.3 + diff --git a/SOURCES/kvm-pc-smbios-fixup-manufacturer-product-version-to-matc.patch b/SOURCES/kvm-pc-smbios-fixup-manufacturer-product-version-to-matc.patch new file mode 100644 index 0000000..9d3fd29 --- /dev/null +++ b/SOURCES/kvm-pc-smbios-fixup-manufacturer-product-version-to-matc.patch @@ -0,0 +1,44 @@ +From 03cad16743d9a4d377af66611d030eca9eda326d Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Tue, 19 Mar 2024 11:42:04 +0100 +Subject: [PATCH 4/4] pc: smbios: fixup manufacturer/product/version to match + downstream + +RH-Author: Igor Mammedov +RH-MergeRequest: 232: pc: smbios: fixup manufacturer/product/version to match downstream +RH-Jira: RHEL-21705 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Ani Sinha +RH-Commit: [1/1] 9235f64acb5ff46360282e889d0aedcb13374ac1 + +JIRA: https://issues.redhat.com/browse/RHEL-21705 + +commit [1] discarded RHEL only change that customizes +SMBIOS values for manufacturer/product/version for pc/q35 +machine types. +Fix it up by reverting back to ("Red Hat", "KVM", mc->desc) +tuple. + +1) +Fixes: 208239eb2 (hw/i386/pc: Defer smbios_set_defaults() to machine_done) +Signed-off-by: Igor Mammedov +--- + hw/i386/fw_cfg.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c +index 58429bb78d..d6a24177e2 100644 +--- a/hw/i386/fw_cfg.c ++++ b/hw/i386/fw_cfg.c +@@ -63,7 +63,7 @@ void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg, + + if (pcmc->smbios_defaults) { + /* These values are guest ABI, do not change */ +- smbios_set_defaults("QEMU", mc->desc, mc->name, ++ smbios_set_defaults("Red Hat", "KVM", mc->desc, + pcmc->smbios_uuid_encoded, + pcmc->smbios_stream_product, + pcmc->smbios_stream_version); +-- +2.39.3 + diff --git a/SOURCES/kvm-pcie-Add-hotplug-detect-state-register-to-cmask.patch b/SOURCES/kvm-pcie-Add-hotplug-detect-state-register-to-cmask.patch deleted file mode 100644 index 1ec1c82..0000000 --- a/SOURCES/kvm-pcie-Add-hotplug-detect-state-register-to-cmask.patch +++ /dev/null @@ -1,87 +0,0 @@ -From 2732b6c5ef249d3ec9affca66768cc2fc476ff7c Mon Sep 17 00:00:00 2001 -From: Leonardo Bras -Date: Thu, 6 Jul 2023 01:55:47 -0300 -Subject: [PATCH 11/12] pcie: Add hotplug detect state register to cmask -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Leonardo Brás -RH-MergeRequest: 188: pcie: Add hotplug detect state register to cmask -RH-Bugzilla: 2215819 -RH-Acked-by: Peter Xu -RH-Acked-by: quintela1 -RH-Commit: [1/1] a125fa337711bddbc957c399044393e82272b143 (LeoBras/centos-qemu-kvm) - -When trying to migrate a machine type pc-q35-6.0 or lower, with this -cmdline options, - --device driver=pcie-root-port,port=18,chassis=19,id=pcie-root-port18,bus=pcie.0,addr=0x12 \ --device driver=nec-usb-xhci,p2=4,p3=4,id=nex-usb-xhci0,bus=pcie-root-port18,addr=0x12.0x1 - -the following bug happens after all ram pages were sent: - -qemu-kvm: get_pci_config_device: Bad config data: i=0x6e read: 0 device: 40 cmask: ff wmask: 0 w1cmask:19 -qemu-kvm: Failed to load PCIDevice:config -qemu-kvm: Failed to load pcie-root-port:parent_obj.parent_obj.parent_obj -qemu-kvm: error while loading state for instance 0x0 of device '0000:00:12.0/pcie-root-port' -qemu-kvm: load of migration failed: Invalid argument - -This happens on pc-q35-6.0 or lower because of: -{ "ICH9-LPC", ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, "off" } - -In this scenario, hotplug_handler_plug() calls pcie_cap_slot_plug_cb(), -which sets dev->config byte 0x6e with bit PCI_EXP_SLTSTA_PDS to signal PCI -hotplug for the guest. After a while the guest will deal with this hotplug -and qemu will clear the above bit. - -Then, during migration, get_pci_config_device() will compare the -configs of both the freshly created device and the one that is being -received via migration, which will differ due to the PCI_EXP_SLTSTA_PDS bit -and cause the bug to reproduce. - -To avoid this fake incompatibility, there are tree fields in PCIDevice that -can help: - -- wmask: Used to implement R/W bytes, and -- w1cmask: Used to implement RW1C(Write 1 to Clear) bytes -- cmask: Used to enable config checks on load. - -According to PCI Express® Base Specification Revision 5.0 Version 1.0, -table 7-27 (Slot Status Register) bit 6, the "Presence Detect State" is -listed as RO (read-only), so it only makes sense to make use of the cmask -field. - -So, clear PCI_EXP_SLTSTA_PDS bit on cmask, so the fake incompatibility on -get_pci_config_device() does not abort the migration. - -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2215819 -Signed-off-by: Leonardo Bras -Message-Id: <20230706045546.593605-3-leobras@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Reviewed-by: Juan Quintela -(cherry picked from commit 625b370c45f4acd155ee625d61c0057d770a5b5e) -Signed-off-by: Leonardo Bras ---- - hw/pci/pcie.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c -index b8c24cf45f..8bc4a4ee57 100644 ---- a/hw/pci/pcie.c -+++ b/hw/pci/pcie.c -@@ -659,6 +659,10 @@ void pcie_cap_slot_init(PCIDevice *dev, PCIESlot *s) - pci_word_test_and_set_mask(dev->w1cmask + pos + PCI_EXP_SLTSTA, - PCI_EXP_HP_EV_SUPPORTED); - -+ /* Avoid migration abortion when this device hot-removed by guest */ -+ pci_word_test_and_clear_mask(dev->cmask + pos + PCI_EXP_SLTSTA, -+ PCI_EXP_SLTSTA_PDS); -+ - dev->exp.hpev_notified = false; - - qbus_set_hotplug_handler(BUS(pci_bridge_get_sec_bus(PCI_BRIDGE(dev))), --- -2.39.3 - diff --git a/SOURCES/kvm-postcopy-ram-do-not-use-qatomic_mb_read.patch b/SOURCES/kvm-postcopy-ram-do-not-use-qatomic_mb_read.patch deleted file mode 100644 index 0421e33..0000000 --- a/SOURCES/kvm-postcopy-ram-do-not-use-qatomic_mb_read.patch +++ /dev/null @@ -1,42 +0,0 @@ -From ab9b8620c62540f3267d005c198920671ef9abc3 Mon Sep 17 00:00:00 2001 -From: Paolo Bonzini -Date: Fri, 3 Mar 2023 11:15:28 +0100 -Subject: [PATCH 06/56] postcopy-ram: do not use qatomic_mb_read -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [5/50] 534c0e13362dfc994fa90c79bfb5ed6ee8c27dfc (peterx/qemu-kvm) - -It does not even pair with a qatomic_mb_set(), so it is clearer to use -load-acquire in this case; they are synonyms. - -Signed-off-by: Paolo Bonzini -(cherry picked from commit 4592eaf38755a28300d113cd128f65b5b38495f2) -Signed-off-by: Peter Xu ---- - migration/postcopy-ram.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c -index bbb8af61ae..d7b48dd920 100644 ---- a/migration/postcopy-ram.c -+++ b/migration/postcopy-ram.c -@@ -1526,7 +1526,7 @@ static PostcopyState incoming_postcopy_state; - - PostcopyState postcopy_state_get(void) - { -- return qatomic_mb_read(&incoming_postcopy_state); -+ return qatomic_load_acquire(&incoming_postcopy_state); - } - - /* Set the state and return the old state */ --- -2.39.1 - diff --git a/SOURCES/kvm-qapi-add-fdset-feature-for-BlockdevOptionsVirtioBlkV.patch b/SOURCES/kvm-qapi-add-fdset-feature-for-BlockdevOptionsVirtioBlkV.patch deleted file mode 100644 index abaadf8..0000000 --- a/SOURCES/kvm-qapi-add-fdset-feature-for-BlockdevOptionsVirtioBlkV.patch +++ /dev/null @@ -1,79 +0,0 @@ -From 99f27e14856c528f442b628e8f4a7881e6e63179 Mon Sep 17 00:00:00 2001 -From: Stefano Garzarella -Date: Tue, 30 May 2023 09:19:41 +0200 -Subject: [PATCH 4/5] qapi: add '@fdset' feature for - BlockdevOptionsVirtioBlkVhostVdpa - -RH-Author: Stefano Garzarella -RH-MergeRequest: 169: block/blkio: support fd passing for virtio-blk-vhost-vdpa driver -RH-Bugzilla: 2180076 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [2/2] abee2a542e41f9eaa17dd204b74778e232d1eb60 (sgarzarella/qemu-kvm-c-9-s) - -The virtio-blk-vhost-vdpa driver in libblkio 1.3.0 supports the fd -passing through the new 'fd' property. - -Since now we are using qemu_open() on '@path' if the virtio-blk driver -supports the fd passing, let's announce it. -In this way, the management layer can pass the file descriptor of an -already opened vhost-vdpa character device. This is useful especially -when the device can only be accessed with certain privileges. - -Add the '@fdset' feature only when the virtio-blk-vhost-vdpa driver -in libblkio supports it. - -Suggested-by: Markus Armbruster -Reviewed-by: Stefan Hajnoczi -Signed-off-by: Stefano Garzarella -Message-id: 20230530071941.8954-3-sgarzare@redhat.com -Signed-off-by: Stefan Hajnoczi -(cherry picked from commit 98b126f5e3228a346c774e569e26689943b401dd) -- changed doc indentantion since QAPI parser failed downstream because - we don't have commit 08349786c84306863a3b659c8a9b28bb74c405c6 - downstream. It relaxed the indentation rules. -Signed-off-by: Stefano Garzarella ---- - meson.build | 4 ++++ - qapi/block-core.json | 6 ++++++ - 2 files changed, 10 insertions(+) - -diff --git a/meson.build b/meson.build -index d964e741e7..a18cc64531 100644 ---- a/meson.build -+++ b/meson.build -@@ -1843,6 +1843,10 @@ config_host_data.set('CONFIG_LZO', lzo.found()) - config_host_data.set('CONFIG_MPATH', mpathpersist.found()) - config_host_data.set('CONFIG_MPATH_NEW_API', mpathpersist_new_api) - config_host_data.set('CONFIG_BLKIO', blkio.found()) -+if blkio.found() -+ config_host_data.set('CONFIG_BLKIO_VHOST_VDPA_FD', -+ blkio.version().version_compare('>=1.3.0')) -+endif - config_host_data.set('CONFIG_CURL', curl.found()) - config_host_data.set('CONFIG_CURSES', curses.found()) - config_host_data.set('CONFIG_GBM', gbm.found()) -diff --git a/qapi/block-core.json b/qapi/block-core.json -index c05ad0c07e..81b48a8d3b 100644 ---- a/qapi/block-core.json -+++ b/qapi/block-core.json -@@ -3841,10 +3841,16 @@ - # - # @path: path to the vhost-vdpa character device. - # -+# Features: -+# @fdset: Member @path supports the special "/dev/fdset/N" path -+# (since 8.1) -+# - # Since: 7.2 - ## - { 'struct': 'BlockdevOptionsVirtioBlkVhostVdpa', - 'data': { 'path': 'str' }, -+ 'features': [ { 'name' :'fdset', -+ 'if': 'CONFIG_BLKIO_VHOST_VDPA_FD' } ], - 'if': 'CONFIG_BLKIO' } - - ## --- -2.39.3 - diff --git a/SOURCES/kvm-qapi-i386-sev-Change-the-reduced-phys-bits-value-fro.patch b/SOURCES/kvm-qapi-i386-sev-Change-the-reduced-phys-bits-value-fro.patch deleted file mode 100644 index a95895b..0000000 --- a/SOURCES/kvm-qapi-i386-sev-Change-the-reduced-phys-bits-value-fro.patch +++ /dev/null @@ -1,50 +0,0 @@ -From cbf9c74ef46d71c015b9de53f4514941dca8a035 Mon Sep 17 00:00:00 2001 -From: Bandan Das -Date: Thu, 3 Aug 2023 14:19:37 -0400 -Subject: [PATCH 10/14] qapi, i386/sev: Change the reduced-phys-bits value from - 5 to 1 - -RH-Author: Bandan Das -RH-MergeRequest: 196: Updates to SEV reduced-phys-bits parameter -RH-Bugzilla: 2214839 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/4] 4243578db33f89461e60b745eb96fee402218c9f (bdas1/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2214839 - -commit 798a818f50a9bfc01e8b5943090de458863b897b -Author: Tom Lendacky -Date: Fri Sep 30 10:14:27 2022 -0500 - - qapi, i386/sev: Change the reduced-phys-bits value from 5 to 1 - - A guest only ever experiences, at most, 1 bit of reduced physical - addressing. Change the query-sev-capabilities json comment to use 1. - - Fixes: 31dd67f684 ("sev/i386: qmp: add query-sev-capabilities command") - Signed-off-by: Tom Lendacky - Reviewed-by: Dr. David Alan Gilbert - Message-Id: - Signed-off-by: Paolo Bonzini - -Signed-off-by: Bandan Das ---- - qapi/misc-target.json | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/qapi/misc-target.json b/qapi/misc-target.json -index de91054523..bf04042f45 100644 ---- a/qapi/misc-target.json -+++ b/qapi/misc-target.json -@@ -172,7 +172,7 @@ - # -> { "execute": "query-sev-capabilities" } - # <- { "return": { "pdh": "8CCDD8DDD", "cert-chain": "888CCCDDDEE", - # "cpu0-id": "2lvmGwo+...61iEinw==", --# "cbitpos": 47, "reduced-phys-bits": 5}} -+# "cbitpos": 47, "reduced-phys-bits": 1}} - # - ## - { 'command': 'query-sev-capabilities', 'returns': 'SevCapability', --- -2.39.3 - diff --git a/SOURCES/kvm-qdev-add-IOThreadVirtQueueMappingList-property-type.patch b/SOURCES/kvm-qdev-add-IOThreadVirtQueueMappingList-property-type.patch new file mode 100644 index 0000000..b31142e --- /dev/null +++ b/SOURCES/kvm-qdev-add-IOThreadVirtQueueMappingList-property-type.patch @@ -0,0 +1,167 @@ +From f1e82fe5076b4030d385dfa49b8284899386114d Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Wed, 20 Dec 2023 08:47:54 -0500 +Subject: [PATCH 08/22] qdev: add IOThreadVirtQueueMappingList property type + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter +RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Hanna Czenczek +RH-Commit: [4/17] 817aa1339da8ed3814730473342ba045e66d5b51 (stefanha/centos-stream-qemu-kvm) + +virtio-blk and virtio-scsi devices will need a way to specify the +mapping between IOThreads and virtqueues. At the moment all virtqueues +are assigned to a single IOThread or the main loop. This single thread +can be a CPU bottleneck, so it is necessary to allow finer-grained +assignment to spread the load. + +Introduce DEFINE_PROP_IOTHREAD_VQ_MAPPING_LIST() so devices can take a +parameter that maps virtqueues to IOThreads. The command-line syntax for +this new property is as follows: + + --device '{"driver":"foo","iothread-vq-mapping":[{"iothread":"iothread0","vqs":[0,1,2]},...]}' + +IOThreads are specified by name and virtqueues are specified by 0-based +index. + +It will be common to simply assign virtqueues round-robin across a set +of IOThreads. A convenient syntax that does not require specifying +individual virtqueue indices is available: + + --device '{"driver":"foo","iothread-vq-mapping":[{"iothread":"iothread0"},{"iothread":"iothread1"},...]}' + +Signed-off-by: Stefan Hajnoczi +Message-ID: <20231220134755.814917-4-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit cf03a152c5d749fd0083bfe540df9524f1d2ff1d) +Signed-off-by: Stefan Hajnoczi +--- + hw/core/qdev-properties-system.c | 46 +++++++++++++++++++++++++++++ + include/hw/qdev-properties-system.h | 5 ++++ + qapi/virtio.json | 29 ++++++++++++++++++ + 3 files changed, 80 insertions(+) + +diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c +index 73cced4626..1a396521d5 100644 +--- a/hw/core/qdev-properties-system.c ++++ b/hw/core/qdev-properties-system.c +@@ -18,6 +18,7 @@ + #include "qapi/qapi-types-block.h" + #include "qapi/qapi-types-machine.h" + #include "qapi/qapi-types-migration.h" ++#include "qapi/qapi-visit-virtio.h" + #include "qapi/qmp/qerror.h" + #include "qemu/ctype.h" + #include "qemu/cutils.h" +@@ -1160,3 +1161,48 @@ const PropertyInfo qdev_prop_cpus390entitlement = { + .set = qdev_propinfo_set_enum, + .set_default_value = qdev_propinfo_set_default_value_enum, + }; ++ ++/* --- IOThreadVirtQueueMappingList --- */ ++ ++static void get_iothread_vq_mapping_list(Object *obj, Visitor *v, ++ const char *name, void *opaque, Error **errp) ++{ ++ IOThreadVirtQueueMappingList **prop_ptr = ++ object_field_prop_ptr(obj, opaque); ++ ++ visit_type_IOThreadVirtQueueMappingList(v, name, prop_ptr, errp); ++} ++ ++static void set_iothread_vq_mapping_list(Object *obj, Visitor *v, ++ const char *name, void *opaque, Error **errp) ++{ ++ IOThreadVirtQueueMappingList **prop_ptr = ++ object_field_prop_ptr(obj, opaque); ++ IOThreadVirtQueueMappingList *list; ++ ++ if (!visit_type_IOThreadVirtQueueMappingList(v, name, &list, errp)) { ++ return; ++ } ++ ++ qapi_free_IOThreadVirtQueueMappingList(*prop_ptr); ++ *prop_ptr = list; ++} ++ ++static void release_iothread_vq_mapping_list(Object *obj, ++ const char *name, void *opaque) ++{ ++ IOThreadVirtQueueMappingList **prop_ptr = ++ object_field_prop_ptr(obj, opaque); ++ ++ qapi_free_IOThreadVirtQueueMappingList(*prop_ptr); ++ *prop_ptr = NULL; ++} ++ ++const PropertyInfo qdev_prop_iothread_vq_mapping_list = { ++ .name = "IOThreadVirtQueueMappingList", ++ .description = "IOThread virtqueue mapping list [{\"iothread\":\"\", " ++ "\"vqs\":[1,2,3,...]},...]", ++ .get = get_iothread_vq_mapping_list, ++ .set = set_iothread_vq_mapping_list, ++ .release = release_iothread_vq_mapping_list, ++}; +diff --git a/include/hw/qdev-properties-system.h b/include/hw/qdev-properties-system.h +index 91f7a2452d..06c359c190 100644 +--- a/include/hw/qdev-properties-system.h ++++ b/include/hw/qdev-properties-system.h +@@ -24,6 +24,7 @@ extern const PropertyInfo qdev_prop_off_auto_pcibar; + extern const PropertyInfo qdev_prop_pcie_link_speed; + extern const PropertyInfo qdev_prop_pcie_link_width; + extern const PropertyInfo qdev_prop_cpus390entitlement; ++extern const PropertyInfo qdev_prop_iothread_vq_mapping_list; + + #define DEFINE_PROP_PCI_DEVFN(_n, _s, _f, _d) \ + DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_pci_devfn, int32_t) +@@ -82,4 +83,8 @@ extern const PropertyInfo qdev_prop_cpus390entitlement; + DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_cpus390entitlement, \ + CpuS390Entitlement) + ++#define DEFINE_PROP_IOTHREAD_VQ_MAPPING_LIST(_name, _state, _field) \ ++ DEFINE_PROP(_name, _state, _field, qdev_prop_iothread_vq_mapping_list, \ ++ IOThreadVirtQueueMappingList *) ++ + #endif +diff --git a/qapi/virtio.json b/qapi/virtio.json +index e6dcee7b83..19c7c36e36 100644 +--- a/qapi/virtio.json ++++ b/qapi/virtio.json +@@ -928,3 +928,32 @@ + 'data': { 'path': 'str', 'queue': 'uint16', '*index': 'uint16' }, + 'returns': 'VirtioQueueElement', + 'features': [ 'unstable' ] } ++ ++## ++# @IOThreadVirtQueueMapping: ++# ++# Describes the subset of virtqueues assigned to an IOThread. ++# ++# @iothread: the id of IOThread object ++# ++# @vqs: an optional array of virtqueue indices that will be handled by this ++# IOThread. When absent, virtqueues are assigned round-robin across all ++# IOThreadVirtQueueMappings provided. Either all IOThreadVirtQueueMappings ++# must have @vqs or none of them must have it. ++# ++# Since: 9.0 ++## ++ ++{ 'struct': 'IOThreadVirtQueueMapping', ++ 'data': { 'iothread': 'str', '*vqs': ['uint16'] } } ++ ++## ++# @DummyVirtioForceArrays: ++# ++# Not used by QMP; hack to let us use IOThreadVirtQueueMappingList internally ++# ++# Since: 9.0 ++## ++ ++{ 'struct': 'DummyVirtioForceArrays', ++ 'data': { 'unused-iothread-vq-mapping': ['IOThreadVirtQueueMapping'] } } +-- +2.39.3 + diff --git a/SOURCES/kvm-qdev-properties-alias-all-object-class-properties.patch b/SOURCES/kvm-qdev-properties-alias-all-object-class-properties.patch new file mode 100644 index 0000000..94bb716 --- /dev/null +++ b/SOURCES/kvm-qdev-properties-alias-all-object-class-properties.patch @@ -0,0 +1,85 @@ +From 4251aab5b2beb68d1800cd4a329361ff6f57c430 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Wed, 20 Dec 2023 08:47:52 -0500 +Subject: [PATCH 07/22] qdev-properties: alias all object class properties + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter +RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Hanna Czenczek +RH-Commit: [3/17] bc5d0aafe4645dacf9277904a2b20760d6e676e1 (stefanha/centos-stream-qemu-kvm) + +qdev_alias_all_properties() aliases a DeviceState's qdev properties onto +an Object. This is used for VirtioPCIProxy types so that --device +virtio-blk-pci has properties of its embedded --device virtio-blk-device +object. + +Currently this function is implemented using qdev properties. Change the +function to use QOM object class properties instead. This works because +qdev properties create QOM object class properties, but it also catches +any QOM object class-only properties that have no qdev properties. + +This change ensures that properties of devices are shown with --device +foo,\? even if they are QOM object class properties. + +Signed-off-by: Stefan Hajnoczi +Message-ID: <20231220134755.814917-2-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 350147a871a545ab56b4a1062c8485635d9ffc24) +Signed-off-by: Stefan Hajnoczi +--- + hw/core/qdev-properties.c | 18 ++++++++++-------- + include/hw/qdev-properties.h | 4 ++-- + 2 files changed, 12 insertions(+), 10 deletions(-) + +diff --git a/hw/core/qdev-properties.c b/hw/core/qdev-properties.c +index 840006e953..7d6fa726fd 100644 +--- a/hw/core/qdev-properties.c ++++ b/hw/core/qdev-properties.c +@@ -1076,16 +1076,18 @@ void device_class_set_props(DeviceClass *dc, Property *props) + void qdev_alias_all_properties(DeviceState *target, Object *source) + { + ObjectClass *class; +- Property *prop; ++ ObjectPropertyIterator iter; ++ ObjectProperty *prop; + + class = object_get_class(OBJECT(target)); +- do { +- DeviceClass *dc = DEVICE_CLASS(class); + +- for (prop = dc->props_; prop && prop->name; prop++) { +- object_property_add_alias(source, prop->name, +- OBJECT(target), prop->name); ++ object_class_property_iter_init(&iter, class); ++ while ((prop = object_property_iter_next(&iter))) { ++ if (object_property_find(source, prop->name)) { ++ continue; /* skip duplicate properties */ + } +- class = object_class_get_parent(class); +- } while (class != object_class_by_name(TYPE_DEVICE)); ++ ++ object_property_add_alias(source, prop->name, ++ OBJECT(target), prop->name); ++ } + } +diff --git a/include/hw/qdev-properties.h b/include/hw/qdev-properties.h +index 25743a29a0..09aa04ca1e 100644 +--- a/include/hw/qdev-properties.h ++++ b/include/hw/qdev-properties.h +@@ -230,8 +230,8 @@ void qdev_property_add_static(DeviceState *dev, Property *prop); + * @target: Device which has properties to be aliased + * @source: Object to add alias properties to + * +- * Add alias properties to the @source object for all qdev properties on +- * the @target DeviceState. ++ * Add alias properties to the @source object for all properties on the @target ++ * DeviceState. + * + * This is useful when @target is an internal implementation object + * owned by @source, and you want to expose all the properties of that +-- +2.39.3 + diff --git a/SOURCES/kvm-qemu-options.hx-Update-the-reduced-phys-bits-documen.patch b/SOURCES/kvm-qemu-options.hx-Update-the-reduced-phys-bits-documen.patch deleted file mode 100644 index 6830692..0000000 --- a/SOURCES/kvm-qemu-options.hx-Update-the-reduced-phys-bits-documen.patch +++ /dev/null @@ -1,60 +0,0 @@ -From 50c833fc3c7d8d3a5124cfdb2f2dc06b910c2252 Mon Sep 17 00:00:00 2001 -From: Bandan Das -Date: Thu, 3 Aug 2023 14:21:25 -0400 -Subject: [PATCH 11/14] qemu-options.hx: Update the reduced-phys-bits - documentation - -RH-Author: Bandan Das -RH-MergeRequest: 196: Updates to SEV reduced-phys-bits parameter -RH-Bugzilla: 2214839 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [2/4] b0c4a19e9f4185c97ddf71857bc9367cea01ffa8 (bdas1/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2214839 - -commit 326e3015c4c6f3197157ea0bb00826ae740e2fad -Author: Tom Lendacky -Date: Fri Sep 30 10:14:28 2022 -0500 - - qemu-options.hx: Update the reduced-phys-bits documentation - - A guest only ever experiences, at most, 1 bit of reduced physical - addressing. Update the documentation to reflect this as well as change - the example value on the reduced-phys-bits option. - - Fixes: a9b4942f48 ("target/i386: add Secure Encrypted Virtualization (SEV) object") - Signed-off-by: Tom Lendacky - Reviewed-by: Dr. David Alan Gilbert - Message-Id: <13a62ced1808546c1d398e2025cf85f4c94ae123.1664550870.git.thomas.lendacky@amd.com> - Signed-off-by: Paolo Bonzini - -Signed-off-by: Bandan Das ---- - qemu-options.hx | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/qemu-options.hx b/qemu-options.hx -index b18f933703..edf10a5aac 100644 ---- a/qemu-options.hx -+++ b/qemu-options.hx -@@ -5417,7 +5417,7 @@ SRST - physical address space. The ``reduced-phys-bits`` is used to - provide the number of bits we loose in physical address space. - Similar to C-bit, the value is Host family dependent. On EPYC, -- the value should be 5. -+ a guest will lose a maximum of 1 bit, so the value should be 1. - - The ``sev-device`` provides the device file to use for - communicating with the SEV firmware running inside AMD Secure -@@ -5452,7 +5452,7 @@ SRST - - # |qemu_system_x86| \\ - ...... \\ -- -object sev-guest,id=sev0,cbitpos=47,reduced-phys-bits=5 \\ -+ -object sev-guest,id=sev0,cbitpos=47,reduced-phys-bits=1 \\ - -machine ...,memory-encryption=sev0 \\ - ..... - --- -2.39.3 - diff --git a/SOURCES/kvm-qemu_init-increase-NOFILE-soft-limit-on-POSIX.patch b/SOURCES/kvm-qemu_init-increase-NOFILE-soft-limit-on-POSIX.patch new file mode 100644 index 0000000..7dc550c --- /dev/null +++ b/SOURCES/kvm-qemu_init-increase-NOFILE-soft-limit-on-POSIX.patch @@ -0,0 +1,135 @@ +From f2fe6c7a2def488633cbb67e28ac00279d6e8de4 Mon Sep 17 00:00:00 2001 +From: Cornelia Huck +Date: Tue, 27 Feb 2024 11:17:39 +0100 +Subject: [PATCH 1/2] qemu_init: increase NOFILE soft limit on POSIX +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cornelia Huck +RH-MergeRequest: 226: qemu_init: increase NOFILE soft limit on POSIX +RH-Jira: RHEL-26049 +RH-Acked-by: Gavin Shan +RH-Acked-by: Ani Sinha +RH-Acked-by: Shaoqin Huang +RH-Commit: [1/1] cee5404aef3f6437d45a1c43bdee73a57a528bee (cohuck/qemu-kvm-c9s) + +Jira: https://issues.redhat.com/browse/RHEL-26049 + +In many configurations, e.g. multiple vNICs with multiple queues or +with many Ceph OSDs, the default soft limit of 1024 is not enough. +QEMU is supposed to work fine with file descriptors >= 1024 and does +not use select() on POSIX. Bump the soft limit to the allowed hard +limit to avoid issues with the aforementioned configurations. + +Of course the limit could be raised from the outside, but the man page +of systemd.exec states about 'LimitNOFILE=': + +> Don't use. +> [...] +> Typically applications should increase their soft limit to the hard +> limit on their own, if they are OK with working with file +> descriptors above 1023, + +If the soft limit is already the same as the hard limit, avoid the +superfluous setrlimit call. This can avoid a warning with a strict +seccomp filter blocking setrlimit if NOFILE was already raised before +executing QEMU. + +Buglink: https://bugzilla.proxmox.com/show_bug.cgi?id=4507 +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Fiona Ebner +Signed-off-by: Daniel P. Berrangé +(cherry picked from commit 03e471c41d8b1b6eb16c9714f387449f52fe5c1d) +Signed-off-by: Cornelia Huck +--- + include/sysemu/os-posix.h | 1 + + include/sysemu/os-win32.h | 5 +++++ + os-posix.c | 22 ++++++++++++++++++++++ + system/vl.c | 2 ++ + 4 files changed, 30 insertions(+) + +diff --git a/include/sysemu/os-posix.h b/include/sysemu/os-posix.h +index dff32ae185..b881ac6c6f 100644 +--- a/include/sysemu/os-posix.h ++++ b/include/sysemu/os-posix.h +@@ -51,6 +51,7 @@ bool is_daemonized(void); + void os_daemonize(void); + bool os_set_runas(const char *user_id); + void os_set_chroot(const char *path); ++void os_setup_limits(void); + void os_setup_post(void); + int os_mlock(void); + +diff --git a/include/sysemu/os-win32.h b/include/sysemu/os-win32.h +index 1047d260cb..b82a5d3ad9 100644 +--- a/include/sysemu/os-win32.h ++++ b/include/sysemu/os-win32.h +@@ -128,6 +128,11 @@ static inline int os_mlock(void) + return -ENOSYS; + } + ++static inline void os_setup_limits(void) ++{ ++ return; ++} ++ + #define fsync _commit + + #if !defined(lseek) +diff --git a/os-posix.c b/os-posix.c +index 52ef6990ff..a4284e2c07 100644 +--- a/os-posix.c ++++ b/os-posix.c +@@ -24,6 +24,7 @@ + */ + + #include "qemu/osdep.h" ++#include + #include + #include + #include +@@ -256,6 +257,27 @@ void os_daemonize(void) + } + } + ++void os_setup_limits(void) ++{ ++ struct rlimit nofile; ++ ++ if (getrlimit(RLIMIT_NOFILE, &nofile) < 0) { ++ warn_report("unable to query NOFILE limit: %s", strerror(errno)); ++ return; ++ } ++ ++ if (nofile.rlim_cur == nofile.rlim_max) { ++ return; ++ } ++ ++ nofile.rlim_cur = nofile.rlim_max; ++ ++ if (setrlimit(RLIMIT_NOFILE, &nofile) < 0) { ++ warn_report("unable to set NOFILE limit: %s", strerror(errno)); ++ return; ++ } ++} ++ + void os_setup_post(void) + { + int fd = 0; +diff --git a/system/vl.c b/system/vl.c +index 93635ffc5b..6443b6e469 100644 +--- a/system/vl.c ++++ b/system/vl.c +@@ -2783,6 +2783,8 @@ void qemu_init(int argc, char **argv) + error_init(argv[0]); + qemu_init_exec_dir(argv[0]); + ++ os_setup_limits(); ++ + qemu_init_arch_modules(); + + qemu_init_subsystems(); +-- +2.39.3 + diff --git a/SOURCES/kvm-raven-disable-reentrancy-detection-for-iomem.patch b/SOURCES/kvm-raven-disable-reentrancy-detection-for-iomem.patch deleted file mode 100644 index 4a4a2cc..0000000 --- a/SOURCES/kvm-raven-disable-reentrancy-detection-for-iomem.patch +++ /dev/null @@ -1,54 +0,0 @@ -From 936e21428a04524ccffeb36110d1aa61de9f44e5 Mon Sep 17 00:00:00 2001 -From: Jon Maloy -Date: Tue, 9 May 2023 10:29:03 -0400 -Subject: [PATCH 11/21] raven: disable reentrancy detection for iomem - -RH-Author: Jon Maloy -RH-MergeRequest: 165: memory: prevent dma-reentracy issues -RH-Jira: RHEL-516 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [7/13] 48278583aa1ab08b912f49cd8b3a79d1bb3abf5f (jmaloy/jmaloy-qemu-kvm-2) - -Jira: https://issues.redhat.com/browse/RHEL-516 -Upstream: Merged -CVE: CVE-2023-2680 - -commit 6dad5a6810d9c60ca320d01276f6133bbcfa1fc7 -Author: Alexander Bulekov -Date: Thu Apr 27 17:10:12 2023 -0400 - - raven: disable reentrancy detection for iomem - - As the code is designed for re-entrant calls from raven_io_ops to - pci-conf, mark raven_io_ops as reentrancy-safe. - - Signed-off-by: Alexander Bulekov - Message-Id: <20230427211013.2994127-8-alxndr@bu.edu> - Signed-off-by: Thomas Huth - -Signed-off-by: Jon Maloy ---- - hw/pci-host/raven.c | 7 +++++++ - 1 file changed, 7 insertions(+) - -diff --git a/hw/pci-host/raven.c b/hw/pci-host/raven.c -index 072ffe3c5e..9a11ac4b2b 100644 ---- a/hw/pci-host/raven.c -+++ b/hw/pci-host/raven.c -@@ -294,6 +294,13 @@ static void raven_pcihost_initfn(Object *obj) - memory_region_init(&s->pci_memory, obj, "pci-memory", 0x3f000000); - address_space_init(&s->pci_io_as, &s->pci_io, "raven-io"); - -+ /* -+ * Raven's raven_io_ops use the address-space API to access pci-conf-idx -+ * (which is also owned by the raven device). As such, mark the -+ * pci_io_non_contiguous as re-entrancy safe. -+ */ -+ s->pci_io_non_contiguous.disable_reentrancy_guard = true; -+ - /* CPU address space */ - memory_region_add_subregion(address_space_mem, PCI_IO_BASE_ADDR, - &s->pci_io); --- -2.39.3 - diff --git a/SOURCES/kvm-s390x-ap-Wire-up-the-device-request-notifier-interfa.patch b/SOURCES/kvm-s390x-ap-Wire-up-the-device-request-notifier-interfa.patch deleted file mode 100644 index d0eb303..0000000 --- a/SOURCES/kvm-s390x-ap-Wire-up-the-device-request-notifier-interfa.patch +++ /dev/null @@ -1,220 +0,0 @@ -From 41987ce0dd79d8734088002cbd34f20704dd017a Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Mon, 17 Jul 2023 17:36:07 +0200 -Subject: [PATCH 04/12] s390x/ap: Wire up the device request notifier interface -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 185: Backport s390x fixes from QEMU 8.1 -RH-Jira: RHEL-794 -RH-Acked-by: Cornelia Huck -RH-Acked-by: Thomas Huth -RH-Commit: [1/1] ab6c912a1b8cdb584adacac16af79352fdfe7355 (clegoate/qemu-kvm-c9s) - -Jira: https://issues.redhat.com/browse/RHEL-794 - -commit 1360b2ad1f673d32a09de5826cd71ecd0510164a -Author: Tony Krowiak -Date: Fri Jun 2 10:11:25 2023 -0400 - - s390x/ap: Wire up the device request notifier interface - - Let's wire up the device request notifier interface to handle device unplug - requests for AP. - - Signed-off-by: Tony Krowiak - Link: https://lore.kernel.org/qemu-devel/20230530225544.280031-1-akrowiak@linux.ibm.com/ - Signed-off-by: Cédric Le Goater - -Backport note: - - - linux-headers/linux/vfio.h - updated to v6.5-rc1 level for VFIO_AP_REQ_IRQ_INDEX definition - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/ap.c | 113 +++++++++++++++++++++++++++++++++++++ - linux-headers/linux/vfio.h | 9 +++ - 2 files changed, 122 insertions(+) - -diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c -index e0dd561e85..6e21d1da5a 100644 ---- a/hw/vfio/ap.c -+++ b/hw/vfio/ap.c -@@ -18,6 +18,8 @@ - #include "hw/vfio/vfio-common.h" - #include "hw/s390x/ap-device.h" - #include "qemu/error-report.h" -+#include "qemu/event_notifier.h" -+#include "qemu/main-loop.h" - #include "qemu/module.h" - #include "qemu/option.h" - #include "qemu/config-file.h" -@@ -33,6 +35,7 @@ - struct VFIOAPDevice { - APDevice apdev; - VFIODevice vdev; -+ EventNotifier req_notifier; - }; - - OBJECT_DECLARE_SIMPLE_TYPE(VFIOAPDevice, VFIO_AP_DEVICE) -@@ -84,10 +87,110 @@ static VFIOGroup *vfio_ap_get_group(VFIOAPDevice *vapdev, Error **errp) - return vfio_get_group(groupid, &address_space_memory, errp); - } - -+static void vfio_ap_req_notifier_handler(void *opaque) -+{ -+ VFIOAPDevice *vapdev = opaque; -+ Error *err = NULL; -+ -+ if (!event_notifier_test_and_clear(&vapdev->req_notifier)) { -+ return; -+ } -+ -+ qdev_unplug(DEVICE(vapdev), &err); -+ -+ if (err) { -+ warn_reportf_err(err, VFIO_MSG_PREFIX, vapdev->vdev.name); -+ } -+} -+ -+static void vfio_ap_register_irq_notifier(VFIOAPDevice *vapdev, -+ unsigned int irq, Error **errp) -+{ -+ int fd; -+ size_t argsz; -+ IOHandler *fd_read; -+ EventNotifier *notifier; -+ struct vfio_irq_info *irq_info; -+ VFIODevice *vdev = &vapdev->vdev; -+ -+ switch (irq) { -+ case VFIO_AP_REQ_IRQ_INDEX: -+ notifier = &vapdev->req_notifier; -+ fd_read = vfio_ap_req_notifier_handler; -+ break; -+ default: -+ error_setg(errp, "vfio: Unsupported device irq(%d)", irq); -+ return; -+ } -+ -+ if (vdev->num_irqs < irq + 1) { -+ error_setg(errp, "vfio: IRQ %u not available (number of irqs %u)", -+ irq, vdev->num_irqs); -+ return; -+ } -+ -+ argsz = sizeof(*irq_info); -+ irq_info = g_malloc0(argsz); -+ irq_info->index = irq; -+ irq_info->argsz = argsz; -+ -+ if (ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO, -+ irq_info) < 0 || irq_info->count < 1) { -+ error_setg_errno(errp, errno, "vfio: Error getting irq info"); -+ goto out_free_info; -+ } -+ -+ if (event_notifier_init(notifier, 0)) { -+ error_setg_errno(errp, errno, -+ "vfio: Unable to init event notifier for irq (%d)", -+ irq); -+ goto out_free_info; -+ } -+ -+ fd = event_notifier_get_fd(notifier); -+ qemu_set_fd_handler(fd, fd_read, NULL, vapdev); -+ -+ if (vfio_set_irq_signaling(vdev, irq, 0, VFIO_IRQ_SET_ACTION_TRIGGER, fd, -+ errp)) { -+ qemu_set_fd_handler(fd, NULL, NULL, vapdev); -+ event_notifier_cleanup(notifier); -+ } -+ -+out_free_info: -+ g_free(irq_info); -+ -+} -+ -+static void vfio_ap_unregister_irq_notifier(VFIOAPDevice *vapdev, -+ unsigned int irq) -+{ -+ Error *err = NULL; -+ EventNotifier *notifier; -+ -+ switch (irq) { -+ case VFIO_AP_REQ_IRQ_INDEX: -+ notifier = &vapdev->req_notifier; -+ break; -+ default: -+ error_report("vfio: Unsupported device irq(%d)", irq); -+ return; -+ } -+ -+ if (vfio_set_irq_signaling(&vapdev->vdev, irq, 0, -+ VFIO_IRQ_SET_ACTION_TRIGGER, -1, &err)) { -+ warn_reportf_err(err, VFIO_MSG_PREFIX, vapdev->vdev.name); -+ } -+ -+ qemu_set_fd_handler(event_notifier_get_fd(notifier), -+ NULL, NULL, vapdev); -+ event_notifier_cleanup(notifier); -+} -+ - static void vfio_ap_realize(DeviceState *dev, Error **errp) - { - int ret; - char *mdevid; -+ Error *err = NULL; - VFIOGroup *vfio_group; - APDevice *apdev = AP_DEVICE(dev); - VFIOAPDevice *vapdev = VFIO_AP_DEVICE(apdev); -@@ -116,6 +219,15 @@ static void vfio_ap_realize(DeviceState *dev, Error **errp) - goto out_get_dev_err; - } - -+ vfio_ap_register_irq_notifier(vapdev, VFIO_AP_REQ_IRQ_INDEX, &err); -+ if (err) { -+ /* -+ * Report this error, but do not make it a failing condition. -+ * Lack of this IRQ in the host does not prevent normal operation. -+ */ -+ error_report_err(err); -+ } -+ - return; - - out_get_dev_err: -@@ -129,6 +241,7 @@ static void vfio_ap_unrealize(DeviceState *dev) - VFIOAPDevice *vapdev = VFIO_AP_DEVICE(apdev); - VFIOGroup *group = vapdev->vdev.group; - -+ vfio_ap_unregister_irq_notifier(vapdev, VFIO_AP_REQ_IRQ_INDEX); - vfio_ap_put_device(vapdev); - vfio_put_group(group); - } -diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h -index c59692ce0b..ce464957c8 100644 ---- a/linux-headers/linux/vfio.h -+++ b/linux-headers/linux/vfio.h -@@ -642,6 +642,15 @@ enum { - VFIO_CCW_NUM_IRQS - }; - -+/* -+ * The vfio-ap bus driver makes use of the following IRQ index mapping. -+ * Unimplemented IRQ types return a count of zero. -+ */ -+enum { -+ VFIO_AP_REQ_IRQ_INDEX, -+ VFIO_AP_NUM_IRQS -+}; -+ - /** - * VFIO_DEVICE_GET_PCI_HOT_RESET_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 12, - * struct vfio_pci_hot_reset_info) --- -2.39.3 - diff --git a/SOURCES/kvm-s390x-pci-avoid-double-enable-disable-of-aif.patch b/SOURCES/kvm-s390x-pci-avoid-double-enable-disable-of-aif.patch new file mode 100644 index 0000000..1b4a4ab --- /dev/null +++ b/SOURCES/kvm-s390x-pci-avoid-double-enable-disable-of-aif.patch @@ -0,0 +1,106 @@ +From 64b0180f5a52668f8ac4c444ba369231dbc4d5b9 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Mon, 22 Jan 2024 09:25:53 +0100 +Subject: [PATCH 096/101] s390x/pci: avoid double enable/disable of aif +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 215: s390x: Fix reset ordering of passthrough ISM devices +RH-Jira: RHEL-21169 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Commit: [1/3] ebdf8a474ea21486f5ec051683f17bae6d20f675 (clegoate/qemu-kvm-c9s) + +JIRA: https://issues.redhat.com/browse/RHEL-21169 + +commit 07b2c8e034d80ff92e202405c494d2ff80fcf848 +Author: Matthew Rosato +Date: Thu Jan 18 13:51:49 2024 -0500 + + s390x/pci: avoid double enable/disable of aif + + Use a flag to keep track of whether AIF is currently enabled. This can be + used to avoid enabling/disabling AIF multiple times as well as to determine + whether or not it should be disabled during reset processing. + + Fixes: d0bc7091c2 ("s390x/pci: enable adapter event notification for interpreted devices") + Reported-by: Cédric Le Goater + Reviewed-by: Eric Farman + Signed-off-by: Matthew Rosato + Message-ID: <20240118185151.265329-2-mjrosato@linux.ibm.com> + Reviewed-by: Cédric Le Goater + Signed-off-by: Thomas Huth + +Signed-off-by: Cédric Le Goater +--- + hw/s390x/s390-pci-kvm.c | 25 +++++++++++++++++++++++-- + include/hw/s390x/s390-pci-bus.h | 1 + + 2 files changed, 24 insertions(+), 2 deletions(-) + +diff --git a/hw/s390x/s390-pci-kvm.c b/hw/s390x/s390-pci-kvm.c +index ff41e4106d..1ee510436c 100644 +--- a/hw/s390x/s390-pci-kvm.c ++++ b/hw/s390x/s390-pci-kvm.c +@@ -27,6 +27,7 @@ bool s390_pci_kvm_interp_allowed(void) + + int s390_pci_kvm_aif_enable(S390PCIBusDevice *pbdev, ZpciFib *fib, bool assist) + { ++ int rc; + struct kvm_s390_zpci_op args = { + .fh = pbdev->fh, + .op = KVM_S390_ZPCIOP_REG_AEN, +@@ -38,15 +39,35 @@ int s390_pci_kvm_aif_enable(S390PCIBusDevice *pbdev, ZpciFib *fib, bool assist) + .u.reg_aen.flags = (assist) ? 0 : KVM_S390_ZPCIOP_REGAEN_HOST + }; + +- return kvm_vm_ioctl(kvm_state, KVM_S390_ZPCI_OP, &args); ++ if (pbdev->aif) { ++ return -EINVAL; ++ } ++ ++ rc = kvm_vm_ioctl(kvm_state, KVM_S390_ZPCI_OP, &args); ++ if (rc == 0) { ++ pbdev->aif = true; ++ } ++ ++ return rc; + } + + int s390_pci_kvm_aif_disable(S390PCIBusDevice *pbdev) + { ++ int rc; ++ + struct kvm_s390_zpci_op args = { + .fh = pbdev->fh, + .op = KVM_S390_ZPCIOP_DEREG_AEN + }; + +- return kvm_vm_ioctl(kvm_state, KVM_S390_ZPCI_OP, &args); ++ if (!pbdev->aif) { ++ return -EINVAL; ++ } ++ ++ rc = kvm_vm_ioctl(kvm_state, KVM_S390_ZPCI_OP, &args); ++ if (rc == 0) { ++ pbdev->aif = false; ++ } ++ ++ return rc; + } +diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h +index b1bdbeaeb5..435e788867 100644 +--- a/include/hw/s390x/s390-pci-bus.h ++++ b/include/hw/s390x/s390-pci-bus.h +@@ -361,6 +361,7 @@ struct S390PCIBusDevice { + bool unplug_requested; + bool interp; + bool forwarding_assist; ++ bool aif; + QTAILQ_ENTRY(S390PCIBusDevice) link; + }; + +-- +2.39.3 + diff --git a/SOURCES/kvm-s390x-pci-drive-ISM-reset-from-subsystem-reset.patch b/SOURCES/kvm-s390x-pci-drive-ISM-reset-from-subsystem-reset.patch new file mode 100644 index 0000000..f3a4129 --- /dev/null +++ b/SOURCES/kvm-s390x-pci-drive-ISM-reset-from-subsystem-reset.patch @@ -0,0 +1,137 @@ +From c885b17e09ab19a3e8d3b2e1765963811af6f764 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Mon, 22 Jan 2024 09:25:53 +0100 +Subject: [PATCH 098/101] s390x/pci: drive ISM reset from subsystem reset +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 215: s390x: Fix reset ordering of passthrough ISM devices +RH-Jira: RHEL-21169 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Commit: [3/3] 426cf156a2c67e6dcd7483a769fa3741e2700504 (clegoate/qemu-kvm-c9s) + +JIRA: https://issues.redhat.com/browse/RHEL-21169 + +commit 68c691ca99a2538d6a53a70ce8a9ce06ee307ff1 +Author: Matthew Rosato +Date: Thu Jan 18 13:51:51 2024 -0500 + + s390x/pci: drive ISM reset from subsystem reset + + ISM devices are sensitive to manipulation of the IOMMU, so the ISM device + needs to be reset before the vfio-pci device is reset (triggering a full + UNMAP). In order to ensure this occurs, trigger ISM device resets from + subsystem_reset before triggering the PCI bus reset (which will also + trigger vfio-pci reset). This only needs to be done for ISM devices + which were enabled for use by the guest. + Further, ensure that AIF is disabled as part of the reset event. + + Fixes: ef1535901a ("s390x: do a subsystem reset before the unprotect on reboot") + Fixes: 03451953c7 ("s390x/pci: reset ISM passthrough devices on shutdown and system reset") + Reported-by: Cédric Le Goater + Signed-off-by: Matthew Rosato + Message-ID: <20240118185151.265329-4-mjrosato@linux.ibm.com> + Reviewed-by: Eric Farman + Reviewed-by: Cédric Le Goater + Signed-off-by: Thomas Huth + +Signed-off-by: Cédric Le Goater +--- + hw/s390x/s390-pci-bus.c | 26 +++++++++++++++++--------- + hw/s390x/s390-virtio-ccw.c | 8 ++++++++ + include/hw/s390x/s390-pci-bus.h | 1 + + 3 files changed, 26 insertions(+), 9 deletions(-) + +diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c +index 347580ebac..3e57d5faca 100644 +--- a/hw/s390x/s390-pci-bus.c ++++ b/hw/s390x/s390-pci-bus.c +@@ -151,20 +151,12 @@ static void s390_pci_shutdown_notifier(Notifier *n, void *opaque) + pci_device_reset(pbdev->pdev); + } + +-static void s390_pci_reset_cb(void *opaque) +-{ +- S390PCIBusDevice *pbdev = opaque; +- +- pci_device_reset(pbdev->pdev); +-} +- + static void s390_pci_perform_unplug(S390PCIBusDevice *pbdev) + { + HotplugHandler *hotplug_ctrl; + + if (pbdev->pft == ZPCI_PFT_ISM) { + notifier_remove(&pbdev->shutdown_notifier); +- qemu_unregister_reset(s390_pci_reset_cb, pbdev); + } + + /* Unplug the PCI device */ +@@ -1132,7 +1124,6 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + if (pbdev->pft == ZPCI_PFT_ISM) { + pbdev->shutdown_notifier.notify = s390_pci_shutdown_notifier; + qemu_register_shutdown_notifier(&pbdev->shutdown_notifier); +- qemu_register_reset(s390_pci_reset_cb, pbdev); + } + } else { + pbdev->fh |= FH_SHM_EMUL; +@@ -1279,6 +1270,23 @@ static void s390_pci_enumerate_bridge(PCIBus *bus, PCIDevice *pdev, + pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, s->bus_no, 1); + } + ++void s390_pci_ism_reset(void) ++{ ++ S390pciState *s = s390_get_phb(); ++ ++ S390PCIBusDevice *pbdev, *next; ++ ++ /* Trigger reset event for each passthrough ISM device currently in-use */ ++ QTAILQ_FOREACH_SAFE(pbdev, &s->zpci_devs, link, next) { ++ if (pbdev->interp && pbdev->pft == ZPCI_PFT_ISM && ++ pbdev->fh & FH_MASK_ENABLE) { ++ s390_pci_kvm_aif_disable(pbdev); ++ ++ pci_device_reset(pbdev->pdev); ++ } ++ } ++} ++ + static void s390_pcihost_reset(DeviceState *dev) + { + S390pciState *s = S390_PCI_HOST_BRIDGE(dev); +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index e26ce26f5a..24f4773179 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -118,6 +118,14 @@ static void subsystem_reset(void) + DeviceState *dev; + int i; + ++ /* ++ * ISM firmware is sensitive to unexpected changes to the IOMMU, which can ++ * occur during reset of the vfio-pci device (unmap of entire aperture). ++ * Ensure any passthrough ISM devices are reset now, while CPUs are paused ++ * but before vfio-pci cleanup occurs. ++ */ ++ s390_pci_ism_reset(); ++ + for (i = 0; i < ARRAY_SIZE(reset_dev_types); i++) { + dev = DEVICE(object_resolve_path_type("", reset_dev_types[i], NULL)); + if (dev) { +diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h +index 435e788867..2c43ea123f 100644 +--- a/include/hw/s390x/s390-pci-bus.h ++++ b/include/hw/s390x/s390-pci-bus.h +@@ -401,5 +401,6 @@ S390PCIBusDevice *s390_pci_find_dev_by_target(S390pciState *s, + const char *target); + S390PCIBusDevice *s390_pci_find_next_avail_dev(S390pciState *s, + S390PCIBusDevice *pbdev); ++void s390_pci_ism_reset(void); + + #endif +-- +2.39.3 + diff --git a/SOURCES/kvm-s390x-pci-refresh-fh-before-disabling-aif.patch b/SOURCES/kvm-s390x-pci-refresh-fh-before-disabling-aif.patch new file mode 100644 index 0000000..845a467 --- /dev/null +++ b/SOURCES/kvm-s390x-pci-refresh-fh-before-disabling-aif.patch @@ -0,0 +1,71 @@ +From 49078bdfd4c116da3e920632ec6f7041f1b38015 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Mon, 22 Jan 2024 09:25:53 +0100 +Subject: [PATCH 097/101] s390x/pci: refresh fh before disabling aif +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 215: s390x: Fix reset ordering of passthrough ISM devices +RH-Jira: RHEL-21169 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Commit: [2/3] 3523067909c41818dfc769abdb93930833416c11 (clegoate/qemu-kvm-c9s) + +JIRA: https://issues.redhat.com/browse/RHEL-21169 + +commit 30e35258e25c75c9d799c34fd89afcafffb37084 +Author: Matthew Rosato +Date: Thu Jan 18 13:51:50 2024 -0500 + + s390x/pci: refresh fh before disabling aif + + Typically we refresh the host fh during CLP enable, however it's possible + that the device goes through multiple reset events before the guest + performs another CLP enable. Let's handle this for now by refreshing the + host handle from vfio before disabling aif. + + Fixes: 03451953c7 ("s390x/pci: reset ISM passthrough devices on shutdown and system reset") + Reported-by: Cédric Le Goater + Reviewed-by: Eric Farman + Signed-off-by: Matthew Rosato + Message-ID: <20240118185151.265329-3-mjrosato@linux.ibm.com> + Reviewed-by: Cédric Le Goater + Signed-off-by: Thomas Huth + +Signed-off-by: Cédric Le Goater +--- + hw/s390x/s390-pci-kvm.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/hw/s390x/s390-pci-kvm.c b/hw/s390x/s390-pci-kvm.c +index 1ee510436c..9eef4fc3ec 100644 +--- a/hw/s390x/s390-pci-kvm.c ++++ b/hw/s390x/s390-pci-kvm.c +@@ -18,6 +18,7 @@ + #include "hw/s390x/s390-pci-bus.h" + #include "hw/s390x/s390-pci-kvm.h" + #include "hw/s390x/s390-pci-inst.h" ++#include "hw/s390x/s390-pci-vfio.h" + #include "cpu_models.h" + + bool s390_pci_kvm_interp_allowed(void) +@@ -64,6 +65,14 @@ int s390_pci_kvm_aif_disable(S390PCIBusDevice *pbdev) + return -EINVAL; + } + ++ /* ++ * The device may have already been reset but we still want to relinquish ++ * the guest ISC, so always be sure to use an up-to-date host fh. ++ */ ++ if (!s390_pci_get_host_fh(pbdev, &args.fh)) { ++ return -EPERM; ++ } ++ + rc = kvm_vm_ioctl(kvm_state, KVM_S390_ZPCI_OP, &args); + if (rc == 0) { + pbdev->aif = false; +-- +2.39.3 + diff --git a/SOURCES/kvm-s390x-pv-Fix-spurious-warning-with-asynchronous-tear.patch b/SOURCES/kvm-s390x-pv-Fix-spurious-warning-with-asynchronous-tear.patch deleted file mode 100644 index ecf1353..0000000 --- a/SOURCES/kvm-s390x-pv-Fix-spurious-warning-with-asynchronous-tear.patch +++ /dev/null @@ -1,129 +0,0 @@ -From 3cab2a638a10ece2b76d9f33a3c5dc6f64f1bbaa Mon Sep 17 00:00:00 2001 -From: Claudio Imbrenda -Date: Wed, 10 May 2023 12:55:31 +0200 -Subject: [PATCH 21/21] s390x/pv: Fix spurious warning with asynchronous - teardown -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -RH-MergeRequest: 173: Improve memory reclaiming for z15 Secure Execution guests -RH-Bugzilla: 2168500 -RH-Acked-by: Cornelia Huck -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Cédric Le Goater -RH-Commit: [2/2] cb690d3155ea22c6df00a4d75b72f501515e5556 (thuth/qemu-kvm-cs9) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2168500 - -Kernel commit 292a7d6fca33 ("KVM: s390: pv: fix asynchronous teardown -for small VMs") causes the KVM_PV_ASYNC_CLEANUP_PREPARE ioctl to fail -if the VM is not larger than 2GiB. QEMU would attempt it and fail, -print an error message, and then proceed with a normal teardown. - -Avoid attempting to use asynchronous teardown altogether when the VM is -not larger than 2 GiB. This will avoid triggering the error message and -also avoid pointless overhead; normal teardown is fast enough for small -VMs. - -Reported-by: Marc Hartmayer -Fixes: c3a073c610 ("s390x/pv: Add support for asynchronous teardown for reboot") -Link: https://lore.kernel.org/all/20230421085036.52511-2-imbrenda@linux.ibm.com/ -Signed-off-by: Claudio Imbrenda -Message-Id: <20230510105531.30623-2-imbrenda@linux.ibm.com> -Reviewed-by: Thomas Huth -[thuth: Fix inline function parameter in pv.h] -Signed-off-by: Thomas Huth -(cherry picked from commit 88693ab2a53f2f3d25cb39a7b5034ab391bc5a81) ---- - hw/s390x/pv.c | 10 ++++++++-- - hw/s390x/s390-virtio-ccw.c | 2 +- - include/hw/s390x/pv.h | 6 +++--- - 3 files changed, 12 insertions(+), 6 deletions(-) - -diff --git a/hw/s390x/pv.c b/hw/s390x/pv.c -index 49ea38236c..b63f3784c6 100644 ---- a/hw/s390x/pv.c -+++ b/hw/s390x/pv.c -@@ -13,6 +13,7 @@ - - #include - -+#include "qemu/units.h" - #include "qapi/error.h" - #include "qemu/error-report.h" - #include "sysemu/kvm.h" -@@ -115,7 +116,7 @@ static void *s390_pv_do_unprot_async_fn(void *p) - return NULL; - } - --bool s390_pv_vm_try_disable_async(void) -+bool s390_pv_vm_try_disable_async(S390CcwMachineState *ms) - { - /* - * t is only needed to create the thread; once qemu_thread_create -@@ -123,7 +124,12 @@ bool s390_pv_vm_try_disable_async(void) - */ - QemuThread t; - -- if (!kvm_check_extension(kvm_state, KVM_CAP_S390_PROTECTED_ASYNC_DISABLE)) { -+ /* -+ * If the feature is not present or if the VM is not larger than 2 GiB, -+ * KVM_PV_ASYNC_CLEANUP_PREPARE fill fail; no point in attempting it. -+ */ -+ if ((MACHINE(ms)->maxram_size <= 2 * GiB) || -+ !kvm_check_extension(kvm_state, KVM_CAP_S390_PROTECTED_ASYNC_DISABLE)) { - return false; - } - if (s390_pv_cmd(KVM_PV_ASYNC_CLEANUP_PREPARE, NULL) != 0) { -diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c -index 6a0b93c63d..d95c595f88 100644 ---- a/hw/s390x/s390-virtio-ccw.c -+++ b/hw/s390x/s390-virtio-ccw.c -@@ -330,7 +330,7 @@ static inline void s390_do_cpu_ipl(CPUState *cs, run_on_cpu_data arg) - - static void s390_machine_unprotect(S390CcwMachineState *ms) - { -- if (!s390_pv_vm_try_disable_async()) { -+ if (!s390_pv_vm_try_disable_async(ms)) { - s390_pv_vm_disable(); - } - ms->pv = false; -diff --git a/include/hw/s390x/pv.h b/include/hw/s390x/pv.h -index 966306a9db..7b935e2246 100644 ---- a/include/hw/s390x/pv.h -+++ b/include/hw/s390x/pv.h -@@ -14,10 +14,10 @@ - - #include "qapi/error.h" - #include "sysemu/kvm.h" -+#include "hw/s390x/s390-virtio-ccw.h" - - #ifdef CONFIG_KVM - #include "cpu.h" --#include "hw/s390x/s390-virtio-ccw.h" - - static inline bool s390_is_pv(void) - { -@@ -41,7 +41,7 @@ static inline bool s390_is_pv(void) - int s390_pv_query_info(void); - int s390_pv_vm_enable(void); - void s390_pv_vm_disable(void); --bool s390_pv_vm_try_disable_async(void); -+bool s390_pv_vm_try_disable_async(S390CcwMachineState *ms); - int s390_pv_set_sec_parms(uint64_t origin, uint64_t length); - int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak); - void s390_pv_prep_reset(void); -@@ -61,7 +61,7 @@ static inline bool s390_is_pv(void) { return false; } - static inline int s390_pv_query_info(void) { return 0; } - static inline int s390_pv_vm_enable(void) { return 0; } - static inline void s390_pv_vm_disable(void) {} --static inline bool s390_pv_vm_try_disable_async(void) { return false; } -+static inline bool s390_pv_vm_try_disable_async(S390CcwMachineState *ms) { return false; } - static inline int s390_pv_set_sec_parms(uint64_t origin, uint64_t length) { return 0; } - static inline int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak) { return 0; } - static inline void s390_pv_prep_reset(void) {} --- -2.39.3 - diff --git a/SOURCES/kvm-scsi-Await-request-purging.patch b/SOURCES/kvm-scsi-Await-request-purging.patch new file mode 100644 index 0000000..9bd4399 --- /dev/null +++ b/SOURCES/kvm-scsi-Await-request-purging.patch @@ -0,0 +1,124 @@ +From 94d6458a58239b52394d58b6880509041186d5a8 Mon Sep 17 00:00:00 2001 +From: Hanna Czenczek +Date: Fri, 2 Feb 2024 15:47:55 +0100 +Subject: [PATCH 04/22] scsi: Await request purging + +RH-Author: Hanna Czenczek +RH-MergeRequest: 222: Allow concurrent BlockBackend context changes +RH-Jira: RHEL-24593 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Kevin Wolf +RH-Commit: [2/2] 35a89273cab0af8f999881e67d359fe1328363a0 (hreitz/qemu-kvm-c-9-s) + +scsi_device_for_each_req_async() currently does not provide any way to +be awaited. One of its callers is scsi_device_purge_requests(), which +therefore currently does not guarantee that all requests are fully +settled when it returns. + +We want all requests to be settled, because scsi_device_purge_requests() +is called through the unrealize path, including the one invoked by +virtio_scsi_hotunplug() through qdev_simple_device_unplug_cb(), which +most likely assumes that all SCSI requests are done then. + +In fact, scsi_device_purge_requests() already contains a blk_drain(), +but this will not fully await scsi_device_for_each_req_async(), only the +I/O requests it potentially cancels (not the non-I/O requests). +However, we can have scsi_device_for_each_req_async() increment the BB +in-flight counter, and have scsi_device_for_each_req_async_bh() +decrement it when it is done. This way, the blk_drain() will fully +await all SCSI requests to be purged. + +This also removes the need for scsi_device_for_each_req_async_bh() to +double-check the current context and potentially re-schedule itself, +should it now differ from the BB's context: Changing a BB's AioContext +with a root node is done through bdrv_try_change_aio_context(), which +creates a drained section. With this patch, we keep the BB in-flight +counter elevated throughout, so we know the BB's context cannot change. + +Signed-off-by: Hanna Czenczek +Message-ID: <20240202144755.671354-3-hreitz@redhat.com> +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 1604c0493193273e4eac547f86fbd2845e7f9af4) +--- + hw/scsi/scsi-bus.c | 30 +++++++++++++++++++++--------- + 1 file changed, 21 insertions(+), 9 deletions(-) + +diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c +index 5b08cbf60a..b1bf8e6433 100644 +--- a/hw/scsi/scsi-bus.c ++++ b/hw/scsi/scsi-bus.c +@@ -120,17 +120,13 @@ static void scsi_device_for_each_req_async_bh(void *opaque) + SCSIRequest *next; + + /* +- * If the AioContext changed before this BH was called then reschedule into +- * the new AioContext before accessing ->requests. This can happen when +- * scsi_device_for_each_req_async() is called and then the AioContext is +- * changed before BHs are run. ++ * The BB cannot have changed contexts between this BH being scheduled and ++ * now: BBs' AioContexts, when they have a node attached, can only be ++ * changed via bdrv_try_change_aio_context(), in a drained section. While ++ * we have the in-flight counter incremented, that drain must block. + */ + ctx = blk_get_aio_context(s->conf.blk); +- if (ctx != qemu_get_current_aio_context()) { +- aio_bh_schedule_oneshot(ctx, scsi_device_for_each_req_async_bh, +- g_steal_pointer(&data)); +- return; +- } ++ assert(ctx == qemu_get_current_aio_context()); + + QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) { + data->fn(req, data->fn_opaque); +@@ -138,11 +134,16 @@ static void scsi_device_for_each_req_async_bh(void *opaque) + + /* Drop the reference taken by scsi_device_for_each_req_async() */ + object_unref(OBJECT(s)); ++ ++ /* Paired with blk_inc_in_flight() in scsi_device_for_each_req_async() */ ++ blk_dec_in_flight(s->conf.blk); + } + + /* + * Schedule @fn() to be invoked for each enqueued request in device @s. @fn() + * runs in the AioContext that is executing the request. ++ * Keeps the BlockBackend's in-flight counter incremented until everything is ++ * done, so draining it will settle all scheduled @fn() calls. + */ + static void scsi_device_for_each_req_async(SCSIDevice *s, + void (*fn)(SCSIRequest *, void *), +@@ -163,6 +164,8 @@ static void scsi_device_for_each_req_async(SCSIDevice *s, + */ + object_ref(OBJECT(s)); + ++ /* Paired with blk_dec_in_flight() in scsi_device_for_each_req_async_bh() */ ++ blk_inc_in_flight(s->conf.blk); + aio_bh_schedule_oneshot(blk_get_aio_context(s->conf.blk), + scsi_device_for_each_req_async_bh, + data); +@@ -1728,11 +1731,20 @@ static void scsi_device_purge_one_req(SCSIRequest *req, void *opaque) + scsi_req_cancel_async(req, NULL); + } + ++/** ++ * Cancel all requests, and block until they are deleted. ++ */ + void scsi_device_purge_requests(SCSIDevice *sdev, SCSISense sense) + { + scsi_device_for_each_req_async(sdev, scsi_device_purge_one_req, NULL); + ++ /* ++ * Await all the scsi_device_purge_one_req() calls scheduled by ++ * scsi_device_for_each_req_async(), and all I/O requests that were ++ * cancelled this way, but may still take a bit of time to settle. ++ */ + blk_drain(sdev->conf.blk); ++ + scsi_device_set_ua(sdev, sense); + } + +-- +2.39.3 + diff --git a/SOURCES/kvm-scsi-assert-that-callbacks-run-in-the-correct-AioCon.patch b/SOURCES/kvm-scsi-assert-that-callbacks-run-in-the-correct-AioCon.patch new file mode 100644 index 0000000..6d43810 --- /dev/null +++ b/SOURCES/kvm-scsi-assert-that-callbacks-run-in-the-correct-AioCon.patch @@ -0,0 +1,88 @@ +From cd08d22a0da022d99fe6cfddb7de680abf66c8be Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 5 Dec 2023 13:19:59 -0500 +Subject: [PATCH 082/101] scsi: assert that callbacks run in the correct + AioContext + +RH-Author: Kevin Wolf +RH-MergeRequest: 214: Remove AioContext lock +RH-Jira: RHEL-15965 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [13/26] d2fd5065c3b72d9d2f4e37efee39fe12eba0f0a9 (kmwolf/centos-qemu-kvm) + +Since the removal of AioContext locking, the correctness of the code +relies on running requests from a single AioContext at any given time. + +Add assertions that verify that callbacks are invoked in the correct +AioContext. + +Signed-off-by: Stefan Hajnoczi +Message-ID: <20231205182011.1976568-3-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +--- + hw/scsi/scsi-disk.c | 14 ++++++++++++++ + system/dma-helpers.c | 3 +++ + 2 files changed, 17 insertions(+) + +diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c +index 2c1bbb3530..a5048e0aaf 100644 +--- a/hw/scsi/scsi-disk.c ++++ b/hw/scsi/scsi-disk.c +@@ -273,6 +273,10 @@ static void scsi_aio_complete(void *opaque, int ret) + SCSIDiskReq *r = (SCSIDiskReq *)opaque; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + ++ /* The request must only run in the BlockBackend's AioContext */ ++ assert(blk_get_aio_context(s->qdev.conf.blk) == ++ qemu_get_current_aio_context()); ++ + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + +@@ -370,8 +374,13 @@ static void scsi_dma_complete(void *opaque, int ret) + + static void scsi_read_complete_noio(SCSIDiskReq *r, int ret) + { ++ SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + uint32_t n; + ++ /* The request must only run in the BlockBackend's AioContext */ ++ assert(blk_get_aio_context(s->qdev.conf.blk) == ++ qemu_get_current_aio_context()); ++ + assert(r->req.aiocb == NULL); + if (scsi_disk_req_check_error(r, ret, false)) { + goto done; +@@ -496,8 +505,13 @@ static void scsi_read_data(SCSIRequest *req) + + static void scsi_write_complete_noio(SCSIDiskReq *r, int ret) + { ++ SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + uint32_t n; + ++ /* The request must only run in the BlockBackend's AioContext */ ++ assert(blk_get_aio_context(s->qdev.conf.blk) == ++ qemu_get_current_aio_context()); ++ + assert (r->req.aiocb == NULL); + if (scsi_disk_req_check_error(r, ret, false)) { + goto done; +diff --git a/system/dma-helpers.c b/system/dma-helpers.c +index 528117f256..9b221cf94e 100644 +--- a/system/dma-helpers.c ++++ b/system/dma-helpers.c +@@ -119,6 +119,9 @@ static void dma_blk_cb(void *opaque, int ret) + + trace_dma_blk_cb(dbs, ret); + ++ /* DMAAIOCB is not thread-safe and must be accessed only from dbs->ctx */ ++ assert(ctx == qemu_get_current_aio_context()); ++ + dbs->acb = NULL; + dbs->offset += dbs->iov.size; + +-- +2.39.3 + diff --git a/SOURCES/kvm-scsi-cleanup-scsi_clear_unit_attention.patch b/SOURCES/kvm-scsi-cleanup-scsi_clear_unit_attention.patch deleted file mode 100644 index 11dda3a..0000000 --- a/SOURCES/kvm-scsi-cleanup-scsi_clear_unit_attention.patch +++ /dev/null @@ -1,81 +0,0 @@ -From 5dd7d26c034c26b2d4d9b91b8d1a7b605e19730f Mon Sep 17 00:00:00 2001 -From: Stefano Garzarella -Date: Wed, 12 Jul 2023 15:43:51 +0200 -Subject: [PATCH 02/12] scsi: cleanup scsi_clear_unit_attention() - -RH-Author: Stefano Garzarella -RH-MergeRequest: 184: scsi: fix issue with Linux guest and unit attention -RH-Bugzilla: 2176702 -RH-Acked-by: Thomas Huth -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [2/3] b3a06a91644e44fae3d76d0fbe72448652db517a (sgarzarella/qemu-kvm-c-9-s) - -The previous commit moved the unit attention clearing when we create -the request. So now we can clean scsi_clear_unit_attention() to handle -only the case of the REPORT LUNS command: this is the only case in -which a UNIT ATTENTION is cleared without having been reported. - -Suggested-by: Paolo Bonzini -Signed-off-by: Stefano Garzarella -Message-ID: <20230712134352.118655-3-sgarzare@redhat.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit ba947dab98e7cd4337c70975bd255701a2a6aad8) -Signed-off-by: Stefano Garzarella ---- - hw/scsi/scsi-bus.c | 28 ++++++---------------------- - 1 file changed, 6 insertions(+), 22 deletions(-) - -diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c -index 5d22313b9d..cecd26479e 100644 ---- a/hw/scsi/scsi-bus.c -+++ b/hw/scsi/scsi-bus.c -@@ -828,26 +828,12 @@ static void scsi_clear_unit_attention(SCSIRequest *req) - return; - } - -- if (req->dev->unit_attention.key != UNIT_ATTENTION && -- req->bus->unit_attention.key != UNIT_ATTENTION) { -- return; -- } -- -- /* -- * If an INQUIRY command enters the enabled command state, -- * the device server shall [not] clear any unit attention condition; -- * See also MMC-6, paragraphs 6.5 and 6.6.2. -- */ -- if (req->cmd.buf[0] == INQUIRY || -- req->cmd.buf[0] == GET_CONFIGURATION || -- req->cmd.buf[0] == GET_EVENT_STATUS_NOTIFICATION) { -- return; -- } -- - if (req->dev->unit_attention.key == UNIT_ATTENTION) { - ua = &req->dev->unit_attention; -- } else { -+ } else if (req->bus->unit_attention.key == UNIT_ATTENTION) { - ua = &req->bus->unit_attention; -+ } else { -+ return; - } - - /* -@@ -856,12 +842,10 @@ static void scsi_clear_unit_attention(SCSIRequest *req) - * with an additional sense code of REPORTED LUNS DATA HAS CHANGED. - */ - if (req->cmd.buf[0] == REPORT_LUNS && -- !(ua->asc == SENSE_CODE(REPORTED_LUNS_CHANGED).asc && -- ua->ascq == SENSE_CODE(REPORTED_LUNS_CHANGED).ascq)) { -- return; -+ ua->asc == SENSE_CODE(REPORTED_LUNS_CHANGED).asc && -+ ua->ascq == SENSE_CODE(REPORTED_LUNS_CHANGED).ascq) { -+ *ua = SENSE_CODE(NO_SENSE); - } -- -- *ua = SENSE_CODE(NO_SENSE); - } - - int scsi_req_get_sense(SCSIRequest *req, uint8_t *buf, int len) --- -2.39.3 - diff --git a/SOURCES/kvm-scsi-clear-unit-attention-only-for-REPORT-LUNS-comma.patch b/SOURCES/kvm-scsi-clear-unit-attention-only-for-REPORT-LUNS-comma.patch deleted file mode 100644 index cb3b24e..0000000 --- a/SOURCES/kvm-scsi-clear-unit-attention-only-for-REPORT-LUNS-comma.patch +++ /dev/null @@ -1,110 +0,0 @@ -From 0a784c45a7b7ee32c36bf86eebb24c8431a89f49 Mon Sep 17 00:00:00 2001 -From: Stefano Garzarella -Date: Wed, 12 Jul 2023 15:43:52 +0200 -Subject: [PATCH 03/12] scsi: clear unit attention only for REPORT LUNS - commands - -RH-Author: Stefano Garzarella -RH-MergeRequest: 184: scsi: fix issue with Linux guest and unit attention -RH-Bugzilla: 2176702 -RH-Acked-by: Thomas Huth -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [3/3] 01d5e112ef9ae204d96ceb01b4a453fdb4e8b669 (sgarzarella/qemu-kvm-c-9-s) - -scsi_clear_unit_attention() now only handles REPORTED LUNS DATA HAS -CHANGED. - -This only happens when we handle REPORT LUNS commands, so let's rename -the function in scsi_clear_reported_luns_changed() and call it only in -scsi_target_emulate_report_luns(). - -Suggested-by: Paolo Bonzini -Signed-off-by: Stefano Garzarella -Message-ID: <20230712134352.118655-4-sgarzare@redhat.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit 2eb5599e8a73e70a9e86a97120818ff95a43a23a) -Signed-off-by: Stefano Garzarella ---- - hw/scsi/scsi-bus.c | 34 +++++++++++----------------------- - 1 file changed, 11 insertions(+), 23 deletions(-) - -diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c -index cecd26479e..9542410800 100644 ---- a/hw/scsi/scsi-bus.c -+++ b/hw/scsi/scsi-bus.c -@@ -22,6 +22,7 @@ static char *scsibus_get_fw_dev_path(DeviceState *dev); - static void scsi_req_dequeue(SCSIRequest *req); - static uint8_t *scsi_target_alloc_buf(SCSIRequest *req, size_t len); - static void scsi_target_free_buf(SCSIRequest *req); -+static void scsi_clear_reported_luns_changed(SCSIRequest *req); - - static int next_scsi_bus; - -@@ -518,6 +519,14 @@ static bool scsi_target_emulate_report_luns(SCSITargetReq *r) - - /* store the LUN list length */ - stl_be_p(&r->buf[0], len - 8); -+ -+ /* -+ * If a REPORT LUNS command enters the enabled command state, [...] -+ * the device server shall clear any pending unit attention condition -+ * with an additional sense code of REPORTED LUNS DATA HAS CHANGED. -+ */ -+ scsi_clear_reported_luns_changed(&r->req); -+ - return true; - } - -@@ -816,18 +825,10 @@ uint8_t *scsi_req_get_buf(SCSIRequest *req) - return req->ops->get_buf(req); - } - --static void scsi_clear_unit_attention(SCSIRequest *req) -+static void scsi_clear_reported_luns_changed(SCSIRequest *req) - { - SCSISense *ua; - -- /* -- * scsi_fetch_unit_attention_sense() already cleaned the unit attention -- * in this case. -- */ -- if (req->ops == &reqops_unit_attention) { -- return; -- } -- - if (req->dev->unit_attention.key == UNIT_ATTENTION) { - ua = &req->dev->unit_attention; - } else if (req->bus->unit_attention.key == UNIT_ATTENTION) { -@@ -836,13 +837,7 @@ static void scsi_clear_unit_attention(SCSIRequest *req) - return; - } - -- /* -- * If a REPORT LUNS command enters the enabled command state, [...] -- * the device server shall clear any pending unit attention condition -- * with an additional sense code of REPORTED LUNS DATA HAS CHANGED. -- */ -- if (req->cmd.buf[0] == REPORT_LUNS && -- ua->asc == SENSE_CODE(REPORTED_LUNS_CHANGED).asc && -+ if (ua->asc == SENSE_CODE(REPORTED_LUNS_CHANGED).asc && - ua->ascq == SENSE_CODE(REPORTED_LUNS_CHANGED).ascq) { - *ua = SENSE_CODE(NO_SENSE); - } -@@ -1528,13 +1523,6 @@ void scsi_req_complete(SCSIRequest *req, int status) - req->dev->sense_is_ua = false; - } - -- /* -- * Unit attention state is now stored in the device's sense buffer -- * if the HBA didn't do autosense. Clear the pending unit attention -- * flags. -- */ -- scsi_clear_unit_attention(req); -- - scsi_req_ref(req); - scsi_req_dequeue(req); - req->bus->info->complete(req, req->residual); --- -2.39.3 - diff --git a/SOURCES/kvm-scsi-don-t-lock-AioContext-in-I-O-code-path.patch b/SOURCES/kvm-scsi-don-t-lock-AioContext-in-I-O-code-path.patch new file mode 100644 index 0000000..65b08ce --- /dev/null +++ b/SOURCES/kvm-scsi-don-t-lock-AioContext-in-I-O-code-path.patch @@ -0,0 +1,245 @@ +From d1d384bd24a7aeb527f4abd8a0958146544ef9bb Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Mon, 4 Dec 2023 11:42:58 -0500 +Subject: [PATCH 079/101] scsi: don't lock AioContext in I/O code path + +RH-Author: Kevin Wolf +RH-MergeRequest: 214: Remove AioContext lock +RH-Jira: RHEL-15965 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [10/26] b5814cec94af5c254e300646d8783672b085bac3 (kmwolf/centos-qemu-kvm) + +blk_aio_*() doesn't require the AioContext lock and the SCSI subsystem's +internal state also does not anymore. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Eric Blake +Acked-by: Kevin Wolf +Message-ID: <20231204164259.1515217-4-stefanha@redhat.com> +Signed-off-by: Kevin Wolf +--- + hw/scsi/scsi-disk.c | 23 ----------------------- + hw/scsi/scsi-generic.c | 20 +++----------------- + 2 files changed, 3 insertions(+), 40 deletions(-) + +diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c +index 6691f5edb8..2c1bbb3530 100644 +--- a/hw/scsi/scsi-disk.c ++++ b/hw/scsi/scsi-disk.c +@@ -273,8 +273,6 @@ static void scsi_aio_complete(void *opaque, int ret) + SCSIDiskReq *r = (SCSIDiskReq *)opaque; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + +- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); +- + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + +@@ -286,7 +284,6 @@ static void scsi_aio_complete(void *opaque, int ret) + scsi_req_complete(&r->req, GOOD); + + done: +- aio_context_release(blk_get_aio_context(s->qdev.conf.blk)); + scsi_req_unref(&r->req); + } + +@@ -394,8 +391,6 @@ static void scsi_read_complete(void *opaque, int ret) + SCSIDiskReq *r = (SCSIDiskReq *)opaque; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + +- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); +- + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + +@@ -406,7 +401,6 @@ static void scsi_read_complete(void *opaque, int ret) + trace_scsi_disk_read_complete(r->req.tag, r->qiov.size); + } + scsi_read_complete_noio(r, ret); +- aio_context_release(blk_get_aio_context(s->qdev.conf.blk)); + } + + /* Actually issue a read to the block device. */ +@@ -448,8 +442,6 @@ static void scsi_do_read_cb(void *opaque, int ret) + SCSIDiskReq *r = (SCSIDiskReq *)opaque; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + +- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); +- + assert (r->req.aiocb != NULL); + r->req.aiocb = NULL; + +@@ -459,7 +451,6 @@ static void scsi_do_read_cb(void *opaque, int ret) + block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); + } + scsi_do_read(opaque, ret); +- aio_context_release(blk_get_aio_context(s->qdev.conf.blk)); + } + + /* Read more data from scsi device into buffer. */ +@@ -533,8 +524,6 @@ static void scsi_write_complete(void * opaque, int ret) + SCSIDiskReq *r = (SCSIDiskReq *)opaque; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + +- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); +- + assert (r->req.aiocb != NULL); + r->req.aiocb = NULL; + +@@ -544,7 +533,6 @@ static void scsi_write_complete(void * opaque, int ret) + block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); + } + scsi_write_complete_noio(r, ret); +- aio_context_release(blk_get_aio_context(s->qdev.conf.blk)); + } + + static void scsi_write_data(SCSIRequest *req) +@@ -1742,8 +1730,6 @@ static void scsi_unmap_complete(void *opaque, int ret) + SCSIDiskReq *r = data->r; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + +- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); +- + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + +@@ -1754,7 +1740,6 @@ static void scsi_unmap_complete(void *opaque, int ret) + block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); + scsi_unmap_complete_noio(data, ret); + } +- aio_context_release(blk_get_aio_context(s->qdev.conf.blk)); + } + + static void scsi_disk_emulate_unmap(SCSIDiskReq *r, uint8_t *inbuf) +@@ -1822,8 +1807,6 @@ static void scsi_write_same_complete(void *opaque, int ret) + SCSIDiskReq *r = data->r; + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + +- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk)); +- + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + +@@ -1847,7 +1830,6 @@ static void scsi_write_same_complete(void *opaque, int ret) + data->sector << BDRV_SECTOR_BITS, + &data->qiov, 0, + scsi_write_same_complete, data); +- aio_context_release(blk_get_aio_context(s->qdev.conf.blk)); + return; + } + +@@ -1857,7 +1839,6 @@ done: + scsi_req_unref(&r->req); + qemu_vfree(data->iov.iov_base); + g_free(data); +- aio_context_release(blk_get_aio_context(s->qdev.conf.blk)); + } + + static void scsi_disk_emulate_write_same(SCSIDiskReq *r, uint8_t *inbuf) +@@ -2810,7 +2791,6 @@ static void scsi_block_sgio_complete(void *opaque, int ret) + { + SCSIBlockReq *req = (SCSIBlockReq *)opaque; + SCSIDiskReq *r = &req->req; +- SCSIDevice *s = r->req.dev; + sg_io_hdr_t *io_hdr = &req->io_header; + + if (ret == 0) { +@@ -2827,13 +2807,10 @@ static void scsi_block_sgio_complete(void *opaque, int ret) + } + + if (ret > 0) { +- aio_context_acquire(blk_get_aio_context(s->conf.blk)); + if (scsi_handle_rw_error(r, ret, true)) { +- aio_context_release(blk_get_aio_context(s->conf.blk)); + scsi_req_unref(&r->req); + return; + } +- aio_context_release(blk_get_aio_context(s->conf.blk)); + + /* Ignore error. */ + ret = 0; +diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c +index 2417f0ad84..b7b04e1d63 100644 +--- a/hw/scsi/scsi-generic.c ++++ b/hw/scsi/scsi-generic.c +@@ -109,15 +109,11 @@ done: + static void scsi_command_complete(void *opaque, int ret) + { + SCSIGenericReq *r = (SCSIGenericReq *)opaque; +- SCSIDevice *s = r->req.dev; +- +- aio_context_acquire(blk_get_aio_context(s->conf.blk)); + + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + + scsi_command_complete_noio(r, ret); +- aio_context_release(blk_get_aio_context(s->conf.blk)); + } + + static int execute_command(BlockBackend *blk, +@@ -274,14 +270,12 @@ static void scsi_read_complete(void * opaque, int ret) + SCSIDevice *s = r->req.dev; + int len; + +- aio_context_acquire(blk_get_aio_context(s->conf.blk)); +- + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + + if (ret || r->req.io_canceled) { + scsi_command_complete_noio(r, ret); +- goto done; ++ return; + } + + len = r->io_header.dxfer_len - r->io_header.resid; +@@ -320,7 +314,7 @@ static void scsi_read_complete(void * opaque, int ret) + r->io_header.status != GOOD || + len == 0) { + scsi_command_complete_noio(r, 0); +- goto done; ++ return; + } + + /* Snoop READ CAPACITY output to set the blocksize. */ +@@ -356,9 +350,6 @@ static void scsi_read_complete(void * opaque, int ret) + req_complete: + scsi_req_data(&r->req, len); + scsi_req_unref(&r->req); +- +-done: +- aio_context_release(blk_get_aio_context(s->conf.blk)); + } + + /* Read more data from scsi device into buffer. */ +@@ -391,14 +382,12 @@ static void scsi_write_complete(void * opaque, int ret) + + trace_scsi_generic_write_complete(ret); + +- aio_context_acquire(blk_get_aio_context(s->conf.blk)); +- + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + + if (ret || r->req.io_canceled) { + scsi_command_complete_noio(r, ret); +- goto done; ++ return; + } + + if (r->req.cmd.buf[0] == MODE_SELECT && r->req.cmd.buf[4] == 12 && +@@ -408,9 +397,6 @@ static void scsi_write_complete(void * opaque, int ret) + } + + scsi_command_complete_noio(r, ret); +- +-done: +- aio_context_release(blk_get_aio_context(s->conf.blk)); + } + + /* Write data to a scsi device. Returns nonzero on failure. +-- +2.39.3 + diff --git a/SOURCES/kvm-scsi-fetch-unit-attention-when-creating-the-request.patch b/SOURCES/kvm-scsi-fetch-unit-attention-when-creating-the-request.patch deleted file mode 100644 index a41ae82..0000000 --- a/SOURCES/kvm-scsi-fetch-unit-attention-when-creating-the-request.patch +++ /dev/null @@ -1,132 +0,0 @@ -From 562ea3a2d602cf41c548f3ddf52c43c04fded347 Mon Sep 17 00:00:00 2001 -From: Stefano Garzarella -Date: Wed, 12 Jul 2023 15:43:50 +0200 -Subject: [PATCH 01/12] scsi: fetch unit attention when creating the request - -RH-Author: Stefano Garzarella -RH-MergeRequest: 184: scsi: fix issue with Linux guest and unit attention -RH-Bugzilla: 2176702 -RH-Acked-by: Thomas Huth -RH-Acked-by: Paolo Bonzini -RH-Acked-by: Stefan Hajnoczi -RH-Commit: [1/3] 04563caac45d0110ea65eda8e55472556cd317c0 (sgarzarella/qemu-kvm-c-9-s) - -Commit 1880ad4f4e ("virtio-scsi: Batched prepare for cmd reqs") split -calls to scsi_req_new() and scsi_req_enqueue() in the virtio-scsi device. -No ill effects were observed until commit 8cc5583abe ("virtio-scsi: Send -"REPORTED LUNS CHANGED" sense data upon disk hotplug events") added a -unit attention that was easy to trigger with device hotplug and -hot-unplug. - -Because the two calls were separated, all requests in the batch were -prepared calling scsi_req_new() to report a sense. The first one -submitted would report the right sense and reset it to NO_SENSE, while -the others reported CHECK_CONDITION with no sense data. This caused -SCSI errors in Linux. - -To solve this issue, let's fetch the unit attention as early as possible -when we prepare the request, so that only the first request in the batch -will use the unit attention SCSIReqOps and the others will not report -CHECK CONDITION. - -Fixes: 1880ad4f4e ("virtio-scsi: Batched prepare for cmd reqs") -Fixes: 8cc5583abe ("virtio-scsi: Send "REPORTED LUNS CHANGED" sense data upon disk hotplug events") -Reported-by: Thomas Huth -Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2176702 -Co-developed-by: Paolo Bonzini -Signed-off-by: Stefano Garzarella -Message-ID: <20230712134352.118655-2-sgarzare@redhat.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit 9472083e642bfb9bc836b38662baddd9bc964ebc) -Signed-off-by: Stefano Garzarella ---- - hw/scsi/scsi-bus.c | 36 +++++++++++++++++++++++++++++++++--- - include/hw/scsi/scsi.h | 1 + - 2 files changed, 34 insertions(+), 3 deletions(-) - -diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c -index 3c20b47ad0..5d22313b9d 100644 ---- a/hw/scsi/scsi-bus.c -+++ b/hw/scsi/scsi-bus.c -@@ -413,19 +413,35 @@ static const struct SCSIReqOps reqops_invalid_opcode = { - - /* SCSIReqOps implementation for unit attention conditions. */ - --static int32_t scsi_unit_attention(SCSIRequest *req, uint8_t *buf) -+static void scsi_fetch_unit_attention_sense(SCSIRequest *req) - { -+ SCSISense *ua = NULL; -+ - if (req->dev->unit_attention.key == UNIT_ATTENTION) { -- scsi_req_build_sense(req, req->dev->unit_attention); -+ ua = &req->dev->unit_attention; - } else if (req->bus->unit_attention.key == UNIT_ATTENTION) { -- scsi_req_build_sense(req, req->bus->unit_attention); -+ ua = &req->bus->unit_attention; - } -+ -+ /* -+ * Fetch the unit attention sense immediately so that another -+ * scsi_req_new does not use reqops_unit_attention. -+ */ -+ if (ua) { -+ scsi_req_build_sense(req, *ua); -+ *ua = SENSE_CODE(NO_SENSE); -+ } -+} -+ -+static int32_t scsi_unit_attention(SCSIRequest *req, uint8_t *buf) -+{ - scsi_req_complete(req, CHECK_CONDITION); - return 0; - } - - static const struct SCSIReqOps reqops_unit_attention = { - .size = sizeof(SCSIRequest), -+ .init_req = scsi_fetch_unit_attention_sense, - .send_command = scsi_unit_attention - }; - -@@ -699,6 +715,11 @@ SCSIRequest *scsi_req_alloc(const SCSIReqOps *reqops, SCSIDevice *d, - object_ref(OBJECT(d)); - object_ref(OBJECT(qbus->parent)); - notifier_list_init(&req->cancel_notifiers); -+ -+ if (reqops->init_req) { -+ reqops->init_req(req); -+ } -+ - trace_scsi_req_alloc(req->dev->id, req->lun, req->tag); - return req; - } -@@ -798,6 +819,15 @@ uint8_t *scsi_req_get_buf(SCSIRequest *req) - static void scsi_clear_unit_attention(SCSIRequest *req) - { - SCSISense *ua; -+ -+ /* -+ * scsi_fetch_unit_attention_sense() already cleaned the unit attention -+ * in this case. -+ */ -+ if (req->ops == &reqops_unit_attention) { -+ return; -+ } -+ - if (req->dev->unit_attention.key != UNIT_ATTENTION && - req->bus->unit_attention.key != UNIT_ATTENTION) { - return; -diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h -index 6f23a7a73e..1787ddd01e 100644 ---- a/include/hw/scsi/scsi.h -+++ b/include/hw/scsi/scsi.h -@@ -108,6 +108,7 @@ int cdrom_read_toc_raw(int nb_sectors, uint8_t *buf, int msf, int session_num); - /* scsi-bus.c */ - struct SCSIReqOps { - size_t size; -+ void (*init_req)(SCSIRequest *req); - void (*free_req)(SCSIRequest *req); - int32_t (*send_command)(SCSIRequest *req, uint8_t *buf); - void (*read_data)(SCSIRequest *req); --- -2.39.3 - diff --git a/SOURCES/kvm-scsi-only-access-SCSIDevice-requests-from-one-thread.patch b/SOURCES/kvm-scsi-only-access-SCSIDevice-requests-from-one-thread.patch new file mode 100644 index 0000000..30f1c00 --- /dev/null +++ b/SOURCES/kvm-scsi-only-access-SCSIDevice-requests-from-one-thread.patch @@ -0,0 +1,307 @@ +From 42dd1357310bd1a68d6cacaa53cd5b1d1b02880d Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Mon, 4 Dec 2023 11:42:56 -0500 +Subject: [PATCH 077/101] scsi: only access SCSIDevice->requests from one + thread + +RH-Author: Kevin Wolf +RH-MergeRequest: 214: Remove AioContext lock +RH-Jira: RHEL-15965 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [8/26] 9df662e82a63e93d184b5763bebbe7e43bc9dabe (kmwolf/centos-qemu-kvm) + +Stop depending on the AioContext lock and instead access +SCSIDevice->requests from only one thread at a time: +- When the VM is running only the BlockBackend's AioContext may access + the requests list. +- When the VM is stopped only the main loop may access the requests + list. + +These constraints protect the requests list without the need for locking +in the I/O code path. + +Note that multiple IOThreads are not supported yet because the code +assumes all SCSIRequests are executed from a single AioContext. Leave +that as future work. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Eric Blake +Message-ID: <20231204164259.1515217-2-stefanha@redhat.com> +Signed-off-by: Kevin Wolf +--- + hw/scsi/scsi-bus.c | 181 ++++++++++++++++++++++++++++------------- + include/hw/scsi/scsi.h | 7 +- + 2 files changed, 131 insertions(+), 57 deletions(-) + +diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c +index fc4b77fdb0..b649cdf555 100644 +--- a/hw/scsi/scsi-bus.c ++++ b/hw/scsi/scsi-bus.c +@@ -85,6 +85,89 @@ SCSIDevice *scsi_device_get(SCSIBus *bus, int channel, int id, int lun) + return d; + } + ++/* ++ * Invoke @fn() for each enqueued request in device @s. Must be called from the ++ * main loop thread while the guest is stopped. This is only suitable for ++ * vmstate ->put(), use scsi_device_for_each_req_async() for other cases. ++ */ ++static void scsi_device_for_each_req_sync(SCSIDevice *s, ++ void (*fn)(SCSIRequest *, void *), ++ void *opaque) ++{ ++ SCSIRequest *req; ++ SCSIRequest *next_req; ++ ++ assert(!runstate_is_running()); ++ assert(qemu_in_main_thread()); ++ ++ QTAILQ_FOREACH_SAFE(req, &s->requests, next, next_req) { ++ fn(req, opaque); ++ } ++} ++ ++typedef struct { ++ SCSIDevice *s; ++ void (*fn)(SCSIRequest *, void *); ++ void *fn_opaque; ++} SCSIDeviceForEachReqAsyncData; ++ ++static void scsi_device_for_each_req_async_bh(void *opaque) ++{ ++ g_autofree SCSIDeviceForEachReqAsyncData *data = opaque; ++ SCSIDevice *s = data->s; ++ AioContext *ctx; ++ SCSIRequest *req; ++ SCSIRequest *next; ++ ++ /* ++ * If the AioContext changed before this BH was called then reschedule into ++ * the new AioContext before accessing ->requests. This can happen when ++ * scsi_device_for_each_req_async() is called and then the AioContext is ++ * changed before BHs are run. ++ */ ++ ctx = blk_get_aio_context(s->conf.blk); ++ if (ctx != qemu_get_current_aio_context()) { ++ aio_bh_schedule_oneshot(ctx, scsi_device_for_each_req_async_bh, ++ g_steal_pointer(&data)); ++ return; ++ } ++ ++ QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) { ++ data->fn(req, data->fn_opaque); ++ } ++ ++ /* Drop the reference taken by scsi_device_for_each_req_async() */ ++ object_unref(OBJECT(s)); ++} ++ ++/* ++ * Schedule @fn() to be invoked for each enqueued request in device @s. @fn() ++ * runs in the AioContext that is executing the request. ++ */ ++static void scsi_device_for_each_req_async(SCSIDevice *s, ++ void (*fn)(SCSIRequest *, void *), ++ void *opaque) ++{ ++ assert(qemu_in_main_thread()); ++ ++ SCSIDeviceForEachReqAsyncData *data = ++ g_new(SCSIDeviceForEachReqAsyncData, 1); ++ ++ data->s = s; ++ data->fn = fn; ++ data->fn_opaque = opaque; ++ ++ /* ++ * Hold a reference to the SCSIDevice until ++ * scsi_device_for_each_req_async_bh() finishes. ++ */ ++ object_ref(OBJECT(s)); ++ ++ aio_bh_schedule_oneshot(blk_get_aio_context(s->conf.blk), ++ scsi_device_for_each_req_async_bh, ++ data); ++} ++ + static void scsi_device_realize(SCSIDevice *s, Error **errp) + { + SCSIDeviceClass *sc = SCSI_DEVICE_GET_CLASS(s); +@@ -144,20 +227,18 @@ void scsi_bus_init_named(SCSIBus *bus, size_t bus_size, DeviceState *host, + qbus_set_bus_hotplug_handler(BUS(bus)); + } + +-static void scsi_dma_restart_bh(void *opaque) ++void scsi_req_retry(SCSIRequest *req) + { +- SCSIDevice *s = opaque; +- SCSIRequest *req, *next; +- +- qemu_bh_delete(s->bh); +- s->bh = NULL; ++ req->retry = true; ++} + +- aio_context_acquire(blk_get_aio_context(s->conf.blk)); +- QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) { +- scsi_req_ref(req); +- if (req->retry) { +- req->retry = false; +- switch (req->cmd.mode) { ++/* Called in the AioContext that is executing the request */ ++static void scsi_dma_restart_req(SCSIRequest *req, void *opaque) ++{ ++ scsi_req_ref(req); ++ if (req->retry) { ++ req->retry = false; ++ switch (req->cmd.mode) { + case SCSI_XFER_FROM_DEV: + case SCSI_XFER_TO_DEV: + scsi_req_continue(req); +@@ -166,37 +247,22 @@ static void scsi_dma_restart_bh(void *opaque) + scsi_req_dequeue(req); + scsi_req_enqueue(req); + break; +- } + } +- scsi_req_unref(req); + } +- aio_context_release(blk_get_aio_context(s->conf.blk)); +- /* Drop the reference that was acquired in scsi_dma_restart_cb */ +- object_unref(OBJECT(s)); +-} +- +-void scsi_req_retry(SCSIRequest *req) +-{ +- /* No need to save a reference, because scsi_dma_restart_bh just +- * looks at the request list. */ +- req->retry = true; ++ scsi_req_unref(req); + } + + static void scsi_dma_restart_cb(void *opaque, bool running, RunState state) + { + SCSIDevice *s = opaque; + ++ assert(qemu_in_main_thread()); ++ + if (!running) { + return; + } +- if (!s->bh) { +- AioContext *ctx = blk_get_aio_context(s->conf.blk); +- /* The reference is dropped in scsi_dma_restart_bh.*/ +- object_ref(OBJECT(s)); +- s->bh = aio_bh_new_guarded(ctx, scsi_dma_restart_bh, s, +- &DEVICE(s)->mem_reentrancy_guard); +- qemu_bh_schedule(s->bh); +- } ++ ++ scsi_device_for_each_req_async(s, scsi_dma_restart_req, NULL); + } + + static bool scsi_bus_is_address_free(SCSIBus *bus, +@@ -1657,15 +1723,16 @@ void scsi_device_set_ua(SCSIDevice *sdev, SCSISense sense) + } + } + ++static void scsi_device_purge_one_req(SCSIRequest *req, void *opaque) ++{ ++ scsi_req_cancel_async(req, NULL); ++} ++ + void scsi_device_purge_requests(SCSIDevice *sdev, SCSISense sense) + { +- SCSIRequest *req; ++ scsi_device_for_each_req_async(sdev, scsi_device_purge_one_req, NULL); + + aio_context_acquire(blk_get_aio_context(sdev->conf.blk)); +- while (!QTAILQ_EMPTY(&sdev->requests)) { +- req = QTAILQ_FIRST(&sdev->requests); +- scsi_req_cancel_async(req, NULL); +- } + blk_drain(sdev->conf.blk); + aio_context_release(blk_get_aio_context(sdev->conf.blk)); + scsi_device_set_ua(sdev, sense); +@@ -1737,31 +1804,33 @@ static char *scsibus_get_fw_dev_path(DeviceState *dev) + + /* SCSI request list. For simplicity, pv points to the whole device */ + ++static void put_scsi_req(SCSIRequest *req, void *opaque) ++{ ++ QEMUFile *f = opaque; ++ ++ assert(!req->io_canceled); ++ assert(req->status == -1 && req->host_status == -1); ++ assert(req->enqueued); ++ ++ qemu_put_sbyte(f, req->retry ? 1 : 2); ++ qemu_put_buffer(f, req->cmd.buf, sizeof(req->cmd.buf)); ++ qemu_put_be32s(f, &req->tag); ++ qemu_put_be32s(f, &req->lun); ++ if (req->bus->info->save_request) { ++ req->bus->info->save_request(f, req); ++ } ++ if (req->ops->save_request) { ++ req->ops->save_request(f, req); ++ } ++} ++ + static int put_scsi_requests(QEMUFile *f, void *pv, size_t size, + const VMStateField *field, JSONWriter *vmdesc) + { + SCSIDevice *s = pv; +- SCSIBus *bus = DO_UPCAST(SCSIBus, qbus, s->qdev.parent_bus); +- SCSIRequest *req; + +- QTAILQ_FOREACH(req, &s->requests, next) { +- assert(!req->io_canceled); +- assert(req->status == -1 && req->host_status == -1); +- assert(req->enqueued); +- +- qemu_put_sbyte(f, req->retry ? 1 : 2); +- qemu_put_buffer(f, req->cmd.buf, sizeof(req->cmd.buf)); +- qemu_put_be32s(f, &req->tag); +- qemu_put_be32s(f, &req->lun); +- if (bus->info->save_request) { +- bus->info->save_request(f, req); +- } +- if (req->ops->save_request) { +- req->ops->save_request(f, req); +- } +- } ++ scsi_device_for_each_req_sync(s, put_scsi_req, f); + qemu_put_sbyte(f, 0); +- + return 0; + } + +diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h +index 3692ca82f3..10c4e8288d 100644 +--- a/include/hw/scsi/scsi.h ++++ b/include/hw/scsi/scsi.h +@@ -69,14 +69,19 @@ struct SCSIDevice + { + DeviceState qdev; + VMChangeStateEntry *vmsentry; +- QEMUBH *bh; + uint32_t id; + BlockConf conf; + SCSISense unit_attention; + bool sense_is_ua; + uint8_t sense[SCSI_SENSE_BUF_SIZE]; + uint32_t sense_len; ++ ++ /* ++ * The requests list is only accessed from the AioContext that executes ++ * requests or from the main loop when IOThread processing is stopped. ++ */ + QTAILQ_HEAD(, SCSIRequest) requests; ++ + uint32_t channel; + uint32_t lun; + int blocksize; +-- +2.39.3 + diff --git a/SOURCES/kvm-scsi-remove-AioContext-locking.patch b/SOURCES/kvm-scsi-remove-AioContext-locking.patch new file mode 100644 index 0000000..34a5e46 --- /dev/null +++ b/SOURCES/kvm-scsi-remove-AioContext-locking.patch @@ -0,0 +1,280 @@ +From 61d605433a5edfcc7fe836fd399106ed1e1907bb Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 5 Dec 2023 13:20:05 -0500 +Subject: [PATCH 088/101] scsi: remove AioContext locking + +RH-Author: Kevin Wolf +RH-MergeRequest: 214: Remove AioContext lock +RH-Jira: RHEL-15965 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [19/26] 12a8e26670074a17dd2b0cfac06e0aea03b3068f (kmwolf/centos-qemu-kvm) + +The AioContext lock no longer has any effect. Remove it. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Eric Blake +Message-ID: <20231205182011.1976568-9-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +--- + hw/scsi/scsi-bus.c | 2 -- + hw/scsi/scsi-disk.c | 31 +++++-------------------------- + hw/scsi/virtio-scsi.c | 18 ------------------ + include/hw/virtio/virtio-scsi.h | 14 -------------- + 4 files changed, 5 insertions(+), 60 deletions(-) + +diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c +index b649cdf555..5b08cbf60a 100644 +--- a/hw/scsi/scsi-bus.c ++++ b/hw/scsi/scsi-bus.c +@@ -1732,9 +1732,7 @@ void scsi_device_purge_requests(SCSIDevice *sdev, SCSISense sense) + { + scsi_device_for_each_req_async(sdev, scsi_device_purge_one_req, NULL); + +- aio_context_acquire(blk_get_aio_context(sdev->conf.blk)); + blk_drain(sdev->conf.blk); +- aio_context_release(blk_get_aio_context(sdev->conf.blk)); + scsi_device_set_ua(sdev, sense); + } + +diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c +index a5048e0aaf..61be3d395a 100644 +--- a/hw/scsi/scsi-disk.c ++++ b/hw/scsi/scsi-disk.c +@@ -2339,14 +2339,10 @@ static void scsi_disk_reset(DeviceState *dev) + { + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev.qdev, dev); + uint64_t nb_sectors; +- AioContext *ctx; + + scsi_device_purge_requests(&s->qdev, SENSE_CODE(RESET)); + +- ctx = blk_get_aio_context(s->qdev.conf.blk); +- aio_context_acquire(ctx); + blk_get_geometry(s->qdev.conf.blk, &nb_sectors); +- aio_context_release(ctx); + + nb_sectors /= s->qdev.blocksize / BDRV_SECTOR_SIZE; + if (nb_sectors) { +@@ -2545,15 +2541,13 @@ static void scsi_unrealize(SCSIDevice *dev) + static void scsi_hd_realize(SCSIDevice *dev, Error **errp) + { + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev); +- AioContext *ctx = NULL; ++ + /* can happen for devices without drive. The error message for missing + * backend will be issued in scsi_realize + */ + if (s->qdev.conf.blk) { +- ctx = blk_get_aio_context(s->qdev.conf.blk); +- aio_context_acquire(ctx); + if (!blkconf_blocksizes(&s->qdev.conf, errp)) { +- goto out; ++ return; + } + } + s->qdev.blocksize = s->qdev.conf.logical_block_size; +@@ -2562,16 +2556,11 @@ static void scsi_hd_realize(SCSIDevice *dev, Error **errp) + s->product = g_strdup("QEMU HARDDISK"); + } + scsi_realize(&s->qdev, errp); +-out: +- if (ctx) { +- aio_context_release(ctx); +- } + } + + static void scsi_cd_realize(SCSIDevice *dev, Error **errp) + { + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev); +- AioContext *ctx; + int ret; + uint32_t blocksize = 2048; + +@@ -2587,8 +2576,6 @@ static void scsi_cd_realize(SCSIDevice *dev, Error **errp) + blocksize = dev->conf.physical_block_size; + } + +- ctx = blk_get_aio_context(dev->conf.blk); +- aio_context_acquire(ctx); + s->qdev.blocksize = blocksize; + s->qdev.type = TYPE_ROM; + s->features |= 1 << SCSI_DISK_F_REMOVABLE; +@@ -2596,7 +2583,6 @@ static void scsi_cd_realize(SCSIDevice *dev, Error **errp) + s->product = g_strdup("QEMU CD-ROM"); + } + scsi_realize(&s->qdev, errp); +- aio_context_release(ctx); + } + + +@@ -2727,7 +2713,6 @@ static int get_device_type(SCSIDiskState *s) + static void scsi_block_realize(SCSIDevice *dev, Error **errp) + { + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev); +- AioContext *ctx; + int sg_version; + int rc; + +@@ -2742,9 +2727,6 @@ static void scsi_block_realize(SCSIDevice *dev, Error **errp) + "be removed in a future version"); + } + +- ctx = blk_get_aio_context(s->qdev.conf.blk); +- aio_context_acquire(ctx); +- + /* check we are using a driver managing SG_IO (version 3 and after) */ + rc = blk_ioctl(s->qdev.conf.blk, SG_GET_VERSION_NUM, &sg_version); + if (rc < 0) { +@@ -2752,18 +2734,18 @@ static void scsi_block_realize(SCSIDevice *dev, Error **errp) + if (rc != -EPERM) { + error_append_hint(errp, "Is this a SCSI device?\n"); + } +- goto out; ++ return; + } + if (sg_version < 30000) { + error_setg(errp, "scsi generic interface too old"); +- goto out; ++ return; + } + + /* get device type from INQUIRY data */ + rc = get_device_type(s); + if (rc < 0) { + error_setg(errp, "INQUIRY failed"); +- goto out; ++ return; + } + + /* Make a guess for the block size, we'll fix it when the guest sends. +@@ -2783,9 +2765,6 @@ static void scsi_block_realize(SCSIDevice *dev, Error **errp) + + scsi_realize(&s->qdev, errp); + scsi_generic_read_device_inquiry(&s->qdev); +- +-out: +- aio_context_release(ctx); + } + + typedef struct SCSIBlockReq { +diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c +index 4f8d35facc..ca365a70e9 100644 +--- a/hw/scsi/virtio-scsi.c ++++ b/hw/scsi/virtio-scsi.c +@@ -642,9 +642,7 @@ static void virtio_scsi_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) + return; + } + +- virtio_scsi_acquire(s); + virtio_scsi_handle_ctrl_vq(s, vq); +- virtio_scsi_release(s); + } + + static void virtio_scsi_complete_cmd_req(VirtIOSCSIReq *req) +@@ -882,9 +880,7 @@ static void virtio_scsi_handle_cmd(VirtIODevice *vdev, VirtQueue *vq) + return; + } + +- virtio_scsi_acquire(s); + virtio_scsi_handle_cmd_vq(s, vq); +- virtio_scsi_release(s); + } + + static void virtio_scsi_get_config(VirtIODevice *vdev, +@@ -1031,9 +1027,7 @@ static void virtio_scsi_handle_event(VirtIODevice *vdev, VirtQueue *vq) + return; + } + +- virtio_scsi_acquire(s); + virtio_scsi_handle_event_vq(s, vq); +- virtio_scsi_release(s); + } + + static void virtio_scsi_change(SCSIBus *bus, SCSIDevice *dev, SCSISense sense) +@@ -1052,9 +1046,7 @@ static void virtio_scsi_change(SCSIBus *bus, SCSIDevice *dev, SCSISense sense) + }, + }; + +- virtio_scsi_acquire(s); + virtio_scsi_push_event(s, &info); +- virtio_scsi_release(s); + } + } + +@@ -1071,17 +1063,13 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, + VirtIODevice *vdev = VIRTIO_DEVICE(hotplug_dev); + VirtIOSCSI *s = VIRTIO_SCSI(vdev); + SCSIDevice *sd = SCSI_DEVICE(dev); +- AioContext *old_context; + int ret; + + if (s->ctx && !s->dataplane_fenced) { + if (blk_op_is_blocked(sd->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { + return; + } +- old_context = blk_get_aio_context(sd->conf.blk); +- aio_context_acquire(old_context); + ret = blk_set_aio_context(sd->conf.blk, s->ctx, errp); +- aio_context_release(old_context); + if (ret < 0) { + return; + } +@@ -1097,10 +1085,8 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, + }, + }; + +- virtio_scsi_acquire(s); + virtio_scsi_push_event(s, &info); + scsi_bus_set_ua(&s->bus, SENSE_CODE(REPORTED_LUNS_CHANGED)); +- virtio_scsi_release(s); + } + } + +@@ -1122,17 +1108,13 @@ static void virtio_scsi_hotunplug(HotplugHandler *hotplug_dev, DeviceState *dev, + qdev_simple_device_unplug_cb(hotplug_dev, dev, errp); + + if (s->ctx) { +- virtio_scsi_acquire(s); + /* If other users keep the BlockBackend in the iothread, that's ok */ + blk_set_aio_context(sd->conf.blk, qemu_get_aio_context(), NULL); +- virtio_scsi_release(s); + } + + if (virtio_vdev_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG)) { +- virtio_scsi_acquire(s); + virtio_scsi_push_event(s, &info); + scsi_bus_set_ua(&s->bus, SENSE_CODE(REPORTED_LUNS_CHANGED)); +- virtio_scsi_release(s); + } + } + +diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h +index da8cb928d9..7f0573b1bf 100644 +--- a/include/hw/virtio/virtio-scsi.h ++++ b/include/hw/virtio/virtio-scsi.h +@@ -101,20 +101,6 @@ struct VirtIOSCSI { + uint32_t host_features; + }; + +-static inline void virtio_scsi_acquire(VirtIOSCSI *s) +-{ +- if (s->ctx) { +- aio_context_acquire(s->ctx); +- } +-} +- +-static inline void virtio_scsi_release(VirtIOSCSI *s) +-{ +- if (s->ctx) { +- aio_context_release(s->ctx); +- } +-} +- + void virtio_scsi_common_realize(DeviceState *dev, + VirtIOHandleOutput ctrl, + VirtIOHandleOutput evt, +-- +2.39.3 + diff --git a/SOURCES/kvm-scsi-remove-outdated-AioContext-lock-comment.patch b/SOURCES/kvm-scsi-remove-outdated-AioContext-lock-comment.patch new file mode 100644 index 0000000..c9baf60 --- /dev/null +++ b/SOURCES/kvm-scsi-remove-outdated-AioContext-lock-comment.patch @@ -0,0 +1,41 @@ +From 9f5c6dbe907fe6227006ab51179eaa50a63559cb Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 5 Dec 2023 13:20:09 -0500 +Subject: [PATCH 092/101] scsi: remove outdated AioContext lock comment + +RH-Author: Kevin Wolf +RH-MergeRequest: 214: Remove AioContext lock +RH-Jira: RHEL-15965 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [23/26] 96e2e7d2e6a160ce4d695060f902d21030b3b1d8 (kmwolf/centos-qemu-kvm) + +The SCSI subsystem no longer uses the AioContext lock. Request +processing runs exclusively in the BlockBackend's AioContext since +"scsi: only access SCSIDevice->requests from one thread" and hence the +lock is unnecessary. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Eric Blake +Message-ID: <20231205182011.1976568-13-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +--- + hw/scsi/scsi-disk.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c +index 61be3d395a..2e7e1e9a1c 100644 +--- a/hw/scsi/scsi-disk.c ++++ b/hw/scsi/scsi-disk.c +@@ -355,7 +355,6 @@ done: + scsi_req_unref(&r->req); + } + +-/* Called with AioContext lock held */ + static void scsi_dma_complete(void *opaque, int ret) + { + SCSIDiskReq *r = (SCSIDiskReq *)opaque; +-- +2.39.3 + diff --git a/SOURCES/kvm-smbios-add-smbios_add_usr_blob_size-helper.patch b/SOURCES/kvm-smbios-add-smbios_add_usr_blob_size-helper.patch new file mode 100644 index 0000000..a9d24a0 --- /dev/null +++ b/SOURCES/kvm-smbios-add-smbios_add_usr_blob_size-helper.patch @@ -0,0 +1,62 @@ +From 93cf5b82771f1d1e8182be168dae7a45d42069e9 Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Mon, 4 Mar 2024 15:39:57 +0100 +Subject: [PATCH 11/20] smbios: add smbios_add_usr_blob_size() helper + +RH-Author: Igor Mammedov +RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS +RH-Jira: RHEL-21705 +RH-Acked-by: MST +RH-Acked-by: Ani Sinha +RH-Commit: [9/18] 8c698fb9e186d2b1d2b7f75a74305f356450ad68 + +JIRA: https://issues.redhat.com/browse/RHEL-21705 + +it will be used by follow up patch when legacy handling +is moved out into a separate file. + +Signed-off-by: Igor Mammedov +Reviewed-by: Ani Sinha +--- + hw/smbios/smbios.c | 18 ++++++++++++++---- + 1 file changed, 14 insertions(+), 4 deletions(-) + +diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c +index 441517cf24..c48a290478 100644 +--- a/hw/smbios/smbios.c ++++ b/hw/smbios/smbios.c +@@ -1426,6 +1426,14 @@ static bool save_opt_list(size_t *ndest, char ***dest, QemuOpts *opts, + return true; + } + ++static void smbios_add_usr_blob_size(size_t size) ++{ ++ if (!usr_blobs_sizes) { ++ usr_blobs_sizes = g_array_new(false, false, sizeof(size_t)); ++ } ++ g_array_append_val(usr_blobs_sizes, size); ++} ++ + void smbios_entry_add(QemuOpts *opts, Error **errp) + { + const char *val; +@@ -1473,10 +1481,12 @@ void smbios_entry_add(QemuOpts *opts, Error **errp) + smbios_type4_count++; + } + +- if (!usr_blobs_sizes) { +- usr_blobs_sizes = g_array_new(false, false, sizeof(size_t)); +- } +- g_array_append_val(usr_blobs_sizes, size); ++ /* ++ * preserve blob size for legacy mode so it could build its ++ * blobs flavor from 'usr_blobs' ++ */ ++ smbios_add_usr_blob_size(size); ++ + usr_blobs_len += size; + if (size > usr_table_max) { + usr_table_max = size; +-- +2.39.3 + diff --git a/SOURCES/kvm-smbios-avoid-mangling-user-provided-tables.patch b/SOURCES/kvm-smbios-avoid-mangling-user-provided-tables.patch new file mode 100644 index 0000000..b8e4d92 --- /dev/null +++ b/SOURCES/kvm-smbios-avoid-mangling-user-provided-tables.patch @@ -0,0 +1,309 @@ +From 15d293b706ca6c9e6ad569becda8da5f70461c30 Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Tue, 13 Feb 2024 09:25:11 +0100 +Subject: [PATCH 09/20] smbios: avoid mangling user provided tables + +RH-Author: Igor Mammedov +RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS +RH-Jira: RHEL-21705 +RH-Acked-by: MST +RH-Acked-by: Ani Sinha +RH-Commit: [7/18] fa2c7372f55f29e5834eee94ba98f19ea02e7a82 + +JIRA: https://issues.redhat.com/browse/RHEL-21705 + + currently smbios_entry_add() preserves internally '-smbios type=' + options but tables provided with '-smbios file=' are stored directly + into blob that eventually will be exposed to VM. And then later + QEMU adds default/'-smbios type' entries on top into the same blob. + + It makes impossible to generate tables more than once, hence + 'immutable' guard was used. + Make it possible to regenerate final blob by storing user provided + blobs into a dedicated area (usr_blobs) and then copy it when + composing final blob. Which also makes handling of -smbios + options consistent. + + As side effect of this and previous commits there is no need to + generate legacy smbios_entries at the time options are parsed. + Instead compose smbios_entries on demand from usr_blobs like + it is done for non-legacy SMBIOS tables. + + Signed-off-by: Igor Mammedov + Tested-by: Fiona Ebner + Reviewed-by: Ani Sinha + +Conflicts: hw/smbios/smbios.c + caused by downstream smbios_type2_required + +Signed-off-by: Igor Mammedov +--- + hw/smbios/smbios.c | 181 +++++++++++++++++++++++---------------------- + 1 file changed, 93 insertions(+), 88 deletions(-) + +diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c +index 0c8c439859..d8d68716d4 100644 +--- a/hw/smbios/smbios.c ++++ b/hw/smbios/smbios.c +@@ -60,6 +60,14 @@ static bool smbios_uuid_encoded = true; + /* Set to true for modern Windows 10 HardwareID-6 compat */ + static bool smbios_type2_required; + ++/* ++ * SMBIOS tables provided by user with '-smbios file=' option ++ */ ++uint8_t *usr_blobs; ++size_t usr_blobs_len; ++static GArray *usr_blobs_sizes; ++static unsigned usr_table_max; ++static unsigned usr_table_cnt; + + uint8_t *smbios_tables; + size_t smbios_tables_len; +@@ -70,7 +78,6 @@ static SmbiosEntryPointType smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_32; + static SmbiosEntryPoint ep; + + static int smbios_type4_count = 0; +-static bool smbios_immutable; + static bool smbios_have_defaults; + static uint32_t smbios_cpuid_version, smbios_cpuid_features; + +@@ -617,9 +624,8 @@ static void smbios_build_type_1_fields(void) + + uint8_t *smbios_get_table_legacy(uint32_t expected_t4_count, size_t *length) + { +- /* drop unwanted version of command-line file blob(s) */ +- g_free(smbios_tables); +- smbios_tables = NULL; ++ int i; ++ size_t usr_offset; + + /* also complain if fields were given for types > 1 */ + if (find_next_bit(have_fields_bitmap, +@@ -629,12 +635,33 @@ uint8_t *smbios_get_table_legacy(uint32_t expected_t4_count, size_t *length) + exit(1); + } + +- if (!smbios_immutable) { +- smbios_build_type_0_fields(); +- smbios_build_type_1_fields(); +- smbios_validate_table(expected_t4_count); +- smbios_immutable = true; ++ g_free(smbios_entries); ++ smbios_entries_len = sizeof(uint16_t); ++ smbios_entries = g_malloc0(smbios_entries_len); ++ ++ for (i = 0, usr_offset = 0; usr_blobs_sizes && i < usr_blobs_sizes->len; ++ i++) ++ { ++ struct smbios_table *table; ++ struct smbios_structure_header *header; ++ size_t size = g_array_index(usr_blobs_sizes, size_t, i); ++ ++ header = (struct smbios_structure_header *)(usr_blobs + usr_offset); ++ smbios_entries = g_realloc(smbios_entries, smbios_entries_len + ++ size + sizeof(*table)); ++ table = (struct smbios_table *)(smbios_entries + smbios_entries_len); ++ table->header.type = SMBIOS_TABLE_ENTRY; ++ table->header.length = cpu_to_le16(sizeof(*table) + size); ++ memcpy(table->data, header, size); ++ smbios_entries_len += sizeof(*table) + size; ++ (*(uint16_t *)smbios_entries) = ++ cpu_to_le16(le16_to_cpu(*(uint16_t *)smbios_entries) + 1); ++ usr_offset += size; + } ++ ++ smbios_build_type_0_fields(); ++ smbios_build_type_1_fields(); ++ smbios_validate_table(expected_t4_count); + *length = smbios_entries_len; + return smbios_entries; + } +@@ -1232,68 +1259,68 @@ void smbios_get_tables(MachineState *ms, + { + unsigned i, dimm_cnt, offset; + +- /* drop unwanted (legacy) version of command-line file blob(s) */ +- g_free(smbios_entries); +- smbios_entries = NULL; ++ g_free(smbios_tables); ++ smbios_tables = g_memdup2(usr_blobs, usr_blobs_len); ++ smbios_tables_len = usr_blobs_len; ++ smbios_table_max = usr_table_max; ++ smbios_table_cnt = usr_table_cnt; + +- if (!smbios_immutable) { +- smbios_build_type_0_table(); +- smbios_build_type_1_table(); +- smbios_build_type_2_table(); +- smbios_build_type_3_table(); ++ smbios_build_type_0_table(); ++ smbios_build_type_1_table(); ++ smbios_build_type_2_table(); ++ smbios_build_type_3_table(); + +- assert(ms->smp.sockets >= 1); ++ assert(ms->smp.sockets >= 1); + +- for (i = 0; i < ms->smp.sockets; i++) { +- smbios_build_type_4_table(ms, i); +- } ++ for (i = 0; i < ms->smp.sockets; i++) { ++ smbios_build_type_4_table(ms, i); ++ } + +- smbios_build_type_8_table(); +- smbios_build_type_9_table(errp); +- smbios_build_type_11_table(); ++ smbios_build_type_8_table(); ++ smbios_build_type_9_table(errp); ++ smbios_build_type_11_table(); + + #define MAX_DIMM_SZ (16 * GiB) + #define GET_DIMM_SZ ((i < dimm_cnt - 1) ? MAX_DIMM_SZ \ + : ((current_machine->ram_size - 1) % MAX_DIMM_SZ) + 1) + +- dimm_cnt = QEMU_ALIGN_UP(current_machine->ram_size, MAX_DIMM_SZ) / MAX_DIMM_SZ; ++ dimm_cnt = QEMU_ALIGN_UP(current_machine->ram_size, MAX_DIMM_SZ) / ++ MAX_DIMM_SZ; + +- /* +- * The offset determines if we need to keep additional space between +- * table 17 and table 19 header handle numbers so that they do +- * not overlap. For example, for a VM with larger than 8 TB guest +- * memory and DIMM like chunks of 16 GiB, the default space between +- * the two tables (T19_BASE - T17_BASE = 512) is not enough. +- */ +- offset = (dimm_cnt > (T19_BASE - T17_BASE)) ? \ +- dimm_cnt - (T19_BASE - T17_BASE) : 0; ++ /* ++ * The offset determines if we need to keep additional space between ++ * table 17 and table 19 header handle numbers so that they do ++ * not overlap. For example, for a VM with larger than 8 TB guest ++ * memory and DIMM like chunks of 16 GiB, the default space between ++ * the two tables (T19_BASE - T17_BASE = 512) is not enough. ++ */ ++ offset = (dimm_cnt > (T19_BASE - T17_BASE)) ? \ ++ dimm_cnt - (T19_BASE - T17_BASE) : 0; + +- smbios_build_type_16_table(dimm_cnt); ++ smbios_build_type_16_table(dimm_cnt); + +- for (i = 0; i < dimm_cnt; i++) { +- smbios_build_type_17_table(i, GET_DIMM_SZ); +- } ++ for (i = 0; i < dimm_cnt; i++) { ++ smbios_build_type_17_table(i, GET_DIMM_SZ); ++ } + +- for (i = 0; i < mem_array_size; i++) { +- smbios_build_type_19_table(i, offset, mem_array[i].address, +- mem_array[i].length); +- } ++ for (i = 0; i < mem_array_size; i++) { ++ smbios_build_type_19_table(i, offset, mem_array[i].address, ++ mem_array[i].length); ++ } + +- /* +- * make sure 16 bit handle numbers in the headers of tables 19 +- * and 32 do not overlap. +- */ +- assert((mem_array_size + offset) < (T32_BASE - T19_BASE)); ++ /* ++ * make sure 16 bit handle numbers in the headers of tables 19 ++ * and 32 do not overlap. ++ */ ++ assert((mem_array_size + offset) < (T32_BASE - T19_BASE)); + +- smbios_build_type_32_table(); +- smbios_build_type_38_table(); +- smbios_build_type_41_table(errp); +- smbios_build_type_127_table(); ++ smbios_build_type_32_table(); ++ smbios_build_type_38_table(); ++ smbios_build_type_41_table(errp); ++ smbios_build_type_127_table(); + +- smbios_validate_table(ms->smp.sockets); +- smbios_entry_point_setup(); +- smbios_immutable = true; +- } ++ smbios_validate_table(ms->smp.sockets); ++ smbios_entry_point_setup(); + + /* return tables blob and entry point (anchor), and their sizes */ + *tables = smbios_tables; +@@ -1393,13 +1420,10 @@ void smbios_entry_add(QemuOpts *opts, Error **errp) + { + const char *val; + +- assert(!smbios_immutable); +- + val = qemu_opt_get(opts, "file"); + if (val) { + struct smbios_structure_header *header; +- int size; +- struct smbios_table *table; /* legacy mode only */ ++ size_t size; + + if (!qemu_opts_validate(opts, qemu_smbios_file_opts, errp)) { + return; +@@ -1416,9 +1440,9 @@ void smbios_entry_add(QemuOpts *opts, Error **errp) + * (except in legacy mode, where the second '\0' is implicit and + * will be inserted by the BIOS). + */ +- smbios_tables = g_realloc(smbios_tables, smbios_tables_len + size); +- header = (struct smbios_structure_header *)(smbios_tables + +- smbios_tables_len); ++ usr_blobs = g_realloc(usr_blobs, usr_blobs_len + size); ++ header = (struct smbios_structure_header *)(usr_blobs + ++ usr_blobs_len); + + if (load_image_size(val, (uint8_t *)header, size) != size) { + error_setg(errp, "Failed to load SMBIOS file %s", val); +@@ -1439,34 +1463,15 @@ void smbios_entry_add(QemuOpts *opts, Error **errp) + smbios_type4_count++; + } + +- smbios_tables_len += size; +- if (size > smbios_table_max) { +- smbios_table_max = size; ++ if (!usr_blobs_sizes) { ++ usr_blobs_sizes = g_array_new(false, false, sizeof(size_t)); + } +- smbios_table_cnt++; +- +- /* add a copy of the newly loaded blob to legacy smbios_entries */ +- /* NOTE: This code runs before smbios_set_defaults(), so we don't +- * yet know which mode (legacy vs. aggregate-table) will be +- * required. We therefore add the binary blob to both legacy +- * (smbios_entries) and aggregate (smbios_tables) tables, and +- * delete the one we don't need from smbios_set_defaults(), +- * once we know which machine version has been requested. +- */ +- if (!smbios_entries) { +- smbios_entries_len = sizeof(uint16_t); +- smbios_entries = g_malloc0(smbios_entries_len); ++ g_array_append_val(usr_blobs_sizes, size); ++ usr_blobs_len += size; ++ if (size > usr_table_max) { ++ usr_table_max = size; + } +- smbios_entries = g_realloc(smbios_entries, smbios_entries_len + +- size + sizeof(*table)); +- table = (struct smbios_table *)(smbios_entries + smbios_entries_len); +- table->header.type = SMBIOS_TABLE_ENTRY; +- table->header.length = cpu_to_le16(sizeof(*table) + size); +- memcpy(table->data, header, size); +- smbios_entries_len += sizeof(*table) + size; +- (*(uint16_t *)smbios_entries) = +- cpu_to_le16(le16_to_cpu(*(uint16_t *)smbios_entries) + 1); +- /* end: add a copy of the newly loaded blob to legacy smbios_entries */ ++ usr_table_cnt++; + + return; + } +-- +2.39.3 + diff --git a/SOURCES/kvm-smbios-build-legacy-mode-code-only-for-pc-machine.patch b/SOURCES/kvm-smbios-build-legacy-mode-code-only-for-pc-machine.patch new file mode 100644 index 0000000..1dc4d22 --- /dev/null +++ b/SOURCES/kvm-smbios-build-legacy-mode-code-only-for-pc-machine.patch @@ -0,0 +1,517 @@ +From 7ebb314a4f81d6d1a7dd4980b757fb5e556f5837 Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Tue, 13 Feb 2024 16:45:18 +0100 +Subject: [PATCH 13/20] smbios: build legacy mode code only for 'pc' machine + +RH-Author: Igor Mammedov +RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS +RH-Jira: RHEL-21705 +RH-Acked-by: MST +RH-Acked-by: Ani Sinha +RH-Commit: [11/18] 06e639be03e0d151fb9bcf5f728388edcb84219a + +JIRA: https://issues.redhat.com/browse/RHEL-21705 + + basically moving code around without functional change. + And exposing some symbols so that they could be shared + between smbbios.c and new smbios_legacy.c + + plus some meson magic to build smbios_legacy.c only + for 'pc' machine and otherwise replace it with stub + if not selected. + + Signed-off-by: Igor Mammedov + Reviewed-by: Ani Sinha + +Conflicts: hw/smbios/smbios.c + context change due to downstream smbios_type2_required + +Signed-off-by: Igor Mammedov +--- + hw/i386/Kconfig | 1 + + hw/smbios/Kconfig | 2 + + hw/smbios/meson.build | 5 + + hw/smbios/smbios.c | 163 +----------------------------- + hw/smbios/smbios_legacy.c | 179 +++++++++++++++++++++++++++++++++ + hw/smbios/smbios_legacy_stub.c | 15 +++ + include/hw/firmware/smbios.h | 5 + + 7 files changed, 208 insertions(+), 162 deletions(-) + create mode 100644 hw/smbios/smbios_legacy.c + create mode 100644 hw/smbios/smbios_legacy_stub.c + +diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig +index a1846be6f7..a6ee052f9a 100644 +--- a/hw/i386/Kconfig ++++ b/hw/i386/Kconfig +@@ -76,6 +76,7 @@ config I440FX + select PIIX + select DIMM + select SMBIOS ++ select SMBIOS_LEGACY + select FW_CFG_DMA + + config ISAPC +diff --git a/hw/smbios/Kconfig b/hw/smbios/Kconfig +index 553adf4bfc..8d989a2f1b 100644 +--- a/hw/smbios/Kconfig ++++ b/hw/smbios/Kconfig +@@ -1,2 +1,4 @@ + config SMBIOS + bool ++config SMBIOS_LEGACY ++ bool +diff --git a/hw/smbios/meson.build b/hw/smbios/meson.build +index 6eeae4b35c..fcac1d7490 100644 +--- a/hw/smbios/meson.build ++++ b/hw/smbios/meson.build +@@ -4,10 +4,15 @@ smbios_ss.add(when: 'CONFIG_IPMI', + if_true: files('smbios_type_38.c'), + if_false: files('smbios_type_38-stub.c')) + ++smbios_ss.add(when: 'CONFIG_SMBIOS_LEGACY', ++ if_true: files('smbios_legacy.c'), ++ if_false: files('smbios_legacy_stub.c')) ++ + system_ss.add_all(when: 'CONFIG_SMBIOS', if_true: smbios_ss) + system_ss.add(when: 'CONFIG_SMBIOS', if_false: files('smbios-stub.c')) + + system_ss.add(when: 'CONFIG_ALL', if_true: files( + 'smbios-stub.c', + 'smbios_type_38-stub.c', ++ 'smbios_legacy_stub.c', + )) +diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c +index eb9927335d..e40204550e 100644 +--- a/hw/smbios/smbios.c ++++ b/hw/smbios/smbios.c +@@ -31,31 +31,7 @@ + #include "hw/pci/pci_device.h" + #include "smbios_build.h" + +-/* legacy structures and constants for <= 2.0 machines */ +-struct smbios_header { +- uint16_t length; +- uint8_t type; +-} QEMU_PACKED; +- +-struct smbios_field { +- struct smbios_header header; +- uint8_t type; +- uint16_t offset; +- uint8_t data[]; +-} QEMU_PACKED; +- +-struct smbios_table { +- struct smbios_header header; +- uint8_t data[]; +-} QEMU_PACKED; +- +-#define SMBIOS_FIELD_ENTRY 0 +-#define SMBIOS_TABLE_ENTRY 1 +- +-static uint8_t *smbios_entries; +-static size_t smbios_entries_len; + static bool smbios_uuid_encoded = true; +-/* end: legacy structures & constants for <= 2.0 machines */ + + /* Set to true for modern Windows 10 HardwareID-6 compat */ + static bool smbios_type2_required; +@@ -65,7 +41,6 @@ static bool smbios_type2_required; + */ + uint8_t *usr_blobs; + size_t usr_blobs_len; +-static GArray *usr_blobs_sizes; + static unsigned usr_table_max; + static unsigned usr_table_cnt; + +@@ -531,7 +506,7 @@ static void smbios_check_type4_count(uint32_t expected_t4_count) + } + } + +-static void smbios_validate_table(void) ++void smbios_validate_table(void) + { + if (smbios_ep_type == SMBIOS_ENTRY_POINT_TYPE_32 && + smbios_tables_len > SMBIOS_21_MAX_TABLES_LEN) { +@@ -541,134 +516,6 @@ static void smbios_validate_table(void) + } + } + +- +-/* legacy setup functions for <= 2.0 machines */ +-static void smbios_add_field(int type, int offset, const void *data, size_t len) +-{ +- struct smbios_field *field; +- +- if (!smbios_entries) { +- smbios_entries_len = sizeof(uint16_t); +- smbios_entries = g_malloc0(smbios_entries_len); +- } +- smbios_entries = g_realloc(smbios_entries, smbios_entries_len + +- sizeof(*field) + len); +- field = (struct smbios_field *)(smbios_entries + smbios_entries_len); +- field->header.type = SMBIOS_FIELD_ENTRY; +- field->header.length = cpu_to_le16(sizeof(*field) + len); +- +- field->type = type; +- field->offset = cpu_to_le16(offset); +- memcpy(field->data, data, len); +- +- smbios_entries_len += sizeof(*field) + len; +- (*(uint16_t *)smbios_entries) = +- cpu_to_le16(le16_to_cpu(*(uint16_t *)smbios_entries) + 1); +-} +- +-static void smbios_maybe_add_str(int type, int offset, const char *data) +-{ +- if (data) { +- smbios_add_field(type, offset, data, strlen(data) + 1); +- } +-} +- +-static void smbios_build_type_0_fields(void) +-{ +- smbios_maybe_add_str(0, offsetof(struct smbios_type_0, vendor_str), +- smbios_type0.vendor); +- smbios_maybe_add_str(0, offsetof(struct smbios_type_0, bios_version_str), +- smbios_type0.version); +- smbios_maybe_add_str(0, offsetof(struct smbios_type_0, +- bios_release_date_str), +- smbios_type0.date); +- if (smbios_type0.have_major_minor) { +- smbios_add_field(0, offsetof(struct smbios_type_0, +- system_bios_major_release), +- &smbios_type0.major, 1); +- smbios_add_field(0, offsetof(struct smbios_type_0, +- system_bios_minor_release), +- &smbios_type0.minor, 1); +- } +-} +- +-static void smbios_build_type_1_fields(void) +-{ +- smbios_maybe_add_str(1, offsetof(struct smbios_type_1, manufacturer_str), +- smbios_type1.manufacturer); +- smbios_maybe_add_str(1, offsetof(struct smbios_type_1, product_name_str), +- smbios_type1.product); +- smbios_maybe_add_str(1, offsetof(struct smbios_type_1, version_str), +- smbios_type1.version); +- smbios_maybe_add_str(1, offsetof(struct smbios_type_1, serial_number_str), +- smbios_type1.serial); +- smbios_maybe_add_str(1, offsetof(struct smbios_type_1, sku_number_str), +- smbios_type1.sku); +- smbios_maybe_add_str(1, offsetof(struct smbios_type_1, family_str), +- smbios_type1.family); +- if (qemu_uuid_set) { +- /* We don't encode the UUID in the "wire format" here because this +- * function is for legacy mode and needs to keep the guest ABI, and +- * because we don't know what's the SMBIOS version advertised by the +- * BIOS. +- */ +- smbios_add_field(1, offsetof(struct smbios_type_1, uuid), +- &qemu_uuid, 16); +- } +-} +- +-uint8_t *smbios_get_table_legacy(size_t *length) +-{ +- int i; +- size_t usr_offset; +- +- /* also complain if fields were given for types > 1 */ +- if (find_next_bit(smbios_have_fields_bitmap, +- SMBIOS_MAX_TYPE + 1, 2) < SMBIOS_MAX_TYPE + 1) { +- error_report("can't process fields for smbios " +- "types > 1 on machine versions < 2.1!"); +- exit(1); +- } +- +- if (test_bit(4, smbios_have_binfile_bitmap)) { +- error_report("can't process table for smbios " +- "type 4 on machine versions < 2.1!"); +- exit(1); +- } +- +- g_free(smbios_entries); +- smbios_entries_len = sizeof(uint16_t); +- smbios_entries = g_malloc0(smbios_entries_len); +- +- for (i = 0, usr_offset = 0; usr_blobs_sizes && i < usr_blobs_sizes->len; +- i++) +- { +- struct smbios_table *table; +- struct smbios_structure_header *header; +- size_t size = g_array_index(usr_blobs_sizes, size_t, i); +- +- header = (struct smbios_structure_header *)(usr_blobs + usr_offset); +- smbios_entries = g_realloc(smbios_entries, smbios_entries_len + +- size + sizeof(*table)); +- table = (struct smbios_table *)(smbios_entries + smbios_entries_len); +- table->header.type = SMBIOS_TABLE_ENTRY; +- table->header.length = cpu_to_le16(sizeof(*table) + size); +- memcpy(table->data, header, size); +- smbios_entries_len += sizeof(*table) + size; +- (*(uint16_t *)smbios_entries) = +- cpu_to_le16(le16_to_cpu(*(uint16_t *)smbios_entries) + 1); +- usr_offset += size; +- } +- +- smbios_build_type_0_fields(); +- smbios_build_type_1_fields(); +- smbios_validate_table(); +- *length = smbios_entries_len; +- return smbios_entries; +-} +-/* end: legacy setup functions for <= 2.0 machines */ +- +- + bool smbios_skip_table(uint8_t type, bool required_table) + { + if (test_bit(type, smbios_have_binfile_bitmap)) { +@@ -1418,14 +1265,6 @@ static bool save_opt_list(size_t *ndest, char ***dest, QemuOpts *opts, + return true; + } + +-static void smbios_add_usr_blob_size(size_t size) +-{ +- if (!usr_blobs_sizes) { +- usr_blobs_sizes = g_array_new(false, false, sizeof(size_t)); +- } +- g_array_append_val(usr_blobs_sizes, size); +-} +- + void smbios_entry_add(QemuOpts *opts, Error **errp) + { + const char *val; +diff --git a/hw/smbios/smbios_legacy.c b/hw/smbios/smbios_legacy.c +new file mode 100644 +index 0000000000..21f143e738 +--- /dev/null ++++ b/hw/smbios/smbios_legacy.c +@@ -0,0 +1,179 @@ ++/* ++ * SMBIOS legacy support ++ * ++ * Copyright (C) 2009 Hewlett-Packard Development Company, L.P. ++ * Copyright (C) 2013 Red Hat, Inc. ++ * ++ * Authors: ++ * Alex Williamson ++ * Markus Armbruster ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2. See ++ * the COPYING file in the top-level directory. ++ * ++ * Contributions after 2012-01-13 are licensed under the terms of the ++ * GNU GPL, version 2 or (at your option) any later version. ++ */ ++ ++#include "qemu/osdep.h" ++#include "qemu/bswap.h" ++#include "hw/firmware/smbios.h" ++#include "sysemu/sysemu.h" ++#include "qemu/error-report.h" ++ ++struct smbios_header { ++ uint16_t length; ++ uint8_t type; ++} QEMU_PACKED; ++ ++struct smbios_field { ++ struct smbios_header header; ++ uint8_t type; ++ uint16_t offset; ++ uint8_t data[]; ++} QEMU_PACKED; ++ ++struct smbios_table { ++ struct smbios_header header; ++ uint8_t data[]; ++} QEMU_PACKED; ++ ++#define SMBIOS_FIELD_ENTRY 0 ++#define SMBIOS_TABLE_ENTRY 1 ++ ++static uint8_t *smbios_entries; ++static size_t smbios_entries_len; ++GArray *usr_blobs_sizes; ++ ++void smbios_add_usr_blob_size(size_t size) ++{ ++ if (!usr_blobs_sizes) { ++ usr_blobs_sizes = g_array_new(false, false, sizeof(size_t)); ++ } ++ g_array_append_val(usr_blobs_sizes, size); ++} ++ ++static void smbios_add_field(int type, int offset, const void *data, size_t len) ++{ ++ struct smbios_field *field; ++ ++ if (!smbios_entries) { ++ smbios_entries_len = sizeof(uint16_t); ++ smbios_entries = g_malloc0(smbios_entries_len); ++ } ++ smbios_entries = g_realloc(smbios_entries, smbios_entries_len + ++ sizeof(*field) + len); ++ field = (struct smbios_field *)(smbios_entries + smbios_entries_len); ++ field->header.type = SMBIOS_FIELD_ENTRY; ++ field->header.length = cpu_to_le16(sizeof(*field) + len); ++ ++ field->type = type; ++ field->offset = cpu_to_le16(offset); ++ memcpy(field->data, data, len); ++ ++ smbios_entries_len += sizeof(*field) + len; ++ (*(uint16_t *)smbios_entries) = ++ cpu_to_le16(le16_to_cpu(*(uint16_t *)smbios_entries) + 1); ++} ++ ++static void smbios_maybe_add_str(int type, int offset, const char *data) ++{ ++ if (data) { ++ smbios_add_field(type, offset, data, strlen(data) + 1); ++ } ++} ++ ++static void smbios_build_type_0_fields(void) ++{ ++ smbios_maybe_add_str(0, offsetof(struct smbios_type_0, vendor_str), ++ smbios_type0.vendor); ++ smbios_maybe_add_str(0, offsetof(struct smbios_type_0, bios_version_str), ++ smbios_type0.version); ++ smbios_maybe_add_str(0, offsetof(struct smbios_type_0, ++ bios_release_date_str), ++ smbios_type0.date); ++ if (smbios_type0.have_major_minor) { ++ smbios_add_field(0, offsetof(struct smbios_type_0, ++ system_bios_major_release), ++ &smbios_type0.major, 1); ++ smbios_add_field(0, offsetof(struct smbios_type_0, ++ system_bios_minor_release), ++ &smbios_type0.minor, 1); ++ } ++} ++ ++static void smbios_build_type_1_fields(void) ++{ ++ smbios_maybe_add_str(1, offsetof(struct smbios_type_1, manufacturer_str), ++ smbios_type1.manufacturer); ++ smbios_maybe_add_str(1, offsetof(struct smbios_type_1, product_name_str), ++ smbios_type1.product); ++ smbios_maybe_add_str(1, offsetof(struct smbios_type_1, version_str), ++ smbios_type1.version); ++ smbios_maybe_add_str(1, offsetof(struct smbios_type_1, serial_number_str), ++ smbios_type1.serial); ++ smbios_maybe_add_str(1, offsetof(struct smbios_type_1, sku_number_str), ++ smbios_type1.sku); ++ smbios_maybe_add_str(1, offsetof(struct smbios_type_1, family_str), ++ smbios_type1.family); ++ if (qemu_uuid_set) { ++ /* ++ * We don't encode the UUID in the "wire format" here because this ++ * function is for legacy mode and needs to keep the guest ABI, and ++ * because we don't know what's the SMBIOS version advertised by the ++ * BIOS. ++ */ ++ smbios_add_field(1, offsetof(struct smbios_type_1, uuid), ++ &qemu_uuid, 16); ++ } ++} ++ ++uint8_t *smbios_get_table_legacy(size_t *length) ++{ ++ int i; ++ size_t usr_offset; ++ ++ /* complain if fields were given for types > 1 */ ++ if (find_next_bit(smbios_have_fields_bitmap, ++ SMBIOS_MAX_TYPE + 1, 2) < SMBIOS_MAX_TYPE + 1) { ++ error_report("can't process fields for smbios " ++ "types > 1 on machine versions < 2.1!"); ++ exit(1); ++ } ++ ++ if (test_bit(4, smbios_have_binfile_bitmap)) { ++ error_report("can't process table for smbios " ++ "type 4 on machine versions < 2.1!"); ++ exit(1); ++ } ++ ++ g_free(smbios_entries); ++ smbios_entries_len = sizeof(uint16_t); ++ smbios_entries = g_malloc0(smbios_entries_len); ++ ++ for (i = 0, usr_offset = 0; usr_blobs_sizes && i < usr_blobs_sizes->len; ++ i++) ++ { ++ struct smbios_table *table; ++ struct smbios_structure_header *header; ++ size_t size = g_array_index(usr_blobs_sizes, size_t, i); ++ ++ header = (struct smbios_structure_header *)(usr_blobs + usr_offset); ++ smbios_entries = g_realloc(smbios_entries, smbios_entries_len + ++ size + sizeof(*table)); ++ table = (struct smbios_table *)(smbios_entries + smbios_entries_len); ++ table->header.type = SMBIOS_TABLE_ENTRY; ++ table->header.length = cpu_to_le16(sizeof(*table) + size); ++ memcpy(table->data, header, size); ++ smbios_entries_len += sizeof(*table) + size; ++ (*(uint16_t *)smbios_entries) = ++ cpu_to_le16(le16_to_cpu(*(uint16_t *)smbios_entries) + 1); ++ usr_offset += size; ++ } ++ ++ smbios_build_type_0_fields(); ++ smbios_build_type_1_fields(); ++ smbios_validate_table(); ++ *length = smbios_entries_len; ++ return smbios_entries; ++} +diff --git a/hw/smbios/smbios_legacy_stub.c b/hw/smbios/smbios_legacy_stub.c +new file mode 100644 +index 0000000000..f29b15316c +--- /dev/null ++++ b/hw/smbios/smbios_legacy_stub.c +@@ -0,0 +1,15 @@ ++/* ++ * IPMI SMBIOS firmware handling ++ * ++ * Copyright (c) 2024 Igor Mammedov, Red Hat, Inc. ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ */ ++ ++#include "qemu/osdep.h" ++#include "hw/firmware/smbios.h" ++ ++void smbios_add_usr_blob_size(size_t size) ++{ ++} +diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h +index 333de0d5fc..92e9aba415 100644 +--- a/include/hw/firmware/smbios.h ++++ b/include/hw/firmware/smbios.h +@@ -17,6 +17,9 @@ + * + */ + ++extern uint8_t *usr_blobs; ++extern GArray *usr_blobs_sizes; ++ + typedef struct { + const char *vendor, *version, *date; + bool have_major_minor, uefi; +@@ -323,6 +326,8 @@ struct smbios_type_127 { + struct smbios_structure_header header; + } QEMU_PACKED; + ++void smbios_validate_table(void); ++void smbios_add_usr_blob_size(size_t size); + void smbios_entry_add(QemuOpts *opts, Error **errp); + void smbios_set_cpuid(uint32_t version, uint32_t features); + void smbios_set_defaults(const char *manufacturer, const char *product, +-- +2.39.3 + diff --git a/SOURCES/kvm-smbios-cleanup-smbios_get_tables-from-legacy-handlin.patch b/SOURCES/kvm-smbios-cleanup-smbios_get_tables-from-legacy-handlin.patch new file mode 100644 index 0000000..48a9e16 --- /dev/null +++ b/SOURCES/kvm-smbios-cleanup-smbios_get_tables-from-legacy-handlin.patch @@ -0,0 +1,65 @@ +From 07f6ef2d032cda3e746ac2477c0a9bc1ac636f45 Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Fri, 29 Dec 2023 14:08:13 +0100 +Subject: [PATCH 06/20] smbios: cleanup smbios_get_tables() from legacy + handling + +RH-Author: Igor Mammedov +RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS +RH-Jira: RHEL-21705 +RH-Acked-by: MST +RH-Acked-by: Ani Sinha +RH-Commit: [4/18] a5e09ce1df72293fecad863edd146a8c4b1a734f + +JIRA: https://issues.redhat.com/browse/RHEL-21705 + +smbios_get_tables() bails out right away if leagacy mode is enabled +and won't generate any SMBIOS tables. At the same time x86 specific +fw_cfg_build_smbios() will genarate legacy tables and then proceed +to preparing temporary mem_array for useless call to +smbios_get_tables() and then discard it. + +Drop legacy related check in smbios_get_tables() and return from +fw_cfg_build_smbios() early if legacy tables where built without +proceeding to non legacy part of the function. + +Signed-off-by: Igor Mammedov +Reviewed-by: Ani Sinha +Tested-by: Fiona Ebner +--- + hw/i386/fw_cfg.c | 1 + + hw/smbios/smbios.c | 6 ------ + 2 files changed, 1 insertion(+), 6 deletions(-) + +diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c +index 6a5466faf0..ed72b1442d 100644 +--- a/hw/i386/fw_cfg.c ++++ b/hw/i386/fw_cfg.c +@@ -76,6 +76,7 @@ void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg) + if (smbios_tables) { + fw_cfg_add_bytes(fw_cfg, FW_CFG_SMBIOS_ENTRIES, + smbios_tables, smbios_tables_len); ++ return; + } + + /* build the array of physical mem area from e820 table */ +diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c +index 074705fa4c..b13e40bae2 100644 +--- a/hw/smbios/smbios.c ++++ b/hw/smbios/smbios.c +@@ -1244,12 +1244,6 @@ void smbios_get_tables(MachineState *ms, + { + unsigned i, dimm_cnt, offset; + +- if (smbios_legacy) { +- *tables = *anchor = NULL; +- *tables_len = *anchor_len = 0; +- return; +- } +- + if (!smbios_immutable) { + smbios_build_type_0_table(); + smbios_build_type_1_table(); +-- +2.39.3 + diff --git a/SOURCES/kvm-smbios-clear-smbios_type4_count-before-building-tabl.patch b/SOURCES/kvm-smbios-clear-smbios_type4_count-before-building-tabl.patch new file mode 100644 index 0000000..38a7f95 --- /dev/null +++ b/SOURCES/kvm-smbios-clear-smbios_type4_count-before-building-tabl.patch @@ -0,0 +1,38 @@ +From c0282a842a912aacac28a6ae229de5854c3fb5df Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Mon, 26 Feb 2024 13:20:22 +0100 +Subject: [PATCH 16/20] smbios: clear smbios_type4_count before building tables + +RH-Author: Igor Mammedov +RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS +RH-Jira: RHEL-21705 +RH-Acked-by: MST +RH-Acked-by: Ani Sinha +RH-Commit: [14/18] f809595d2934ae975c0b7d17a4a79645e062ba42 + +JIRA: https://issues.redhat.com/browse/RHEL-21705 + +it will help to keep type 4 tables accounting correct in case +SMBIOS tables are built multiple times. + +Signed-off-by: Igor Mammedov +Tested-by: Fiona Ebner +--- + hw/smbios/smbios.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c +index 7e32430b85..4521ea386c 100644 +--- a/hw/smbios/smbios.c ++++ b/hw/smbios/smbios.c +@@ -1111,6 +1111,7 @@ void smbios_get_tables(MachineState *ms, + ep_type == SMBIOS_ENTRY_POINT_TYPE_64); + + g_free(smbios_tables); ++ smbios_type4_count = 0; + smbios_tables = g_memdup2(usr_blobs, usr_blobs_len); + smbios_tables_len = usr_blobs_len; + smbios_table_max = usr_table_max; +-- +2.39.3 + diff --git a/SOURCES/kvm-smbios-don-t-check-type4-structures-in-legacy-mode.patch b/SOURCES/kvm-smbios-don-t-check-type4-structures-in-legacy-mode.patch new file mode 100644 index 0000000..a2c9532 --- /dev/null +++ b/SOURCES/kvm-smbios-don-t-check-type4-structures-in-legacy-mode.patch @@ -0,0 +1,133 @@ +From 2b76d95ec07aba6d96070ee90c5015c1676be091 Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Tue, 13 Feb 2024 16:25:54 +0100 +Subject: [PATCH 10/20] smbios: don't check type4 structures in legacy mode + +RH-Author: Igor Mammedov +RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS +RH-Jira: RHEL-21705 +RH-Acked-by: MST +RH-Acked-by: Ani Sinha +RH-Commit: [8/18] c1f8409ea0d916f333c9373535bf21b521c62855 + +JIRA: https://issues.redhat.com/browse/RHEL-21705 + + legacy mode doesn't support structures of type 2 and more, + and CLI has a check for '-smbios type' option, however it's + still possible to sneak in type4 as a blob with '-smbios file' + option. However doing the later makes SMBIOS tables broken + since SeaBIOS doesn't expect that. + + Rather than trying to add support for type4 to legacy code + (both QEMU and SeaBIOS), simplify smbios_get_table_legacy() + by dropping not relevant check in legacy code and error out + on type4 blob. + + Signed-off-by: Igor Mammedov + Reviewed-by: Ani Sinha + Tested-by: Fiona Ebner + +Conflicts: include/hw/firmware/smbios.h +Signed-off-by: Igor Mammedov + + Please enter the commit message for your changes. Lines starting +--- + hw/i386/fw_cfg.c | 3 +-- + hw/smbios/smbios.c | 18 ++++++++++++++---- + include/hw/firmware/smbios.h | 2 +- + 3 files changed, 16 insertions(+), 7 deletions(-) + +diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c +index bb7149c4c3..a25793a68f 100644 +--- a/hw/i386/fw_cfg.c ++++ b/hw/i386/fw_cfg.c +@@ -73,8 +73,7 @@ void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg) + smbios_set_cpuid(cpu->env.cpuid_version, cpu->env.features[FEAT_1_EDX]); + + if (pcmc->smbios_legacy_mode) { +- smbios_tables = smbios_get_table_legacy(ms->smp.cpus, +- &smbios_tables_len); ++ smbios_tables = smbios_get_table_legacy(&smbios_tables_len); + fw_cfg_add_bytes(fw_cfg, FW_CFG_SMBIOS_ENTRIES, + smbios_tables, smbios_tables_len); + return; +diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c +index d8d68716d4..441517cf24 100644 +--- a/hw/smbios/smbios.c ++++ b/hw/smbios/smbios.c +@@ -530,14 +530,17 @@ opts_init(smbios_register_config); + */ + #define SMBIOS_21_MAX_TABLES_LEN 0xffff + +-static void smbios_validate_table(uint32_t expected_t4_count) ++static void smbios_check_type4_count(uint32_t expected_t4_count) + { + if (smbios_type4_count && smbios_type4_count != expected_t4_count) { + error_report("Expected %d SMBIOS Type 4 tables, got %d instead", + expected_t4_count, smbios_type4_count); + exit(1); + } ++} + ++static void smbios_validate_table(void) ++{ + if (smbios_ep_type == SMBIOS_ENTRY_POINT_TYPE_32 && + smbios_tables_len > SMBIOS_21_MAX_TABLES_LEN) { + error_report("SMBIOS 2.1 table length %zu exceeds %d", +@@ -622,7 +625,7 @@ static void smbios_build_type_1_fields(void) + } + } + +-uint8_t *smbios_get_table_legacy(uint32_t expected_t4_count, size_t *length) ++uint8_t *smbios_get_table_legacy(size_t *length) + { + int i; + size_t usr_offset; +@@ -635,6 +638,12 @@ uint8_t *smbios_get_table_legacy(uint32_t expected_t4_count, size_t *length) + exit(1); + } + ++ if (test_bit(4, have_binfile_bitmap)) { ++ error_report("can't process table for smbios " ++ "type 4 on machine versions < 2.1!"); ++ exit(1); ++ } ++ + g_free(smbios_entries); + smbios_entries_len = sizeof(uint16_t); + smbios_entries = g_malloc0(smbios_entries_len); +@@ -661,7 +670,7 @@ uint8_t *smbios_get_table_legacy(uint32_t expected_t4_count, size_t *length) + + smbios_build_type_0_fields(); + smbios_build_type_1_fields(); +- smbios_validate_table(expected_t4_count); ++ smbios_validate_table(); + *length = smbios_entries_len; + return smbios_entries; + } +@@ -1319,7 +1328,8 @@ void smbios_get_tables(MachineState *ms, + smbios_build_type_41_table(errp); + smbios_build_type_127_table(); + +- smbios_validate_table(ms->smp.sockets); ++ smbios_check_type4_count(ms->smp.sockets); ++ smbios_validate_table(); + smbios_entry_point_setup(); + + /* return tables blob and entry point (anchor), and their sizes */ +diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h +index b9fc9a0f42..d55018e5e3 100644 +--- a/include/hw/firmware/smbios.h ++++ b/include/hw/firmware/smbios.h +@@ -315,7 +315,7 @@ void smbios_set_defaults(const char *manufacturer, const char *product, + SmbiosEntryPointType ep_type, + const char *stream_product, + const char *stream_version); +-uint8_t *smbios_get_table_legacy(uint32_t expected_t4_count, size_t *length); ++uint8_t *smbios_get_table_legacy(size_t *length); + void smbios_get_tables(MachineState *ms, + const struct smbios_phys_mem_area *mem_array, + const unsigned int mem_array_size, +-- +2.39.3 + diff --git a/SOURCES/kvm-smbios-error-out-when-building-type-4-table-is-not-p.patch b/SOURCES/kvm-smbios-error-out-when-building-type-4-table-is-not-p.patch new file mode 100644 index 0000000..2afbe66 --- /dev/null +++ b/SOURCES/kvm-smbios-error-out-when-building-type-4-table-is-not-p.patch @@ -0,0 +1,72 @@ +From bbb2d260e6f33380b9df28c74421055bd8dccda5 Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Mon, 26 Feb 2024 12:59:33 +0100 +Subject: [PATCH 19/20] smbios: error out when building type 4 table is not + possible + +RH-Author: Igor Mammedov +RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS +RH-Jira: RHEL-21705 +RH-Acked-by: MST +RH-Acked-by: Ani Sinha +RH-Commit: [17/18] 86b1c67bfbe9c0c14a190cd1204b6ccd1de1630f + +JIRA: https://issues.redhat.com/browse/RHEL-21705 + +If SMBIOS v2 version is requested but number of cores/threads +are more than it's possible to describe with v2, error out +instead of silently ignoring the fact and filling core/thread +count with bogus values. + +This will help caller to decide if it should fallback to +SMBIOSv3 when smbios-entry-point-type='auto' + +Signed-off-by: Igor Mammedov +Reviewed-by: Ani Sinha +Tested-by: Fiona Ebner +--- + hw/smbios/smbios.c | 14 ++++++++++++-- + 1 file changed, 12 insertions(+), 2 deletions(-) + +diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c +index 3d9dcb0d31..637aa952f5 100644 +--- a/hw/smbios/smbios.c ++++ b/hw/smbios/smbios.c +@@ -655,7 +655,8 @@ static void smbios_build_type_3_table(void) + } + + static void smbios_build_type_4_table(MachineState *ms, unsigned instance, +- SmbiosEntryPointType ep_type) ++ SmbiosEntryPointType ep_type, ++ Error **errp) + { + char sock_str[128]; + size_t tbl_len = SMBIOS_TYPE_4_LEN_V28; +@@ -709,6 +710,12 @@ static void smbios_build_type_4_table(MachineState *ms, unsigned instance, + if (tbl_len == SMBIOS_TYPE_4_LEN_V30) { + t->core_count2 = t->core_enabled2 = cpu_to_le16(cores_per_socket); + t->thread_count2 = cpu_to_le16(threads_per_socket); ++ } else if (t->core_count == 0xFF || t->thread_count == 0xFF) { ++ error_setg(errp, "SMBIOS 2.0 doesn't support number of processor " ++ "cores/threads more than 255, use " ++ "-machine smbios-entry-point-type=64 option to enable " ++ "SMBIOS 3.0 support"); ++ return; + } + + SMBIOS_BUILD_TABLE_POST; +@@ -1126,7 +1133,10 @@ static bool smbios_get_tables_ep(MachineState *ms, + assert(ms->smp.sockets >= 1); + + for (i = 0; i < ms->smp.sockets; i++) { +- smbios_build_type_4_table(ms, i, ep_type); ++ smbios_build_type_4_table(ms, i, ep_type, errp); ++ if (*errp) { ++ goto err_exit; ++ } + } + + smbios_build_type_8_table(); +-- +2.39.3 + diff --git a/SOURCES/kvm-smbios-extend-smbios-entry-point-type-with-auto-valu.patch b/SOURCES/kvm-smbios-extend-smbios-entry-point-type-with-auto-valu.patch new file mode 100644 index 0000000..db012bd --- /dev/null +++ b/SOURCES/kvm-smbios-extend-smbios-entry-point-type-with-auto-valu.patch @@ -0,0 +1,48 @@ +From c083959c963fde33f4769fd4c6e122dd16ce6d3c Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Wed, 21 Feb 2024 16:04:36 +0100 +Subject: [PATCH 17/20] smbios: extend smbios-entry-point-type with 'auto' + value + +RH-Author: Igor Mammedov +RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS +RH-Jira: RHEL-21705 +RH-Acked-by: MST +RH-Acked-by: Ani Sinha +RH-Commit: [15/18] 202563dcf77e062a238aa2a10ec14c25d3f5a7d0 + +JIRA: https://issues.redhat.com/browse/RHEL-21705 + +later patches will use it to pick SMBIOS version at runtime +depending on configuration. + +Signed-off-by: Igor Mammedov +Acked-by: Markus Armbruster +Reviewed-by: Ani Sinha +Tested-by: Fiona Ebner +--- + qapi/machine.json | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/qapi/machine.json b/qapi/machine.json +index b6d634b30d..99f6368fa6 100644 +--- a/qapi/machine.json ++++ b/qapi/machine.json +@@ -1788,10 +1788,13 @@ + # + # @64: SMBIOS version 3.0 (64-bit) Entry Point + # ++# @auto: Either 2.x or 3.x SMBIOS version, 2.x if configuration can be ++# described by it and 3.x otherwise (since: 9.0) ++# + # Since: 7.0 + ## + { 'enum': 'SmbiosEntryPointType', +- 'data': [ '32', '64' ] } ++ 'data': [ '32', '64', 'auto' ] } + + ## + # @MemorySizeConfiguration: +-- +2.39.3 + diff --git a/SOURCES/kvm-smbios-get-rid-of-global-smbios_ep_type.patch b/SOURCES/kvm-smbios-get-rid-of-global-smbios_ep_type.patch new file mode 100644 index 0000000..1896f2b --- /dev/null +++ b/SOURCES/kvm-smbios-get-rid-of-global-smbios_ep_type.patch @@ -0,0 +1,281 @@ +From be0abbf3f7845847b46486704c46c5de5a2b2323 Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Mon, 26 Feb 2024 13:49:14 +0100 +Subject: [PATCH 15/20] smbios: get rid of global smbios_ep_type + +RH-Author: Igor Mammedov +RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS +RH-Jira: RHEL-21705 +RH-Acked-by: MST +RH-Acked-by: Ani Sinha +RH-Commit: [13/18] 2e838ed0d03989e2e4ee08041b5ba64d5d7f5820 + +JIRA: https://issues.redhat.com/browse/RHEL-21705 + +Conflicts: hw/arm/virt.c, hw/i386/fw_cfg.c, hw/riscv/virt.c, hw/smbios/smbios.c, + include/hw/firmware/smbios.h + due to downstream specific smbios_set_defaults() + +Signed-off-by: Igor Mammedov +Acked-by: Daniel Henrique Barboza +Reviewed-by: Ani Sinha +Tested-by: Fiona Ebner +Signed-off-by: Igor Mammedov +--- + hw/arm/virt.c | 4 ++-- + hw/i386/fw_cfg.c | 6 +++--- + hw/i386/fw_cfg.h | 3 ++- + hw/i386/pc.c | 2 +- + hw/loongarch/virt.c | 7 ++++--- + hw/smbios/smbios.c | 26 ++++++++++++++------------ + hw/smbios/smbios_legacy.c | 2 +- + include/hw/firmware/smbios.h | 4 ++-- + 8 files changed, 29 insertions(+), 25 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index e5cfc19c08..e4a66affcb 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -1695,14 +1695,14 @@ static void virt_build_smbios(VirtMachineState *vms) + + smbios_set_defaults("QEMU", product, + vmc->smbios_old_sys_ver ? "1.0" : mc->name, +- true, SMBIOS_ENTRY_POINT_TYPE_64, ++ true, + NULL, NULL); + + /* build the array of physical mem area from base_memmap */ + mem_array.address = vms->memmap[VIRT_MEM].base; + mem_array.length = ms->ram_size; + +- smbios_get_tables(ms, &mem_array, 1, ++ smbios_get_tables(ms, SMBIOS_ENTRY_POINT_TYPE_64, &mem_array, 1, + &smbios_tables, &smbios_tables_len, + &smbios_anchor, &smbios_anchor_len, + &error_fatal); +diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c +index bdc3cc4556..58429bb78d 100644 +--- a/hw/i386/fw_cfg.c ++++ b/hw/i386/fw_cfg.c +@@ -48,7 +48,8 @@ const char *fw_cfg_arch_key_name(uint16_t key) + return NULL; + } + +-void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg) ++void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg, ++ SmbiosEntryPointType ep_type) + { + #ifdef CONFIG_SMBIOS + uint8_t *smbios_tables, *smbios_anchor; +@@ -64,7 +65,6 @@ void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg) + /* These values are guest ABI, do not change */ + smbios_set_defaults("QEMU", mc->desc, mc->name, + pcmc->smbios_uuid_encoded, +- pcms->smbios_entry_point_type, + pcmc->smbios_stream_product, + pcmc->smbios_stream_version); + } +@@ -91,7 +91,7 @@ void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg) + array_count++; + } + } +- smbios_get_tables(ms, mem_array, array_count, ++ smbios_get_tables(ms, ep_type, mem_array, array_count, + &smbios_tables, &smbios_tables_len, + &smbios_anchor, &smbios_anchor_len, + &error_fatal); +diff --git a/hw/i386/fw_cfg.h b/hw/i386/fw_cfg.h +index 1e1de6b4a3..92e310f5fd 100644 +--- a/hw/i386/fw_cfg.h ++++ b/hw/i386/fw_cfg.h +@@ -23,7 +23,8 @@ + FWCfgState *fw_cfg_arch_create(MachineState *ms, + uint16_t boot_cpus, + uint16_t apic_id_limit); +-void fw_cfg_build_smbios(PCMachineState *ms, FWCfgState *fw_cfg); ++void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg, ++ SmbiosEntryPointType ep_type); + void fw_cfg_build_feature_control(MachineState *ms, FWCfgState *fw_cfg); + void fw_cfg_add_acpi_dsdt(Aml *scope, FWCfgState *fw_cfg); + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 16de2a59e8..ae6777fc1a 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -847,7 +847,7 @@ void pc_machine_done(Notifier *notifier, void *data) + + acpi_setup(); + if (x86ms->fw_cfg) { +- fw_cfg_build_smbios(pcms, x86ms->fw_cfg); ++ fw_cfg_build_smbios(pcms, x86ms->fw_cfg, pcms->smbios_entry_point_type); + fw_cfg_build_feature_control(MACHINE(pcms), x86ms->fw_cfg); + /* update FW_CFG_NB_CPUS to account for -device added CPUs */ + fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); +diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c +index 7358a023d3..77956b5ada 100644 +--- a/hw/loongarch/virt.c ++++ b/hw/loongarch/virt.c +@@ -320,10 +320,11 @@ static void virt_build_smbios(LoongArchMachineState *lams) + return; + } + +- smbios_set_defaults("QEMU", product, mc->name, +- true, SMBIOS_ENTRY_POINT_TYPE_64); ++ smbios_set_defaults("QEMU", product, mc->name, true); + +- smbios_get_tables(ms, NULL, 0, &smbios_tables, &smbios_tables_len, ++ smbios_get_tables(ms, SMBIOS_ENTRY_POINT_TYPE_64, ++ NULL, 0, ++ &smbios_tables, &smbios_tables_len, + &smbios_anchor, &smbios_anchor_len, &error_fatal); + + if (smbios_anchor) { +diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c +index b5745c6c2d..7e32430b85 100644 +--- a/hw/smbios/smbios.c ++++ b/hw/smbios/smbios.c +@@ -47,7 +47,6 @@ uint8_t *smbios_tables; + size_t smbios_tables_len; + unsigned smbios_table_max; + unsigned smbios_table_cnt; +-static SmbiosEntryPointType smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_32; + + static SmbiosEntryPoint ep; + +@@ -506,9 +505,9 @@ static bool smbios_check_type4_count(uint32_t expected_t4_count, Error **errp) + return true; + } + +-bool smbios_validate_table(Error **errp) ++bool smbios_validate_table(SmbiosEntryPointType ep_type, Error **errp) + { +- if (smbios_ep_type == SMBIOS_ENTRY_POINT_TYPE_32 && ++ if (ep_type == SMBIOS_ENTRY_POINT_TYPE_32 && + smbios_tables_len > SMBIOS_21_MAX_TABLES_LEN) { + error_setg(errp, "SMBIOS 2.1 table length %zu exceeds %d", + smbios_tables_len, SMBIOS_21_MAX_TABLES_LEN); +@@ -655,14 +654,15 @@ static void smbios_build_type_3_table(void) + SMBIOS_BUILD_TABLE_POST; + } + +-static void smbios_build_type_4_table(MachineState *ms, unsigned instance) ++static void smbios_build_type_4_table(MachineState *ms, unsigned instance, ++ SmbiosEntryPointType ep_type) + { + char sock_str[128]; + size_t tbl_len = SMBIOS_TYPE_4_LEN_V28; + unsigned threads_per_socket; + unsigned cores_per_socket; + +- if (smbios_ep_type == SMBIOS_ENTRY_POINT_TYPE_64) { ++ if (ep_type == SMBIOS_ENTRY_POINT_TYPE_64) { + tbl_len = SMBIOS_TYPE_4_LEN_V30; + } + +@@ -991,13 +991,11 @@ void smbios_set_cpuid(uint32_t version, uint32_t features) + void smbios_set_defaults(const char *manufacturer, const char *product, + const char *version, + bool uuid_encoded, +- SmbiosEntryPointType ep_type, + const char *stream_product, + const char *stream_version) + { + smbios_have_defaults = true; + smbios_uuid_encoded = uuid_encoded; +- smbios_ep_type = ep_type; + + /* + * If @stream_product & @stream_version are non-NULL, then +@@ -1048,9 +1046,9 @@ void smbios_set_defaults(const char *manufacturer, const char *product, + SMBIOS_SET_DEFAULT(type17.manufacturer, manufacturer); + } + +-static void smbios_entry_point_setup(void) ++static void smbios_entry_point_setup(SmbiosEntryPointType ep_type) + { +- switch (smbios_ep_type) { ++ switch (ep_type) { + case SMBIOS_ENTRY_POINT_TYPE_32: + memcpy(ep.ep21.anchor_string, "_SM_", 4); + memcpy(ep.ep21.intermediate_anchor_string, "_DMI_", 5); +@@ -1100,6 +1098,7 @@ static void smbios_entry_point_setup(void) + } + + void smbios_get_tables(MachineState *ms, ++ SmbiosEntryPointType ep_type, + const struct smbios_phys_mem_area *mem_array, + const unsigned int mem_array_size, + uint8_t **tables, size_t *tables_len, +@@ -1108,6 +1107,9 @@ void smbios_get_tables(MachineState *ms, + { + unsigned i, dimm_cnt, offset; + ++ assert(ep_type == SMBIOS_ENTRY_POINT_TYPE_32 || ++ ep_type == SMBIOS_ENTRY_POINT_TYPE_64); ++ + g_free(smbios_tables); + smbios_tables = g_memdup2(usr_blobs, usr_blobs_len); + smbios_tables_len = usr_blobs_len; +@@ -1122,7 +1124,7 @@ void smbios_get_tables(MachineState *ms, + assert(ms->smp.sockets >= 1); + + for (i = 0; i < ms->smp.sockets; i++) { +- smbios_build_type_4_table(ms, i); ++ smbios_build_type_4_table(ms, i, ep_type); + } + + smbios_build_type_8_table(); +@@ -1171,10 +1173,10 @@ void smbios_get_tables(MachineState *ms, + if (!smbios_check_type4_count(ms->smp.sockets, errp)) { + goto err_exit; + } +- if (!smbios_validate_table(errp)) { ++ if (!smbios_validate_table(ep_type, errp)) { + goto err_exit; + } +- smbios_entry_point_setup(); ++ smbios_entry_point_setup(ep_type); + + /* return tables blob and entry point (anchor), and their sizes */ + *tables = smbios_tables; +diff --git a/hw/smbios/smbios_legacy.c b/hw/smbios/smbios_legacy.c +index a6544bf55a..06907cd16c 100644 +--- a/hw/smbios/smbios_legacy.c ++++ b/hw/smbios/smbios_legacy.c +@@ -173,7 +173,7 @@ uint8_t *smbios_get_table_legacy(size_t *length, Error **errp) + + smbios_build_type_0_fields(); + smbios_build_type_1_fields(); +- if (!smbios_validate_table(errp)) { ++ if (!smbios_validate_table(SMBIOS_ENTRY_POINT_TYPE_32, errp)) { + goto err_exit; + } + +diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h +index 44af3a0d82..781298f594 100644 +--- a/include/hw/firmware/smbios.h ++++ b/include/hw/firmware/smbios.h +@@ -326,18 +326,18 @@ struct smbios_type_127 { + struct smbios_structure_header header; + } QEMU_PACKED; + +-bool smbios_validate_table(Error **errp); ++bool smbios_validate_table(SmbiosEntryPointType ep_type, Error **errp); + void smbios_add_usr_blob_size(size_t size); + void smbios_entry_add(QemuOpts *opts, Error **errp); + void smbios_set_cpuid(uint32_t version, uint32_t features); + void smbios_set_defaults(const char *manufacturer, const char *product, + const char *version, + bool uuid_encoded, +- SmbiosEntryPointType ep_type, + const char *stream_product, + const char *stream_version); + uint8_t *smbios_get_table_legacy(size_t *length, Error **errp); + void smbios_get_tables(MachineState *ms, ++ SmbiosEntryPointType ep_type, + const struct smbios_phys_mem_area *mem_array, + const unsigned int mem_array_size, + uint8_t **tables, size_t *tables_len, +-- +2.39.3 + diff --git a/SOURCES/kvm-smbios-get-rid-of-smbios_legacy-global.patch b/SOURCES/kvm-smbios-get-rid-of-smbios_legacy-global.patch new file mode 100644 index 0000000..ecb730e --- /dev/null +++ b/SOURCES/kvm-smbios-get-rid-of-smbios_legacy-global.patch @@ -0,0 +1,198 @@ +From 0802fa7199c8085d018fc38dd4beaa5062d383d1 Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Fri, 29 Dec 2023 15:37:29 +0100 +Subject: [PATCH 08/20] smbios: get rid of smbios_legacy global + +RH-Author: Igor Mammedov +RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS +RH-Jira: RHEL-21705 +RH-Acked-by: MST +RH-Acked-by: Ani Sinha +RH-Commit: [6/18] 684dd1dca8d611c6de97b26ef8c1cda6ca509d54 + +JIRA: https://issues.redhat.com/browse/RHEL-21705 + + clean up smbios_set_defaults() which is reused by legacy + and non legacy machines from being aware of 'legacy' notion + and need to turn it off. And push legacy handling up to + PC machine code where it's relevant. + + Signed-off-by: Igor Mammedov + Reviewed-by: Ani Sinha + Acked-by: Daniel Henrique Barboza + Tested-by: Fiona Ebner + +Conflicts: hw/arm/virt.c, hw/i386/fw_cfg.c, hw/loongarch/virt.c, + hw/smbios/smbios.c, include/hw/firmware/smbios.h + due to downstream specifc signature of smbios_set_defaults() +PS: + while fixing conflicts move RHEL specific + smbios_stream_product/smbios_stream_version + at the end of arguments list + +Signed-off-by: Igor Mammedov +--- + hw/arm/virt.c | 5 +++-- + hw/i386/fw_cfg.c | 11 +++++----- + hw/loongarch/virt.c | 2 +- + hw/smbios/smbios.c | 39 ++++++++++++++++-------------------- + include/hw/firmware/smbios.h | 6 +++--- + 5 files changed, 30 insertions(+), 33 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 943c563391..e5cfc19c08 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -1694,8 +1694,9 @@ static void virt_build_smbios(VirtMachineState *vms) + } + + smbios_set_defaults("QEMU", product, +- vmc->smbios_old_sys_ver ? "1.0" : mc->name, false, +- true, NULL, NULL, SMBIOS_ENTRY_POINT_TYPE_64); ++ vmc->smbios_old_sys_ver ? "1.0" : mc->name, ++ true, SMBIOS_ENTRY_POINT_TYPE_64, ++ NULL, NULL); + + /* build the array of physical mem area from base_memmap */ + mem_array.address = vms->memmap[VIRT_MEM].base; +diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c +index 79ff7f7225..bb7149c4c3 100644 +--- a/hw/i386/fw_cfg.c ++++ b/hw/i386/fw_cfg.c +@@ -63,17 +63,18 @@ void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg) + if (pcmc->smbios_defaults) { + /* These values are guest ABI, do not change */ + smbios_set_defaults("QEMU", mc->desc, mc->name, +- pcmc->smbios_legacy_mode, pcmc->smbios_uuid_encoded, ++ pcmc->smbios_uuid_encoded, ++ pcms->smbios_entry_point_type, + pcmc->smbios_stream_product, +- pcmc->smbios_stream_version, +- pcms->smbios_entry_point_type); ++ pcmc->smbios_stream_version); + } + + /* tell smbios about cpuid version and features */ + smbios_set_cpuid(cpu->env.cpuid_version, cpu->env.features[FEAT_1_EDX]); + +- smbios_tables = smbios_get_table_legacy(ms->smp.cpus, &smbios_tables_len); +- if (smbios_tables) { ++ if (pcmc->smbios_legacy_mode) { ++ smbios_tables = smbios_get_table_legacy(ms->smp.cpus, ++ &smbios_tables_len); + fw_cfg_add_bytes(fw_cfg, FW_CFG_SMBIOS_ENTRIES, + smbios_tables, smbios_tables_len); + return; +diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c +index 4b7dc67a2d..7358a023d3 100644 +--- a/hw/loongarch/virt.c ++++ b/hw/loongarch/virt.c +@@ -320,7 +320,7 @@ static void virt_build_smbios(LoongArchMachineState *lams) + return; + } + +- smbios_set_defaults("QEMU", product, mc->name, false, ++ smbios_set_defaults("QEMU", product, mc->name, + true, SMBIOS_ENTRY_POINT_TYPE_64); + + smbios_get_tables(ms, NULL, 0, &smbios_tables, &smbios_tables_len, +diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c +index 8129d396d1..0c8c439859 100644 +--- a/hw/smbios/smbios.c ++++ b/hw/smbios/smbios.c +@@ -54,7 +54,6 @@ struct smbios_table { + + static uint8_t *smbios_entries; + static size_t smbios_entries_len; +-static bool smbios_legacy = true; + static bool smbios_uuid_encoded = true; + /* end: legacy structures & constants for <= 2.0 machines */ + +@@ -618,9 +617,16 @@ static void smbios_build_type_1_fields(void) + + uint8_t *smbios_get_table_legacy(uint32_t expected_t4_count, size_t *length) + { +- if (!smbios_legacy) { +- *length = 0; +- return NULL; ++ /* drop unwanted version of command-line file blob(s) */ ++ g_free(smbios_tables); ++ smbios_tables = NULL; ++ ++ /* also complain if fields were given for types > 1 */ ++ if (find_next_bit(have_fields_bitmap, ++ SMBIOS_MAX_TYPE + 1, 2) < SMBIOS_MAX_TYPE + 1) { ++ error_report("can't process fields for smbios " ++ "types > 1 on machine versions < 2.1!"); ++ exit(1); + } + + if (!smbios_immutable) { +@@ -1107,31 +1113,16 @@ void smbios_set_cpuid(uint32_t version, uint32_t features) + } + + void smbios_set_defaults(const char *manufacturer, const char *product, +- const char *version, bool legacy_mode, ++ const char *version, + bool uuid_encoded, ++ SmbiosEntryPointType ep_type, + const char *stream_product, +- const char *stream_version, +- SmbiosEntryPointType ep_type) ++ const char *stream_version) + { + smbios_have_defaults = true; +- smbios_legacy = legacy_mode; + smbios_uuid_encoded = uuid_encoded; + smbios_ep_type = ep_type; + +- /* drop unwanted version of command-line file blob(s) */ +- if (smbios_legacy) { +- g_free(smbios_tables); +- /* in legacy mode, also complain if fields were given for types > 1 */ +- if (find_next_bit(have_fields_bitmap, +- SMBIOS_MAX_TYPE+1, 2) < SMBIOS_MAX_TYPE+1) { +- error_report("can't process fields for smbios " +- "types > 1 on machine versions < 2.1!"); +- exit(1); +- } +- } else { +- g_free(smbios_entries); +- } +- + /* + * If @stream_product & @stream_version are non-NULL, then + * we're following rules for new Windows driver support. +@@ -1241,6 +1232,10 @@ void smbios_get_tables(MachineState *ms, + { + unsigned i, dimm_cnt, offset; + ++ /* drop unwanted (legacy) version of command-line file blob(s) */ ++ g_free(smbios_entries); ++ smbios_entries = NULL; ++ + if (!smbios_immutable) { + smbios_build_type_0_table(); + smbios_build_type_1_table(); +diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h +index 95ec64ce2c..b9fc9a0f42 100644 +--- a/include/hw/firmware/smbios.h ++++ b/include/hw/firmware/smbios.h +@@ -310,11 +310,11 @@ struct smbios_type_127 { + void smbios_entry_add(QemuOpts *opts, Error **errp); + void smbios_set_cpuid(uint32_t version, uint32_t features); + void smbios_set_defaults(const char *manufacturer, const char *product, +- const char *version, bool legacy_mode, ++ const char *version, + bool uuid_encoded, ++ SmbiosEntryPointType ep_type, + const char *stream_product, +- const char *stream_version, +- SmbiosEntryPointType ep_type); ++ const char *stream_version); + uint8_t *smbios_get_table_legacy(uint32_t expected_t4_count, size_t *length); + void smbios_get_tables(MachineState *ms, + const struct smbios_phys_mem_area *mem_array, +-- +2.39.3 + diff --git a/SOURCES/kvm-smbios-get-rid-of-smbios_smp_sockets-global.patch b/SOURCES/kvm-smbios-get-rid-of-smbios_smp_sockets-global.patch new file mode 100644 index 0000000..fde17ac --- /dev/null +++ b/SOURCES/kvm-smbios-get-rid-of-smbios_smp_sockets-global.patch @@ -0,0 +1,134 @@ +From 1536f1ec00ddc1729854b381cc0d54814bb6c19f Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Fri, 29 Dec 2023 15:04:55 +0100 +Subject: [PATCH 07/20] smbios: get rid of smbios_smp_sockets global + +RH-Author: Igor Mammedov +RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS +RH-Jira: RHEL-21705 +RH-Acked-by: MST +RH-Acked-by: Ani Sinha +RH-Commit: [5/18] f59bfeb547b7febcf1de2b6179af006e7fa0bccd + +JIRA: https://issues.redhat.com/browse/RHEL-21705 + + it makes smbios_validate_table() independent from + smbios_smp_sockets global, which in turn lets + smbios_get_tables() avoid using not related legacy code. + + Signed-off-by: Igor Mammedov + Reviewed-by: Ani Sinha + Tested-by: Fiona Ebner + +Conflicts: + include/hw/firmware/smbios.h due to down-stream + (d9ff466c980d Machine type related general changes) + adding custom stream_product/stream_version + +Signed-off-by: Igor Mammedov +--- + hw/i386/fw_cfg.c | 2 +- + hw/smbios/smbios.c | 22 +++++++++------------- + include/hw/firmware/smbios.h | 2 +- + 3 files changed, 11 insertions(+), 15 deletions(-) + +diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c +index ed72b1442d..79ff7f7225 100644 +--- a/hw/i386/fw_cfg.c ++++ b/hw/i386/fw_cfg.c +@@ -72,7 +72,7 @@ void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg) + /* tell smbios about cpuid version and features */ + smbios_set_cpuid(cpu->env.cpuid_version, cpu->env.features[FEAT_1_EDX]); + +- smbios_tables = smbios_get_table_legacy(ms, &smbios_tables_len); ++ smbios_tables = smbios_get_table_legacy(ms->smp.cpus, &smbios_tables_len); + if (smbios_tables) { + fw_cfg_add_bytes(fw_cfg, FW_CFG_SMBIOS_ENTRIES, + smbios_tables, smbios_tables_len); +diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c +index b13e40bae2..8129d396d1 100644 +--- a/hw/smbios/smbios.c ++++ b/hw/smbios/smbios.c +@@ -73,7 +73,7 @@ static SmbiosEntryPoint ep; + static int smbios_type4_count = 0; + static bool smbios_immutable; + static bool smbios_have_defaults; +-static uint32_t smbios_cpuid_version, smbios_cpuid_features, smbios_smp_sockets; ++static uint32_t smbios_cpuid_version, smbios_cpuid_features; + + static DECLARE_BITMAP(have_binfile_bitmap, SMBIOS_MAX_TYPE+1); + static DECLARE_BITMAP(have_fields_bitmap, SMBIOS_MAX_TYPE+1); +@@ -524,14 +524,11 @@ opts_init(smbios_register_config); + */ + #define SMBIOS_21_MAX_TABLES_LEN 0xffff + +-static void smbios_validate_table(MachineState *ms) ++static void smbios_validate_table(uint32_t expected_t4_count) + { +- uint32_t expect_t4_count = smbios_legacy ? +- ms->smp.cpus : smbios_smp_sockets; +- +- if (smbios_type4_count && smbios_type4_count != expect_t4_count) { ++ if (smbios_type4_count && smbios_type4_count != expected_t4_count) { + error_report("Expected %d SMBIOS Type 4 tables, got %d instead", +- expect_t4_count, smbios_type4_count); ++ expected_t4_count, smbios_type4_count); + exit(1); + } + +@@ -619,7 +616,7 @@ static void smbios_build_type_1_fields(void) + } + } + +-uint8_t *smbios_get_table_legacy(MachineState *ms, size_t *length) ++uint8_t *smbios_get_table_legacy(uint32_t expected_t4_count, size_t *length) + { + if (!smbios_legacy) { + *length = 0; +@@ -629,7 +626,7 @@ uint8_t *smbios_get_table_legacy(MachineState *ms, size_t *length) + if (!smbios_immutable) { + smbios_build_type_0_fields(); + smbios_build_type_1_fields(); +- smbios_validate_table(ms); ++ smbios_validate_table(expected_t4_count); + smbios_immutable = true; + } + *length = smbios_entries_len; +@@ -1250,10 +1247,9 @@ void smbios_get_tables(MachineState *ms, + smbios_build_type_2_table(); + smbios_build_type_3_table(); + +- smbios_smp_sockets = ms->smp.sockets; +- assert(smbios_smp_sockets >= 1); ++ assert(ms->smp.sockets >= 1); + +- for (i = 0; i < smbios_smp_sockets; i++) { ++ for (i = 0; i < ms->smp.sockets; i++) { + smbios_build_type_4_table(ms, i); + } + +@@ -1299,7 +1295,7 @@ void smbios_get_tables(MachineState *ms, + smbios_build_type_41_table(errp); + smbios_build_type_127_table(); + +- smbios_validate_table(ms); ++ smbios_validate_table(ms->smp.sockets); + smbios_entry_point_setup(); + smbios_immutable = true; + } +diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h +index f8dd07fe4c..95ec64ce2c 100644 +--- a/include/hw/firmware/smbios.h ++++ b/include/hw/firmware/smbios.h +@@ -315,7 +315,7 @@ void smbios_set_defaults(const char *manufacturer, const char *product, + const char *stream_product, + const char *stream_version, + SmbiosEntryPointType ep_type); +-uint8_t *smbios_get_table_legacy(MachineState *ms, size_t *length); ++uint8_t *smbios_get_table_legacy(uint32_t expected_t4_count, size_t *length); + void smbios_get_tables(MachineState *ms, + const struct smbios_phys_mem_area *mem_array, + const unsigned int mem_array_size, +-- +2.39.3 + diff --git a/SOURCES/kvm-smbios-handle-errors-consistently.patch b/SOURCES/kvm-smbios-handle-errors-consistently.patch new file mode 100644 index 0000000..ac75c76 --- /dev/null +++ b/SOURCES/kvm-smbios-handle-errors-consistently.patch @@ -0,0 +1,217 @@ +From 7271c4424c6d90f0bb34f8090eb4e192eb2b2537 Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Tue, 20 Feb 2024 10:30:28 +0100 +Subject: [PATCH 14/20] smbios: handle errors consistently + +RH-Author: Igor Mammedov +RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS +RH-Jira: RHEL-21705 +RH-Acked-by: MST +RH-Acked-by: Ani Sinha +RH-Commit: [12/18] d8a5a70602ae0665ce35e5bf87b2d8420f9189bc + +JIRA: https://issues.redhat.com/browse/RHEL-21705 + + Current code uses mix of error_report()+exit(1) + and error_setg() to handle errors. + Use newer error_setg() everywhere, beside consistency + it will allow to detect error condition without killing + QEMU and attempt switch-over to SMBIOS3.x tables/entrypoint + in follow up patch. + + while at it, clear smbios_tables pointer after freeing. + that will avoid double free if smbios_get_tables() is called + multiple times. + + Signed-off-by: Igor Mammedov + Reviewed-by: Ani Sinha + +Conflicts: include/hw/firmware/smbios.h + due to downstream specific smbios_set_defaults() + +Signed-off-by: Igor Mammedov +--- + hw/i386/fw_cfg.c | 3 ++- + hw/smbios/smbios.c | 34 ++++++++++++++++++++++------------ + hw/smbios/smbios_legacy.c | 22 ++++++++++++++-------- + include/hw/firmware/smbios.h | 4 ++-- + 4 files changed, 40 insertions(+), 23 deletions(-) + +diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c +index a25793a68f..bdc3cc4556 100644 +--- a/hw/i386/fw_cfg.c ++++ b/hw/i386/fw_cfg.c +@@ -73,7 +73,8 @@ void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg) + smbios_set_cpuid(cpu->env.cpuid_version, cpu->env.features[FEAT_1_EDX]); + + if (pcmc->smbios_legacy_mode) { +- smbios_tables = smbios_get_table_legacy(&smbios_tables_len); ++ smbios_tables = smbios_get_table_legacy(&smbios_tables_len, ++ &error_fatal); + fw_cfg_add_bytes(fw_cfg, FW_CFG_SMBIOS_ENTRIES, + smbios_tables, smbios_tables_len); + return; +diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c +index e40204550e..b5745c6c2d 100644 +--- a/hw/smbios/smbios.c ++++ b/hw/smbios/smbios.c +@@ -19,7 +19,6 @@ + #include "qemu/units.h" + #include "qapi/error.h" + #include "qemu/config-file.h" +-#include "qemu/error-report.h" + #include "qemu/module.h" + #include "qemu/option.h" + #include "sysemu/sysemu.h" +@@ -497,23 +496,25 @@ opts_init(smbios_register_config); + */ + #define SMBIOS_21_MAX_TABLES_LEN 0xffff + +-static void smbios_check_type4_count(uint32_t expected_t4_count) ++static bool smbios_check_type4_count(uint32_t expected_t4_count, Error **errp) + { + if (smbios_type4_count && smbios_type4_count != expected_t4_count) { +- error_report("Expected %d SMBIOS Type 4 tables, got %d instead", +- expected_t4_count, smbios_type4_count); +- exit(1); ++ error_setg(errp, "Expected %d SMBIOS Type 4 tables, got %d instead", ++ expected_t4_count, smbios_type4_count); ++ return false; + } ++ return true; + } + +-void smbios_validate_table(void) ++bool smbios_validate_table(Error **errp) + { + if (smbios_ep_type == SMBIOS_ENTRY_POINT_TYPE_32 && + smbios_tables_len > SMBIOS_21_MAX_TABLES_LEN) { +- error_report("SMBIOS 2.1 table length %zu exceeds %d", +- smbios_tables_len, SMBIOS_21_MAX_TABLES_LEN); +- exit(1); ++ error_setg(errp, "SMBIOS 2.1 table length %zu exceeds %d", ++ smbios_tables_len, SMBIOS_21_MAX_TABLES_LEN); ++ return false; + } ++ return true; + } + + bool smbios_skip_table(uint8_t type, bool required_table) +@@ -1167,15 +1168,18 @@ void smbios_get_tables(MachineState *ms, + smbios_build_type_41_table(errp); + smbios_build_type_127_table(); + +- smbios_check_type4_count(ms->smp.sockets); +- smbios_validate_table(); ++ if (!smbios_check_type4_count(ms->smp.sockets, errp)) { ++ goto err_exit; ++ } ++ if (!smbios_validate_table(errp)) { ++ goto err_exit; ++ } + smbios_entry_point_setup(); + + /* return tables blob and entry point (anchor), and their sizes */ + *tables = smbios_tables; + *tables_len = smbios_tables_len; + *anchor = (uint8_t *)&ep; +- + /* calculate length based on anchor string */ + if (!strncmp((char *)&ep, "_SM_", 4)) { + *anchor_len = sizeof(struct smbios_21_entry_point); +@@ -1184,6 +1188,12 @@ void smbios_get_tables(MachineState *ms, + } else { + abort(); + } ++ ++ return; ++err_exit: ++ g_free(smbios_tables); ++ smbios_tables = NULL; ++ return; + } + + static void save_opt(const char **dest, QemuOpts *opts, const char *name) +diff --git a/hw/smbios/smbios_legacy.c b/hw/smbios/smbios_legacy.c +index 21f143e738..a6544bf55a 100644 +--- a/hw/smbios/smbios_legacy.c ++++ b/hw/smbios/smbios_legacy.c +@@ -19,7 +19,7 @@ + #include "qemu/bswap.h" + #include "hw/firmware/smbios.h" + #include "sysemu/sysemu.h" +-#include "qemu/error-report.h" ++#include "qapi/error.h" + + struct smbios_header { + uint16_t length; +@@ -128,7 +128,7 @@ static void smbios_build_type_1_fields(void) + } + } + +-uint8_t *smbios_get_table_legacy(size_t *length) ++uint8_t *smbios_get_table_legacy(size_t *length, Error **errp) + { + int i; + size_t usr_offset; +@@ -136,15 +136,15 @@ uint8_t *smbios_get_table_legacy(size_t *length) + /* complain if fields were given for types > 1 */ + if (find_next_bit(smbios_have_fields_bitmap, + SMBIOS_MAX_TYPE + 1, 2) < SMBIOS_MAX_TYPE + 1) { +- error_report("can't process fields for smbios " ++ error_setg(errp, "can't process fields for smbios " + "types > 1 on machine versions < 2.1!"); +- exit(1); ++ goto err_exit; + } + + if (test_bit(4, smbios_have_binfile_bitmap)) { +- error_report("can't process table for smbios " +- "type 4 on machine versions < 2.1!"); +- exit(1); ++ error_setg(errp, "can't process table for smbios " ++ "type 4 on machine versions < 2.1!"); ++ goto err_exit; + } + + g_free(smbios_entries); +@@ -173,7 +173,13 @@ uint8_t *smbios_get_table_legacy(size_t *length) + + smbios_build_type_0_fields(); + smbios_build_type_1_fields(); +- smbios_validate_table(); ++ if (!smbios_validate_table(errp)) { ++ goto err_exit; ++ } ++ + *length = smbios_entries_len; + return smbios_entries; ++err_exit: ++ g_free(smbios_entries); ++ return NULL; + } +diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h +index 92e9aba415..44af3a0d82 100644 +--- a/include/hw/firmware/smbios.h ++++ b/include/hw/firmware/smbios.h +@@ -326,7 +326,7 @@ struct smbios_type_127 { + struct smbios_structure_header header; + } QEMU_PACKED; + +-void smbios_validate_table(void); ++bool smbios_validate_table(Error **errp); + void smbios_add_usr_blob_size(size_t size); + void smbios_entry_add(QemuOpts *opts, Error **errp); + void smbios_set_cpuid(uint32_t version, uint32_t features); +@@ -336,7 +336,7 @@ void smbios_set_defaults(const char *manufacturer, const char *product, + SmbiosEntryPointType ep_type, + const char *stream_product, + const char *stream_version); +-uint8_t *smbios_get_table_legacy(size_t *length); ++uint8_t *smbios_get_table_legacy(size_t *length, Error **errp); + void smbios_get_tables(MachineState *ms, + const struct smbios_phys_mem_area *mem_array, + const unsigned int mem_array_size, +-- +2.39.3 + diff --git a/SOURCES/kvm-smbios-in-case-of-entry-point-is-auto-try-to-build-v.patch b/SOURCES/kvm-smbios-in-case-of-entry-point-is-auto-try-to-build-v.patch new file mode 100644 index 0000000..fd1bc19 --- /dev/null +++ b/SOURCES/kvm-smbios-in-case-of-entry-point-is-auto-try-to-build-v.patch @@ -0,0 +1,131 @@ +From 38220bc61bdb1614f34a53481f7604720c9e9e5a Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Tue, 20 Feb 2024 10:41:06 +0100 +Subject: [PATCH 18/20] smbios: in case of entry point is 'auto' try to build + v2 tables 1st + +RH-Author: Igor Mammedov +RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS +RH-Jira: RHEL-21705 +RH-Acked-by: MST +RH-Acked-by: Ani Sinha +RH-Commit: [16/18] becbcb3d8dad4842e5939bb75e21f4e737a4a325 + +JIRA: https://issues.redhat.com/browse/RHEL-21705 + +QEMU for some time now uses SMBIOS 3.0 for PC/Q35 machines by +default, however Windows has a bug in locating SMBIOS 3.0 +entrypoint and fails to find tables when booted on SeaBIOS +(on UEFI SMBIOS 3.0 tables work fine since firmware hands +over tables in another way) + +Missing SMBIOS tables may lead to some issues for guest +though (worst are: possible reactiveation, inability to +get virtio drivers from 'Windows Update') + +It's unclear at this point if MS will fix the issue on their +side. So instead of it (or rather in addition) this patch +will try to workaround the issue. + +aka, use smbios-entry-point-type=auto to make QEMU try +generating conservative SMBIOS 2.0 tables and if that +fails (due to limits/requested configuration) fallback +to SMBIOS 3.0 tables. + +With this in place majority of users will use SMBIOS 2.0 +tables which work fine with (Windows + legacy BIOS). +The configurations that is not to possible to describe +with SMBIOS 2.0 will switch automatically to SMBIOS 3.0 +(which will trigger Windows bug but there is nothing +QEMU can do here, so go and aks Microsoft to real fix). + +Signed-off-by: Igor Mammedov +Reviewed-by: Ani Sinha +Tested-by: Fiona Ebner +--- + hw/smbios/smbios.c | 52 +++++++++++++++++++++++++++++++++++++++++++--- + 1 file changed, 49 insertions(+), 3 deletions(-) + +diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c +index 4521ea386c..3d9dcb0d31 100644 +--- a/hw/smbios/smbios.c ++++ b/hw/smbios/smbios.c +@@ -1097,7 +1097,7 @@ static void smbios_entry_point_setup(SmbiosEntryPointType ep_type) + } + } + +-void smbios_get_tables(MachineState *ms, ++static bool smbios_get_tables_ep(MachineState *ms, + SmbiosEntryPointType ep_type, + const struct smbios_phys_mem_area *mem_array, + const unsigned int mem_array_size, +@@ -1106,6 +1106,7 @@ void smbios_get_tables(MachineState *ms, + Error **errp) + { + unsigned i, dimm_cnt, offset; ++ ERRP_GUARD(); + + assert(ep_type == SMBIOS_ENTRY_POINT_TYPE_32 || + ep_type == SMBIOS_ENTRY_POINT_TYPE_64); +@@ -1192,11 +1193,56 @@ void smbios_get_tables(MachineState *ms, + abort(); + } + +- return; ++ return true; + err_exit: + g_free(smbios_tables); + smbios_tables = NULL; +- return; ++ return false; ++} ++ ++void smbios_get_tables(MachineState *ms, ++ SmbiosEntryPointType ep_type, ++ const struct smbios_phys_mem_area *mem_array, ++ const unsigned int mem_array_size, ++ uint8_t **tables, size_t *tables_len, ++ uint8_t **anchor, size_t *anchor_len, ++ Error **errp) ++{ ++ Error *local_err = NULL; ++ bool is_valid; ++ ERRP_GUARD(); ++ ++ switch (ep_type) { ++ case SMBIOS_ENTRY_POINT_TYPE_AUTO: ++ case SMBIOS_ENTRY_POINT_TYPE_32: ++ is_valid = smbios_get_tables_ep(ms, SMBIOS_ENTRY_POINT_TYPE_32, ++ mem_array, mem_array_size, ++ tables, tables_len, ++ anchor, anchor_len, ++ &local_err); ++ if (is_valid || ep_type != SMBIOS_ENTRY_POINT_TYPE_AUTO) { ++ break; ++ } ++ /* ++ * fall through in case AUTO endpoint is selected and ++ * SMBIOS 2.x tables can't be generated, to try if SMBIOS 3.x ++ * tables would work ++ */ ++ case SMBIOS_ENTRY_POINT_TYPE_64: ++ error_free(local_err); ++ local_err = NULL; ++ is_valid = smbios_get_tables_ep(ms, SMBIOS_ENTRY_POINT_TYPE_64, ++ mem_array, mem_array_size, ++ tables, tables_len, ++ anchor, anchor_len, ++ &local_err); ++ break; ++ default: ++ abort(); ++ } ++ if (!is_valid) { ++ error_propagate(errp, local_err); ++ } + } + + static void save_opt(const char **dest, QemuOpts *opts, const char *name) +-- +2.39.3 + diff --git a/SOURCES/kvm-smbios-rename-expose-structures-bitmaps-used-by-both.patch b/SOURCES/kvm-smbios-rename-expose-structures-bitmaps-used-by-both.patch new file mode 100644 index 0000000..cba6134 --- /dev/null +++ b/SOURCES/kvm-smbios-rename-expose-structures-bitmaps-used-by-both.patch @@ -0,0 +1,330 @@ +From 36b0256e27f9d5268c5413891b4a7322819ae9db Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Mon, 4 Mar 2024 15:56:19 +0100 +Subject: [PATCH 12/20] smbios: rename/expose structures/bitmaps used by both + legacy and modern code + +RH-Author: Igor Mammedov +RH-MergeRequest: 230: Workaround Windows failing to find 64bit SMBIOS entry point with SeaBIOS +RH-Jira: RHEL-21705 +RH-Acked-by: MST +RH-Acked-by: Ani Sinha +RH-Commit: [10/18] 59fe438d5b7f6e584a6bb02597e4d4724fe2cece + +JIRA: https://issues.redhat.com/browse/RHEL-21705 + + As a preparation to move legacy handling into a separate file, + add prefix 'smbios_' to type0/type1/have_binfile_bitmap/have_fields_bitmap + and expose them in smbios.h so that they can be reused in + legacy and modern code. + + Doing it as a separate patch to avoid rename cluttering follow-up + patch which will move legacy code into a separate file. + + Signed-off-by: Igor Mammedov + Reviewed-by: Ani Sinha + +Conflicts: hw/smbios/smbios.c + due to setting downstream type1.family/type1.sku defaults + +Signed-off-by: Igor Mammedov +--- + hw/smbios/smbios.c | 117 ++++++++++++++++------------------- + include/hw/firmware/smbios.h | 16 +++++ + 2 files changed, 71 insertions(+), 62 deletions(-) + +diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c +index c48a290478..eb9927335d 100644 +--- a/hw/smbios/smbios.c ++++ b/hw/smbios/smbios.c +@@ -81,19 +81,11 @@ static int smbios_type4_count = 0; + static bool smbios_have_defaults; + static uint32_t smbios_cpuid_version, smbios_cpuid_features; + +-static DECLARE_BITMAP(have_binfile_bitmap, SMBIOS_MAX_TYPE+1); +-static DECLARE_BITMAP(have_fields_bitmap, SMBIOS_MAX_TYPE+1); ++DECLARE_BITMAP(smbios_have_binfile_bitmap, SMBIOS_MAX_TYPE + 1); ++DECLARE_BITMAP(smbios_have_fields_bitmap, SMBIOS_MAX_TYPE + 1); + +-static struct { +- const char *vendor, *version, *date; +- bool have_major_minor, uefi; +- uint8_t major, minor; +-} type0; +- +-static struct { +- const char *manufacturer, *product, *version, *serial, *sku, *family; +- /* uuid is in qemu_uuid */ +-} type1; ++smbios_type0_t smbios_type0; ++smbios_type1_t smbios_type1; + + static struct { + const char *manufacturer, *product, *version, *serial, *asset, *location; +@@ -584,36 +576,36 @@ static void smbios_maybe_add_str(int type, int offset, const char *data) + static void smbios_build_type_0_fields(void) + { + smbios_maybe_add_str(0, offsetof(struct smbios_type_0, vendor_str), +- type0.vendor); ++ smbios_type0.vendor); + smbios_maybe_add_str(0, offsetof(struct smbios_type_0, bios_version_str), +- type0.version); ++ smbios_type0.version); + smbios_maybe_add_str(0, offsetof(struct smbios_type_0, + bios_release_date_str), +- type0.date); +- if (type0.have_major_minor) { ++ smbios_type0.date); ++ if (smbios_type0.have_major_minor) { + smbios_add_field(0, offsetof(struct smbios_type_0, + system_bios_major_release), +- &type0.major, 1); ++ &smbios_type0.major, 1); + smbios_add_field(0, offsetof(struct smbios_type_0, + system_bios_minor_release), +- &type0.minor, 1); ++ &smbios_type0.minor, 1); + } + } + + static void smbios_build_type_1_fields(void) + { + smbios_maybe_add_str(1, offsetof(struct smbios_type_1, manufacturer_str), +- type1.manufacturer); ++ smbios_type1.manufacturer); + smbios_maybe_add_str(1, offsetof(struct smbios_type_1, product_name_str), +- type1.product); ++ smbios_type1.product); + smbios_maybe_add_str(1, offsetof(struct smbios_type_1, version_str), +- type1.version); ++ smbios_type1.version); + smbios_maybe_add_str(1, offsetof(struct smbios_type_1, serial_number_str), +- type1.serial); ++ smbios_type1.serial); + smbios_maybe_add_str(1, offsetof(struct smbios_type_1, sku_number_str), +- type1.sku); ++ smbios_type1.sku); + smbios_maybe_add_str(1, offsetof(struct smbios_type_1, family_str), +- type1.family); ++ smbios_type1.family); + if (qemu_uuid_set) { + /* We don't encode the UUID in the "wire format" here because this + * function is for legacy mode and needs to keep the guest ABI, and +@@ -631,14 +623,14 @@ uint8_t *smbios_get_table_legacy(size_t *length) + size_t usr_offset; + + /* also complain if fields were given for types > 1 */ +- if (find_next_bit(have_fields_bitmap, ++ if (find_next_bit(smbios_have_fields_bitmap, + SMBIOS_MAX_TYPE + 1, 2) < SMBIOS_MAX_TYPE + 1) { + error_report("can't process fields for smbios " + "types > 1 on machine versions < 2.1!"); + exit(1); + } + +- if (test_bit(4, have_binfile_bitmap)) { ++ if (test_bit(4, smbios_have_binfile_bitmap)) { + error_report("can't process table for smbios " + "type 4 on machine versions < 2.1!"); + exit(1); +@@ -679,10 +671,10 @@ uint8_t *smbios_get_table_legacy(size_t *length) + + bool smbios_skip_table(uint8_t type, bool required_table) + { +- if (test_bit(type, have_binfile_bitmap)) { ++ if (test_bit(type, smbios_have_binfile_bitmap)) { + return true; /* user provided their own binary blob(s) */ + } +- if (test_bit(type, have_fields_bitmap)) { ++ if (test_bit(type, smbios_have_fields_bitmap)) { + return false; /* user provided fields via command line */ + } + if (smbios_have_defaults && required_table) { +@@ -710,25 +702,25 @@ static void smbios_build_type_0_table(void) + { + SMBIOS_BUILD_TABLE_PRE(0, T0_BASE, false); /* optional, leave up to BIOS */ + +- SMBIOS_TABLE_SET_STR(0, vendor_str, type0.vendor); +- SMBIOS_TABLE_SET_STR(0, bios_version_str, type0.version); ++ SMBIOS_TABLE_SET_STR(0, vendor_str, smbios_type0.vendor); ++ SMBIOS_TABLE_SET_STR(0, bios_version_str, smbios_type0.version); + + t->bios_starting_address_segment = cpu_to_le16(0xE800); /* from SeaBIOS */ + +- SMBIOS_TABLE_SET_STR(0, bios_release_date_str, type0.date); ++ SMBIOS_TABLE_SET_STR(0, bios_release_date_str, smbios_type0.date); + + t->bios_rom_size = 0; /* hardcoded in SeaBIOS with FIXME comment */ + + t->bios_characteristics = cpu_to_le64(0x08); /* Not supported */ + t->bios_characteristics_extension_bytes[0] = 0; + t->bios_characteristics_extension_bytes[1] = 0x14; /* TCD/SVVP | VM */ +- if (type0.uefi) { ++ if (smbios_type0.uefi) { + t->bios_characteristics_extension_bytes[1] |= 0x08; /* |= UEFI */ + } + +- if (type0.have_major_minor) { +- t->system_bios_major_release = type0.major; +- t->system_bios_minor_release = type0.minor; ++ if (smbios_type0.have_major_minor) { ++ t->system_bios_major_release = smbios_type0.major; ++ t->system_bios_minor_release = smbios_type0.minor; + } else { + t->system_bios_major_release = 0; + t->system_bios_minor_release = 0; +@@ -758,18 +750,18 @@ static void smbios_build_type_1_table(void) + { + SMBIOS_BUILD_TABLE_PRE(1, T1_BASE, true); /* required */ + +- SMBIOS_TABLE_SET_STR(1, manufacturer_str, type1.manufacturer); +- SMBIOS_TABLE_SET_STR(1, product_name_str, type1.product); +- SMBIOS_TABLE_SET_STR(1, version_str, type1.version); +- SMBIOS_TABLE_SET_STR(1, serial_number_str, type1.serial); ++ SMBIOS_TABLE_SET_STR(1, manufacturer_str, smbios_type1.manufacturer); ++ SMBIOS_TABLE_SET_STR(1, product_name_str, smbios_type1.product); ++ SMBIOS_TABLE_SET_STR(1, version_str, smbios_type1.version); ++ SMBIOS_TABLE_SET_STR(1, serial_number_str, smbios_type1.serial); + if (qemu_uuid_set) { + smbios_encode_uuid(&t->uuid, &qemu_uuid); + } else { + memset(&t->uuid, 0, 16); + } + t->wake_up_type = 0x06; /* power switch */ +- SMBIOS_TABLE_SET_STR(1, sku_number_str, type1.sku); +- SMBIOS_TABLE_SET_STR(1, family_str, type1.family); ++ SMBIOS_TABLE_SET_STR(1, sku_number_str, smbios_type1.sku); ++ SMBIOS_TABLE_SET_STR(1, family_str, smbios_type1.family); + + SMBIOS_BUILD_TABLE_POST; + } +@@ -1184,12 +1176,12 @@ void smbios_set_defaults(const char *manufacturer, const char *product, + * + * We get 'System Manufacturer' and 'Baseboard Manufacturer' + */ +- SMBIOS_SET_DEFAULT(type1.manufacturer, manufacturer); +- SMBIOS_SET_DEFAULT(type1.product, product); +- SMBIOS_SET_DEFAULT(type1.version, version); +- SMBIOS_SET_DEFAULT(type1.family, "Red Hat Enterprise Linux"); ++ SMBIOS_SET_DEFAULT(smbios_type1.manufacturer, manufacturer); ++ SMBIOS_SET_DEFAULT(smbios_type1.product, product); ++ SMBIOS_SET_DEFAULT(smbios_type1.version, version); ++ SMBIOS_SET_DEFAULT(smbios_type1.family, "Red Hat Enterprise Linux"); + if (stream_version != NULL) { +- SMBIOS_SET_DEFAULT(type1.sku, stream_version); ++ SMBIOS_SET_DEFAULT(smbios_type1.sku, stream_version); + } + SMBIOS_SET_DEFAULT(type2.manufacturer, manufacturer); + if (stream_product != NULL) { +@@ -1468,13 +1460,13 @@ void smbios_entry_add(QemuOpts *opts, Error **errp) + } + + if (header->type <= SMBIOS_MAX_TYPE) { +- if (test_bit(header->type, have_fields_bitmap)) { ++ if (test_bit(header->type, smbios_have_fields_bitmap)) { + error_setg(errp, + "can't load type %d struct, fields already specified!", + header->type); + return; + } +- set_bit(header->type, have_binfile_bitmap); ++ set_bit(header->type, smbios_have_binfile_bitmap); + } + + if (header->type == 4) { +@@ -1505,41 +1497,42 @@ void smbios_entry_add(QemuOpts *opts, Error **errp) + return; + } + +- if (test_bit(type, have_binfile_bitmap)) { ++ if (test_bit(type, smbios_have_binfile_bitmap)) { + error_setg(errp, "can't add fields, binary file already loaded!"); + return; + } +- set_bit(type, have_fields_bitmap); ++ set_bit(type, smbios_have_fields_bitmap); + + switch (type) { + case 0: + if (!qemu_opts_validate(opts, qemu_smbios_type0_opts, errp)) { + return; + } +- save_opt(&type0.vendor, opts, "vendor"); +- save_opt(&type0.version, opts, "version"); +- save_opt(&type0.date, opts, "date"); +- type0.uefi = qemu_opt_get_bool(opts, "uefi", false); ++ save_opt(&smbios_type0.vendor, opts, "vendor"); ++ save_opt(&smbios_type0.version, opts, "version"); ++ save_opt(&smbios_type0.date, opts, "date"); ++ smbios_type0.uefi = qemu_opt_get_bool(opts, "uefi", false); + + val = qemu_opt_get(opts, "release"); + if (val) { +- if (sscanf(val, "%hhu.%hhu", &type0.major, &type0.minor) != 2) { ++ if (sscanf(val, "%hhu.%hhu", &smbios_type0.major, ++ &smbios_type0.minor) != 2) { + error_setg(errp, "Invalid release"); + return; + } +- type0.have_major_minor = true; ++ smbios_type0.have_major_minor = true; + } + return; + case 1: + if (!qemu_opts_validate(opts, qemu_smbios_type1_opts, errp)) { + return; + } +- save_opt(&type1.manufacturer, opts, "manufacturer"); +- save_opt(&type1.product, opts, "product"); +- save_opt(&type1.version, opts, "version"); +- save_opt(&type1.serial, opts, "serial"); +- save_opt(&type1.sku, opts, "sku"); +- save_opt(&type1.family, opts, "family"); ++ save_opt(&smbios_type1.manufacturer, opts, "manufacturer"); ++ save_opt(&smbios_type1.product, opts, "product"); ++ save_opt(&smbios_type1.version, opts, "version"); ++ save_opt(&smbios_type1.serial, opts, "serial"); ++ save_opt(&smbios_type1.sku, opts, "sku"); ++ save_opt(&smbios_type1.family, opts, "family"); + + val = qemu_opt_get(opts, "uuid"); + if (val) { +diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h +index d55018e5e3..333de0d5fc 100644 +--- a/include/hw/firmware/smbios.h ++++ b/include/hw/firmware/smbios.h +@@ -2,6 +2,7 @@ + #define QEMU_SMBIOS_H + + #include "qapi/qapi-types-machine.h" ++#include "qemu/bitmap.h" + + /* + * SMBIOS Support +@@ -16,8 +17,23 @@ + * + */ + ++typedef struct { ++ const char *vendor, *version, *date; ++ bool have_major_minor, uefi; ++ uint8_t major, minor; ++} smbios_type0_t; ++extern smbios_type0_t smbios_type0; ++ ++typedef struct { ++ const char *manufacturer, *product, *version, *serial, *sku, *family; ++ /* uuid is in qemu_uuid */ ++} smbios_type1_t; ++extern smbios_type1_t smbios_type1; + + #define SMBIOS_MAX_TYPE 127 ++extern DECLARE_BITMAP(smbios_have_binfile_bitmap, SMBIOS_MAX_TYPE + 1); ++extern DECLARE_BITMAP(smbios_have_fields_bitmap, SMBIOS_MAX_TYPE + 1); ++ + #define offsetofend(TYPE, MEMBER) \ + (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER)) + +-- +2.39.3 + diff --git a/SOURCES/kvm-spice-move-client_migrate_info-command-to-ui.patch b/SOURCES/kvm-spice-move-client_migrate_info-command-to-ui.patch deleted file mode 100644 index f1de158..0000000 --- a/SOURCES/kvm-spice-move-client_migrate_info-command-to-ui.patch +++ /dev/null @@ -1,248 +0,0 @@ -From 00f6e941e75f378c84c773a15efde7dd085d9ce3 Mon Sep 17 00:00:00 2001 -From: Juan Quintela -Date: Wed, 1 Mar 2023 19:40:14 +0100 -Subject: [PATCH 21/56] spice: move client_migrate_info command to ui/ -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [20/50] a587bb001b51a1f9fdf2fcfb0978bb931ae443b6 (peterx/qemu-kvm) - -It has nothing to do with migration, except for the "migrate" in the -name of the command. Move it with the rest of the ui commands. - -Signed-off-by: Juan Quintela -Reviewed-by: Philippe Mathieu-Daudé -(cherry picked from commit f9e1ef7482f1ee289b04f4b45702a1701bc8929d) -Signed-off-by: Peter Xu ---- - migration/migration-hmp-cmds.c | 17 ----------------- - migration/migration.c | 30 ------------------------------ - qapi/migration.json | 28 ---------------------------- - qapi/ui.json | 28 ++++++++++++++++++++++++++++ - ui/ui-hmp-cmds.c | 17 +++++++++++++++++ - ui/ui-qmp-cmds.c | 29 +++++++++++++++++++++++++++++ - 6 files changed, 74 insertions(+), 75 deletions(-) - -diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c -index 71da91967a..4e9f00e7dc 100644 ---- a/migration/migration-hmp-cmds.c -+++ b/migration/migration-hmp-cmds.c -@@ -636,23 +636,6 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) - hmp_handle_error(mon, err); - } - --void hmp_client_migrate_info(Monitor *mon, const QDict *qdict) --{ -- Error *err = NULL; -- const char *protocol = qdict_get_str(qdict, "protocol"); -- const char *hostname = qdict_get_str(qdict, "hostname"); -- bool has_port = qdict_haskey(qdict, "port"); -- int port = qdict_get_try_int(qdict, "port", -1); -- bool has_tls_port = qdict_haskey(qdict, "tls-port"); -- int tls_port = qdict_get_try_int(qdict, "tls-port", -1); -- const char *cert_subject = qdict_get_try_str(qdict, "cert-subject"); -- -- qmp_client_migrate_info(protocol, hostname, -- has_port, port, has_tls_port, tls_port, -- cert_subject, &err); -- hmp_handle_error(mon, err); --} -- - void hmp_migrate_start_postcopy(Monitor *mon, const QDict *qdict) - { - Error *err = NULL; -diff --git a/migration/migration.c b/migration/migration.c -index aa96ffdc5b..b745d829a4 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -63,7 +63,6 @@ - #include "sysemu/cpus.h" - #include "yank_functions.h" - #include "sysemu/qtest.h" --#include "ui/qemu-spice.h" - - #define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */ - -@@ -1018,35 +1017,6 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) - return params; - } - --void qmp_client_migrate_info(const char *protocol, const char *hostname, -- bool has_port, int64_t port, -- bool has_tls_port, int64_t tls_port, -- const char *cert_subject, -- Error **errp) --{ -- if (strcmp(protocol, "spice") == 0) { -- if (!qemu_using_spice(errp)) { -- return; -- } -- -- if (!has_port && !has_tls_port) { -- error_setg(errp, QERR_MISSING_PARAMETER, "port/tls-port"); -- return; -- } -- -- if (qemu_spice.migrate_info(hostname, -- has_port ? port : -1, -- has_tls_port ? tls_port : -1, -- cert_subject)) { -- error_setg(errp, "Could not set up display for migration"); -- return; -- } -- return; -- } -- -- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "protocol", "'spice'"); --} -- - AnnounceParameters *migrate_announce_params(void) - { - static AnnounceParameters ap; -diff --git a/qapi/migration.json b/qapi/migration.json -index c84fa10e86..2c35b7b9cf 100644 ---- a/qapi/migration.json -+++ b/qapi/migration.json -@@ -1203,34 +1203,6 @@ - { 'command': 'query-migrate-parameters', - 'returns': 'MigrationParameters' } - --## --# @client_migrate_info: --# --# Set migration information for remote display. This makes the server --# ask the client to automatically reconnect using the new parameters --# once migration finished successfully. Only implemented for SPICE. --# --# @protocol: must be "spice" --# @hostname: migration target hostname --# @port: spice tcp port for plaintext channels --# @tls-port: spice tcp port for tls-secured channels --# @cert-subject: server certificate subject --# --# Since: 0.14 --# --# Example: --# --# -> { "execute": "client_migrate_info", --# "arguments": { "protocol": "spice", --# "hostname": "virt42.lab.kraxel.org", --# "port": 1234 } } --# <- { "return": {} } --# --## --{ 'command': 'client_migrate_info', -- 'data': { 'protocol': 'str', 'hostname': 'str', '*port': 'int', -- '*tls-port': 'int', '*cert-subject': 'str' } } -- - ## - # @migrate-start-postcopy: - # -diff --git a/qapi/ui.json b/qapi/ui.json -index 98322342f7..7ddd27a932 100644 ---- a/qapi/ui.json -+++ b/qapi/ui.json -@@ -1554,3 +1554,31 @@ - { 'command': 'display-update', - 'data': 'DisplayUpdateOptions', - 'boxed' : true } -+ -+## -+# @client_migrate_info: -+# -+# Set migration information for remote display. This makes the server -+# ask the client to automatically reconnect using the new parameters -+# once migration finished successfully. Only implemented for SPICE. -+# -+# @protocol: must be "spice" -+# @hostname: migration target hostname -+# @port: spice tcp port for plaintext channels -+# @tls-port: spice tcp port for tls-secured channels -+# @cert-subject: server certificate subject -+# -+# Since: 0.14 -+# -+# Example: -+# -+# -> { "execute": "client_migrate_info", -+# "arguments": { "protocol": "spice", -+# "hostname": "virt42.lab.kraxel.org", -+# "port": 1234 } } -+# <- { "return": {} } -+# -+## -+{ 'command': 'client_migrate_info', -+ 'data': { 'protocol': 'str', 'hostname': 'str', '*port': 'int', -+ '*tls-port': 'int', '*cert-subject': 'str' } } -diff --git a/ui/ui-hmp-cmds.c b/ui/ui-hmp-cmds.c -index 5c456ecc02..c671389473 100644 ---- a/ui/ui-hmp-cmds.c -+++ b/ui/ui-hmp-cmds.c -@@ -458,3 +458,20 @@ hmp_screendump(Monitor *mon, const QDict *qdict) - end: - hmp_handle_error(mon, err); - } -+ -+void hmp_client_migrate_info(Monitor *mon, const QDict *qdict) -+{ -+ Error *err = NULL; -+ const char *protocol = qdict_get_str(qdict, "protocol"); -+ const char *hostname = qdict_get_str(qdict, "hostname"); -+ bool has_port = qdict_haskey(qdict, "port"); -+ int port = qdict_get_try_int(qdict, "port", -1); -+ bool has_tls_port = qdict_haskey(qdict, "tls-port"); -+ int tls_port = qdict_get_try_int(qdict, "tls-port", -1); -+ const char *cert_subject = qdict_get_try_str(qdict, "cert-subject"); -+ -+ qmp_client_migrate_info(protocol, hostname, -+ has_port, port, has_tls_port, tls_port, -+ cert_subject, &err); -+ hmp_handle_error(mon, err); -+} -diff --git a/ui/ui-qmp-cmds.c b/ui/ui-qmp-cmds.c -index dbc4afcd73..a37a7024f3 100644 ---- a/ui/ui-qmp-cmds.c -+++ b/ui/ui-qmp-cmds.c -@@ -175,3 +175,32 @@ void qmp_display_update(DisplayUpdateOptions *arg, Error **errp) - abort(); - } - } -+ -+void qmp_client_migrate_info(const char *protocol, const char *hostname, -+ bool has_port, int64_t port, -+ bool has_tls_port, int64_t tls_port, -+ const char *cert_subject, -+ Error **errp) -+{ -+ if (strcmp(protocol, "spice") == 0) { -+ if (!qemu_using_spice(errp)) { -+ return; -+ } -+ -+ if (!has_port && !has_tls_port) { -+ error_setg(errp, QERR_MISSING_PARAMETER, "port/tls-port"); -+ return; -+ } -+ -+ if (qemu_spice.migrate_info(hostname, -+ has_port ? port : -1, -+ has_tls_port ? tls_port : -1, -+ cert_subject)) { -+ error_setg(errp, "Could not set up display for migration"); -+ return; -+ } -+ return; -+ } -+ -+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "protocol", "'spice'"); -+} --- -2.39.1 - diff --git a/SOURCES/kvm-string-output-visitor-Fix-pseudo-struct-handling.patch b/SOURCES/kvm-string-output-visitor-Fix-pseudo-struct-handling.patch new file mode 100644 index 0000000..81ae2f1 --- /dev/null +++ b/SOURCES/kvm-string-output-visitor-Fix-pseudo-struct-handling.patch @@ -0,0 +1,190 @@ +From c5f9e92cd49a2171a5b0223cafd7fab3f45edb82 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Tue, 9 Jan 2024 19:17:17 +0100 +Subject: [PATCH 06/22] string-output-visitor: Fix (pseudo) struct handling + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter +RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Hanna Czenczek +RH-Commit: [2/17] 84e226f161680dd61b6635e213203d062c1aa556 (stefanha/centos-stream-qemu-kvm) + +Commit ff32bb53 tried to get minimal struct support into the string +output visitor by just making it return "". Unfortunately, it +forgot that the caller will still make more visitor calls for the +content of the struct. + +If the struct is contained in a list, such as IOThreadVirtQueueMapping, +in the better case its fields show up as separate list entries. In the +worse case, it contains another list, and the string output visitor +doesn't support nested lists and asserts that this doesn't happen. So as +soon as the optional "vqs" field in IOThreadVirtQueueMapping is +specified, we get a crash. + +This can be reproduced with the following command line: + + echo "info qtree" | ./qemu-system-x86_64 \ + -object iothread,id=t0 \ + -blockdev null-co,node-name=disk \ + -device '{"driver": "virtio-blk-pci", "drive": "disk", + "iothread-vq-mapping": [{"iothread": "t0", "vqs": [0]}]}' \ + -monitor stdio + +Fix the problem by counting the nesting level of structs and ignoring +any visitor calls for values (apart from start/end_struct) while we're +not on the top level. + +Lists nested directly within lists remain unimplemented, as we don't +currently have a use case for them. + +Fixes: ff32bb53476539d352653f4ed56372dced73a388 +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2069 +Reported-by: Aihua Liang +Signed-off-by: Kevin Wolf +Message-ID: <20240109181717.42493-1-kwolf@redhat.com> +Reviewed-by: Stefan Hajnoczi +Signed-off-by: Kevin Wolf +(cherry picked from commit 014b99a8e41c8cd1e895137654b44dec5430122c) +Signed-off-by: Stefan Hajnoczi +--- + qapi/string-output-visitor.c | 46 ++++++++++++++++++++++++++++++++++++ + 1 file changed, 46 insertions(+) + +diff --git a/qapi/string-output-visitor.c b/qapi/string-output-visitor.c +index f0c1dea89e..5115536b15 100644 +--- a/qapi/string-output-visitor.c ++++ b/qapi/string-output-visitor.c +@@ -65,6 +65,7 @@ struct StringOutputVisitor + } range_start, range_end; + GList *ranges; + void *list; /* Only needed for sanity checking the caller */ ++ unsigned int struct_nesting; + }; + + static StringOutputVisitor *to_sov(Visitor *v) +@@ -144,6 +145,10 @@ static bool print_type_int64(Visitor *v, const char *name, int64_t *obj, + StringOutputVisitor *sov = to_sov(v); + GList *l; + ++ if (sov->struct_nesting) { ++ return true; ++ } ++ + switch (sov->list_mode) { + case LM_NONE: + string_output_append(sov, *obj); +@@ -231,6 +236,10 @@ static bool print_type_size(Visitor *v, const char *name, uint64_t *obj, + uint64_t val; + char *out, *psize; + ++ if (sov->struct_nesting) { ++ return true; ++ } ++ + if (!sov->human) { + out = g_strdup_printf("%"PRIu64, *obj); + string_output_set(sov, out); +@@ -250,6 +259,11 @@ static bool print_type_bool(Visitor *v, const char *name, bool *obj, + Error **errp) + { + StringOutputVisitor *sov = to_sov(v); ++ ++ if (sov->struct_nesting) { ++ return true; ++ } ++ + string_output_set(sov, g_strdup(*obj ? "true" : "false")); + return true; + } +@@ -260,6 +274,10 @@ static bool print_type_str(Visitor *v, const char *name, char **obj, + StringOutputVisitor *sov = to_sov(v); + char *out; + ++ if (sov->struct_nesting) { ++ return true; ++ } ++ + if (sov->human) { + out = *obj ? g_strdup_printf("\"%s\"", *obj) : g_strdup(""); + } else { +@@ -273,6 +291,11 @@ static bool print_type_number(Visitor *v, const char *name, double *obj, + Error **errp) + { + StringOutputVisitor *sov = to_sov(v); ++ ++ if (sov->struct_nesting) { ++ return true; ++ } ++ + string_output_set(sov, g_strdup_printf("%.17g", *obj)); + return true; + } +@@ -283,6 +306,10 @@ static bool print_type_null(Visitor *v, const char *name, QNull **obj, + StringOutputVisitor *sov = to_sov(v); + char *out; + ++ if (sov->struct_nesting) { ++ return true; ++ } ++ + if (sov->human) { + out = g_strdup(""); + } else { +@@ -295,6 +322,9 @@ static bool print_type_null(Visitor *v, const char *name, QNull **obj, + static bool start_struct(Visitor *v, const char *name, void **obj, + size_t size, Error **errp) + { ++ StringOutputVisitor *sov = to_sov(v); ++ ++ sov->struct_nesting++; + return true; + } + +@@ -302,6 +332,10 @@ static void end_struct(Visitor *v, void **obj) + { + StringOutputVisitor *sov = to_sov(v); + ++ if (--sov->struct_nesting) { ++ return; ++ } ++ + /* TODO actually print struct fields */ + string_output_set(sov, g_strdup("")); + } +@@ -312,6 +346,10 @@ start_list(Visitor *v, const char *name, GenericList **list, size_t size, + { + StringOutputVisitor *sov = to_sov(v); + ++ if (sov->struct_nesting) { ++ return true; ++ } ++ + /* we can't traverse a list in a list */ + assert(sov->list_mode == LM_NONE); + /* We don't support visits without a list */ +@@ -329,6 +367,10 @@ static GenericList *next_list(Visitor *v, GenericList *tail, size_t size) + StringOutputVisitor *sov = to_sov(v); + GenericList *ret = tail->next; + ++ if (sov->struct_nesting) { ++ return ret; ++ } ++ + if (ret && !ret->next) { + sov->list_mode = LM_END; + } +@@ -339,6 +381,10 @@ static void end_list(Visitor *v, void **obj) + { + StringOutputVisitor *sov = to_sov(v); + ++ if (sov->struct_nesting) { ++ return; ++ } ++ + assert(sov->list == obj); + assert(sov->list_mode == LM_STARTED || + sov->list_mode == LM_END || +-- +2.39.3 + diff --git a/SOURCES/kvm-string-output-visitor-show-structs-as-omitted.patch b/SOURCES/kvm-string-output-visitor-show-structs-as-omitted.patch new file mode 100644 index 0000000..f83635d --- /dev/null +++ b/SOURCES/kvm-string-output-visitor-show-structs-as-omitted.patch @@ -0,0 +1,90 @@ +From fb2069be402ec1322834c555714f0e993778cc9d Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 12 Dec 2023 08:49:34 -0500 +Subject: [PATCH 05/22] string-output-visitor: show structs as "" + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter +RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Hanna Czenczek +RH-Commit: [1/17] 0c08e8237d28fbdbdbc7576d4c17d2eeeb413c2a (stefanha/centos-stream-qemu-kvm) + +StringOutputVisitor crashes when it visits a struct because +->start_struct() is NULL. + +Show "" instead of crashing. This is necessary because the +virtio-blk-pci iothread-vq-mapping parameter that I'd like to introduce +soon is a list of IOThreadMapping structs. + +This patch is a quick fix to solve the crash, but the long-term solution +is replacing StringOutputVisitor with something that can handle the full +gamut of values in QEMU. + +Cc: Markus Armbruster +Signed-off-by: Stefan Hajnoczi +Message-ID: <20231212134934.500289-1-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Reviewed-by: Markus Armbruster +Signed-off-by: Kevin Wolf +(cherry picked from commit ff32bb53476539d352653f4ed56372dced73a388) +Signed-off-by: Stefan Hajnoczi +--- + include/qapi/string-output-visitor.h | 6 +++--- + qapi/string-output-visitor.c | 16 ++++++++++++++++ + 2 files changed, 19 insertions(+), 3 deletions(-) + +diff --git a/include/qapi/string-output-visitor.h b/include/qapi/string-output-visitor.h +index 268dfe9986..b1ee473b30 100644 +--- a/include/qapi/string-output-visitor.h ++++ b/include/qapi/string-output-visitor.h +@@ -26,9 +26,9 @@ typedef struct StringOutputVisitor StringOutputVisitor; + * If everything else succeeds, pass @result to visit_complete() to + * collect the result of the visit. + * +- * The string output visitor does not implement support for visiting +- * QAPI structs, alternates, null, or arbitrary QTypes. It also +- * requires a non-null list argument to visit_start_list(). ++ * The string output visitor does not implement support for alternates, null, ++ * or arbitrary QTypes. Struct fields are not shown. It also requires a ++ * non-null list argument to visit_start_list(). + */ + Visitor *string_output_visitor_new(bool human, char **result); + +diff --git a/qapi/string-output-visitor.c b/qapi/string-output-visitor.c +index c0cb72dbe4..f0c1dea89e 100644 +--- a/qapi/string-output-visitor.c ++++ b/qapi/string-output-visitor.c +@@ -292,6 +292,20 @@ static bool print_type_null(Visitor *v, const char *name, QNull **obj, + return true; + } + ++static bool start_struct(Visitor *v, const char *name, void **obj, ++ size_t size, Error **errp) ++{ ++ return true; ++} ++ ++static void end_struct(Visitor *v, void **obj) ++{ ++ StringOutputVisitor *sov = to_sov(v); ++ ++ /* TODO actually print struct fields */ ++ string_output_set(sov, g_strdup("")); ++} ++ + static bool + start_list(Visitor *v, const char *name, GenericList **list, size_t size, + Error **errp) +@@ -379,6 +393,8 @@ Visitor *string_output_visitor_new(bool human, char **result) + v->visitor.type_str = print_type_str; + v->visitor.type_number = print_type_number; + v->visitor.type_null = print_type_null; ++ v->visitor.start_struct = start_struct; ++ v->visitor.end_struct = end_struct; + v->visitor.start_list = start_list; + v->visitor.next_list = next_list; + v->visitor.end_list = end_list; +-- +2.39.3 + diff --git a/SOURCES/kvm-target-i386-Add-EPYC-Genoa-model-to-support-Zen-4-pr.patch b/SOURCES/kvm-target-i386-Add-EPYC-Genoa-model-to-support-Zen-4-pr.patch deleted file mode 100644 index 43c239a..0000000 --- a/SOURCES/kvm-target-i386-Add-EPYC-Genoa-model-to-support-Zen-4-pr.patch +++ /dev/null @@ -1,203 +0,0 @@ -From 03011d00cfb5862edb7394a9b79b269198af5c89 Mon Sep 17 00:00:00 2001 -From: Bandan Das -Date: Wed, 9 Aug 2023 12:48:34 -0400 -Subject: [PATCH 7/7] target/i386: Add EPYC-Genoa model to support Zen 4 - processor series - -RH-Author: Bandan Das -RH-MergeRequest: 198: Add EPYC-Genoa CPU model in qemu -RH-Bugzilla: 2094913 -RH-Acked-by: Wei Huang -RH-Acked-by: Miroslav Rezanina -RH-Commit: [7/7] 158091c691169a5d30c7c8005371ee7a0d9fc4ce (bdas1/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2094913 - -commit 166b1741884dd4fd7090b753cd7333868457a29b -Author: Babu Moger -Date: Thu May 4 15:53:12 2023 -0500 - - target/i386: Add EPYC-Genoa model to support Zen 4 processor series - - Adds the support for AMD EPYC Genoa generation processors. The model - display for the new processor will be EPYC-Genoa. - - Adds the following new feature bits on top of the feature bits from - the previous generation EPYC models. - - avx512f : AVX-512 Foundation instruction - avx512dq : AVX-512 Doubleword & Quadword Instruction - avx512ifma : AVX-512 Integer Fused Multiply Add instruction - avx512cd : AVX-512 Conflict Detection instruction - avx512bw : AVX-512 Byte and Word Instructions - avx512vl : AVX-512 Vector Length Extension Instructions - avx512vbmi : AVX-512 Vector Byte Manipulation Instruction - avx512_vbmi2 : AVX-512 Additional Vector Byte Manipulation Instruction - gfni : AVX-512 Galois Field New Instructions - avx512_vnni : AVX-512 Vector Neural Network Instructions - avx512_bitalg : AVX-512 Bit Algorithms, add bit algorithms Instructions - avx512_vpopcntdq: AVX-512 AVX-512 Vector Population Count Doubleword and - Quadword Instructions - avx512_bf16 : AVX-512 BFLOAT16 instructions - la57 : 57-bit virtual address support (5-level Page Tables) - vnmi : Virtual NMI (VNMI) allows the hypervisor to inject the NMI - into the guest without using Event Injection mechanism - meaning not required to track the guest NMI and intercepting - the IRET. - auto-ibrs : The AMD Zen4 core supports a new feature called Automatic IBRS. - It is a "set-and-forget" feature that means that, unlike e.g., - s/w-toggled SPEC_CTRL.IBRS, h/w manages its IBRS mitigation - resources automatically across CPL transitions. - - Signed-off-by: Babu Moger - Message-Id: <20230504205313.225073-8-babu.moger@amd.com> - Signed-off-by: Paolo Bonzini - -Signed-off-by: Bandan Das ---- - target/i386/cpu.c | 122 ++++++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 122 insertions(+) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index f1baefe775..b27db050a2 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -1973,6 +1973,56 @@ static const CPUCaches epyc_milan_v2_cache_info = { - }, - }; - -+static const CPUCaches epyc_genoa_cache_info = { -+ .l1d_cache = &(CPUCacheInfo) { -+ .type = DATA_CACHE, -+ .level = 1, -+ .size = 32 * KiB, -+ .line_size = 64, -+ .associativity = 8, -+ .partitions = 1, -+ .sets = 64, -+ .lines_per_tag = 1, -+ .self_init = 1, -+ .no_invd_sharing = true, -+ }, -+ .l1i_cache = &(CPUCacheInfo) { -+ .type = INSTRUCTION_CACHE, -+ .level = 1, -+ .size = 32 * KiB, -+ .line_size = 64, -+ .associativity = 8, -+ .partitions = 1, -+ .sets = 64, -+ .lines_per_tag = 1, -+ .self_init = 1, -+ .no_invd_sharing = true, -+ }, -+ .l2_cache = &(CPUCacheInfo) { -+ .type = UNIFIED_CACHE, -+ .level = 2, -+ .size = 1 * MiB, -+ .line_size = 64, -+ .associativity = 8, -+ .partitions = 1, -+ .sets = 2048, -+ .lines_per_tag = 1, -+ }, -+ .l3_cache = &(CPUCacheInfo) { -+ .type = UNIFIED_CACHE, -+ .level = 3, -+ .size = 32 * MiB, -+ .line_size = 64, -+ .associativity = 16, -+ .partitions = 1, -+ .sets = 32768, -+ .lines_per_tag = 1, -+ .self_init = true, -+ .inclusive = true, -+ .complex_indexing = false, -+ }, -+}; -+ - /* The following VMX features are not supported by KVM and are left out in the - * CPU definitions: - * -@@ -4493,6 +4543,78 @@ static const X86CPUDefinition builtin_x86_defs[] = { - { /* end of list */ } - } - }, -+ { -+ .name = "EPYC-Genoa", -+ .level = 0xd, -+ .vendor = CPUID_VENDOR_AMD, -+ .family = 25, -+ .model = 17, -+ .stepping = 0, -+ .features[FEAT_1_EDX] = -+ CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH | -+ CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | CPUID_PGE | -+ CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | CPUID_MCE | -+ CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | CPUID_DE | -+ CPUID_VME | CPUID_FP87, -+ .features[FEAT_1_ECX] = -+ CPUID_EXT_RDRAND | CPUID_EXT_F16C | CPUID_EXT_AVX | -+ CPUID_EXT_XSAVE | CPUID_EXT_AES | CPUID_EXT_POPCNT | -+ CPUID_EXT_MOVBE | CPUID_EXT_SSE42 | CPUID_EXT_SSE41 | -+ CPUID_EXT_PCID | CPUID_EXT_CX16 | CPUID_EXT_FMA | -+ CPUID_EXT_SSSE3 | CPUID_EXT_MONITOR | CPUID_EXT_PCLMULQDQ | -+ CPUID_EXT_SSE3, -+ .features[FEAT_8000_0001_EDX] = -+ CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_PDPE1GB | -+ CPUID_EXT2_FFXSR | CPUID_EXT2_MMXEXT | CPUID_EXT2_NX | -+ CPUID_EXT2_SYSCALL, -+ .features[FEAT_8000_0001_ECX] = -+ CPUID_EXT3_OSVW | CPUID_EXT3_3DNOWPREFETCH | -+ CPUID_EXT3_MISALIGNSSE | CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | -+ CPUID_EXT3_CR8LEG | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM | -+ CPUID_EXT3_TOPOEXT | CPUID_EXT3_PERFCORE, -+ .features[FEAT_8000_0008_EBX] = -+ CPUID_8000_0008_EBX_CLZERO | CPUID_8000_0008_EBX_XSAVEERPTR | -+ CPUID_8000_0008_EBX_WBNOINVD | CPUID_8000_0008_EBX_IBPB | -+ CPUID_8000_0008_EBX_IBRS | CPUID_8000_0008_EBX_STIBP | -+ CPUID_8000_0008_EBX_STIBP_ALWAYS_ON | -+ CPUID_8000_0008_EBX_AMD_SSBD | CPUID_8000_0008_EBX_AMD_PSFD, -+ .features[FEAT_8000_0021_EAX] = -+ CPUID_8000_0021_EAX_No_NESTED_DATA_BP | -+ CPUID_8000_0021_EAX_LFENCE_ALWAYS_SERIALIZING | -+ CPUID_8000_0021_EAX_NULL_SEL_CLR_BASE | -+ CPUID_8000_0021_EAX_AUTO_IBRS, -+ .features[FEAT_7_0_EBX] = -+ CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_AVX2 | -+ CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | -+ CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_AVX512F | -+ CPUID_7_0_EBX_AVX512DQ | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | -+ CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_AVX512IFMA | -+ CPUID_7_0_EBX_CLFLUSHOPT | CPUID_7_0_EBX_CLWB | -+ CPUID_7_0_EBX_AVX512CD | CPUID_7_0_EBX_SHA_NI | -+ CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512VL, -+ .features[FEAT_7_0_ECX] = -+ CPUID_7_0_ECX_AVX512_VBMI | CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU | -+ CPUID_7_0_ECX_AVX512_VBMI2 | CPUID_7_0_ECX_GFNI | -+ CPUID_7_0_ECX_VAES | CPUID_7_0_ECX_VPCLMULQDQ | -+ CPUID_7_0_ECX_AVX512VNNI | CPUID_7_0_ECX_AVX512BITALG | -+ CPUID_7_0_ECX_AVX512_VPOPCNTDQ | CPUID_7_0_ECX_LA57 | -+ CPUID_7_0_ECX_RDPID, -+ .features[FEAT_7_0_EDX] = -+ CPUID_7_0_EDX_FSRM, -+ .features[FEAT_7_1_EAX] = -+ CPUID_7_1_EAX_AVX512_BF16, -+ .features[FEAT_XSAVE] = -+ CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | -+ CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES, -+ .features[FEAT_6_EAX] = -+ CPUID_6_EAX_ARAT, -+ .features[FEAT_SVM] = -+ CPUID_SVM_NPT | CPUID_SVM_NRIPSAVE | CPUID_SVM_VNMI | -+ CPUID_SVM_SVME_ADDR_CHK, -+ .xlevel = 0x80000022, -+ .model_id = "AMD EPYC-Genoa Processor", -+ .cache_info = &epyc_genoa_cache_info, -+ }, - }; - - /* --- -2.39.3 - diff --git a/SOURCES/kvm-target-i386-Add-VNMI-and-automatic-IBRS-feature-bits.patch b/SOURCES/kvm-target-i386-Add-VNMI-and-automatic-IBRS-feature-bits.patch deleted file mode 100644 index 5e8f79b..0000000 --- a/SOURCES/kvm-target-i386-Add-VNMI-and-automatic-IBRS-feature-bits.patch +++ /dev/null @@ -1,105 +0,0 @@ -From 95c5cee20741b055dea9ac3ad3176bbaa1eaf705 Mon Sep 17 00:00:00 2001 -From: Bandan Das -Date: Wed, 9 Aug 2023 12:46:25 -0400 -Subject: [PATCH 6/7] target/i386: Add VNMI and automatic IBRS feature bits -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Bandan Das -RH-MergeRequest: 198: Add EPYC-Genoa CPU model in qemu -RH-Bugzilla: 2094913 -RH-Acked-by: Wei Huang -RH-Acked-by: Miroslav Rezanina -RH-Commit: [6/7] 24c0fb08973aa2615817f67576550ce2efadb75c (bdas1/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2094913 - -commit 62a798d4bc2c3e767d94670776c77a7df274d7c5 -Author: Babu Moger -Date: Thu May 4 15:53:11 2023 -0500 - - target/i386: Add VNMI and automatic IBRS feature bits - - Add the following featute bits. - - vnmi: Virtual NMI (VNMI) allows the hypervisor to inject the NMI into the - guest without using Event Injection mechanism meaning not required to - track the guest NMI and intercepting the IRET. - The presence of this feature is indicated via the CPUID function - 0x8000000A_EDX[25]. - - automatic-ibrs : - The AMD Zen4 core supports a new feature called Automatic IBRS. - It is a "set-and-forget" feature that means that, unlike e.g., - s/w-toggled SPEC_CTRL.IBRS, h/w manages its IBRS mitigation - resources automatically across CPL transitions. - The presence of this feature is indicated via the CPUID function - 0x80000021_EAX[8]. - - The documention for the features are available in the links below. - a. Processor Programming Reference (PPR) for AMD Family 19h Model 01h, - Revision B1 Processors - b. AMD64 Architecture Programmer’s Manual Volumes 1–5 Publication No. Revision - 40332 4.05 Date October 2022 - - Signed-off-by: Santosh Shukla - Signed-off-by: Kim Phillips - Signed-off-by: Babu Moger - Link: https://www.amd.com/system/files/TechDocs/55898_B1_pub_0.50.zip - Link: https://www.amd.com/system/files/TechDocs/40332_4.05.pdf - Message-Id: <20230504205313.225073-7-babu.moger@amd.com> - Signed-off-by: Paolo Bonzini - -Signed-off-by: Bandan Das ---- - target/i386/cpu.c | 4 ++-- - target/i386/cpu.h | 3 +++ - 2 files changed, 5 insertions(+), 2 deletions(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index bbddc682df..f1baefe775 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -806,7 +806,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { - "pfthreshold", "avic", NULL, "v-vmsave-vmload", - "vgif", NULL, NULL, NULL, - NULL, NULL, NULL, NULL, -- NULL, NULL, NULL, NULL, -+ NULL, "vnmi", NULL, NULL, - "svme-addr-chk", NULL, NULL, NULL, - }, - .cpuid = { .eax = 0x8000000A, .reg = R_EDX, }, -@@ -925,7 +925,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { - .feat_names = { - "no-nested-data-bp", NULL, "lfence-always-serializing", NULL, - NULL, NULL, "null-sel-clr-base", NULL, -- NULL, NULL, NULL, NULL, -+ "auto-ibrs", NULL, NULL, NULL, - NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, -diff --git a/target/i386/cpu.h b/target/i386/cpu.h -index c37abf62ae..f7d225e4f1 100644 ---- a/target/i386/cpu.h -+++ b/target/i386/cpu.h -@@ -773,6 +773,7 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, - #define CPUID_SVM_AVIC (1U << 13) - #define CPUID_SVM_V_VMSAVE_VMLOAD (1U << 15) - #define CPUID_SVM_VGIF (1U << 16) -+#define CPUID_SVM_VNMI (1U << 25) - #define CPUID_SVM_SVME_ADDR_CHK (1U << 28) - - /* Support RDFSBASE/RDGSBASE/WRFSBASE/WRGSBASE */ -@@ -948,6 +949,8 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, - #define CPUID_8000_0021_EAX_LFENCE_ALWAYS_SERIALIZING (1U << 2) - /* Null Selector Clears Base */ - #define CPUID_8000_0021_EAX_NULL_SEL_CLR_BASE (1U << 6) -+/* Automatic IBRS */ -+#define CPUID_8000_0021_EAX_AUTO_IBRS (1U << 8) - - #define CPUID_XSAVE_XSAVEOPT (1U << 0) - #define CPUID_XSAVE_XSAVEC (1U << 1) --- -2.39.3 - diff --git a/SOURCES/kvm-target-i386-Add-a-couple-of-feature-bits-in-8000_000.patch b/SOURCES/kvm-target-i386-Add-a-couple-of-feature-bits-in-8000_000.patch deleted file mode 100644 index 772bbbd..0000000 --- a/SOURCES/kvm-target-i386-Add-a-couple-of-feature-bits-in-8000_000.patch +++ /dev/null @@ -1,94 +0,0 @@ -From 2d7fb99c02a7666f1d8fe70a4749f0b7771a68ed Mon Sep 17 00:00:00 2001 -From: Bandan Das -Date: Wed, 9 Aug 2023 12:29:55 -0400 -Subject: [PATCH 3/7] target/i386: Add a couple of feature bits in - 8000_0008_EBX - -RH-Author: Bandan Das -RH-MergeRequest: 198: Add EPYC-Genoa CPU model in qemu -RH-Bugzilla: 2094913 -RH-Acked-by: Wei Huang -RH-Acked-by: Miroslav Rezanina -RH-Commit: [3/7] b11020b249d4ecc2e3e1ddf4fdc4b52c42ec2642 (bdas1/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2094913 - -commit bb039a230e6a7920d71d21fa9afee2653a678c48 -Author: Babu Moger -Date: Thu May 4 15:53:08 2023 -0500 - - target/i386: Add a couple of feature bits in 8000_0008_EBX - - Add the following feature bits. - - amd-psfd : Predictive Store Forwarding Disable: - PSF is a hardware-based micro-architectural optimization - designed to improve the performance of code execution by - predicting address dependencies between loads and stores. - While SSBD (Speculative Store Bypass Disable) disables both - PSF and speculative store bypass, PSFD only disables PSF. - PSFD may be desirable for the software which is concerned - with the speculative behavior of PSF but desires a smaller - performance impact than setting SSBD. - Depends on the following kernel commit: - b73a54321ad8 ("KVM: x86: Expose Predictive Store Forwarding Disable") - - stibp-always-on : - Single Thread Indirect Branch Prediction mode has enhanced - performance and may be left always on. - - The documentation for the features are available in the links below. - a. Processor Programming Reference (PPR) for AMD Family 19h Model 01h, - Revision B1 Processors - b. SECURITY ANALYSIS OF AMD PREDICTIVE STORE FORWARDING - - Signed-off-by: Babu Moger - Acked-by: Michael S. Tsirkin - Link: https://www.amd.com/system/files/documents/security-analysis-predictive-store-forwarding.pdf - Link: https://www.amd.com/system/files/TechDocs/55898_B1_pub_0.50.zip - Message-Id: <20230504205313.225073-4-babu.moger@amd.com> - Signed-off-by: Paolo Bonzini - -Signed-off-by: Bandan Das ---- - target/i386/cpu.c | 4 ++-- - target/i386/cpu.h | 4 ++++ - 2 files changed, 6 insertions(+), 2 deletions(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 8aa7eb611c..c8f88aefc7 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -911,10 +911,10 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { - NULL, NULL, NULL, NULL, - NULL, "wbnoinvd", NULL, NULL, - "ibpb", NULL, "ibrs", "amd-stibp", -- NULL, NULL, NULL, NULL, -+ NULL, "stibp-always-on", NULL, NULL, - NULL, NULL, NULL, NULL, - "amd-ssbd", "virt-ssbd", "amd-no-ssb", NULL, -- NULL, NULL, NULL, NULL, -+ "amd-psfd", NULL, NULL, NULL, - }, - .cpuid = { .eax = 0x80000008, .reg = R_EBX, }, - .tcg_features = 0, -diff --git a/target/i386/cpu.h b/target/i386/cpu.h -index c28b9df217..81d2200543 100644 ---- a/target/i386/cpu.h -+++ b/target/i386/cpu.h -@@ -934,8 +934,12 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, - #define CPUID_8000_0008_EBX_IBRS (1U << 14) - /* Single Thread Indirect Branch Predictors */ - #define CPUID_8000_0008_EBX_STIBP (1U << 15) -+/* STIBP mode has enhanced performance and may be left always on */ -+#define CPUID_8000_0008_EBX_STIBP_ALWAYS_ON (1U << 17) - /* Speculative Store Bypass Disable */ - #define CPUID_8000_0008_EBX_AMD_SSBD (1U << 24) -+/* Predictive Store Forwarding Disable */ -+#define CPUID_8000_0008_EBX_AMD_PSFD (1U << 28) - - #define CPUID_XSAVE_XSAVEOPT (1U << 0) - #define CPUID_XSAVE_XSAVEC (1U << 1) --- -2.39.3 - diff --git a/SOURCES/kvm-target-i386-Add-feature-bits-for-CPUID_Fn80000021_EA.patch b/SOURCES/kvm-target-i386-Add-feature-bits-for-CPUID_Fn80000021_EA.patch deleted file mode 100644 index c714e49..0000000 --- a/SOURCES/kvm-target-i386-Add-feature-bits-for-CPUID_Fn80000021_EA.patch +++ /dev/null @@ -1,126 +0,0 @@ -From 2a2f74c53258ef67034307b59afe2f4c679afaa2 Mon Sep 17 00:00:00 2001 -From: Bandan Das -Date: Wed, 9 Aug 2023 12:32:00 -0400 -Subject: [PATCH 4/7] target/i386: Add feature bits for CPUID_Fn80000021_EAX -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Bandan Das -RH-MergeRequest: 198: Add EPYC-Genoa CPU model in qemu -RH-Bugzilla: 2094913 -RH-Acked-by: Wei Huang -RH-Acked-by: Miroslav Rezanina -RH-Commit: [4/7] 133044a7245226308406a684a875e1f96a394516 (bdas1/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2094913 - -commit b70eec312b185197d639bff689007727e596afd1 -Author: Babu Moger -Date: Thu May 4 15:53:09 2023 -0500 - - target/i386: Add feature bits for CPUID_Fn80000021_EAX - - Add the following feature bits. - no-nested-data-bp : Processor ignores nested data breakpoints. - lfence-always-serializing : LFENCE instruction is always serializing. - null-sel-cls-base : Null Selector Clears Base. When this bit is - set, a null segment load clears the segment base. - - The documentation for the features are available in the links below. - a. Processor Programming Reference (PPR) for AMD Family 19h Model 01h, - Revision B1 Processors - b. AMD64 Architecture Programmer’s Manual Volumes 1–5 Publication No. Revision - 40332 4.05 Date October 2022 - - Signed-off-by: Babu Moger - Acked-by: Michael S. Tsirkin - Link: https://www.amd.com/system/files/TechDocs/55898_B1_pub_0.50.zip - Link: https://www.amd.com/system/files/TechDocs/40332_4.05.pdf - Message-Id: <20230504205313.225073-5-babu.moger@amd.com> - Signed-off-by: Paolo Bonzini - -Signed-off-by: Bandan Das ---- - target/i386/cpu.c | 24 ++++++++++++++++++++++++ - target/i386/cpu.h | 8 ++++++++ - 2 files changed, 32 insertions(+) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index c8f88aefc7..7ddebbaa3c 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -920,6 +920,22 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { - .tcg_features = 0, - .unmigratable_flags = 0, - }, -+ [FEAT_8000_0021_EAX] = { -+ .type = CPUID_FEATURE_WORD, -+ .feat_names = { -+ "no-nested-data-bp", NULL, "lfence-always-serializing", NULL, -+ NULL, NULL, "null-sel-clr-base", NULL, -+ NULL, NULL, NULL, NULL, -+ NULL, NULL, NULL, NULL, -+ NULL, NULL, NULL, NULL, -+ NULL, NULL, NULL, NULL, -+ NULL, NULL, NULL, NULL, -+ NULL, NULL, NULL, NULL, -+ }, -+ .cpuid = { .eax = 0x80000021, .reg = R_EAX, }, -+ .tcg_features = 0, -+ .unmigratable_flags = 0, -+ }, - [FEAT_XSAVE] = { - .type = CPUID_FEATURE_WORD, - .feat_names = { -@@ -6156,6 +6172,10 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, - *ebx |= (sev_get_reduced_phys_bits() & 0x3f) << 6; /* EBX[11:6] */ - } - break; -+ case 0x80000021: -+ *eax = env->features[FEAT_8000_0021_EAX]; -+ *ebx = *ecx = *edx = 0; -+ break; - default: - /* reserved values: zero */ - *eax = 0; -@@ -6585,6 +6605,10 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp) - x86_cpu_adjust_level(cpu, &env->cpuid_min_xlevel, 0x8000001F); - } - -+ if (env->features[FEAT_8000_0021_EAX]) { -+ x86_cpu_adjust_level(cpu, &env->cpuid_min_xlevel, 0x80000021); -+ } -+ - /* SGX requires CPUID[0x12] for EPC enumeration */ - if (env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_SGX) { - x86_cpu_adjust_level(cpu, &env->cpuid_min_level, 0x12); -diff --git a/target/i386/cpu.h b/target/i386/cpu.h -index 81d2200543..c37abf62ae 100644 ---- a/target/i386/cpu.h -+++ b/target/i386/cpu.h -@@ -600,6 +600,7 @@ typedef enum FeatureWord { - FEAT_8000_0001_ECX, /* CPUID[8000_0001].ECX */ - FEAT_8000_0007_EDX, /* CPUID[8000_0007].EDX */ - FEAT_8000_0008_EBX, /* CPUID[8000_0008].EBX */ -+ FEAT_8000_0021_EAX, /* CPUID[8000_0021].EAX */ - FEAT_C000_0001_EDX, /* CPUID[C000_0001].EDX */ - FEAT_KVM, /* CPUID[4000_0001].EAX (KVM_CPUID_FEATURES) */ - FEAT_KVM_HINTS, /* CPUID[4000_0001].EDX */ -@@ -941,6 +942,13 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, - /* Predictive Store Forwarding Disable */ - #define CPUID_8000_0008_EBX_AMD_PSFD (1U << 28) - -+/* Processor ignores nested data breakpoints */ -+#define CPUID_8000_0021_EAX_No_NESTED_DATA_BP (1U << 0) -+/* LFENCE is always serializing */ -+#define CPUID_8000_0021_EAX_LFENCE_ALWAYS_SERIALIZING (1U << 2) -+/* Null Selector Clears Base */ -+#define CPUID_8000_0021_EAX_NULL_SEL_CLR_BASE (1U << 6) -+ - #define CPUID_XSAVE_XSAVEOPT (1U << 0) - #define CPUID_XSAVE_XSAVEC (1U << 1) - #define CPUID_XSAVE_XGETBV1 (1U << 2) --- -2.39.3 - diff --git a/SOURCES/kvm-target-i386-Add-missing-feature-bits-in-EPYC-Milan-m.patch b/SOURCES/kvm-target-i386-Add-missing-feature-bits-in-EPYC-Milan-m.patch deleted file mode 100644 index 9bb4bf9..0000000 --- a/SOURCES/kvm-target-i386-Add-missing-feature-bits-in-EPYC-Milan-m.patch +++ /dev/null @@ -1,152 +0,0 @@ -From a8180665019d537ee9775614627bf9eb8bd4770e Mon Sep 17 00:00:00 2001 -From: Bandan Das -Date: Wed, 9 Aug 2023 12:35:33 -0400 -Subject: [PATCH 5/7] target/i386: Add missing feature bits in EPYC-Milan model -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Bandan Das -RH-MergeRequest: 198: Add EPYC-Genoa CPU model in qemu -RH-Bugzilla: 2094913 -RH-Acked-by: Wei Huang -RH-Acked-by: Miroslav Rezanina -RH-Commit: [5/7] 8f77315c8d7010564423df3e3c594c90fd5f9c00 (bdas1/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2094913 - -commit 27f03be6f59d04bd5673ba1e1628b2b490f9a9ff -Author: Babu Moger -Date: Thu May 4 15:53:10 2023 -0500 - - target/i386: Add missing feature bits in EPYC-Milan model - - Add the following feature bits for EPYC-Milan model and bump the version. - vaes : Vector VAES(ENC|DEC), VAES(ENC|DEC)LAST instruction support - vpclmulqdq : Vector VPCLMULQDQ instruction support - stibp-always-on : Single Thread Indirect Branch Prediction Mode has enhanced - performance and may be left Always on - amd-psfd : Predictive Store Forward Disable - no-nested-data-bp : Processor ignores nested data breakpoints - lfence-always-serializing : LFENCE instruction is always serializing - null-sel-clr-base : Null Selector Clears Base. When this bit is - set, a null segment load clears the segment base - - These new features will be added in EPYC-Milan-v2. The "-cpu help" output - after the change will be. - - x86 EPYC-Milan (alias configured by machine type) - x86 EPYC-Milan-v1 AMD EPYC-Milan Processor - x86 EPYC-Milan-v2 AMD EPYC-Milan Processor - - The documentation for the features are available in the links below. - a. Processor Programming Reference (PPR) for AMD Family 19h Model 01h, - Revision B1 Processors - b. SECURITY ANALYSIS OF AMD PREDICTIVE STORE FORWARDING - c. AMD64 Architecture Programmer’s Manual Volumes 1–5 Publication No. Revision - 40332 4.05 Date October 2022 - - Signed-off-by: Babu Moger - Acked-by: Michael S. Tsirkin - Link: https://www.amd.com/system/files/TechDocs/55898_B1_pub_0.50.zip - Link: https://www.amd.com/system/files/documents/security-analysis-predictive-store-forwarding.pdf - Link: https://www.amd.com/system/files/TechDocs/40332_4.05.pdf - Message-Id: <20230504205313.225073-6-babu.moger@amd.com> - Signed-off-by: Paolo Bonzini - -Signed-off-by: Bandan Das ---- - target/i386/cpu.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 70 insertions(+) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 7ddebbaa3c..bbddc682df 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -1923,6 +1923,56 @@ static const CPUCaches epyc_milan_cache_info = { - }, - }; - -+static const CPUCaches epyc_milan_v2_cache_info = { -+ .l1d_cache = &(CPUCacheInfo) { -+ .type = DATA_CACHE, -+ .level = 1, -+ .size = 32 * KiB, -+ .line_size = 64, -+ .associativity = 8, -+ .partitions = 1, -+ .sets = 64, -+ .lines_per_tag = 1, -+ .self_init = 1, -+ .no_invd_sharing = true, -+ }, -+ .l1i_cache = &(CPUCacheInfo) { -+ .type = INSTRUCTION_CACHE, -+ .level = 1, -+ .size = 32 * KiB, -+ .line_size = 64, -+ .associativity = 8, -+ .partitions = 1, -+ .sets = 64, -+ .lines_per_tag = 1, -+ .self_init = 1, -+ .no_invd_sharing = true, -+ }, -+ .l2_cache = &(CPUCacheInfo) { -+ .type = UNIFIED_CACHE, -+ .level = 2, -+ .size = 512 * KiB, -+ .line_size = 64, -+ .associativity = 8, -+ .partitions = 1, -+ .sets = 1024, -+ .lines_per_tag = 1, -+ }, -+ .l3_cache = &(CPUCacheInfo) { -+ .type = UNIFIED_CACHE, -+ .level = 3, -+ .size = 32 * MiB, -+ .line_size = 64, -+ .associativity = 16, -+ .partitions = 1, -+ .sets = 32768, -+ .lines_per_tag = 1, -+ .self_init = true, -+ .inclusive = true, -+ .complex_indexing = false, -+ }, -+}; -+ - /* The following VMX features are not supported by KVM and are left out in the - * CPU definitions: - * -@@ -4422,6 +4472,26 @@ static const X86CPUDefinition builtin_x86_defs[] = { - .xlevel = 0x8000001E, - .model_id = "AMD EPYC-Milan Processor", - .cache_info = &epyc_milan_cache_info, -+ .versions = (X86CPUVersionDefinition[]) { -+ { .version = 1 }, -+ { -+ .version = 2, -+ .props = (PropValue[]) { -+ { "model-id", -+ "AMD EPYC-Milan-v2 Processor" }, -+ { "vaes", "on" }, -+ { "vpclmulqdq", "on" }, -+ { "stibp-always-on", "on" }, -+ { "amd-psfd", "on" }, -+ { "no-nested-data-bp", "on" }, -+ { "lfence-always-serializing", "on" }, -+ { "null-sel-clr-base", "on" }, -+ { /* end of list */ } -+ }, -+ .cache_info = &epyc_milan_v2_cache_info -+ }, -+ { /* end of list */ } -+ } - }, - }; - --- -2.39.3 - diff --git a/SOURCES/kvm-target-i386-Add-new-EPYC-CPU-versions-with-updated-c.patch b/SOURCES/kvm-target-i386-Add-new-EPYC-CPU-versions-with-updated-c.patch deleted file mode 100644 index 40c289a..0000000 --- a/SOURCES/kvm-target-i386-Add-new-EPYC-CPU-versions-with-updated-c.patch +++ /dev/null @@ -1,192 +0,0 @@ -From 92f0b5d0c7a841a21cabbc6efc1d7baf0e5a3e0f Mon Sep 17 00:00:00 2001 -From: Bandan Das -Date: Wed, 9 Aug 2023 12:26:12 -0400 -Subject: [PATCH 2/7] target/i386: Add new EPYC CPU versions with updated - cache_info - -RH-Author: Bandan Das -RH-MergeRequest: 198: Add EPYC-Genoa CPU model in qemu -RH-Bugzilla: 2094913 -RH-Acked-by: Wei Huang -RH-Acked-by: Miroslav Rezanina -RH-Commit: [2/7] 71a2fd907636733f86729bc9328600f6f9306eaf (bdas1/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2094913 - -commit d7c72735f618a7ee27ee109d8b1468193734606a -Author: Michael Roth -Date: Thu May 4 15:53:07 2023 -0500 - - target/i386: Add new EPYC CPU versions with updated cache_info - - Introduce new EPYC cpu versions: EPYC-v4 and EPYC-Rome-v3. - The only difference vs. older models is an updated cache_info with - the 'complex_indexing' bit unset, since this bit is not currently - defined for AMD and may cause problems should it be used for - something else in the future. Setting this bit will also cause - CPUID validation failures when running SEV-SNP guests. - - Signed-off-by: Michael Roth - Signed-off-by: Babu Moger - Acked-by: Michael S. Tsirkin - Message-Id: <20230504205313.225073-3-babu.moger@amd.com> - Signed-off-by: Paolo Bonzini - -Signed-off-by: Bandan Das ---- - target/i386/cpu.c | 118 ++++++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 118 insertions(+) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 3558c92ed0..8aa7eb611c 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -1707,6 +1707,56 @@ static const CPUCaches epyc_cache_info = { - }, - }; - -+static CPUCaches epyc_v4_cache_info = { -+ .l1d_cache = &(CPUCacheInfo) { -+ .type = DATA_CACHE, -+ .level = 1, -+ .size = 32 * KiB, -+ .line_size = 64, -+ .associativity = 8, -+ .partitions = 1, -+ .sets = 64, -+ .lines_per_tag = 1, -+ .self_init = 1, -+ .no_invd_sharing = true, -+ }, -+ .l1i_cache = &(CPUCacheInfo) { -+ .type = INSTRUCTION_CACHE, -+ .level = 1, -+ .size = 64 * KiB, -+ .line_size = 64, -+ .associativity = 4, -+ .partitions = 1, -+ .sets = 256, -+ .lines_per_tag = 1, -+ .self_init = 1, -+ .no_invd_sharing = true, -+ }, -+ .l2_cache = &(CPUCacheInfo) { -+ .type = UNIFIED_CACHE, -+ .level = 2, -+ .size = 512 * KiB, -+ .line_size = 64, -+ .associativity = 8, -+ .partitions = 1, -+ .sets = 1024, -+ .lines_per_tag = 1, -+ }, -+ .l3_cache = &(CPUCacheInfo) { -+ .type = UNIFIED_CACHE, -+ .level = 3, -+ .size = 8 * MiB, -+ .line_size = 64, -+ .associativity = 16, -+ .partitions = 1, -+ .sets = 8192, -+ .lines_per_tag = 1, -+ .self_init = true, -+ .inclusive = true, -+ .complex_indexing = false, -+ }, -+}; -+ - static const CPUCaches epyc_rome_cache_info = { - .l1d_cache = &(CPUCacheInfo) { - .type = DATA_CACHE, -@@ -1757,6 +1807,56 @@ static const CPUCaches epyc_rome_cache_info = { - }, - }; - -+static const CPUCaches epyc_rome_v3_cache_info = { -+ .l1d_cache = &(CPUCacheInfo) { -+ .type = DATA_CACHE, -+ .level = 1, -+ .size = 32 * KiB, -+ .line_size = 64, -+ .associativity = 8, -+ .partitions = 1, -+ .sets = 64, -+ .lines_per_tag = 1, -+ .self_init = 1, -+ .no_invd_sharing = true, -+ }, -+ .l1i_cache = &(CPUCacheInfo) { -+ .type = INSTRUCTION_CACHE, -+ .level = 1, -+ .size = 32 * KiB, -+ .line_size = 64, -+ .associativity = 8, -+ .partitions = 1, -+ .sets = 64, -+ .lines_per_tag = 1, -+ .self_init = 1, -+ .no_invd_sharing = true, -+ }, -+ .l2_cache = &(CPUCacheInfo) { -+ .type = UNIFIED_CACHE, -+ .level = 2, -+ .size = 512 * KiB, -+ .line_size = 64, -+ .associativity = 8, -+ .partitions = 1, -+ .sets = 1024, -+ .lines_per_tag = 1, -+ }, -+ .l3_cache = &(CPUCacheInfo) { -+ .type = UNIFIED_CACHE, -+ .level = 3, -+ .size = 16 * MiB, -+ .line_size = 64, -+ .associativity = 16, -+ .partitions = 1, -+ .sets = 16384, -+ .lines_per_tag = 1, -+ .self_init = true, -+ .inclusive = true, -+ .complex_indexing = false, -+ }, -+}; -+ - static const CPUCaches epyc_milan_cache_info = { - .l1d_cache = &(CPUCacheInfo) { - .type = DATA_CACHE, -@@ -4112,6 +4212,15 @@ static const X86CPUDefinition builtin_x86_defs[] = { - { /* end of list */ } - } - }, -+ { -+ .version = 4, -+ .props = (PropValue[]) { -+ { "model-id", -+ "AMD EPYC-v4 Processor" }, -+ { /* end of list */ } -+ }, -+ .cache_info = &epyc_v4_cache_info -+ }, - { /* end of list */ } - } - }, -@@ -4231,6 +4340,15 @@ static const X86CPUDefinition builtin_x86_defs[] = { - { /* end of list */ } - } - }, -+ { -+ .version = 3, -+ .props = (PropValue[]) { -+ { "model-id", -+ "AMD EPYC-Rome-v3 Processor" }, -+ { /* end of list */ } -+ }, -+ .cache_info = &epyc_rome_v3_cache_info -+ }, - { /* end of list */ } - } - }, --- -2.39.3 - diff --git a/SOURCES/kvm-target-i386-add-support-for-FB_CLEAR-feature.patch b/SOURCES/kvm-target-i386-add-support-for-FB_CLEAR-feature.patch deleted file mode 100644 index 2b1cbc9..0000000 --- a/SOURCES/kvm-target-i386-add-support-for-FB_CLEAR-feature.patch +++ /dev/null @@ -1,71 +0,0 @@ -From 0d056d6da9e4147d5965bf3507f6d6d6a413924d Mon Sep 17 00:00:00 2001 -From: Emanuele Giuseppe Esposito -Date: Wed, 24 May 2023 06:52:43 -0400 -Subject: [PATCH 2/5] target/i386: add support for FB_CLEAR feature - -RH-Author: Emanuele Giuseppe Esposito -RH-MergeRequest: 167: target/i386: add support for FB_CLEAR feature -RH-Bugzilla: 2216201 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [2/2] 5f191964ba25754107a06ef907f4ac614280aaa1 (eesposit/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2216201 - -commit 22e1094ca82d5518c1b69aff3e87c550776ae1eb -Author: Emanuele Giuseppe Esposito -Date: Wed Feb 1 08:57:59 2023 -0500 - - target/i386: add support for FB_CLEAR feature - - As reported by the Intel's doc: - "FB_CLEAR: The processor will overwrite fill buffer values as part of - MD_CLEAR operations with the VERW instruction. - On these processors, L1D_FLUSH does not overwrite fill buffer values." - - If this cpu feature is present in host, allow QEMU to choose whether to - show it to the guest too. - One disadvantage of not exposing it is that the guest will report - a non existing vulnerability in - /sys/devices/system/cpu/vulnerabilities/mmio_stale_data - because the mitigation is present only when the cpu has - (FLUSH_L1D and MD_CLEAR) or FB_CLEAR - features enabled. - - Signed-off-by: Emanuele Giuseppe Esposito - Message-Id: <20230201135759.555607-3-eesposit@redhat.com> - Signed-off-by: Paolo Bonzini - -Signed-off-by: Emanuele Giuseppe Esposito ---- - target/i386/cpu.c | 2 +- - target/i386/cpu.h | 1 + - 2 files changed, 2 insertions(+), 1 deletion(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index caf6338cc0..839706b430 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -1012,7 +1012,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { - "ssb-no", "mds-no", "pschange-mc-no", "tsx-ctrl", - "taa-no", NULL, NULL, NULL, - NULL, NULL, NULL, NULL, -- NULL, NULL, NULL, NULL, -+ NULL, "fb-clear", NULL, NULL, - NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, -diff --git a/target/i386/cpu.h b/target/i386/cpu.h -index 74fa649b60..c28b9df217 100644 ---- a/target/i386/cpu.h -+++ b/target/i386/cpu.h -@@ -989,6 +989,7 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, - #define MSR_ARCH_CAP_PSCHANGE_MC_NO (1U << 6) - #define MSR_ARCH_CAP_TSX_CTRL_MSR (1U << 7) - #define MSR_ARCH_CAP_TAA_NO (1U << 8) -+#define MSR_ARCH_CAP_FB_CLEAR (1U << 17) - - #define MSR_CORE_CAP_SPLIT_LOCK_DETECT (1U << 5) - --- -2.39.3 - diff --git a/SOURCES/kvm-target-i386-add-support-for-FLUSH_L1D-feature.patch b/SOURCES/kvm-target-i386-add-support-for-FLUSH_L1D-feature.patch deleted file mode 100644 index 39f2542..0000000 --- a/SOURCES/kvm-target-i386-add-support-for-FLUSH_L1D-feature.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 14eae569030805680570d93412100ad26242c7e6 Mon Sep 17 00:00:00 2001 -From: Emanuele Giuseppe Esposito -Date: Wed, 24 May 2023 06:52:34 -0400 -Subject: [PATCH 1/5] target/i386: add support for FLUSH_L1D feature - -RH-Author: Emanuele Giuseppe Esposito -RH-MergeRequest: 167: target/i386: add support for FB_CLEAR feature -RH-Bugzilla: 2216201 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/2] e296c75c5cd7e1d16d3c70483d52aeba9f9eb2cd (eesposit/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2216201 - -commit 0e7e3bf1a552c178924867fa7c2f30ccc8a179e0 -Author: Emanuele Giuseppe Esposito -Date: Wed Feb 1 08:57:58 2023 -0500 - - target/i386: add support for FLUSH_L1D feature - - As reported by Intel's doc: - "L1D_FLUSH: Writeback and invalidate the L1 data cache" - - If this cpu feature is present in host, allow QEMU to choose whether to - show it to the guest too. - One disadvantage of not exposing it is that the guest will report - a non existing vulnerability in - /sys/devices/system/cpu/vulnerabilities/mmio_stale_data - because the mitigation is present only when the cpu has - (FLUSH_L1D and MD_CLEAR) or FB_CLEAR - features enabled. - - Signed-off-by: Emanuele Giuseppe Esposito - Message-Id: <20230201135759.555607-2-eesposit@redhat.com> - Signed-off-by: Paolo Bonzini - -Signed-off-by: Emanuele Giuseppe Esposito ---- - target/i386/cpu.c | 2 +- - target/i386/cpu.h | 2 ++ - 2 files changed, 3 insertions(+), 1 deletion(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 0ef2bf1b93..caf6338cc0 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -860,7 +860,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { - "tsx-ldtrk", NULL, NULL /* pconfig */, "arch-lbr", - NULL, NULL, "amx-bf16", "avx512-fp16", - "amx-tile", "amx-int8", "spec-ctrl", "stibp", -- NULL, "arch-capabilities", "core-capability", "ssbd", -+ "flush-l1d", "arch-capabilities", "core-capability", "ssbd", - }, - .cpuid = { - .eax = 7, -diff --git a/target/i386/cpu.h b/target/i386/cpu.h -index d243e290d3..74fa649b60 100644 ---- a/target/i386/cpu.h -+++ b/target/i386/cpu.h -@@ -896,6 +896,8 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, - #define CPUID_7_0_EDX_SPEC_CTRL (1U << 26) - /* Single Thread Indirect Branch Predictors */ - #define CPUID_7_0_EDX_STIBP (1U << 27) -+/* Flush L1D cache */ -+#define CPUID_7_0_EDX_FLUSH_L1D (1U << 28) - /* Arch Capabilities */ - #define CPUID_7_0_EDX_ARCH_CAPABILITIES (1U << 29) - /* Core Capability */ --- -2.39.3 - diff --git a/SOURCES/kvm-target-i386-allow-versioned-CPUs-to-specify-new-cach.patch b/SOURCES/kvm-target-i386-allow-versioned-CPUs-to-specify-new-cach.patch deleted file mode 100644 index 2c81c72..0000000 --- a/SOURCES/kvm-target-i386-allow-versioned-CPUs-to-specify-new-cach.patch +++ /dev/null @@ -1,116 +0,0 @@ -From 457e74c076e0fe7b64631dfd4369d167f0762c9a Mon Sep 17 00:00:00 2001 -From: Bandan Das -Date: Wed, 9 Aug 2023 12:22:41 -0400 -Subject: [PATCH 1/7] target/i386: allow versioned CPUs to specify new - cache_info - -RH-Author: Bandan Das -RH-MergeRequest: 198: Add EPYC-Genoa CPU model in qemu -RH-Bugzilla: 2094913 -RH-Acked-by: Wei Huang -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/7] 6070e07a4bb070d1c15a811b2bd3195929c18d61 (bdas1/qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2094913 - -commit cca0a000d06f897411a8af4402e5d0522bbe450b -Author: Michael Roth -Date: Thu May 4 15:53:06 2023 -0500 - - target/i386: allow versioned CPUs to specify new cache_info - - New EPYC CPUs versions require small changes to their cache_info's. - Because current QEMU x86 CPU definition does not support versioned - cach_info, we would have to declare a new CPU type for each such case. - To avoid the dup work, add "cache_info" in X86CPUVersionDefinition", - to allow new cache_info pointers to be specified for a new CPU version. - - Co-developed-by: Wei Huang - Signed-off-by: Wei Huang - Signed-off-by: Michael Roth - Signed-off-by: Babu Moger - Acked-by: Michael S. Tsirkin - Message-Id: <20230504205313.225073-2-babu.moger@amd.com> - Signed-off-by: Paolo Bonzini - -Signed-off-by: Bandan Das ---- - target/i386/cpu.c | 35 ++++++++++++++++++++++++++++++++--- - 1 file changed, 32 insertions(+), 3 deletions(-) - -diff --git a/target/i386/cpu.c b/target/i386/cpu.c -index 4ac3046313..3558c92ed0 100644 ---- a/target/i386/cpu.c -+++ b/target/i386/cpu.c -@@ -1598,6 +1598,7 @@ typedef struct X86CPUVersionDefinition { - const char *alias; - const char *note; - PropValue *props; -+ const CPUCaches *const cache_info; - } X86CPUVersionDefinition; - - /* Base definition for a CPU model */ -@@ -5213,6 +5214,31 @@ static void x86_cpu_apply_version_props(X86CPU *cpu, X86CPUModel *model) - assert(vdef->version == version); - } - -+static const CPUCaches *x86_cpu_get_versioned_cache_info(X86CPU *cpu, -+ X86CPUModel *model) -+{ -+ const X86CPUVersionDefinition *vdef; -+ X86CPUVersion version = x86_cpu_model_resolve_version(model); -+ const CPUCaches *cache_info = model->cpudef->cache_info; -+ -+ if (version == CPU_VERSION_LEGACY) { -+ return cache_info; -+ } -+ -+ for (vdef = x86_cpu_def_get_versions(model->cpudef); vdef->version; vdef++) { -+ if (vdef->cache_info) { -+ cache_info = vdef->cache_info; -+ } -+ -+ if (vdef->version == version) { -+ break; -+ } -+ } -+ -+ assert(vdef->version == version); -+ return cache_info; -+} -+ - /* - * Load data from X86CPUDefinition into a X86CPU object. - * Only for builtin_x86_defs models initialized with x86_register_cpudef_types. -@@ -5245,7 +5271,7 @@ static void x86_cpu_load_model(X86CPU *cpu, X86CPUModel *model) - } - - /* legacy-cache defaults to 'off' if CPU model provides cache info */ -- cpu->legacy_cache = !def->cache_info; -+ cpu->legacy_cache = !x86_cpu_get_versioned_cache_info(cpu, model); - - env->features[FEAT_1_ECX] |= CPUID_EXT_HYPERVISOR; - -@@ -6724,14 +6750,17 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) - - /* Cache information initialization */ - if (!cpu->legacy_cache) { -- if (!xcc->model || !xcc->model->cpudef->cache_info) { -+ const CPUCaches *cache_info = -+ x86_cpu_get_versioned_cache_info(cpu, xcc->model); -+ -+ if (!xcc->model || !cache_info) { - g_autofree char *name = x86_cpu_class_get_model_name(xcc); - error_setg(errp, - "CPU model '%s' doesn't support legacy-cache=off", name); - return; - } - env->cache_info_cpuid2 = env->cache_info_cpuid4 = env->cache_info_amd = -- *xcc->model->cpudef->cache_info; -+ *cache_info; - } else { - /* Build legacy cache information */ - env->cache_info_cpuid2.l1d_cache = &legacy_l1d_cache; --- -2.39.3 - diff --git a/SOURCES/kvm-target-s390x-kvm-pv-Provide-some-more-useful-informa.patch b/SOURCES/kvm-target-s390x-kvm-pv-Provide-some-more-useful-informa.patch new file mode 100644 index 0000000..a2d712f --- /dev/null +++ b/SOURCES/kvm-target-s390x-kvm-pv-Provide-some-more-useful-informa.patch @@ -0,0 +1,205 @@ +From cc8d794932e26df7c7f3c8cc0c1f42da8d52f12b Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Mon, 15 Jan 2024 10:26:52 +0100 +Subject: [PATCH 069/101] target/s390x/kvm/pv: Provide some more useful + information if decryption fails +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +RH-MergeRequest: 213: s390x: Provide some more useful information if decryption of a PV image fails +RH-Jira: RHEL-18212 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Cornelia Huck +RH-Commit: [1/1] 4ffb61869f7df33e23d3e0ebf8c29e386e3f6cbc (thuth/qemu-kvm-cs9) + +JIRA: https://issues.redhat.com/browse/RHEL-18212 + +commit 7af51621b16ae86646cc2dc9dee30de8176ff761 +Author: Thomas Huth +Date: Wed Jan 10 15:29:16 2024 +0100 + + target/s390x/kvm/pv: Provide some more useful information if decryption fails + + It's a common scenario to copy guest images from one host to another + to run the guest on the other machine. This (of course) does not work + with "secure execution" guests since they are encrypted with one certain + host key. However, if you still (accidentally) do it, you only get a + very user-unfriendly error message that looks like this: + + qemu-system-s390x: KVM PV command 2 (KVM_PV_SET_SEC_PARMS) failed: + header rc 108 rrc 5 IOCTL rc: -22 + + Let's provide at least a somewhat nicer hint to the users so that they + are able to figure out what might have gone wrong. + + Buglink: https://issues.redhat.com/browse/RHEL-18212 + Message-ID: <20240110142916.850605-1-thuth@redhat.com> + Reviewed-by: Philippe Mathieu-Daudé + Reviewed-by: Cédric Le Goater + Reviewed-by: Claudio Imbrenda + Signed-off-by: Thomas Huth + +Signed-off-by: Thomas Huth +--- + hw/s390x/ipl.c | 5 ++--- + hw/s390x/ipl.h | 2 +- + hw/s390x/s390-virtio-ccw.c | 5 ++++- + target/s390x/kvm/pv.c | 25 ++++++++++++++++++++----- + target/s390x/kvm/pv.h | 5 +++-- + 5 files changed, 30 insertions(+), 12 deletions(-) + +diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c +index 515dcf51b5..b23a6a0ef3 100644 +--- a/hw/s390x/ipl.c ++++ b/hw/s390x/ipl.c +@@ -703,7 +703,7 @@ static void s390_ipl_prepare_qipl(S390CPU *cpu) + cpu_physical_memory_unmap(addr, len, 1, len); + } + +-int s390_ipl_prepare_pv_header(void) ++int s390_ipl_prepare_pv_header(Error **errp) + { + IplParameterBlock *ipib = s390_ipl_get_iplb_pv(); + IPLBlockPV *ipib_pv = &ipib->pv; +@@ -712,8 +712,7 @@ int s390_ipl_prepare_pv_header(void) + + cpu_physical_memory_read(ipib_pv->pv_header_addr, hdr, + ipib_pv->pv_header_len); +- rc = s390_pv_set_sec_parms((uintptr_t)hdr, +- ipib_pv->pv_header_len); ++ rc = s390_pv_set_sec_parms((uintptr_t)hdr, ipib_pv->pv_header_len, errp); + g_free(hdr); + return rc; + } +diff --git a/hw/s390x/ipl.h b/hw/s390x/ipl.h +index 7fc86e7905..57cd125769 100644 +--- a/hw/s390x/ipl.h ++++ b/hw/s390x/ipl.h +@@ -107,7 +107,7 @@ typedef union IplParameterBlock IplParameterBlock; + + int s390_ipl_set_loadparm(uint8_t *loadparm); + void s390_ipl_update_diag308(IplParameterBlock *iplb); +-int s390_ipl_prepare_pv_header(void); ++int s390_ipl_prepare_pv_header(Error **errp); + int s390_ipl_pv_unpack(void); + void s390_ipl_prepare_cpu(S390CPU *cpu); + IplParameterBlock *s390_ipl_get_iplb(void); +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index 984891b82a..e26ce26f5a 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -391,7 +391,7 @@ static int s390_machine_protect(S390CcwMachineState *ms) + } + + /* Set SE header and unpack */ +- rc = s390_ipl_prepare_pv_header(); ++ rc = s390_ipl_prepare_pv_header(&local_err); + if (rc) { + goto out_err; + } +@@ -410,6 +410,9 @@ static int s390_machine_protect(S390CcwMachineState *ms) + return rc; + + out_err: ++ if (local_err) { ++ error_report_err(local_err); ++ } + s390_machine_unprotect(ms); + return rc; + } +diff --git a/target/s390x/kvm/pv.c b/target/s390x/kvm/pv.c +index 6a69be7e5c..7ca7faec73 100644 +--- a/target/s390x/kvm/pv.c ++++ b/target/s390x/kvm/pv.c +@@ -29,7 +29,8 @@ static bool info_valid; + static struct kvm_s390_pv_info_vm info_vm; + static struct kvm_s390_pv_info_dump info_dump; + +-static int __s390_pv_cmd(uint32_t cmd, const char *cmdname, void *data) ++static int __s390_pv_cmd(uint32_t cmd, const char *cmdname, void *data, ++ int *pvrc) + { + struct kvm_pv_cmd pv_cmd = { + .cmd = cmd, +@@ -46,6 +47,9 @@ static int __s390_pv_cmd(uint32_t cmd, const char *cmdname, void *data) + "IOCTL rc: %d", cmd, cmdname, pv_cmd.rc, pv_cmd.rrc, + rc); + } ++ if (pvrc) { ++ *pvrc = pv_cmd.rc; ++ } + return rc; + } + +@@ -53,12 +57,13 @@ static int __s390_pv_cmd(uint32_t cmd, const char *cmdname, void *data) + * This macro lets us pass the command as a string to the function so + * we can print it on an error. + */ +-#define s390_pv_cmd(cmd, data) __s390_pv_cmd(cmd, #cmd, data) ++#define s390_pv_cmd(cmd, data) __s390_pv_cmd(cmd, #cmd, data, NULL) ++#define s390_pv_cmd_pvrc(cmd, data, pvrc) __s390_pv_cmd(cmd, #cmd, data, pvrc) + #define s390_pv_cmd_exit(cmd, data) \ + { \ + int rc; \ + \ +- rc = __s390_pv_cmd(cmd, #cmd, data);\ ++ rc = __s390_pv_cmd(cmd, #cmd, data, NULL); \ + if (rc) { \ + exit(1); \ + } \ +@@ -142,14 +147,24 @@ bool s390_pv_vm_try_disable_async(S390CcwMachineState *ms) + return true; + } + +-int s390_pv_set_sec_parms(uint64_t origin, uint64_t length) ++int s390_pv_set_sec_parms(uint64_t origin, uint64_t length, Error **errp) + { ++ int ret, pvrc; + struct kvm_s390_pv_sec_parm args = { + .origin = origin, + .length = length, + }; + +- return s390_pv_cmd(KVM_PV_SET_SEC_PARMS, &args); ++ ret = s390_pv_cmd_pvrc(KVM_PV_SET_SEC_PARMS, &args, &pvrc); ++ if (ret) { ++ error_setg(errp, "Failed to set secure execution parameters"); ++ if (pvrc == 0x108) { ++ error_append_hint(errp, "Please check whether the image is " ++ "correctly encrypted for this host\n"); ++ } ++ } ++ ++ return ret; + } + + /* +diff --git a/target/s390x/kvm/pv.h b/target/s390x/kvm/pv.h +index 7b935e2246..5877d28ff1 100644 +--- a/target/s390x/kvm/pv.h ++++ b/target/s390x/kvm/pv.h +@@ -42,7 +42,7 @@ int s390_pv_query_info(void); + int s390_pv_vm_enable(void); + void s390_pv_vm_disable(void); + bool s390_pv_vm_try_disable_async(S390CcwMachineState *ms); +-int s390_pv_set_sec_parms(uint64_t origin, uint64_t length); ++int s390_pv_set_sec_parms(uint64_t origin, uint64_t length, Error **errp); + int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak); + void s390_pv_prep_reset(void); + int s390_pv_verify(void); +@@ -62,7 +62,8 @@ static inline int s390_pv_query_info(void) { return 0; } + static inline int s390_pv_vm_enable(void) { return 0; } + static inline void s390_pv_vm_disable(void) {} + static inline bool s390_pv_vm_try_disable_async(S390CcwMachineState *ms) { return false; } +-static inline int s390_pv_set_sec_parms(uint64_t origin, uint64_t length) { return 0; } ++static inline int s390_pv_set_sec_parms(uint64_t origin, uint64_t length, ++ Error **errp) { return 0; } + static inline int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak) { return 0; } + static inline void s390_pv_prep_reset(void) {} + static inline int s390_pv_verify(void) { return 0; } +-- +2.39.3 + diff --git a/SOURCES/kvm-tests-remove-aio_context_acquire-tests.patch b/SOURCES/kvm-tests-remove-aio_context_acquire-tests.patch new file mode 100644 index 0000000..9b3eefb --- /dev/null +++ b/SOURCES/kvm-tests-remove-aio_context_acquire-tests.patch @@ -0,0 +1,125 @@ +From 420bf75353286324822c3bbca3b52a7a56ed668c Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 5 Dec 2023 13:20:00 -0500 +Subject: [PATCH 083/101] tests: remove aio_context_acquire() tests + +RH-Author: Kevin Wolf +RH-MergeRequest: 214: Remove AioContext lock +RH-Jira: RHEL-15965 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [14/26] f6421037c1523bc957f3be0f4ad05571ae012dba (kmwolf/centos-qemu-kvm) + +The aio_context_acquire() API is being removed. Drop the test case that +calls the API. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Eric Blake +Reviewed-by: Kevin Wolf +Message-ID: <20231205182011.1976568-4-stefanha@redhat.com> +Signed-off-by: Kevin Wolf +--- + tests/unit/test-aio.c | 67 +------------------------------------------ + 1 file changed, 1 insertion(+), 66 deletions(-) + +diff --git a/tests/unit/test-aio.c b/tests/unit/test-aio.c +index 337b6e4ea7..e77d86be87 100644 +--- a/tests/unit/test-aio.c ++++ b/tests/unit/test-aio.c +@@ -100,76 +100,12 @@ static void event_ready_cb(EventNotifier *e) + + /* Tests using aio_*. */ + +-typedef struct { +- QemuMutex start_lock; +- EventNotifier notifier; +- bool thread_acquired; +-} AcquireTestData; +- +-static void *test_acquire_thread(void *opaque) +-{ +- AcquireTestData *data = opaque; +- +- /* Wait for other thread to let us start */ +- qemu_mutex_lock(&data->start_lock); +- qemu_mutex_unlock(&data->start_lock); +- +- /* event_notifier_set might be called either before or after +- * the main thread's call to poll(). The test case's outcome +- * should be the same in either case. +- */ +- event_notifier_set(&data->notifier); +- aio_context_acquire(ctx); +- aio_context_release(ctx); +- +- data->thread_acquired = true; /* success, we got here */ +- +- return NULL; +-} +- + static void set_event_notifier(AioContext *nctx, EventNotifier *notifier, + EventNotifierHandler *handler) + { + aio_set_event_notifier(nctx, notifier, handler, NULL, NULL); + } + +-static void dummy_notifier_read(EventNotifier *n) +-{ +- event_notifier_test_and_clear(n); +-} +- +-static void test_acquire(void) +-{ +- QemuThread thread; +- AcquireTestData data; +- +- /* Dummy event notifier ensures aio_poll() will block */ +- event_notifier_init(&data.notifier, false); +- set_event_notifier(ctx, &data.notifier, dummy_notifier_read); +- g_assert(!aio_poll(ctx, false)); /* consume aio_notify() */ +- +- qemu_mutex_init(&data.start_lock); +- qemu_mutex_lock(&data.start_lock); +- data.thread_acquired = false; +- +- qemu_thread_create(&thread, "test_acquire_thread", +- test_acquire_thread, +- &data, QEMU_THREAD_JOINABLE); +- +- /* Block in aio_poll(), let other thread kick us and acquire context */ +- aio_context_acquire(ctx); +- qemu_mutex_unlock(&data.start_lock); /* let the thread run */ +- g_assert(aio_poll(ctx, true)); +- g_assert(!data.thread_acquired); +- aio_context_release(ctx); +- +- qemu_thread_join(&thread); +- set_event_notifier(ctx, &data.notifier, NULL); +- event_notifier_cleanup(&data.notifier); +- +- g_assert(data.thread_acquired); +-} +- + static void test_bh_schedule(void) + { + BHTestData data = { .n = 0 }; +@@ -879,7 +815,7 @@ static void test_worker_thread_co_enter(void) + qemu_thread_get_self(&this_thread); + co = qemu_coroutine_create(co_check_current_thread, &this_thread); + +- qemu_thread_create(&worker_thread, "test_acquire_thread", ++ qemu_thread_create(&worker_thread, "test_aio_co_enter", + test_aio_co_enter, + co, QEMU_THREAD_JOINABLE); + +@@ -899,7 +835,6 @@ int main(int argc, char **argv) + while (g_main_context_iteration(NULL, false)); + + g_test_init(&argc, &argv, NULL); +- g_test_add_func("/aio/acquire", test_acquire); + g_test_add_func("/aio/bh/schedule", test_bh_schedule); + g_test_add_func("/aio/bh/schedule10", test_bh_schedule10); + g_test_add_func("/aio/bh/cancel", test_bh_cancel); +-- +2.39.3 + diff --git a/SOURCES/kvm-tests-unit-Bump-test-replication-timeout-to-60-secon.patch b/SOURCES/kvm-tests-unit-Bump-test-replication-timeout-to-60-secon.patch new file mode 100644 index 0000000..0afdea2 --- /dev/null +++ b/SOURCES/kvm-tests-unit-Bump-test-replication-timeout-to-60-secon.patch @@ -0,0 +1,46 @@ +From bbe64d706b3cb8b10ecd22bd71cf76b21eea257f Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 25 Jan 2024 17:58:03 +0100 +Subject: [PATCH 20/22] tests/unit: Bump test-replication timeout to 60 seconds + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter +RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Hanna Czenczek +RH-Commit: [16/17] 200768aedee44d10aa8d199b92a9c17a9002fc3f (stefanha/centos-stream-qemu-kvm) + +We're seeing timeouts for this test on CI runs (specifically for +ubuntu-20.04-s390x-all). It doesn't fail consistently, but even the +successful runs take about 27 or 28 seconds, which is not very far from +the 30 seconds timeout. + +Bump the timeout a bit to make failure less likely even on this CI host. + +Signed-off-by: Kevin Wolf +Message-ID: <20240125165803.48373-1-kwolf@redhat.com> +Reviewed-by: Thomas Huth +Signed-off-by: Kevin Wolf +(cherry picked from commit 63b18312d14ac984acaf13c7c55d9baa2d61496e) +Signed-off-by: Stefan Hajnoczi +--- + tests/unit/meson.build | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/tests/unit/meson.build b/tests/unit/meson.build +index a05d471090..28db6adea8 100644 +--- a/tests/unit/meson.build ++++ b/tests/unit/meson.build +@@ -173,7 +173,8 @@ test_env.set('G_TEST_BUILDDIR', meson.current_build_dir()) + + slow_tests = { + 'test-crypto-tlscredsx509': 45, +- 'test-crypto-tlssession': 45 ++ 'test-crypto-tlssession': 45, ++ 'test-replication': 60, + } + + foreach test_name, extra: tests +-- +2.39.3 + diff --git a/SOURCES/kvm-ui-Fix-pixel-colour-channel-order-for-PNG-screenshot.patch b/SOURCES/kvm-ui-Fix-pixel-colour-channel-order-for-PNG-screenshot.patch deleted file mode 100644 index ef99b30..0000000 --- a/SOURCES/kvm-ui-Fix-pixel-colour-channel-order-for-PNG-screenshot.patch +++ /dev/null @@ -1,88 +0,0 @@ -From b998f8474846886fa1e0428fe79fe2a79231cc05 Mon Sep 17 00:00:00 2001 -From: Peter Maydell -Date: Fri, 12 May 2023 15:43:38 +0100 -Subject: [PATCH 35/37] ui: Fix pixel colour channel order for PNG screenshots -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Marc-André Lureau -RH-MergeRequest: 183: ui: Fix pixel colour channel order for PNG screenshots -RH-Bugzilla: 2222579 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/1] 76acd3c5526639e70bc2998f584503c78fc9bc56 (marcandre.lureau-rh/qemu-kvm-centos) - -When we take a PNG screenshot the ordering of the colour channels in -the data is not correct, resulting in the image having weird -colouring compared to the actual display. (Specifically, on a -little-endian host the blue and red channels are swapped; on -big-endian everything is wrong.) - -This happens because the pixman idea of the pixel data and the libpng -idea differ. PIXMAN_a8r8g8b8 defines that pixels are 32-bit values, -with A in bits 24-31, R in bits 16-23, G in bits 8-15 and B in bits -0-7. This means that on little-endian systems the bytes in memory -are - B G R A -and on big-endian systems they are - A R G B - -libpng, on the other hand, thinks of pixels as being a series of -values for each channel, so its format PNG_COLOR_TYPE_RGB_ALPHA -always wants bytes in the order - R G B A - -This isn't the same as the pixman order for either big or little -endian hosts. - -The alpha channel is also unnecessary bulk in the output PNG file, -because there is no alpha information in a screenshot. - -To handle the endianness issue, we already define in ui/qemu-pixman.h -various PIXMAN_BE_* and PIXMAN_LE_* values that give consistent -byte-order pixel channel formats. So we can use PIXMAN_BE_r8g8b8 and -PNG_COLOR_TYPE_RGB, which both have an in-memory byte order of - R G B -and 3 bytes per pixel. - -(PPM format screenshots get this right; they already use the -PIXMAN_BE_r8g8b8 format.) - -Cc: qemu-stable@nongnu.org -Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1622 -Fixes: 9a0a119a382867 ("Added parameter to take screenshot with screendump as PNG") -Signed-off-by: Peter Maydell -Reviewed-by: Marc-André Lureau -Message-id: 20230502135548.2451309-1-peter.maydell@linaro.org - -(cherry picked from commit cd22a0f520f471e3bd33bc19cf3b2fa772cdb2a8) -Signed-off-by: Marc-André Lureau ---- - ui/console.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/ui/console.c b/ui/console.c -index 6e8a3cdc62..e173731e20 100644 ---- a/ui/console.c -+++ b/ui/console.c -@@ -311,7 +311,7 @@ static bool png_save(int fd, pixman_image_t *image, Error **errp) - png_struct *png_ptr; - png_info *info_ptr; - g_autoptr(pixman_image_t) linebuf = -- qemu_pixman_linebuf_create(PIXMAN_a8r8g8b8, width); -+ qemu_pixman_linebuf_create(PIXMAN_BE_r8g8b8, width); - uint8_t *buf = (uint8_t *)pixman_image_get_data(linebuf); - FILE *f = fdopen(fd, "wb"); - int y; -@@ -341,7 +341,7 @@ static bool png_save(int fd, pixman_image_t *image, Error **errp) - png_init_io(png_ptr, f); - - png_set_IHDR(png_ptr, info_ptr, width, height, 8, -- PNG_COLOR_TYPE_RGB_ALPHA, PNG_INTERLACE_NONE, -+ PNG_COLOR_TYPE_RGB, PNG_INTERLACE_NONE, - PNG_COMPRESSION_TYPE_BASE, PNG_FILTER_TYPE_BASE); - - png_write_info(png_ptr, info_ptr); --- -2.39.3 - diff --git a/SOURCES/kvm-ui-clipboard-add-asserts-for-update-and-request.patch b/SOURCES/kvm-ui-clipboard-add-asserts-for-update-and-request.patch new file mode 100644 index 0000000..b51961c --- /dev/null +++ b/SOURCES/kvm-ui-clipboard-add-asserts-for-update-and-request.patch @@ -0,0 +1,81 @@ +From 6e4f68e9ba3fe75ca6f200f189f96bb402f0ee8e Mon Sep 17 00:00:00 2001 +From: Fiona Ebner +Date: Wed, 24 Jan 2024 11:57:49 +0100 +Subject: [PATCH 02/20] ui/clipboard: add asserts for update and request +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 228: ui/clipboard: mark type as not available when there is no data +RH-Jira: RHEL-19629 +RH-Acked-by: Marc-André Lureau +RH-Acked-by: Gerd Hoffmann +RH-Commit: [2/2] 176b4b835fd8aa226f2fa93fd334b9384080cf21 (jmaloy/jmaloy-qemu-kvm-2) + +JIRA: https://issues.redhat.com/browse/RHEL-19629 +CVE: CVE-2023-6683 +Upstream: Merged + +ui/clipboard: add asserts for update and request + +commit 9c416582611b7495bdddb4c5456c7acb64b78938 +Author: Fiona Ebner +Date: Wed Jan 24 11:57:49 2024 +0100 + + ui/clipboard: add asserts for update and request + + Should an issue like CVE-2023-6683 ever appear again in the future, + it will be more obvious which assumption was violated. + + Suggested-by: Marc-André Lureau + Signed-off-by: Fiona Ebner + Reviewed-by: Marc-André Lureau + Message-ID: <20240124105749.204610-2-f.ebner@proxmox.com> + +Signed-off-by: Jon Maloy +--- + ui/clipboard.c | 14 ++++++++++++++ + 1 file changed, 14 insertions(+) + +diff --git a/ui/clipboard.c b/ui/clipboard.c +index b3f6fa3c9e..4264884a6c 100644 +--- a/ui/clipboard.c ++++ b/ui/clipboard.c +@@ -65,12 +65,24 @@ bool qemu_clipboard_check_serial(QemuClipboardInfo *info, bool client) + + void qemu_clipboard_update(QemuClipboardInfo *info) + { ++ uint32_t type; + QemuClipboardNotify notify = { + .type = QEMU_CLIPBOARD_UPDATE_INFO, + .info = info, + }; + assert(info->selection < QEMU_CLIPBOARD_SELECTION__COUNT); + ++ for (type = 0; type < QEMU_CLIPBOARD_TYPE__COUNT; type++) { ++ /* ++ * If data is missing, the clipboard owner's 'request' callback needs to ++ * be set. Otherwise, there is no way to get the clipboard data and ++ * qemu_clipboard_request() cannot be called. ++ */ ++ if (info->types[type].available && !info->types[type].data) { ++ assert(info->owner && info->owner->request); ++ } ++ } ++ + notifier_list_notify(&clipboard_notifiers, ¬ify); + + if (cbinfo[info->selection] != info) { +@@ -132,6 +144,8 @@ void qemu_clipboard_request(QemuClipboardInfo *info, + !info->owner) + return; + ++ assert(info->owner->request); ++ + info->types[type].requested = true; + info->owner->request(info, type); + } +-- +2.39.3 + diff --git a/SOURCES/kvm-ui-clipboard-mark-type-as-not-available-when-there-i.patch b/SOURCES/kvm-ui-clipboard-mark-type-as-not-available-when-there-i.patch new file mode 100644 index 0000000..00c9369 --- /dev/null +++ b/SOURCES/kvm-ui-clipboard-mark-type-as-not-available-when-there-i.patch @@ -0,0 +1,107 @@ +From 097516bef2993d917e76d92066ca2eb067e45394 Mon Sep 17 00:00:00 2001 +From: Fiona Ebner +Date: Wed, 24 Jan 2024 11:57:48 +0100 +Subject: [PATCH 01/20] ui/clipboard: mark type as not available when there is + no data +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 228: ui/clipboard: mark type as not available when there is no data +RH-Jira: RHEL-19629 +RH-Acked-by: Marc-André Lureau +RH-Acked-by: Gerd Hoffmann +RH-Commit: [1/2] 74ded03d6376f9693733d673502219f76eab7099 (jmaloy/jmaloy-qemu-kvm-2) + +JIRA: https://issues.redhat.com/browse/RHEL-19629 +CVE: CVE-2023-6683 +Upstream: Merged + +commit 405484b29f6548c7b86549b0f961b906337aa68a +Author: Fiona Ebner +Date: Wed Jan 24 11:57:48 2024 +0100 + + ui/clipboard: mark type as not available when there is no data + + With VNC, a client can send a non-extended VNC_MSG_CLIENT_CUT_TEXT + message with len=0. In qemu_clipboard_set_data(), the clipboard info + will be updated setting data to NULL (because g_memdup(data, size) + returns NULL when size is 0). If the client does not set the + VNC_ENCODING_CLIPBOARD_EXT feature when setting up the encodings, then + the 'request' callback for the clipboard peer is not initialized. + Later, because data is NULL, qemu_clipboard_request() can be reached + via vdagent_chr_write() and vdagent_clipboard_recv_request() and + there, the clipboard owner's 'request' callback will be attempted to + be called, but that is a NULL pointer. + + In particular, this can happen when using the KRDC (22.12.3) VNC + client. + + Another scenario leading to the same issue is with two clients (say + noVNC and KRDC): + + The noVNC client sets the extension VNC_FEATURE_CLIPBOARD_EXT and + initializes its cbpeer. + + The KRDC client does not, but triggers a vnc_client_cut_text() (note + it's not the _ext variant)). There, a new clipboard info with it as + the 'owner' is created and via qemu_clipboard_set_data() is called, + which in turn calls qemu_clipboard_update() with that info. + + In qemu_clipboard_update(), the notifier for the noVNC client will be + called, i.e. vnc_clipboard_notify() and also set vs->cbinfo for the + noVNC client. The 'owner' in that clipboard info is the clipboard peer + for the KRDC client, which did not initialize the 'request' function. + That sounds correct to me, it is the owner of that clipboard info. + + Then when noVNC sends a VNC_MSG_CLIENT_CUT_TEXT message (it did set + the VNC_FEATURE_CLIPBOARD_EXT feature correctly, so a check for it + passes), that clipboard info is passed to qemu_clipboard_request() and + the original segfault still happens. + + Fix the issue by handling updates with size 0 differently. In + particular, mark in the clipboard info that the type is not available. + + While at it, switch to g_memdup2(), because g_memdup() is deprecated. + + Cc: qemu-stable@nongnu.org + Fixes: CVE-2023-6683 + Reported-by: Markus Frank + Suggested-by: Marc-André Lureau + Signed-off-by: Fiona Ebner + Reviewed-by: Marc-André Lureau + Tested-by: Markus Frank + Message-ID: <20240124105749.204610-1-f.ebner@proxmox.com> + +Signed-off-by: Jon Maloy +--- + ui/clipboard.c | 12 +++++++++--- + 1 file changed, 9 insertions(+), 3 deletions(-) + +diff --git a/ui/clipboard.c b/ui/clipboard.c +index 3d14bffaf8..b3f6fa3c9e 100644 +--- a/ui/clipboard.c ++++ b/ui/clipboard.c +@@ -163,9 +163,15 @@ void qemu_clipboard_set_data(QemuClipboardPeer *peer, + } + + g_free(info->types[type].data); +- info->types[type].data = g_memdup(data, size); +- info->types[type].size = size; +- info->types[type].available = true; ++ if (size) { ++ info->types[type].data = g_memdup2(data, size); ++ info->types[type].size = size; ++ info->types[type].available = true; ++ } else { ++ info->types[type].data = NULL; ++ info->types[type].size = 0; ++ info->types[type].available = false; ++ } + + if (update) { + qemu_clipboard_update(info); +-- +2.39.3 + diff --git a/SOURCES/kvm-util-async-teardown-wire-up-query-command-line-optio.patch b/SOURCES/kvm-util-async-teardown-wire-up-query-command-line-optio.patch deleted file mode 100644 index 8c468d8..0000000 --- a/SOURCES/kvm-util-async-teardown-wire-up-query-command-line-optio.patch +++ /dev/null @@ -1,180 +0,0 @@ -From c1502b0cd16378d6d5bd4259b90bf81a5fb5aad3 Mon Sep 17 00:00:00 2001 -From: Claudio Imbrenda -Date: Fri, 5 May 2023 14:00:51 +0200 -Subject: [PATCH 20/21] util/async-teardown: wire up query-command-line-options -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Thomas Huth -RH-MergeRequest: 173: Improve memory reclaiming for z15 Secure Execution guests -RH-Bugzilla: 2168500 -RH-Acked-by: Cornelia Huck -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Cédric Le Goater -RH-Commit: [1/2] 76e5f25df2c02721f5a29f552ee3061be589abb2 (thuth/qemu-kvm-cs9) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2168500 - -Add new -run-with option with an async-teardown=on|off parameter. It is -visible in the output of query-command-line-options QMP command, so it -can be discovered and used by libvirt. - -The option -async-teardown is now redundant, deprecate it. - -Reported-by: Boris Fiuczynski -Fixes: c891c24b1a ("os-posix: asynchronous teardown for shutdown on Linux") -Signed-off-by: Claudio Imbrenda -Message-Id: <20230505120051.36605-2-imbrenda@linux.ibm.com> -[thuth: Add curly braces to fix error with GCC 8.5, fix bug in deprecated.rst] -Signed-off-by: Thomas Huth - -(cherry picked from commit 80bd81cadd127c1e2fc784612a52abe392670ba4) -Conflicts: - docs/about/deprecated.rst (missing context from other patches) -Signed-off-by: Thomas Huth ---- - docs/about/deprecated.rst | 5 +++++ - os-posix.c | 14 ++++++++++++++ - qemu-options.hx | 34 +++++++++++++++++++++++----------- - util/async-teardown.c | 21 +++++++++++++++++++++ - 4 files changed, 63 insertions(+), 11 deletions(-) - -diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst -index 1ca9dc33d6..52893fcf38 100644 ---- a/docs/about/deprecated.rst -+++ b/docs/about/deprecated.rst -@@ -111,6 +111,11 @@ Use ``-machine acpi=off`` instead. - The HAXM project has been retired (see https://github.com/intel/haxm#status). - Use "whpx" (on Windows) or "hvf" (on macOS) instead. - -+``-async-teardown`` (since 8.1) -+''''''''''''''''''''''''''''''' -+ -+Use ``-run-with async-teardown=on`` instead. -+ - - QEMU Machine Protocol (QMP) commands - ------------------------------------ -diff --git a/os-posix.c b/os-posix.c -index 5adc69f560..90ea71725f 100644 ---- a/os-posix.c -+++ b/os-posix.c -@@ -36,6 +36,8 @@ - #include "qemu/log.h" - #include "sysemu/runstate.h" - #include "qemu/cutils.h" -+#include "qemu/config-file.h" -+#include "qemu/option.h" - - #ifdef CONFIG_LINUX - #include -@@ -152,9 +154,21 @@ int os_parse_cmd_args(int index, const char *optarg) - daemonize = 1; - break; - #if defined(CONFIG_LINUX) -+ /* deprecated */ - case QEMU_OPTION_asyncteardown: - init_async_teardown(); - break; -+ case QEMU_OPTION_run_with: { -+ QemuOpts *opts = qemu_opts_parse_noisily(qemu_find_opts("run-with"), -+ optarg, false); -+ if (!opts) { -+ exit(1); -+ } -+ if (qemu_opt_get_bool(opts, "async-teardown", false)) { -+ init_async_teardown(); -+ } -+ break; -+ } - #endif - default: - return -1; -diff --git a/qemu-options.hx b/qemu-options.hx -index 52b49f1f6a..b18f933703 100644 ---- a/qemu-options.hx -+++ b/qemu-options.hx -@@ -4766,20 +4766,32 @@ DEF("qtest-log", HAS_ARG, QEMU_OPTION_qtest_log, "", QEMU_ARCH_ALL) - DEF("async-teardown", 0, QEMU_OPTION_asyncteardown, - "-async-teardown enable asynchronous teardown\n", - QEMU_ARCH_ALL) --#endif - SRST - ``-async-teardown`` -- Enable asynchronous teardown. A new process called "cleanup/" -- will be created at startup sharing the address space with the main qemu -- process, using clone. It will wait for the main qemu process to -- terminate completely, and then exit. -- This allows qemu to terminate very quickly even if the guest was -- huge, leaving the teardown of the address space to the cleanup -- process. Since the cleanup process shares the same cgroups as the -- main qemu process, accounting is performed correctly. This only -- works if the cleanup process is not forcefully killed with SIGKILL -- before the main qemu process has terminated completely. -+ This option is deprecated and should no longer be used. The new option -+ ``-run-with async-teardown=on`` is a replacement. - ERST -+DEF("run-with", HAS_ARG, QEMU_OPTION_run_with, -+ "-run-with async-teardown[=on|off]\n" -+ " misc QEMU process lifecycle options\n" -+ " async-teardown=on enables asynchronous teardown\n", -+ QEMU_ARCH_ALL) -+SRST -+``-run-with`` -+ Set QEMU process lifecycle options. -+ -+ ``async-teardown=on`` enables asynchronous teardown. A new process called -+ "cleanup/" will be created at startup sharing the address -+ space with the main QEMU process, using clone. It will wait for the -+ main QEMU process to terminate completely, and then exit. This allows -+ QEMU to terminate very quickly even if the guest was huge, leaving the -+ teardown of the address space to the cleanup process. Since the cleanup -+ process shares the same cgroups as the main QEMU process, accounting is -+ performed correctly. This only works if the cleanup process is not -+ forcefully killed with SIGKILL before the main QEMU process has -+ terminated completely. -+ERST -+#endif - - DEF("msg", HAS_ARG, QEMU_OPTION_msg, - "-msg [timestamp[=on|off]][,guest-name=[on|off]]\n" -diff --git a/util/async-teardown.c b/util/async-teardown.c -index 62cdeb0f20..3ab19c8740 100644 ---- a/util/async-teardown.c -+++ b/util/async-teardown.c -@@ -12,6 +12,9 @@ - */ - - #include "qemu/osdep.h" -+#include "qemu/config-file.h" -+#include "qemu/option.h" -+#include "qemu/module.h" - #include - #include - #include -@@ -144,3 +147,21 @@ void init_async_teardown(void) - clone(async_teardown_fn, new_stack_for_clone(), CLONE_VM, NULL); - sigprocmask(SIG_SETMASK, &old_signals, NULL); - } -+ -+static QemuOptsList qemu_run_with_opts = { -+ .name = "run-with", -+ .head = QTAILQ_HEAD_INITIALIZER(qemu_run_with_opts.head), -+ .desc = { -+ { -+ .name = "async-teardown", -+ .type = QEMU_OPT_BOOL, -+ }, -+ { /* end of list */ } -+ }, -+}; -+ -+static void register_teardown(void) -+{ -+ qemu_add_opts(&qemu_run_with_opts); -+} -+opts_init(register_teardown); --- -2.39.3 - diff --git a/SOURCES/kvm-util-char_dev-Add-open_cdev.patch b/SOURCES/kvm-util-char_dev-Add-open_cdev.patch new file mode 100644 index 0000000..1f1e870 --- /dev/null +++ b/SOURCES/kvm-util-char_dev-Add-open_cdev.patch @@ -0,0 +1,175 @@ +From de167878ec4ca159cc6def5134c91c5fe9b5ab96 Mon Sep 17 00:00:00 2001 +From: Yi Liu +Date: Tue, 21 Nov 2023 16:44:01 +0800 +Subject: [PATCH 022/101] util/char_dev: Add open_cdev() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [21/67] 72bf9ec3ccc9959626235bd270ec84caa4cee435 (eauger1/centos-qemu-kvm) + +/dev/vfio/devices/vfioX may not exist. In that case it is still possible +to open /dev/char/$major:$minor instead. Add helper function to abstract +the cdev open. + +Suggested-by: Jason Gunthorpe +Signed-off-by: Yi Liu +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Reviewed-by: Eric Auger +Tested-by: Eric Auger +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +(cherry picked from commit d6b5c4c1b516a8176b74ec35a0af8cf89b04b6c1) +Signed-off-by: Eric Auger +--- + MAINTAINERS | 2 + + include/qemu/chardev_open.h | 16 ++++++++ + util/chardev_open.c | 81 +++++++++++++++++++++++++++++++++++++ + util/meson.build | 1 + + 4 files changed, 100 insertions(+) + create mode 100644 include/qemu/chardev_open.h + create mode 100644 util/chardev_open.c + +diff --git a/MAINTAINERS b/MAINTAINERS +index a5a446914a..ca70bb4e64 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -2174,6 +2174,8 @@ M: Zhenzhong Duan + S: Supported + F: backends/iommufd.c + F: include/sysemu/iommufd.h ++F: include/qemu/chardev_open.h ++F: util/chardev_open.c + + vhost + M: Michael S. Tsirkin +diff --git a/include/qemu/chardev_open.h b/include/qemu/chardev_open.h +new file mode 100644 +index 0000000000..64e8fcfdcb +--- /dev/null ++++ b/include/qemu/chardev_open.h +@@ -0,0 +1,16 @@ ++/* ++ * QEMU Chardev Helper ++ * ++ * Copyright (C) 2023 Intel Corporation. ++ * ++ * Authors: Yi Liu ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2. See ++ * the COPYING file in the top-level directory. ++ */ ++ ++#ifndef QEMU_CHARDEV_OPEN_H ++#define QEMU_CHARDEV_OPEN_H ++ ++int open_cdev(const char *devpath, dev_t cdev); ++#endif +diff --git a/util/chardev_open.c b/util/chardev_open.c +new file mode 100644 +index 0000000000..f776429788 +--- /dev/null ++++ b/util/chardev_open.c +@@ -0,0 +1,81 @@ ++/* ++ * Copyright (c) 2019, Mellanox Technologies. All rights reserved. ++ * Copyright (C) 2023 Intel Corporation. ++ * ++ * This software is available to you under a choice of one of two ++ * licenses. You may choose to be licensed under the terms of the GNU ++ * General Public License (GPL) Version 2, available from the file ++ * COPYING in the main directory of this source tree, or the ++ * OpenIB.org BSD license below: ++ * ++ * Redistribution and use in source and binary forms, with or ++ * without modification, are permitted provided that the following ++ * conditions are met: ++ * ++ * - Redistributions of source code must retain the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer. ++ * ++ * - Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials ++ * provided with the distribution. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND ++ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS ++ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ++ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN ++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE ++ * SOFTWARE. ++ * ++ * Authors: Yi Liu ++ * ++ * Copied from ++ * https://github.com/linux-rdma/rdma-core/blob/master/util/open_cdev.c ++ * ++ */ ++ ++#include "qemu/osdep.h" ++#include "qemu/chardev_open.h" ++ ++static int open_cdev_internal(const char *path, dev_t cdev) ++{ ++ struct stat st; ++ int fd; ++ ++ fd = qemu_open_old(path, O_RDWR); ++ if (fd == -1) { ++ return -1; ++ } ++ if (fstat(fd, &st) || !S_ISCHR(st.st_mode) || ++ (cdev != 0 && st.st_rdev != cdev)) { ++ close(fd); ++ return -1; ++ } ++ return fd; ++} ++ ++static int open_cdev_robust(dev_t cdev) ++{ ++ g_autofree char *devpath = NULL; ++ ++ /* ++ * This assumes that udev is being used and is creating the /dev/char/ ++ * symlinks. ++ */ ++ devpath = g_strdup_printf("/dev/char/%u:%u", major(cdev), minor(cdev)); ++ return open_cdev_internal(devpath, cdev); ++} ++ ++int open_cdev(const char *devpath, dev_t cdev) ++{ ++ int fd; ++ ++ fd = open_cdev_internal(devpath, cdev); ++ if (fd == -1 && cdev != 0) { ++ return open_cdev_robust(cdev); ++ } ++ return fd; ++} +diff --git a/util/meson.build b/util/meson.build +index c2322ef6e7..174c133368 100644 +--- a/util/meson.build ++++ b/util/meson.build +@@ -108,6 +108,7 @@ if have_block + util_ss.add(files('filemonitor-stub.c')) + endif + util_ss.add(when: 'CONFIG_LINUX', if_true: files('vfio-helpers.c')) ++ util_ss.add(when: 'CONFIG_LINUX', if_true: files('chardev_open.c')) + endif + + if cpu == 'aarch64' +-- +2.39.3 + diff --git a/SOURCES/kvm-util-iov-Make-qiov_slice-public.patch b/SOURCES/kvm-util-iov-Make-qiov_slice-public.patch deleted file mode 100644 index fe68d18..0000000 --- a/SOURCES/kvm-util-iov-Make-qiov_slice-public.patch +++ /dev/null @@ -1,97 +0,0 @@ -From 64652225695c23855cfb1252cea2b55c24da2260 Mon Sep 17 00:00:00 2001 -From: Hanna Czenczek -Date: Tue, 11 Apr 2023 19:34:15 +0200 -Subject: [PATCH 1/9] util/iov: Make qiov_slice() public - -RH-Author: Hanna Czenczek -RH-MergeRequest: 189: block: Split padded I/O vectors exceeding IOV_MAX -RH-Bugzilla: 2174676 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/5] 9c3cd661f7139ce124ee4f4d5fcbeaf3dbb9c45c (hreitz/qemu-kvm-c-9-s) - -We want to inline qemu_iovec_init_extended() in block/io.c for padding -requests, and having access to qiov_slice() is useful for this. As a -public function, it is renamed to qemu_iovec_slice(). - -(We will need to count the number of I/O vector elements of a slice -there, and then later process this slice. Without qiov_slice(), we -would need to call qemu_iovec_subvec_niov(), and all further -IOV-processing functions may need to skip prefixing elements to -accomodate for a qiov_offset. Because qemu_iovec_subvec_niov() -internally calls qiov_slice(), we can just have the block/io.c code call -qiov_slice() itself, thus get the number of elements, and also create an -iovec array with the superfluous prefixing elements stripped, so the -following processing functions no longer need to skip them.) - -Reviewed-by: Eric Blake -Reviewed-by: Vladimir Sementsov-Ogievskiy -Signed-off-by: Hanna Czenczek -Message-Id: <20230411173418.19549-2-hreitz@redhat.com> -(cherry picked from commit 3d06cea8256d54a6b0238934c31012f7f17100f5) -Signed-off-by: Hanna Czenczek ---- - include/qemu/iov.h | 3 +++ - util/iov.c | 14 +++++++------- - 2 files changed, 10 insertions(+), 7 deletions(-) - -diff --git a/include/qemu/iov.h b/include/qemu/iov.h -index 9330746680..46fadfb27a 100644 ---- a/include/qemu/iov.h -+++ b/include/qemu/iov.h -@@ -229,6 +229,9 @@ int qemu_iovec_init_extended( - void *tail_buf, size_t tail_len); - void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source, - size_t offset, size_t len); -+struct iovec *qemu_iovec_slice(QEMUIOVector *qiov, -+ size_t offset, size_t len, -+ size_t *head, size_t *tail, int *niov); - int qemu_iovec_subvec_niov(QEMUIOVector *qiov, size_t offset, size_t len); - void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len); - void qemu_iovec_concat(QEMUIOVector *dst, -diff --git a/util/iov.c b/util/iov.c -index b4be580022..65a70449da 100644 ---- a/util/iov.c -+++ b/util/iov.c -@@ -378,15 +378,15 @@ static struct iovec *iov_skip_offset(struct iovec *iov, size_t offset, - } - - /* -- * qiov_slice -+ * qemu_iovec_slice - * - * Find subarray of iovec's, containing requested range. @head would - * be offset in first iov (returned by the function), @tail would be - * count of extra bytes in last iovec (returned iov + @niov - 1). - */ --static struct iovec *qiov_slice(QEMUIOVector *qiov, -- size_t offset, size_t len, -- size_t *head, size_t *tail, int *niov) -+struct iovec *qemu_iovec_slice(QEMUIOVector *qiov, -+ size_t offset, size_t len, -+ size_t *head, size_t *tail, int *niov) - { - struct iovec *iov, *end_iov; - -@@ -411,7 +411,7 @@ int qemu_iovec_subvec_niov(QEMUIOVector *qiov, size_t offset, size_t len) - size_t head, tail; - int niov; - -- qiov_slice(qiov, offset, len, &head, &tail, &niov); -+ qemu_iovec_slice(qiov, offset, len, &head, &tail, &niov); - - return niov; - } -@@ -439,8 +439,8 @@ int qemu_iovec_init_extended( - } - - if (mid_len) { -- mid_iov = qiov_slice(mid_qiov, mid_offset, mid_len, -- &mid_head, &mid_tail, &mid_niov); -+ mid_iov = qemu_iovec_slice(mid_qiov, mid_offset, mid_len, -+ &mid_head, &mid_tail, &mid_niov); - } - - total_niov = !!head_len + mid_niov + !!tail_len; --- -2.39.3 - diff --git a/SOURCES/kvm-util-iov-Remove-qemu_iovec_init_extended.patch b/SOURCES/kvm-util-iov-Remove-qemu_iovec_init_extended.patch deleted file mode 100644 index fd21880..0000000 --- a/SOURCES/kvm-util-iov-Remove-qemu_iovec_init_extended.patch +++ /dev/null @@ -1,156 +0,0 @@ -From 8ff973985a04fec1a3cdf886976a03e0dca7b0ea Mon Sep 17 00:00:00 2001 -From: Hanna Czenczek -Date: Tue, 11 Apr 2023 19:34:17 +0200 -Subject: [PATCH 3/9] util/iov: Remove qemu_iovec_init_extended() - -RH-Author: Hanna Czenczek -RH-MergeRequest: 189: block: Split padded I/O vectors exceeding IOV_MAX -RH-Bugzilla: 2174676 -RH-Acked-by: Miroslav Rezanina -RH-Commit: [3/5] 1740d7b15ea4fbfbe71e7adc122741e85e83fb8c (hreitz/qemu-kvm-c-9-s) - -bdrv_pad_request() was the main user of qemu_iovec_init_extended(). -HEAD^ has removed that use, so we can remove qemu_iovec_init_extended() -now. - -The only remaining user is qemu_iovec_init_slice(), which can easily -inline the small part it really needs. - -Note that qemu_iovec_init_extended() offered a memcpy() optimization to -initialize the new I/O vector. qemu_iovec_concat_iov(), which is used -to replace its functionality, does not, but calls qemu_iovec_add() for -every single element. If we decide this optimization was important, we -will need to re-implement it in qemu_iovec_concat_iov(), which might -also benefit its pre-existing users. - -Reviewed-by: Eric Blake -Reviewed-by: Vladimir Sementsov-Ogievskiy -Signed-off-by: Hanna Czenczek -Message-Id: <20230411173418.19549-4-hreitz@redhat.com> -(cherry picked from commit cc63f6f6fa1aaa4b6405dd69432c693e9c8d18ca) -Signed-off-by: Hanna Czenczek ---- - include/qemu/iov.h | 5 --- - util/iov.c | 79 +++++++--------------------------------------- - 2 files changed, 11 insertions(+), 73 deletions(-) - -diff --git a/include/qemu/iov.h b/include/qemu/iov.h -index 46fadfb27a..63a1c01965 100644 ---- a/include/qemu/iov.h -+++ b/include/qemu/iov.h -@@ -222,11 +222,6 @@ static inline void *qemu_iovec_buf(QEMUIOVector *qiov) - - void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint); - void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov); --int qemu_iovec_init_extended( -- QEMUIOVector *qiov, -- void *head_buf, size_t head_len, -- QEMUIOVector *mid_qiov, size_t mid_offset, size_t mid_len, -- void *tail_buf, size_t tail_len); - void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source, - size_t offset, size_t len); - struct iovec *qemu_iovec_slice(QEMUIOVector *qiov, -diff --git a/util/iov.c b/util/iov.c -index 65a70449da..866fb577f3 100644 ---- a/util/iov.c -+++ b/util/iov.c -@@ -416,70 +416,6 @@ int qemu_iovec_subvec_niov(QEMUIOVector *qiov, size_t offset, size_t len) - return niov; - } - --/* -- * Compile new iovec, combining @head_buf buffer, sub-qiov of @mid_qiov, -- * and @tail_buf buffer into new qiov. -- */ --int qemu_iovec_init_extended( -- QEMUIOVector *qiov, -- void *head_buf, size_t head_len, -- QEMUIOVector *mid_qiov, size_t mid_offset, size_t mid_len, -- void *tail_buf, size_t tail_len) --{ -- size_t mid_head, mid_tail; -- int total_niov, mid_niov = 0; -- struct iovec *p, *mid_iov = NULL; -- -- assert(mid_qiov->niov <= IOV_MAX); -- -- if (SIZE_MAX - head_len < mid_len || -- SIZE_MAX - head_len - mid_len < tail_len) -- { -- return -EINVAL; -- } -- -- if (mid_len) { -- mid_iov = qemu_iovec_slice(mid_qiov, mid_offset, mid_len, -- &mid_head, &mid_tail, &mid_niov); -- } -- -- total_niov = !!head_len + mid_niov + !!tail_len; -- if (total_niov > IOV_MAX) { -- return -EINVAL; -- } -- -- if (total_niov == 1) { -- qemu_iovec_init_buf(qiov, NULL, 0); -- p = &qiov->local_iov; -- } else { -- qiov->niov = qiov->nalloc = total_niov; -- qiov->size = head_len + mid_len + tail_len; -- p = qiov->iov = g_new(struct iovec, qiov->niov); -- } -- -- if (head_len) { -- p->iov_base = head_buf; -- p->iov_len = head_len; -- p++; -- } -- -- assert(!mid_niov == !mid_len); -- if (mid_niov) { -- memcpy(p, mid_iov, mid_niov * sizeof(*p)); -- p[0].iov_base = (uint8_t *)p[0].iov_base + mid_head; -- p[0].iov_len -= mid_head; -- p[mid_niov - 1].iov_len -= mid_tail; -- p += mid_niov; -- } -- -- if (tail_len) { -- p->iov_base = tail_buf; -- p->iov_len = tail_len; -- } -- -- return 0; --} -- - /* - * Check if the contents of subrange of qiov data is all zeroes. - */ -@@ -511,14 +447,21 @@ bool qemu_iovec_is_zero(QEMUIOVector *qiov, size_t offset, size_t bytes) - void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source, - size_t offset, size_t len) - { -- int ret; -+ struct iovec *slice_iov; -+ int slice_niov; -+ size_t slice_head, slice_tail; - - assert(source->size >= len); - assert(source->size - len >= offset); - -- /* We shrink the request, so we can't overflow neither size_t nor MAX_IOV */ -- ret = qemu_iovec_init_extended(qiov, NULL, 0, source, offset, len, NULL, 0); -- assert(ret == 0); -+ slice_iov = qemu_iovec_slice(source, offset, len, -+ &slice_head, &slice_tail, &slice_niov); -+ if (slice_niov == 1) { -+ qemu_iovec_init_buf(qiov, slice_iov[0].iov_base + slice_head, len); -+ } else { -+ qemu_iovec_init(qiov, slice_niov); -+ qemu_iovec_concat_iov(qiov, slice_iov, slice_niov, slice_head, len); -+ } - } - - void qemu_iovec_destroy(QEMUIOVector *qiov) --- -2.39.3 - diff --git a/SOURCES/kvm-util-mmap-alloc-qemu_fd_getfs.patch b/SOURCES/kvm-util-mmap-alloc-qemu_fd_getfs.patch deleted file mode 100644 index b0e66f6..0000000 --- a/SOURCES/kvm-util-mmap-alloc-qemu_fd_getfs.patch +++ /dev/null @@ -1,95 +0,0 @@ -From 439a8cdd010dfd253fc2277ae4ec605b5ba621d9 Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Wed, 19 Apr 2023 12:17:36 -0400 -Subject: [PATCH 02/56] util/mmap-alloc: qemu_fd_getfs() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [1/50] 8970b5ae611a933d693e0c90cbf4eda073635494 (peterx/qemu-kvm) - -This new helper fetches file system type for a fd. Only Linux is -implemented so far. Currently only tmpfs and hugetlbfs are defined, -but it can grow as needed. - -Signed-off-by: Peter Xu -Reviewed-by: David Hildenbrand -Reviewed-by: Juan Quintela -Signed-off-by: Juan Quintela -(cherry picked from commit fa45f8dab9613993c042176ea2d25552bfebc955) -Signed-off-by: Peter Xu ---- - include/qemu/mmap-alloc.h | 7 +++++++ - util/mmap-alloc.c | 28 ++++++++++++++++++++++++++++ - 2 files changed, 35 insertions(+) - -diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h -index 2825e231a7..8344daaa03 100644 ---- a/include/qemu/mmap-alloc.h -+++ b/include/qemu/mmap-alloc.h -@@ -1,8 +1,15 @@ - #ifndef QEMU_MMAP_ALLOC_H - #define QEMU_MMAP_ALLOC_H - -+typedef enum { -+ QEMU_FS_TYPE_UNKNOWN = 0, -+ QEMU_FS_TYPE_TMPFS, -+ QEMU_FS_TYPE_HUGETLBFS, -+ QEMU_FS_TYPE_NUM, -+} QemuFsType; - - size_t qemu_fd_getpagesize(int fd); -+QemuFsType qemu_fd_getfs(int fd); - - /** - * qemu_ram_mmap: mmap anonymous memory, the specified file or device. -diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c -index 5ed7d29183..ed14f9c64d 100644 ---- a/util/mmap-alloc.c -+++ b/util/mmap-alloc.c -@@ -27,8 +27,36 @@ - - #ifdef CONFIG_LINUX - #include -+#include - #endif - -+QemuFsType qemu_fd_getfs(int fd) -+{ -+#ifdef CONFIG_LINUX -+ struct statfs fs; -+ int ret; -+ -+ if (fd < 0) { -+ return QEMU_FS_TYPE_UNKNOWN; -+ } -+ -+ do { -+ ret = fstatfs(fd, &fs); -+ } while (ret != 0 && errno == EINTR); -+ -+ switch (fs.f_type) { -+ case TMPFS_MAGIC: -+ return QEMU_FS_TYPE_TMPFS; -+ case HUGETLBFS_MAGIC: -+ return QEMU_FS_TYPE_HUGETLBFS; -+ default: -+ return QEMU_FS_TYPE_UNKNOWN; -+ } -+#else -+ return QEMU_FS_TYPE_UNKNOWN; -+#endif -+} -+ - size_t qemu_fd_getpagesize(int fd) - { - #ifdef CONFIG_LINUX --- -2.39.1 - diff --git a/SOURCES/kvm-util-vfio-helpers-Use-g_file_read_link.patch b/SOURCES/kvm-util-vfio-helpers-Use-g_file_read_link.patch deleted file mode 100644 index 4e492d9..0000000 --- a/SOURCES/kvm-util-vfio-helpers-Use-g_file_read_link.patch +++ /dev/null @@ -1,82 +0,0 @@ -From fb2d40cc84f689e46138a81c57ccd1f234dbbb7c Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 07/37] util/vfio-helpers: Use g_file_read_link() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [5/28] 3545a07c967782dba8dd081415232f91d3f600a9 (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit dbdea0dbfe2c -Author: Akihiko Odaki -Date: Tue May 23 11:39:12 2023 +0900 - - util/vfio-helpers: Use g_file_read_link() - - When _FORTIFY_SOURCE=2, glibc version is 2.35, and GCC version is - 12.1.0, the compiler complains as follows: - - In file included from /usr/include/features.h:490, - from /usr/include/bits/libc-header-start.h:33, - from /usr/include/stdint.h:26, - from /usr/lib/gcc/aarch64-unknown-linux-gnu/12.1.0/include/stdint.h:9, - from /home/alarm/q/var/qemu/include/qemu/osdep.h:94, - from ../util/vfio-helpers.c:13: - In function 'readlink', - inlined from 'sysfs_find_group_file' at ../util/vfio-helpers.c:116:9, - inlined from 'qemu_vfio_init_pci' at ../util/vfio-helpers.c:326:18, - inlined from 'qemu_vfio_open_pci' at ../util/vfio-helpers.c:517:9: - /usr/include/bits/unistd.h:119:10: error: argument 2 is null but the corresponding size argument 3 value is 4095 [-Werror=nonnull] - 119 | return __glibc_fortify (readlink, __len, sizeof (char), - | ^~~~~~~~~~~~~~~ - - This error implies the allocated buffer can be NULL. Use - g_file_read_link(), which allocates buffer automatically to avoid the - error. - - Signed-off-by: Akihiko Odaki - Reviewed-by: Philippe Mathieu-Daudé - Reviewed-by: Cédric Le Goater - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - util/vfio-helpers.c | 8 +++++--- - 1 file changed, 5 insertions(+), 3 deletions(-) - -diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c -index 2d8af38f88..f8bab46c68 100644 ---- a/util/vfio-helpers.c -+++ b/util/vfio-helpers.c -@@ -106,15 +106,17 @@ struct QEMUVFIOState { - */ - static char *sysfs_find_group_file(const char *device, Error **errp) - { -+ g_autoptr(GError) gerr = NULL; - char *sysfs_link; - char *sysfs_group; - char *p; - char *path = NULL; - - sysfs_link = g_strdup_printf("/sys/bus/pci/devices/%s/iommu_group", device); -- sysfs_group = g_malloc0(PATH_MAX); -- if (readlink(sysfs_link, sysfs_group, PATH_MAX - 1) == -1) { -- error_setg_errno(errp, errno, "Failed to find iommu group sysfs path"); -+ sysfs_group = g_file_read_link(sysfs_link, &gerr); -+ if (gerr) { -+ error_setg(errp, "Failed to find iommu group sysfs path: %s", -+ gerr->message); - goto out; - } - p = strrchr(sysfs_group, '/'); --- -2.39.3 - diff --git a/SOURCES/kvm-vdpa-do-not-block-migration-if-device-has-cvq-and-x-.patch b/SOURCES/kvm-vdpa-do-not-block-migration-if-device-has-cvq-and-x-.patch deleted file mode 100644 index 56b9aed..0000000 --- a/SOURCES/kvm-vdpa-do-not-block-migration-if-device-has-cvq-and-x-.patch +++ /dev/null @@ -1,61 +0,0 @@ -From 74c2f378bdf278a03c02ae48948b00b4431a3fd6 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Fri, 2 Jun 2023 16:38:52 +0200 -Subject: [PATCH 6/9] vdpa: do not block migration if device has cvq and - x-svq=on -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 190: vdpa: do not block migration if device has cvq and x-svq=on -RH-Jira: RHEL-573 -RH-Acked-by: Jason Wang -RH-Acked-by: Laurent Vivier -RH-Commit: [1/1] b0e2ec3c9e5c17252cf6a043fe1374ddc3c37de7 (eperezmartin/qemu-kvm) - -It was a mistake to forbid in all cases, as SVQ is already able to send -all the CVQ messages before start forwarding data vqs. It actually -caused a regression, making impossible to migrate device previously -migratable. - -Fixes: 36e4647247f2 ("vdpa: add vhost_vdpa_net_valid_svq_features") -Signed-off-by: Eugenio Pérez -Message-Id: <20230602143854.1879091-2-eperezma@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Tested-by: Lei Yang -(cherry picked from commit 8bc0049eadafb984d305c847cedff550b58e5fc0) -Signed-off-by: Eugenio Pérez ---- - net/vhost-vdpa.c | 11 +++++++---- - 1 file changed, 7 insertions(+), 4 deletions(-) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 8c8900f0f4..1ae839da34 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -844,13 +844,16 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, - s->vhost_vdpa.shadow_vq_ops_opaque = s; - - /* -- * TODO: We cannot migrate devices with CVQ as there is no way to set -- * the device state (MAC, MQ, etc) before starting the datapath. -+ * TODO: We cannot migrate devices with CVQ and no x-svq enabled as -+ * there is no way to set the device state (MAC, MQ, etc) before -+ * starting the datapath. - * - * Migration blocker ownership now belongs to s->vhost_vdpa. - */ -- error_setg(&s->vhost_vdpa.migration_blocker, -- "net vdpa cannot migrate with CVQ feature"); -+ if (!svq) { -+ error_setg(&s->vhost_vdpa.migration_blocker, -+ "net vdpa cannot migrate with CVQ feature"); -+ } - } - ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs); - if (ret) { --- -2.39.3 - diff --git a/SOURCES/kvm-vdpa-export-vhost_vdpa_set_vring_ready.patch b/SOURCES/kvm-vdpa-export-vhost_vdpa_set_vring_ready.patch deleted file mode 100644 index 1ab8f02..0000000 --- a/SOURCES/kvm-vdpa-export-vhost_vdpa_set_vring_ready.patch +++ /dev/null @@ -1,105 +0,0 @@ -From 636eb63cbf23b31fc9880528490ac4bef680305b Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Wed, 25 Jan 2023 08:47:34 +0100 -Subject: [PATCH 4/7] vdpa: export vhost_vdpa_set_vring_ready -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 199: CVQ migration support -RH-Jira: RHEL-923 -RH-Acked-by: Jason Wang -RH-Acked-by: Cindy Lu -RH-Commit: [4/7] 8d1fecec7a993b8b68e268e8783c200c158f5ee0 (eperezmartin/qemu-kvm) - -The vhost-vdpa net backend needs to enable vrings in a different order -than default, so export it. - -No functional change intended except for tracing, that now includes the -(virtio) index being enabled and the return value of the ioctl. - -Still ignoring return value of this function if called from -vhost_vdpa_dev_start, as reorganize calling code around it is out of -the scope of this series. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang ---- - hw/virtio/trace-events | 2 +- - hw/virtio/vhost-vdpa.c | 25 +++++++++++++------------ - include/hw/virtio/vhost-vdpa.h | 1 + - 3 files changed, 15 insertions(+), 13 deletions(-) - -diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events -index 300dec8d3e..85b43cd8fe 100644 ---- a/hw/virtio/trace-events -+++ b/hw/virtio/trace-events -@@ -48,7 +48,7 @@ vhost_vdpa_set_features(void *dev, uint64_t features) "dev: %p features: 0x%"PRI - vhost_vdpa_get_device_id(void *dev, uint32_t device_id) "dev: %p device_id %"PRIu32 - vhost_vdpa_reset_device(void *dev, uint8_t status) "dev: %p status: 0x%"PRIx8 - vhost_vdpa_get_vq_index(void *dev, int idx, int vq_idx) "dev: %p idx: %d vq idx: %d" --vhost_vdpa_set_vring_ready(void *dev) "dev: %p" -+vhost_vdpa_set_vring_ready(void *dev, unsigned i, int r) "dev: %p, idx: %u, r: %d" - vhost_vdpa_dump_config(void *dev, const char *line) "dev: %p %s" - vhost_vdpa_set_config(void *dev, uint32_t offset, uint32_t size, uint32_t flags) "dev: %p offset: %"PRIu32" size: %"PRIu32" flags: 0x%"PRIx32 - vhost_vdpa_get_config(void *dev, void *config, uint32_t config_len) "dev: %p config: %p config_len: %"PRIu32 -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index c04f14420d..e4d0101327 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -733,18 +733,17 @@ static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx) - return idx; - } - --static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev) -+int vhost_vdpa_set_vring_ready(struct vhost_vdpa *v, unsigned idx) - { -- int i; -- trace_vhost_vdpa_set_vring_ready(dev); -- for (i = 0; i < dev->nvqs; ++i) { -- struct vhost_vring_state state = { -- .index = dev->vq_index + i, -- .num = 1, -- }; -- vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state); -- } -- return 0; -+ struct vhost_dev *dev = v->dev; -+ struct vhost_vring_state state = { -+ .index = idx, -+ .num = 1, -+ }; -+ int r = vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state); -+ -+ trace_vhost_vdpa_set_vring_ready(dev, idx, r); -+ return r; - } - - static int vhost_vdpa_set_config_call(struct vhost_dev *dev, -@@ -1155,7 +1154,9 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) - if (unlikely(!ok)) { - return -1; - } -- vhost_vdpa_set_vring_ready(dev); -+ for (int i = 0; i < dev->nvqs; ++i) { -+ vhost_vdpa_set_vring_ready(v, dev->vq_index + i); -+ } - } else { - vhost_vdpa_suspend(dev); - vhost_vdpa_svqs_stop(dev); -diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h -index c278a2a8de..540642d304 100644 ---- a/include/hw/virtio/vhost-vdpa.h -+++ b/include/hw/virtio/vhost-vdpa.h -@@ -55,6 +55,7 @@ typedef struct vhost_vdpa { - } VhostVDPA; - - int vhost_vdpa_get_iova_range(int fd, struct vhost_vdpa_iova_range *iova_range); -+int vhost_vdpa_set_vring_ready(struct vhost_vdpa *v, unsigned idx); - - int vhost_vdpa_dma_map(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, - hwaddr size, void *vaddr, bool readonly); --- -2.39.3 - diff --git a/SOURCES/kvm-vdpa-move-CVQ-isolation-check-to-net_init_vhost_vdpa.patch b/SOURCES/kvm-vdpa-move-CVQ-isolation-check-to-net_init_vhost_vdpa.patch deleted file mode 100644 index a37612c..0000000 --- a/SOURCES/kvm-vdpa-move-CVQ-isolation-check-to-net_init_vhost_vdpa.patch +++ /dev/null @@ -1,286 +0,0 @@ -From 1609e47511c9a02b26e0023ff6e1e999d7cdf179 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Fri, 26 May 2023 17:31:43 +0200 -Subject: [PATCH 2/7] vdpa: move CVQ isolation check to net_init_vhost_vdpa -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 199: CVQ migration support -RH-Jira: RHEL-923 -RH-Acked-by: Jason Wang -RH-Acked-by: Cindy Lu -RH-Commit: [2/7] caed8f81c3e30e6147817e7f43225aa3ee90ff37 (eperezmartin/qemu-kvm) - -Evaluating it at start time instead of initialization time may make the -guest capable of dynamically adding or removing migration blockers. - -Also, moving to initialization reduces the number of ioctls in the -migration, reducing failure possibilities. - -As a drawback we need to check for CVQ isolation twice: one time with no -MQ negotiated and another one acking it, as long as the device supports -it. This is because Vring ASID / group management is based on vq -indexes, but we don't know the index of CVQ before negotiating MQ. - -Signed-off-by: Eugenio Pérez -Message-Id: <20230526153143.470745-3-eperezma@redhat.com> -Tested-by: Lei Yang -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Acked-by: Jason Wang ---- - net/vhost-vdpa.c | 155 ++++++++++++++++++++++++++++++++++------------- - 1 file changed, 112 insertions(+), 43 deletions(-) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 801d4e0422..ce17e4416a 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -43,6 +43,10 @@ typedef struct VhostVDPAState { - - /* The device always have SVQ enabled */ - bool always_svq; -+ -+ /* The device can isolate CVQ in its own ASID */ -+ bool cvq_isolated; -+ - bool started; - } VhostVDPAState; - -@@ -369,15 +373,8 @@ static NetClientInfo net_vhost_vdpa_info = { - .check_peer_type = vhost_vdpa_check_peer_type, - }; - --/** -- * Get vring virtqueue group -- * -- * @device_fd vdpa device fd -- * @vq_index Virtqueue index -- * -- * Return -errno in case of error, or vq group if success. -- */ --static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index) -+static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index, -+ Error **errp) - { - struct vhost_vring_state state = { - .index = vq_index, -@@ -386,8 +383,7 @@ static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index) - - if (unlikely(r < 0)) { - r = -errno; -- error_report("Cannot get VQ %u group: %s", vq_index, -- g_strerror(errno)); -+ error_setg_errno(errp, errno, "Cannot get VQ %u group", vq_index); - return r; - } - -@@ -487,9 +483,9 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) - { - VhostVDPAState *s, *s0; - struct vhost_vdpa *v; -- uint64_t backend_features; - int64_t cvq_group; -- int cvq_index, r; -+ int r; -+ Error *err = NULL; - - assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); - -@@ -509,41 +505,22 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) - /* - * If we early return in these cases SVQ will not be enabled. The migration - * will be blocked as long as vhost-vdpa backends will not offer _F_LOG. -- * -- * Calling VHOST_GET_BACKEND_FEATURES as they are not available in v->dev -- * yet. - */ -- r = ioctl(v->device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features); -- if (unlikely(r < 0)) { -- error_report("Cannot get vdpa backend_features: %s(%d)", -- g_strerror(errno), errno); -- return -1; -+ if (!vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) { -+ return 0; - } -- if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID)) || -- !vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) { -+ -+ if (!s->cvq_isolated) { - return 0; - } - -- /* -- * Check if all the virtqueues of the virtio device are in a different vq -- * than the last vq. VQ group of last group passed in cvq_group. -- */ -- cvq_index = v->dev->vq_index_end - 1; -- cvq_group = vhost_vdpa_get_vring_group(v->device_fd, cvq_index); -+ cvq_group = vhost_vdpa_get_vring_group(v->device_fd, -+ v->dev->vq_index_end - 1, -+ &err); - if (unlikely(cvq_group < 0)) { -+ error_report_err(err); - return cvq_group; - } -- for (int i = 0; i < cvq_index; ++i) { -- int64_t group = vhost_vdpa_get_vring_group(v->device_fd, i); -- -- if (unlikely(group < 0)) { -- return group; -- } -- -- if (group == cvq_group) { -- return 0; -- } -- } - - r = vhost_vdpa_set_address_space_id(v, cvq_group, VHOST_VDPA_NET_CVQ_ASID); - if (unlikely(r < 0)) { -@@ -806,6 +783,87 @@ static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = { - .avail_handler = vhost_vdpa_net_handle_ctrl_avail, - }; - -+/** -+ * Probe if CVQ is isolated -+ * -+ * @device_fd The vdpa device fd -+ * @features Features offered by the device. -+ * @cvq_index The control vq pair index -+ * -+ * Returns <0 in case of failure, 0 if false and 1 if true. -+ */ -+static int vhost_vdpa_probe_cvq_isolation(int device_fd, uint64_t features, -+ int cvq_index, Error **errp) -+{ -+ uint64_t backend_features; -+ int64_t cvq_group; -+ uint8_t status = VIRTIO_CONFIG_S_ACKNOWLEDGE | -+ VIRTIO_CONFIG_S_DRIVER | -+ VIRTIO_CONFIG_S_FEATURES_OK; -+ int r; -+ -+ ERRP_GUARD(); -+ -+ r = ioctl(device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features); -+ if (unlikely(r < 0)) { -+ error_setg_errno(errp, errno, "Cannot get vdpa backend_features"); -+ return r; -+ } -+ -+ if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID))) { -+ return 0; -+ } -+ -+ r = ioctl(device_fd, VHOST_SET_FEATURES, &features); -+ if (unlikely(r)) { -+ error_setg_errno(errp, errno, "Cannot set features"); -+ } -+ -+ r = ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status); -+ if (unlikely(r)) { -+ error_setg_errno(errp, -r, "Cannot set device features"); -+ goto out; -+ } -+ -+ cvq_group = vhost_vdpa_get_vring_group(device_fd, cvq_index, errp); -+ if (unlikely(cvq_group < 0)) { -+ if (cvq_group != -ENOTSUP) { -+ r = cvq_group; -+ goto out; -+ } -+ -+ /* -+ * The kernel report VHOST_BACKEND_F_IOTLB_ASID if the vdpa frontend -+ * support ASID even if the parent driver does not. The CVQ cannot be -+ * isolated in this case. -+ */ -+ error_free(*errp); -+ *errp = NULL; -+ r = 0; -+ goto out; -+ } -+ -+ for (int i = 0; i < cvq_index; ++i) { -+ int64_t group = vhost_vdpa_get_vring_group(device_fd, i, errp); -+ if (unlikely(group < 0)) { -+ r = group; -+ goto out; -+ } -+ -+ if (group == (int64_t)cvq_group) { -+ r = 0; -+ goto out; -+ } -+ } -+ -+ r = 1; -+ -+out: -+ status = 0; -+ ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status); -+ return r; -+} -+ - static NetClientState *net_vhost_vdpa_init(NetClientState *peer, - const char *device, - const char *name, -@@ -815,16 +873,26 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, - bool is_datapath, - bool svq, - struct vhost_vdpa_iova_range iova_range, -- uint64_t features) -+ uint64_t features, -+ Error **errp) - { - NetClientState *nc = NULL; - VhostVDPAState *s; - int ret = 0; - assert(name); -+ int cvq_isolated; -+ - if (is_datapath) { - nc = qemu_new_net_client(&net_vhost_vdpa_info, peer, device, - name); - } else { -+ cvq_isolated = vhost_vdpa_probe_cvq_isolation(vdpa_device_fd, features, -+ queue_pair_index * 2, -+ errp); -+ if (unlikely(cvq_isolated < 0)) { -+ return NULL; -+ } -+ - nc = qemu_new_net_control_client(&net_vhost_vdpa_cvq_info, peer, - device, name); - } -@@ -851,6 +919,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, - - s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops; - s->vhost_vdpa.shadow_vq_ops_opaque = s; -+ s->cvq_isolated = cvq_isolated; - - /* - * TODO: We cannot migrate devices with CVQ and no x-svq enabled as -@@ -982,7 +1051,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, - for (i = 0; i < queue_pairs; i++) { - ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, - vdpa_device_fd, i, 2, true, opts->x_svq, -- iova_range, features); -+ iova_range, features, errp); - if (!ncs[i]) - goto err; - } -@@ -990,7 +1059,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, - if (has_cvq) { - nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, - vdpa_device_fd, i, 1, false, -- opts->x_svq, iova_range, features); -+ opts->x_svq, iova_range, features, errp); - if (!nc) - goto err; - } --- -2.39.3 - diff --git a/SOURCES/kvm-vdpa-move-vhost_vdpa_set_vring_ready-to-the-caller.patch b/SOURCES/kvm-vdpa-move-vhost_vdpa_set_vring_ready-to-the-caller.patch deleted file mode 100644 index 4ebd8bd..0000000 --- a/SOURCES/kvm-vdpa-move-vhost_vdpa_set_vring_ready-to-the-caller.patch +++ /dev/null @@ -1,134 +0,0 @@ -From 09bf0febef2512f00e71edca0fcbaf452652c2c7 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 10 Aug 2023 11:27:28 +0200 -Subject: [PATCH 6/7] vdpa: move vhost_vdpa_set_vring_ready to the caller -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 199: CVQ migration support -RH-Jira: RHEL-923 -RH-Acked-by: Jason Wang -RH-Acked-by: Cindy Lu -RH-Commit: [6/7] cf4fd1071ca127914c8e8d6aefec451cad97ecc1 (eperezmartin/qemu-kvm) - -Doing that way allows CVQ to be enabled before the dataplane vqs, -restoring the state as MQ or MAC addresses properly in the case of a -migration. - -The patch does it by defining a ->load NetClientInfo callback also for -dataplane. Ideally, this should be done by an independent patch, but -the function is already static so it would only add an empty -vhost_vdpa_net_data_load stub. - -Signed-off-by: Eugenio Pérez ---- -v3: -* Fix subject typo -* Expand patch message so it explains why ---- - hw/virtio/vdpa-dev.c | 3 +++ - hw/virtio/vhost-vdpa.c | 3 --- - net/vhost-vdpa.c | 41 +++++++++++++++++++++++++++++++---------- - 3 files changed, 34 insertions(+), 13 deletions(-) - -diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c -index 01b41eb0f1..8c47d643bf 100644 ---- a/hw/virtio/vdpa-dev.c -+++ b/hw/virtio/vdpa-dev.c -@@ -256,6 +256,9 @@ static int vhost_vdpa_device_start(VirtIODevice *vdev, Error **errp) - error_setg_errno(errp, -ret, "Error starting vhost"); - goto err_guest_notifiers; - } -+ for (i = 0; i < s->dev.nvqs; ++i) { -+ vhost_vdpa_set_vring_ready(&s->vdpa, i); -+ } - s->started = true; - - /* -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index e4d0101327..0d9d311abd 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -1154,9 +1154,6 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) - if (unlikely(!ok)) { - return -1; - } -- for (int i = 0; i < dev->nvqs; ++i) { -- vhost_vdpa_set_vring_ready(v, dev->vq_index + i); -- } - } else { - vhost_vdpa_suspend(dev); - vhost_vdpa_svqs_stop(dev); -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index a1b16bbc52..47b87bf80d 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -344,6 +344,22 @@ static int vhost_vdpa_net_data_start(NetClientState *nc) - return 0; - } - -+static int vhost_vdpa_net_data_load(NetClientState *nc) -+{ -+ VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); -+ struct vhost_vdpa *v = &s->vhost_vdpa; -+ bool has_cvq = v->dev->vq_index_end % 2; -+ -+ if (has_cvq) { -+ return 0; -+ } -+ -+ for (int i = 0; i < v->dev->nvqs; ++i) { -+ vhost_vdpa_set_vring_ready(v, i + v->dev->vq_index); -+ } -+ return 0; -+} -+ - static void vhost_vdpa_net_client_stop(NetClientState *nc) - { - VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); -@@ -366,6 +382,7 @@ static NetClientInfo net_vhost_vdpa_info = { - .size = sizeof(VhostVDPAState), - .receive = vhost_vdpa_receive, - .start = vhost_vdpa_net_data_start, -+ .load = vhost_vdpa_net_data_load, - .stop = vhost_vdpa_net_client_stop, - .cleanup = vhost_vdpa_cleanup, - .has_vnet_hdr = vhost_vdpa_has_vnet_hdr, -@@ -682,18 +699,22 @@ static int vhost_vdpa_net_cvq_load(NetClientState *nc) - - assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); - -- if (!v->shadow_vqs_enabled) { -- return 0; -- } -+ vhost_vdpa_set_vring_ready(v, v->dev->vq_index); - -- n = VIRTIO_NET(v->dev->vdev); -- r = vhost_vdpa_net_load_mac(s, n); -- if (unlikely(r < 0)) { -- return r; -+ if (v->shadow_vqs_enabled) { -+ n = VIRTIO_NET(v->dev->vdev); -+ r = vhost_vdpa_net_load_mac(s, n); -+ if (unlikely(r < 0)) { -+ return r; -+ } -+ r = vhost_vdpa_net_load_mq(s, n); -+ if (unlikely(r)) { -+ return r; -+ } - } -- r = vhost_vdpa_net_load_mq(s, n); -- if (unlikely(r)) { -- return r; -+ -+ for (int i = 0; i < v->dev->vq_index; ++i) { -+ vhost_vdpa_set_vring_ready(v, i); - } - - return 0; --- -2.39.3 - diff --git a/SOURCES/kvm-vdpa-remove-net-cvq-migration-blocker.patch b/SOURCES/kvm-vdpa-remove-net-cvq-migration-blocker.patch deleted file mode 100644 index 9388d75..0000000 --- a/SOURCES/kvm-vdpa-remove-net-cvq-migration-blocker.patch +++ /dev/null @@ -1,51 +0,0 @@ -From 46d5b861a39b7d0d3222162e6b7707526c131230 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Fri, 24 Mar 2023 13:28:15 +0100 -Subject: [PATCH 7/7] vdpa: remove net cvq migration blocker -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 199: CVQ migration support -RH-Jira: RHEL-923 -RH-Acked-by: Jason Wang -RH-Acked-by: Cindy Lu -RH-Commit: [7/7] 9542e305c7ea3a47e0f1fe0629281238b0bb2111 (eperezmartin/qemu-kvm) - -Now that we have add migration blockers if the device does not support -all the needed features, remove the general blocker applied to all net -devices with CVQ. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang ---- - net/vhost-vdpa.c | 12 ------------ - 1 file changed, 12 deletions(-) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 47b87bf80d..6e03db4afa 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -941,18 +941,6 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, - s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops; - s->vhost_vdpa.shadow_vq_ops_opaque = s; - s->cvq_isolated = cvq_isolated; -- -- /* -- * TODO: We cannot migrate devices with CVQ and no x-svq enabled as -- * there is no way to set the device state (MAC, MQ, etc) before -- * starting the datapath. -- * -- * Migration blocker ownership now belongs to s->vhost_vdpa. -- */ -- if (!svq) { -- error_setg(&s->vhost_vdpa.migration_blocker, -- "net vdpa cannot migrate with CVQ feature"); -- } - } - ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs); - if (ret) { --- -2.39.3 - diff --git a/SOURCES/kvm-vdpa-rename-vhost_vdpa_net_load-to-vhost_vdpa_net_cv.patch b/SOURCES/kvm-vdpa-rename-vhost_vdpa_net_load-to-vhost_vdpa_net_cv.patch deleted file mode 100644 index 15dc410..0000000 --- a/SOURCES/kvm-vdpa-rename-vhost_vdpa_net_load-to-vhost_vdpa_net_cv.patch +++ /dev/null @@ -1,49 +0,0 @@ -From db7ca7692e264e8bf1bd9e08e3de7a92fc76a363 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Wed, 9 Aug 2023 18:07:26 +0200 -Subject: [PATCH 5/7] vdpa: rename vhost_vdpa_net_load to - vhost_vdpa_net_cvq_load -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 199: CVQ migration support -RH-Jira: RHEL-923 -RH-Acked-by: Jason Wang -RH-Acked-by: Cindy Lu -RH-Commit: [5/7] aea91f3274786665725af892eb905818eb0f44f1 (eperezmartin/qemu-kvm) - -Next patches will add the corresponding data load. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang ---- - net/vhost-vdpa.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 29d3fd3ca6..a1b16bbc52 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -673,7 +673,7 @@ static int vhost_vdpa_net_load_mq(VhostVDPAState *s, - return *s->status != VIRTIO_NET_OK; - } - --static int vhost_vdpa_net_load(NetClientState *nc) -+static int vhost_vdpa_net_cvq_load(NetClientState *nc) - { - VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); - struct vhost_vdpa *v = &s->vhost_vdpa; -@@ -704,7 +704,7 @@ static NetClientInfo net_vhost_vdpa_cvq_info = { - .size = sizeof(VhostVDPAState), - .receive = vhost_vdpa_receive, - .start = vhost_vdpa_net_cvq_start, -- .load = vhost_vdpa_net_load, -+ .load = vhost_vdpa_net_cvq_load, - .stop = vhost_vdpa_net_cvq_stop, - .cleanup = vhost_vdpa_cleanup, - .has_vnet_hdr = vhost_vdpa_has_vnet_hdr, --- -2.39.3 - diff --git a/SOURCES/kvm-vdpa-return-errno-in-vhost_vdpa_get_vring_group-erro.patch b/SOURCES/kvm-vdpa-return-errno-in-vhost_vdpa_get_vring_group-erro.patch deleted file mode 100644 index c8b4913..0000000 --- a/SOURCES/kvm-vdpa-return-errno-in-vhost_vdpa_get_vring_group-erro.patch +++ /dev/null @@ -1,67 +0,0 @@ -From 09583f39d51d16079c9fda32545d7a44b6f5c8c6 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Fri, 26 May 2023 17:31:42 +0200 -Subject: [PATCH 1/7] vdpa: return errno in vhost_vdpa_get_vring_group error -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 199: CVQ migration support -RH-Jira: RHEL-923 -RH-Acked-by: Jason Wang -RH-Acked-by: Cindy Lu -RH-Commit: [1/7] 89745b1828a1af535c40657022d385250688d11d (eperezmartin/qemu-kvm) - -We need to tell in the caller, as some errors are expected in a normal -workflow. In particular, parent drivers in recent kernels with -VHOST_BACKEND_F_IOTLB_ASID may not support vring groups. In that case, --ENOTSUP is returned. - -This is the case of vp_vdpa in Linux 6.2. - -Next patches in this series will use that information to know if it must -abort or not. Also, next patches return properly an errp instead of -printing with error_report. - -Reviewed-by: Stefano Garzarella -Acked-by: Jason Wang -Signed-off-by: Eugenio Pérez -Message-Id: <20230526153143.470745-2-eperezma@redhat.com> -Tested-by: Lei Yang -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin ---- - net/vhost-vdpa.c | 9 +++++++++ - 1 file changed, 9 insertions(+) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 1ae839da34..801d4e0422 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -369,6 +369,14 @@ static NetClientInfo net_vhost_vdpa_info = { - .check_peer_type = vhost_vdpa_check_peer_type, - }; - -+/** -+ * Get vring virtqueue group -+ * -+ * @device_fd vdpa device fd -+ * @vq_index Virtqueue index -+ * -+ * Return -errno in case of error, or vq group if success. -+ */ - static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index) - { - struct vhost_vring_state state = { -@@ -377,6 +385,7 @@ static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index) - int r = ioctl(device_fd, VHOST_VDPA_GET_VRING_GROUP, &state); - - if (unlikely(r < 0)) { -+ r = -errno; - error_report("Cannot get VQ %u group: %s", vq_index, - g_strerror(errno)); - return r; --- -2.39.3 - diff --git a/SOURCES/kvm-vdpa-use-first-queue-SVQ-state-for-CVQ-default.patch b/SOURCES/kvm-vdpa-use-first-queue-SVQ-state-for-CVQ-default.patch deleted file mode 100644 index bfb1b8e..0000000 --- a/SOURCES/kvm-vdpa-use-first-queue-SVQ-state-for-CVQ-default.patch +++ /dev/null @@ -1,46 +0,0 @@ -From 726662aee0bc295f6931b7aba1bd68f033e949aa Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= -Date: Thu, 10 Aug 2023 16:08:18 +0200 -Subject: [PATCH 3/7] vdpa: use first queue SVQ state for CVQ default -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Eugenio Pérez -RH-MergeRequest: 199: CVQ migration support -RH-Jira: RHEL-923 -RH-Acked-by: Jason Wang -RH-Acked-by: Cindy Lu -RH-Commit: [3/7] 5c98f11b5080552a62c8e37ff2c23339455b7b86 (eperezmartin/qemu-kvm) - -Previous to this patch the only way CVQ would be shadowed is if it does -support to isolate CVQ group or if all vqs were shadowed from the -beginning. The second condition was checked at the beginning, and no -more configuration was done. - -After this series we need to check if data queues are shadowed because -they are in the middle of the migration. As checking if they are -shadowed already covers the previous case, let's just mimic it. - -Signed-off-by: Eugenio Pérez -Acked-by: Jason Wang ---- - net/vhost-vdpa.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index ce17e4416a..29d3fd3ca6 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -494,7 +494,7 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) - - s0 = vhost_vdpa_net_first_nc_vdpa(s); - v->shadow_data = s0->vhost_vdpa.shadow_vqs_enabled; -- v->shadow_vqs_enabled = s->always_svq; -+ v->shadow_vqs_enabled = s0->vhost_vdpa.shadow_vqs_enabled; - s->vhost_vdpa.address_space_id = VHOST_VDPA_GUEST_PA_ASID; - - if (s->vhost_vdpa.shadow_data) { --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-Fix-null-pointer-dereference-bug-in-vfio_bars_f.patch b/SOURCES/kvm-vfio-Fix-null-pointer-dereference-bug-in-vfio_bars_f.patch deleted file mode 100644 index 1e00427..0000000 --- a/SOURCES/kvm-vfio-Fix-null-pointer-dereference-bug-in-vfio_bars_f.patch +++ /dev/null @@ -1,72 +0,0 @@ -From 97124d4f2afbc8e65a3ecf76096e6b34a9b71541 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 30/37] vfio: Fix null pointer dereference bug in - vfio_bars_finalize() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [28/28] 4bbdf7f9c5595897244c6cc3d88d487dd5f99bf0 (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit 8af87a3ec7e4 -Author: Avihai Horon -Date: Tue Jul 4 16:39:27 2023 +0300 - - vfio: Fix null pointer dereference bug in vfio_bars_finalize() - - vfio_realize() has the following flow: - 1. vfio_bars_prepare() -- sets VFIOBAR->size. - 2. msix_early_setup(). - 3. vfio_bars_register() -- allocates VFIOBAR->mr. - - After vfio_bars_prepare() is called msix_early_setup() can fail. If it - does fail, vfio_bars_register() is never called and VFIOBAR->mr is not - allocated. - - In this case, vfio_bars_finalize() is called as part of the error flow - to free the bars' resources. However, vfio_bars_finalize() calls - object_unparent() for VFIOBAR->mr after checking only VFIOBAR->size, and - thus we get a null pointer dereference. - - Fix it by checking VFIOBAR->mr in vfio_bars_finalize(). - - Fixes: 89d5202edc50 ("vfio/pci: Allow relocating MSI-X MMIO") - Signed-off-by: Avihai Horon - Reviewed-by: Philippe Mathieu-Daudé - Reviewed-by: Cédric Le Goater - Reviewed-by: Alex Williamson - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/pci.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index ba40ca8784..9189459a38 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -1755,9 +1755,11 @@ static void vfio_bars_finalize(VFIOPCIDevice *vdev) - - vfio_bar_quirk_finalize(vdev, i); - vfio_region_finalize(&bar->region); -- if (bar->size) { -+ if (bar->mr) { -+ assert(bar->size); - object_unparent(OBJECT(bar->mr)); - g_free(bar->mr); -+ bar->mr = NULL; - } - } - --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-Implement-a-common-device-info-helper.patch b/SOURCES/kvm-vfio-Implement-a-common-device-info-helper.patch deleted file mode 100644 index 78a554d..0000000 --- a/SOURCES/kvm-vfio-Implement-a-common-device-info-helper.patch +++ /dev/null @@ -1,196 +0,0 @@ -From f68e8c5d841cd7fc785cc3d15b3c280211bfb4c3 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 17/37] vfio: Implement a common device info helper -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [15/28] 9cfd233ab1b95dc7de776e8ef901823bd37c5a6b (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit 634f38f0f73f -Author: Alex Williamson -Date: Thu Jun 1 08:45:06 2023 -0600 - - vfio: Implement a common device info helper - - A common helper implementing the realloc algorithm for handling - capabilities. - - Reviewed-by: Philippe Mathieu-Daudé - Reviewed-by: Cédric Le Goater - Signed-off-by: Alex Williamson - Reviewed-by: Robin Voetter - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - hw/s390x/s390-pci-vfio.c | 37 ++++------------------------ - hw/vfio/common.c | 46 ++++++++++++++++++++++++++--------- - include/hw/vfio/vfio-common.h | 1 + - 3 files changed, 41 insertions(+), 43 deletions(-) - -diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c -index f51190d466..59a2e03873 100644 ---- a/hw/s390x/s390-pci-vfio.c -+++ b/hw/s390x/s390-pci-vfio.c -@@ -289,38 +289,11 @@ static void s390_pci_read_pfip(S390PCIBusDevice *pbdev, - memcpy(pbdev->zpci_fn.pfip, cap->pfip, CLP_PFIP_NR_SEGMENTS); - } - --static struct vfio_device_info *get_device_info(S390PCIBusDevice *pbdev, -- uint32_t argsz) -+static struct vfio_device_info *get_device_info(S390PCIBusDevice *pbdev) - { -- struct vfio_device_info *info = g_malloc0(argsz); -- VFIOPCIDevice *vfio_pci; -- int fd; -+ VFIOPCIDevice *vfio_pci = container_of(pbdev->pdev, VFIOPCIDevice, pdev); - -- vfio_pci = container_of(pbdev->pdev, VFIOPCIDevice, pdev); -- fd = vfio_pci->vbasedev.fd; -- -- /* -- * If the specified argsz is not large enough to contain all capabilities -- * it will be updated upon return from the ioctl. Retry until we have -- * a big enough buffer to hold the entire capability chain. On error, -- * just exit and rely on CLP defaults. -- */ --retry: -- info->argsz = argsz; -- -- if (ioctl(fd, VFIO_DEVICE_GET_INFO, info)) { -- trace_s390_pci_clp_dev_info(vfio_pci->vbasedev.name); -- g_free(info); -- return NULL; -- } -- -- if (info->argsz > argsz) { -- argsz = info->argsz; -- info = g_realloc(info, argsz); -- goto retry; -- } -- -- return info; -+ return vfio_get_device_info(vfio_pci->vbasedev.fd); - } - - /* -@@ -335,7 +308,7 @@ bool s390_pci_get_host_fh(S390PCIBusDevice *pbdev, uint32_t *fh) - - assert(fh); - -- info = get_device_info(pbdev, sizeof(*info)); -+ info = get_device_info(pbdev); - if (!info) { - return false; - } -@@ -356,7 +329,7 @@ void s390_pci_get_clp_info(S390PCIBusDevice *pbdev) - { - g_autofree struct vfio_device_info *info = NULL; - -- info = get_device_info(pbdev, sizeof(*info)); -+ info = get_device_info(pbdev); - if (!info) { - return; - } -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index b73086e17a..3b4ac53f15 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -2845,11 +2845,35 @@ void vfio_put_group(VFIOGroup *group) - } - } - -+struct vfio_device_info *vfio_get_device_info(int fd) -+{ -+ struct vfio_device_info *info; -+ uint32_t argsz = sizeof(*info); -+ -+ info = g_malloc0(argsz); -+ -+retry: -+ info->argsz = argsz; -+ -+ if (ioctl(fd, VFIO_DEVICE_GET_INFO, info)) { -+ g_free(info); -+ return NULL; -+ } -+ -+ if (info->argsz > argsz) { -+ argsz = info->argsz; -+ info = g_realloc(info, argsz); -+ goto retry; -+ } -+ -+ return info; -+} -+ - int vfio_get_device(VFIOGroup *group, const char *name, - VFIODevice *vbasedev, Error **errp) - { -- struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) }; -- int ret, fd; -+ g_autofree struct vfio_device_info *info = NULL; -+ int fd; - - fd = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name); - if (fd < 0) { -@@ -2861,11 +2885,11 @@ int vfio_get_device(VFIOGroup *group, const char *name, - return fd; - } - -- ret = ioctl(fd, VFIO_DEVICE_GET_INFO, &dev_info); -- if (ret) { -+ info = vfio_get_device_info(fd); -+ if (!info) { - error_setg_errno(errp, errno, "error getting device info"); - close(fd); -- return ret; -+ return -1; - } - - /* -@@ -2893,14 +2917,14 @@ int vfio_get_device(VFIOGroup *group, const char *name, - vbasedev->group = group; - QLIST_INSERT_HEAD(&group->device_list, vbasedev, next); - -- vbasedev->num_irqs = dev_info.num_irqs; -- vbasedev->num_regions = dev_info.num_regions; -- vbasedev->flags = dev_info.flags; -+ vbasedev->num_irqs = info->num_irqs; -+ vbasedev->num_regions = info->num_regions; -+ vbasedev->flags = info->flags; -+ -+ trace_vfio_get_device(name, info->flags, info->num_regions, info->num_irqs); - -- trace_vfio_get_device(name, dev_info.flags, dev_info.num_regions, -- dev_info.num_irqs); -+ vbasedev->reset_works = !!(info->flags & VFIO_DEVICE_FLAGS_RESET); - -- vbasedev->reset_works = !!(dev_info.flags & VFIO_DEVICE_FLAGS_RESET); - return 0; - } - -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 3dc5f2104c..6d1b8487c3 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -216,6 +216,7 @@ void vfio_region_finalize(VFIORegion *region); - void vfio_reset_handler(void *opaque); - VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp); - void vfio_put_group(VFIOGroup *group); -+struct vfio_device_info *vfio_get_device_info(int fd); - int vfio_get_device(VFIOGroup *group, const char *name, - VFIODevice *vbasedev, Error **errp); - --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-Introduce-a-helper-function-to-initialize-VFIOD.patch b/SOURCES/kvm-vfio-Introduce-a-helper-function-to-initialize-VFIOD.patch new file mode 100644 index 0000000..040288f --- /dev/null +++ b/SOURCES/kvm-vfio-Introduce-a-helper-function-to-initialize-VFIOD.patch @@ -0,0 +1,154 @@ +From f554328f6f4702743af71befcb83c25c36e4fa4d Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Tue, 21 Nov 2023 16:44:25 +0800 +Subject: [PATCH 046/101] vfio: Introduce a helper function to initialize + VFIODevice +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [45/67] 73225f394540bf5aeb70c0bdb89771f19a6d286d (eauger1/centos-qemu-kvm) + +Introduce a helper function to replace the common code to initialize +VFIODevice in pci, platform, ap and ccw VFIO device. + +No functional change intended. + +Suggested-by: Cédric Le Goater +Signed-off-by: Zhenzhong Duan +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +(cherry picked from commit 6106a329141af7d47bdc3346ce9820d4714e0e5d) +Signed-off-by: Eric Auger +--- + hw/vfio/ap.c | 8 ++------ + hw/vfio/ccw.c | 8 ++------ + hw/vfio/helpers.c | 11 +++++++++++ + hw/vfio/pci.c | 6 ++---- + hw/vfio/platform.c | 6 ++---- + include/hw/vfio/vfio-common.h | 2 ++ + 6 files changed, 21 insertions(+), 20 deletions(-) + +diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c +index 95fe7cd98b..e157aa1ff7 100644 +--- a/hw/vfio/ap.c ++++ b/hw/vfio/ap.c +@@ -226,18 +226,14 @@ static void vfio_ap_instance_init(Object *obj) + VFIOAPDevice *vapdev = VFIO_AP_DEVICE(obj); + VFIODevice *vbasedev = &vapdev->vdev; + +- vbasedev->type = VFIO_DEVICE_TYPE_AP; +- vbasedev->ops = &vfio_ap_ops; +- vbasedev->dev = DEVICE(vapdev); +- vbasedev->fd = -1; +- + /* + * vfio-ap devices operate in a way compatible with discarding of + * memory in RAM blocks, as no pages are pinned in the host. + * This needs to be set before vfio_get_device() for vfio common to + * handle ram_block_discard_disable(). + */ +- vbasedev->ram_block_discard_allowed = true; ++ vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_AP, &vfio_ap_ops, ++ DEVICE(vapdev), true); + } + + #ifdef CONFIG_IOMMUFD +diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c +index 6305a4c1b8..90e4a53437 100644 +--- a/hw/vfio/ccw.c ++++ b/hw/vfio/ccw.c +@@ -683,11 +683,6 @@ static void vfio_ccw_instance_init(Object *obj) + VFIOCCWDevice *vcdev = VFIO_CCW(obj); + VFIODevice *vbasedev = &vcdev->vdev; + +- vbasedev->type = VFIO_DEVICE_TYPE_CCW; +- vbasedev->ops = &vfio_ccw_ops; +- vbasedev->dev = DEVICE(vcdev); +- vbasedev->fd = -1; +- + /* + * All vfio-ccw devices are believed to operate in a way compatible with + * discarding of memory in RAM blocks, ie. pages pinned in the host are +@@ -696,7 +691,8 @@ static void vfio_ccw_instance_init(Object *obj) + * needs to be set before vfio_get_device() for vfio common to handle + * ram_block_discard_disable(). + */ +- vbasedev->ram_block_discard_allowed = true; ++ vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_CCW, &vfio_ccw_ops, ++ DEVICE(vcdev), true); + } + + #ifdef CONFIG_IOMMUFD +diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c +index 3592c3d54e..6789870802 100644 +--- a/hw/vfio/helpers.c ++++ b/hw/vfio/helpers.c +@@ -652,3 +652,14 @@ void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp) + } + vbasedev->fd = fd; + } ++ ++void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops, ++ DeviceState *dev, bool ram_discard) ++{ ++ vbasedev->type = type; ++ vbasedev->ops = ops; ++ vbasedev->dev = dev; ++ vbasedev->fd = -1; ++ ++ vbasedev->ram_block_discard_allowed = ram_discard; ++} +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 3f5900cc46..83c3238608 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -3353,10 +3353,8 @@ static void vfio_instance_init(Object *obj) + vdev->host.slot = ~0U; + vdev->host.function = ~0U; + +- vbasedev->type = VFIO_DEVICE_TYPE_PCI; +- vbasedev->ops = &vfio_pci_ops; +- vbasedev->dev = DEVICE(vdev); +- vbasedev->fd = -1; ++ vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_PCI, &vfio_pci_ops, ++ DEVICE(vdev), false); + + vdev->nv_gpudirect_clique = 0xFF; + +diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c +index 506eb8193f..a8d9b7da63 100644 +--- a/hw/vfio/platform.c ++++ b/hw/vfio/platform.c +@@ -657,10 +657,8 @@ static void vfio_platform_instance_init(Object *obj) + VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(obj); + VFIODevice *vbasedev = &vdev->vbasedev; + +- vbasedev->type = VFIO_DEVICE_TYPE_PLATFORM; +- vbasedev->ops = &vfio_platform_ops; +- vbasedev->dev = DEVICE(vdev); +- vbasedev->fd = -1; ++ vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_PLATFORM, &vfio_platform_ops, ++ DEVICE(vdev), false); + } + + #ifdef CONFIG_IOMMUFD +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index efcba19f66..b8aa8a5495 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -257,4 +257,6 @@ int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer, uint64_t iova, + /* Returns 0 on success, or a negative errno. */ + int vfio_device_get_name(VFIODevice *vbasedev, Error **errp); + void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp); ++void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops, ++ DeviceState *dev, bool ram_discard); + #endif /* HW_VFIO_VFIO_COMMON_H */ +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-Introduce-base-object-for-VFIOContainer-and-tar.patch b/SOURCES/kvm-vfio-Introduce-base-object-for-VFIOContainer-and-tar.patch new file mode 100644 index 0000000..d41e8fb --- /dev/null +++ b/SOURCES/kvm-vfio-Introduce-base-object-for-VFIOContainer-and-tar.patch @@ -0,0 +1,129 @@ +From 7f392385d1b865904eae4b6681e3e7a87eb3af3d Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Thu, 2 Nov 2023 15:12:27 +0800 +Subject: [PATCH 002/101] vfio: Introduce base object for VFIOContainer and + targeted interface +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [1/67] e63af50c2cb94f286b2d91f58c2d19dd862e019d (eauger1/centos-qemu-kvm) + +Introduce a dumb VFIOContainerBase object and its targeted interface. +This is willingly not a QOM object because we don't want it to be +visible from the user interface. The VFIOContainerBase will be +smoothly populated in subsequent patches as well as interfaces. + +No functional change intended. + +Signed-off-by: Eric Auger +Signed-off-by: Yi Liu +Signed-off-by: Yi Sun +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +(cherry picked from commit f61dddd73232e3d82d560d1e1bca120446021f2f) +Signed-off-by: Eric Auger +--- + include/hw/vfio/vfio-common.h | 8 ++--- + include/hw/vfio/vfio-container-base.h | 50 +++++++++++++++++++++++++++ + 2 files changed, 52 insertions(+), 6 deletions(-) + create mode 100644 include/hw/vfio/vfio-container-base.h + +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index a4a22accb9..586d153c12 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -30,6 +30,7 @@ + #include + #endif + #include "sysemu/sysemu.h" ++#include "hw/vfio/vfio-container-base.h" + + #define VFIO_MSG_PREFIX "vfio %s: " + +@@ -81,6 +82,7 @@ typedef struct VFIOAddressSpace { + struct VFIOGroup; + + typedef struct VFIOContainer { ++ VFIOContainerBase bcontainer; + VFIOAddressSpace *space; + int fd; /* /dev/vfio/vfio, empowered by the attached groups */ + MemoryListener listener; +@@ -201,12 +203,6 @@ typedef struct VFIODisplay { + } dmabuf; + } VFIODisplay; + +-typedef struct { +- unsigned long *bitmap; +- hwaddr size; +- hwaddr pages; +-} VFIOBitmap; +- + VFIOAddressSpace *vfio_get_address_space(AddressSpace *as); + void vfio_put_address_space(VFIOAddressSpace *space); + bool vfio_devices_all_running_and_saving(VFIOContainer *container); +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +new file mode 100644 +index 0000000000..1d6daaea5d +--- /dev/null ++++ b/include/hw/vfio/vfio-container-base.h +@@ -0,0 +1,50 @@ ++/* ++ * VFIO BASE CONTAINER ++ * ++ * Copyright (C) 2023 Intel Corporation. ++ * Copyright Red Hat, Inc. 2023 ++ * ++ * Authors: Yi Liu ++ * Eric Auger ++ * ++ * SPDX-License-Identifier: GPL-2.0-or-later ++ */ ++ ++#ifndef HW_VFIO_VFIO_CONTAINER_BASE_H ++#define HW_VFIO_VFIO_CONTAINER_BASE_H ++ ++#include "exec/memory.h" ++ ++typedef struct VFIODevice VFIODevice; ++typedef struct VFIOIOMMUOps VFIOIOMMUOps; ++ ++typedef struct { ++ unsigned long *bitmap; ++ hwaddr size; ++ hwaddr pages; ++} VFIOBitmap; ++ ++/* ++ * This is the base object for vfio container backends ++ */ ++typedef struct VFIOContainerBase { ++ const VFIOIOMMUOps *ops; ++} VFIOContainerBase; ++ ++struct VFIOIOMMUOps { ++ /* basic feature */ ++ int (*dma_map)(VFIOContainerBase *bcontainer, ++ hwaddr iova, ram_addr_t size, ++ void *vaddr, bool readonly); ++ int (*dma_unmap)(VFIOContainerBase *bcontainer, ++ hwaddr iova, ram_addr_t size, ++ IOMMUTLBEntry *iotlb); ++ int (*attach_device)(const char *name, VFIODevice *vbasedev, ++ AddressSpace *as, Error **errp); ++ void (*detach_device)(VFIODevice *vbasedev); ++ /* migration feature */ ++ int (*set_dirty_page_tracking)(VFIOContainerBase *bcontainer, bool start); ++ int (*query_dirty_bitmap)(VFIOContainerBase *bcontainer, VFIOBitmap *vbmap, ++ hwaddr iova, hwaddr size); ++}; ++#endif /* HW_VFIO_VFIO_CONTAINER_BASE_H */ +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-Make-VFIOContainerBase-poiner-parameter-const-i.patch b/SOURCES/kvm-vfio-Make-VFIOContainerBase-poiner-parameter-const-i.patch new file mode 100644 index 0000000..03fb220 --- /dev/null +++ b/SOURCES/kvm-vfio-Make-VFIOContainerBase-poiner-parameter-const-i.patch @@ -0,0 +1,276 @@ +From 84b15fad1af781d06d0206d362de0801d7a18d0b Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Tue, 21 Nov 2023 16:44:17 +0800 +Subject: [PATCH 038/101] vfio: Make VFIOContainerBase poiner parameter const + in VFIOIOMMUOps callbacks +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [37/67] 95eb9edc7fcfefbd4b075f6f04941ed4a19ff87d (eauger1/centos-qemu-kvm) + +Some of the callbacks in VFIOIOMMUOps pass VFIOContainerBase poiner, +those callbacks only need read access to the sub object of VFIOContainerBase. +So make VFIOContainerBase, VFIOContainer and VFIOIOMMUFDContainer as const +in these callbacks. + +Local functions called by those callbacks also need same changes to avoid +build error. + +Suggested-by: Cédric Le Goater +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Reviewed-by: Eric Auger +Tested-by: Eric Auger +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +(cherry picked from commit 4517c33c31d392f08fa96a9db911da1e3507be94) +Signed-off-by: Eric Auger +--- + hw/vfio/common.c | 9 +++---- + hw/vfio/container-base.c | 2 +- + hw/vfio/container.c | 34 ++++++++++++++------------- + hw/vfio/iommufd.c | 8 +++---- + include/hw/vfio/vfio-common.h | 12 ++++++---- + include/hw/vfio/vfio-container-base.h | 12 ++++++---- + 6 files changed, 42 insertions(+), 35 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 6569732b7a..08a3e57672 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -204,7 +204,7 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainerBase *bcontainer) + return true; + } + +-bool vfio_devices_all_device_dirty_tracking(VFIOContainerBase *bcontainer) ++bool vfio_devices_all_device_dirty_tracking(const VFIOContainerBase *bcontainer) + { + VFIODevice *vbasedev; + +@@ -221,7 +221,8 @@ bool vfio_devices_all_device_dirty_tracking(VFIOContainerBase *bcontainer) + * Check if all VFIO devices are running and migration is active, which is + * essentially equivalent to the migration being in pre-copy phase. + */ +-bool vfio_devices_all_running_and_mig_active(VFIOContainerBase *bcontainer) ++bool ++vfio_devices_all_running_and_mig_active(const VFIOContainerBase *bcontainer) + { + VFIODevice *vbasedev; + +@@ -1139,7 +1140,7 @@ static int vfio_device_dma_logging_report(VFIODevice *vbasedev, hwaddr iova, + return 0; + } + +-int vfio_devices_query_dirty_bitmap(VFIOContainerBase *bcontainer, ++int vfio_devices_query_dirty_bitmap(const VFIOContainerBase *bcontainer, + VFIOBitmap *vbmap, hwaddr iova, + hwaddr size) + { +@@ -1162,7 +1163,7 @@ int vfio_devices_query_dirty_bitmap(VFIOContainerBase *bcontainer, + return 0; + } + +-int vfio_get_dirty_bitmap(VFIOContainerBase *bcontainer, uint64_t iova, ++int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer, uint64_t iova, + uint64_t size, ram_addr_t ram_addr) + { + bool all_device_dirty_tracking = +diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c +index eee2dcfe76..1ffd25bbfa 100644 +--- a/hw/vfio/container-base.c ++++ b/hw/vfio/container-base.c +@@ -63,7 +63,7 @@ int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer, + return bcontainer->ops->set_dirty_page_tracking(bcontainer, start); + } + +-int vfio_container_query_dirty_bitmap(VFIOContainerBase *bcontainer, ++int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer, + VFIOBitmap *vbmap, + hwaddr iova, hwaddr size) + { +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 1dbf9b9a17..b22feb8ded 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -61,11 +61,11 @@ static int vfio_ram_block_discard_disable(VFIOContainer *container, bool state) + } + } + +-static int vfio_dma_unmap_bitmap(VFIOContainer *container, ++static int vfio_dma_unmap_bitmap(const VFIOContainer *container, + hwaddr iova, ram_addr_t size, + IOMMUTLBEntry *iotlb) + { +- VFIOContainerBase *bcontainer = &container->bcontainer; ++ const VFIOContainerBase *bcontainer = &container->bcontainer; + struct vfio_iommu_type1_dma_unmap *unmap; + struct vfio_bitmap *bitmap; + VFIOBitmap vbmap; +@@ -117,11 +117,12 @@ unmap_exit: + /* + * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86 + */ +-static int vfio_legacy_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova, +- ram_addr_t size, IOMMUTLBEntry *iotlb) ++static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer, ++ hwaddr iova, ram_addr_t size, ++ IOMMUTLBEntry *iotlb) + { +- VFIOContainer *container = container_of(bcontainer, VFIOContainer, +- bcontainer); ++ const VFIOContainer *container = container_of(bcontainer, VFIOContainer, ++ bcontainer); + struct vfio_iommu_type1_dma_unmap unmap = { + .argsz = sizeof(unmap), + .flags = 0, +@@ -174,11 +175,11 @@ static int vfio_legacy_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova, + return 0; + } + +-static int vfio_legacy_dma_map(VFIOContainerBase *bcontainer, hwaddr iova, ++static int vfio_legacy_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova, + ram_addr_t size, void *vaddr, bool readonly) + { +- VFIOContainer *container = container_of(bcontainer, VFIOContainer, +- bcontainer); ++ const VFIOContainer *container = container_of(bcontainer, VFIOContainer, ++ bcontainer); + struct vfio_iommu_type1_dma_map map = { + .argsz = sizeof(map), + .flags = VFIO_DMA_MAP_FLAG_READ, +@@ -207,11 +208,12 @@ static int vfio_legacy_dma_map(VFIOContainerBase *bcontainer, hwaddr iova, + return -errno; + } + +-static int vfio_legacy_set_dirty_page_tracking(VFIOContainerBase *bcontainer, +- bool start) ++static int ++vfio_legacy_set_dirty_page_tracking(const VFIOContainerBase *bcontainer, ++ bool start) + { +- VFIOContainer *container = container_of(bcontainer, VFIOContainer, +- bcontainer); ++ const VFIOContainer *container = container_of(bcontainer, VFIOContainer, ++ bcontainer); + int ret; + struct vfio_iommu_type1_dirty_bitmap dirty = { + .argsz = sizeof(dirty), +@@ -233,12 +235,12 @@ static int vfio_legacy_set_dirty_page_tracking(VFIOContainerBase *bcontainer, + return ret; + } + +-static int vfio_legacy_query_dirty_bitmap(VFIOContainerBase *bcontainer, ++static int vfio_legacy_query_dirty_bitmap(const VFIOContainerBase *bcontainer, + VFIOBitmap *vbmap, + hwaddr iova, hwaddr size) + { +- VFIOContainer *container = container_of(bcontainer, VFIOContainer, +- bcontainer); ++ const VFIOContainer *container = container_of(bcontainer, VFIOContainer, ++ bcontainer); + struct vfio_iommu_type1_dirty_bitmap *dbitmap; + struct vfio_iommu_type1_dirty_bitmap_get *range; + int ret; +diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c +index 5accd26484..87a561c545 100644 +--- a/hw/vfio/iommufd.c ++++ b/hw/vfio/iommufd.c +@@ -26,10 +26,10 @@ + #include "qemu/chardev_open.h" + #include "pci.h" + +-static int iommufd_cdev_map(VFIOContainerBase *bcontainer, hwaddr iova, ++static int iommufd_cdev_map(const VFIOContainerBase *bcontainer, hwaddr iova, + ram_addr_t size, void *vaddr, bool readonly) + { +- VFIOIOMMUFDContainer *container = ++ const VFIOIOMMUFDContainer *container = + container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); + + return iommufd_backend_map_dma(container->be, +@@ -37,11 +37,11 @@ static int iommufd_cdev_map(VFIOContainerBase *bcontainer, hwaddr iova, + iova, size, vaddr, readonly); + } + +-static int iommufd_cdev_unmap(VFIOContainerBase *bcontainer, ++static int iommufd_cdev_unmap(const VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, + IOMMUTLBEntry *iotlb) + { +- VFIOIOMMUFDContainer *container = ++ const VFIOIOMMUFDContainer *container = + container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); + + /* TODO: Handle dma_unmap_bitmap with iotlb args (migration) */ +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 697bf24a35..efcba19f66 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -244,13 +244,15 @@ bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp); + void vfio_migration_exit(VFIODevice *vbasedev); + + int vfio_bitmap_alloc(VFIOBitmap *vbmap, hwaddr size); +-bool vfio_devices_all_running_and_mig_active(VFIOContainerBase *bcontainer); +-bool vfio_devices_all_device_dirty_tracking(VFIOContainerBase *bcontainer); +-int vfio_devices_query_dirty_bitmap(VFIOContainerBase *bcontainer, ++bool ++vfio_devices_all_running_and_mig_active(const VFIOContainerBase *bcontainer); ++bool ++vfio_devices_all_device_dirty_tracking(const VFIOContainerBase *bcontainer); ++int vfio_devices_query_dirty_bitmap(const VFIOContainerBase *bcontainer, + VFIOBitmap *vbmap, hwaddr iova, + hwaddr size); +-int vfio_get_dirty_bitmap(VFIOContainerBase *bcontainer, uint64_t iova, +- uint64_t size, ram_addr_t ram_addr); ++int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer, uint64_t iova, ++ uint64_t size, ram_addr_t ram_addr); + + /* Returns 0 on success, or a negative errno. */ + int vfio_device_get_name(VFIODevice *vbasedev, Error **errp); +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index 45bb19c767..2ae297ccda 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -82,7 +82,7 @@ void vfio_container_del_section_window(VFIOContainerBase *bcontainer, + MemoryRegionSection *section); + int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer, + bool start); +-int vfio_container_query_dirty_bitmap(VFIOContainerBase *bcontainer, ++int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer, + VFIOBitmap *vbmap, + hwaddr iova, hwaddr size); + +@@ -93,18 +93,20 @@ void vfio_container_destroy(VFIOContainerBase *bcontainer); + + struct VFIOIOMMUOps { + /* basic feature */ +- int (*dma_map)(VFIOContainerBase *bcontainer, ++ int (*dma_map)(const VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, + void *vaddr, bool readonly); +- int (*dma_unmap)(VFIOContainerBase *bcontainer, ++ int (*dma_unmap)(const VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, + IOMMUTLBEntry *iotlb); + int (*attach_device)(const char *name, VFIODevice *vbasedev, + AddressSpace *as, Error **errp); + void (*detach_device)(VFIODevice *vbasedev); + /* migration feature */ +- int (*set_dirty_page_tracking)(VFIOContainerBase *bcontainer, bool start); +- int (*query_dirty_bitmap)(VFIOContainerBase *bcontainer, VFIOBitmap *vbmap, ++ int (*set_dirty_page_tracking)(const VFIOContainerBase *bcontainer, ++ bool start); ++ int (*query_dirty_bitmap)(const VFIOContainerBase *bcontainer, ++ VFIOBitmap *vbmap, + hwaddr iova, hwaddr size); + /* PCI specific */ + int (*pci_hot_reset)(VFIODevice *vbasedev, bool single); +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-ap-Allow-the-selection-of-a-given-iommu-backend.patch b/SOURCES/kvm-vfio-ap-Allow-the-selection-of-a-given-iommu-backend.patch new file mode 100644 index 0000000..ffd8b9f --- /dev/null +++ b/SOURCES/kvm-vfio-ap-Allow-the-selection-of-a-given-iommu-backend.patch @@ -0,0 +1,75 @@ +From 57bdfc821d6f4b4f9c6b1ff05bf0114e5cabc77e Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Tue, 21 Nov 2023 16:44:13 +0800 +Subject: [PATCH 034/101] vfio/ap: Allow the selection of a given iommu backend +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [33/67] a12bb86e5b627ccf246fb9ce60820595589ff8e5 (eauger1/centos-qemu-kvm) + +Now we support two types of iommu backends, let's add the capability +to select one of them. This depends on whether an iommufd object has +been linked with the vfio-ap device: + +if the user wants to use the legacy backend, it shall not +link the vfio-ap device with any iommufd object: + + -device vfio-ap,sysfsdev=/sys/bus/mdev/devices/XXX + +This is called the legacy mode/backend. + +If the user wants to use the iommufd backend (/dev/iommu) it +shall pass an iommufd object id in the vfio-ap device options: + + -object iommufd,id=iommufd0 + -device vfio-ap,sysfsdev=/sys/bus/mdev/devices/XXX,iommufd=iommufd0 + +Suggested-by: Alex Williamson +Signed-off-by: Zhenzhong Duan +Reviewed-by: Matthew Rosato +Reviewed-by: Cédric Le Goater +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +(cherry picked from commit 336f308958d598f3db351bb7d94cc57b4b2d448d) +Signed-off-by: Eric Auger +--- + hw/vfio/ap.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c +index bbf69ff55a..80629609ae 100644 +--- a/hw/vfio/ap.c ++++ b/hw/vfio/ap.c +@@ -11,10 +11,12 @@ + */ + + #include "qemu/osdep.h" ++#include CONFIG_DEVICES /* CONFIG_IOMMUFD */ + #include + #include + #include "qapi/error.h" + #include "hw/vfio/vfio-common.h" ++#include "sysemu/iommufd.h" + #include "hw/s390x/ap-device.h" + #include "qemu/error-report.h" + #include "qemu/event_notifier.h" +@@ -204,6 +206,10 @@ static void vfio_ap_unrealize(DeviceState *dev) + + static Property vfio_ap_properties[] = { + DEFINE_PROP_STRING("sysfsdev", VFIOAPDevice, vdev.sysfsdev), ++#ifdef CONFIG_IOMMUFD ++ DEFINE_PROP_LINK("iommufd", VFIOAPDevice, vdev.iommufd, ++ TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *), ++#endif + DEFINE_PROP_END_OF_LIST(), + }; + +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-ap-Make-vfio-cdev-pre-openable-by-passing-a-fil.patch b/SOURCES/kvm-vfio-ap-Make-vfio-cdev-pre-openable-by-passing-a-fil.patch new file mode 100644 index 0000000..1055329 --- /dev/null +++ b/SOURCES/kvm-vfio-ap-Make-vfio-cdev-pre-openable-by-passing-a-fil.patch @@ -0,0 +1,87 @@ +From db09b7c60c01ee75d602261ee959a96fa0d89d68 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Tue, 21 Nov 2023 16:44:14 +0800 +Subject: [PATCH 035/101] vfio/ap: Make vfio cdev pre-openable by passing a + file handle +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [34/67] aaafa6088a9b0302d53aa539f67792d02ea0f663 (eauger1/centos-qemu-kvm) + +This gives management tools like libvirt a chance to open the vfio +cdev with privilege and pass FD to qemu. This way qemu never needs +to have privilege to open a VFIO or iommu cdev node. + +Signed-off-by: Zhenzhong Duan +Reviewed-by: Matthew Rosato +Reviewed-by: Cédric Le Goater +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +(cherry picked from commit 5e7ba401b71d18544a3e44b2a58b9e63fd5148d5) +Signed-off-by: Eric Auger +--- + hw/vfio/ap.c | 23 ++++++++++++++++++++++- + 1 file changed, 22 insertions(+), 1 deletion(-) + +diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c +index 80629609ae..f180e4a32a 100644 +--- a/hw/vfio/ap.c ++++ b/hw/vfio/ap.c +@@ -160,7 +160,10 @@ static void vfio_ap_realize(DeviceState *dev, Error **errp) + VFIOAPDevice *vapdev = VFIO_AP_DEVICE(dev); + VFIODevice *vbasedev = &vapdev->vdev; + +- vbasedev->name = g_path_get_basename(vbasedev->sysfsdev); ++ if (vfio_device_get_name(vbasedev, errp) < 0) { ++ return; ++ } ++ + vbasedev->ops = &vfio_ap_ops; + vbasedev->type = VFIO_DEVICE_TYPE_AP; + vbasedev->dev = dev; +@@ -230,11 +233,28 @@ static const VMStateDescription vfio_ap_vmstate = { + .unmigratable = 1, + }; + ++static void vfio_ap_instance_init(Object *obj) ++{ ++ VFIOAPDevice *vapdev = VFIO_AP_DEVICE(obj); ++ ++ vapdev->vdev.fd = -1; ++} ++ ++#ifdef CONFIG_IOMMUFD ++static void vfio_ap_set_fd(Object *obj, const char *str, Error **errp) ++{ ++ vfio_device_set_fd(&VFIO_AP_DEVICE(obj)->vdev, str, errp); ++} ++#endif ++ + static void vfio_ap_class_init(ObjectClass *klass, void *data) + { + DeviceClass *dc = DEVICE_CLASS(klass); + + device_class_set_props(dc, vfio_ap_properties); ++#ifdef CONFIG_IOMMUFD ++ object_class_property_add_str(klass, "fd", NULL, vfio_ap_set_fd); ++#endif + dc->vmsd = &vfio_ap_vmstate; + dc->desc = "VFIO-based AP device assignment"; + set_bit(DEVICE_CATEGORY_MISC, dc->categories); +@@ -249,6 +269,7 @@ static const TypeInfo vfio_ap_info = { + .name = TYPE_VFIO_AP_DEVICE, + .parent = TYPE_AP_DEVICE, + .instance_size = sizeof(VFIOAPDevice), ++ .instance_init = vfio_ap_instance_init, + .class_init = vfio_ap_class_init, + }; + +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-ap-Move-VFIODevice-initializations-in-vfio_ap_i.patch b/SOURCES/kvm-vfio-ap-Move-VFIODevice-initializations-in-vfio_ap_i.patch new file mode 100644 index 0000000..ed60920 --- /dev/null +++ b/SOURCES/kvm-vfio-ap-Move-VFIODevice-initializations-in-vfio_ap_i.patch @@ -0,0 +1,81 @@ +From b8630ecb698e31311089ba4e224d5e2c08c8e665 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Tue, 21 Nov 2023 16:44:23 +0800 +Subject: [PATCH 044/101] vfio/ap: Move VFIODevice initializations in + vfio_ap_instance_init +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [43/67] 95a527f649b28c5c78903e99735107667e8468b1 (eauger1/centos-qemu-kvm) + +Some of the VFIODevice initializations is in vfio_ap_realize, +move all of them in vfio_ap_instance_init. + +No functional change intended. + +Suggested-by: Cédric Le Goater +Signed-off-by: Zhenzhong Duan +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Eric Farman +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +(cherry picked from commit cbbcc2f1706aa1a08637142744d2f5f6515ac93f) +Signed-off-by: Eric Auger +--- + hw/vfio/ap.c | 26 +++++++++++++------------- + 1 file changed, 13 insertions(+), 13 deletions(-) + +diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c +index f180e4a32a..95fe7cd98b 100644 +--- a/hw/vfio/ap.c ++++ b/hw/vfio/ap.c +@@ -164,18 +164,6 @@ static void vfio_ap_realize(DeviceState *dev, Error **errp) + return; + } + +- vbasedev->ops = &vfio_ap_ops; +- vbasedev->type = VFIO_DEVICE_TYPE_AP; +- vbasedev->dev = dev; +- +- /* +- * vfio-ap devices operate in a way compatible with discarding of +- * memory in RAM blocks, as no pages are pinned in the host. +- * This needs to be set before vfio_get_device() for vfio common to +- * handle ram_block_discard_disable(). +- */ +- vapdev->vdev.ram_block_discard_allowed = true; +- + ret = vfio_attach_device(vbasedev->name, vbasedev, + &address_space_memory, errp); + if (ret) { +@@ -236,8 +224,20 @@ static const VMStateDescription vfio_ap_vmstate = { + static void vfio_ap_instance_init(Object *obj) + { + VFIOAPDevice *vapdev = VFIO_AP_DEVICE(obj); ++ VFIODevice *vbasedev = &vapdev->vdev; + +- vapdev->vdev.fd = -1; ++ vbasedev->type = VFIO_DEVICE_TYPE_AP; ++ vbasedev->ops = &vfio_ap_ops; ++ vbasedev->dev = DEVICE(vapdev); ++ vbasedev->fd = -1; ++ ++ /* ++ * vfio-ap devices operate in a way compatible with discarding of ++ * memory in RAM blocks, as no pages are pinned in the host. ++ * This needs to be set before vfio_get_device() for vfio common to ++ * handle ram_block_discard_disable(). ++ */ ++ vbasedev->ram_block_discard_allowed = true; + } + + #ifdef CONFIG_IOMMUFD +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-ccw-Allow-the-selection-of-a-given-iommu-backen.patch b/SOURCES/kvm-vfio-ccw-Allow-the-selection-of-a-given-iommu-backen.patch new file mode 100644 index 0000000..ff64a91 --- /dev/null +++ b/SOURCES/kvm-vfio-ccw-Allow-the-selection-of-a-given-iommu-backen.patch @@ -0,0 +1,79 @@ +From 732115c80eb0dd672925a0737e09643d8a889abd Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Tue, 21 Nov 2023 16:44:15 +0800 +Subject: [PATCH 036/101] vfio/ccw: Allow the selection of a given iommu + backend +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [35/67] 1701de023a9f3b3f0420689bf851e11aee88800d (eauger1/centos-qemu-kvm) + +Now we support two types of iommu backends, let's add the capability +to select one of them. This depends on whether an iommufd object has +been linked with the vfio-ccw device: + +If the user wants to use the legacy backend, it shall not +link the vfio-ccw device with any iommufd object: + + -device vfio-ccw,sysfsdev=/sys/bus/mdev/devices/XXX + +This is called the legacy mode/backend. + +If the user wants to use the iommufd backend (/dev/iommu) it +shall pass an iommufd object id in the vfio-ccw device options: + + -object iommufd,id=iommufd0 + -device vfio-ccw,sysfsdev=/sys/bus/mdev/devices/XXX,iommufd=iommufd0 + +Suggested-by: Alex Williamson +Signed-off-by: Zhenzhong Duan +Reviewed-by: Matthew Rosato +Reviewed-by: Cédric Le Goater +Reviewed-by: Eric Farman +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +(cherry picked from commit e70f971a6c1230138843d7ab82267e4a5aaf6bda) +Signed-off-by: Eric Auger +--- + hw/vfio/ccw.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c +index d857bb8d0f..d2d58bb677 100644 +--- a/hw/vfio/ccw.c ++++ b/hw/vfio/ccw.c +@@ -15,12 +15,14 @@ + */ + + #include "qemu/osdep.h" ++#include CONFIG_DEVICES /* CONFIG_IOMMUFD */ + #include + #include + #include + + #include "qapi/error.h" + #include "hw/vfio/vfio-common.h" ++#include "sysemu/iommufd.h" + #include "hw/s390x/s390-ccw.h" + #include "hw/s390x/vfio-ccw.h" + #include "hw/qdev-properties.h" +@@ -677,6 +679,10 @@ static void vfio_ccw_unrealize(DeviceState *dev) + static Property vfio_ccw_properties[] = { + DEFINE_PROP_STRING("sysfsdev", VFIOCCWDevice, vdev.sysfsdev), + DEFINE_PROP_BOOL("force-orb-pfch", VFIOCCWDevice, force_orb_pfch, false), ++#ifdef CONFIG_IOMMUFD ++ DEFINE_PROP_LINK("iommufd", VFIOCCWDevice, vdev.iommufd, ++ TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *), ++#endif + DEFINE_PROP_END_OF_LIST(), + }; + +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-ccw-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch b/SOURCES/kvm-vfio-ccw-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch new file mode 100644 index 0000000..6c91d85 --- /dev/null +++ b/SOURCES/kvm-vfio-ccw-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch @@ -0,0 +1,93 @@ +From 0ff08afdec19f4decaf750fa7d158e0ea498ff28 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Tue, 21 Nov 2023 16:44:16 +0800 +Subject: [PATCH 037/101] vfio/ccw: Make vfio cdev pre-openable by passing a + file handle +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [36/67] cc0d8f51cffa5d5a7aebc2334b908b9877179ae7 (eauger1/centos-qemu-kvm) + +This gives management tools like libvirt a chance to open the vfio +cdev with privilege and pass FD to qemu. This way qemu never needs +to have privilege to open a VFIO or iommu cdev node. + +Signed-off-by: Zhenzhong Duan +Reviewed-by: Matthew Rosato +Reviewed-by: Cédric Le Goater +Reviewed-by: Eric Farman +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +(cherry picked from commit 909a6254edaa8d0b0e3f1c0a623862e73d1842e9) +Signed-off-by: Eric Auger +--- + hw/vfio/ccw.c | 25 ++++++++++++++++++++++--- + 1 file changed, 22 insertions(+), 3 deletions(-) + +diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c +index d2d58bb677..2afdf17dbe 100644 +--- a/hw/vfio/ccw.c ++++ b/hw/vfio/ccw.c +@@ -590,11 +590,12 @@ static void vfio_ccw_realize(DeviceState *dev, Error **errp) + } + } + ++ if (vfio_device_get_name(vbasedev, errp) < 0) { ++ return; ++ } ++ + vbasedev->ops = &vfio_ccw_ops; + vbasedev->type = VFIO_DEVICE_TYPE_CCW; +- vbasedev->name = g_strdup_printf("%x.%x.%04x", vcdev->cdev.hostid.cssid, +- vcdev->cdev.hostid.ssid, +- vcdev->cdev.hostid.devid); + vbasedev->dev = dev; + + /* +@@ -691,12 +692,29 @@ static const VMStateDescription vfio_ccw_vmstate = { + .unmigratable = 1, + }; + ++static void vfio_ccw_instance_init(Object *obj) ++{ ++ VFIOCCWDevice *vcdev = VFIO_CCW(obj); ++ ++ vcdev->vdev.fd = -1; ++} ++ ++#ifdef CONFIG_IOMMUFD ++static void vfio_ccw_set_fd(Object *obj, const char *str, Error **errp) ++{ ++ vfio_device_set_fd(&VFIO_CCW(obj)->vdev, str, errp); ++} ++#endif ++ + static void vfio_ccw_class_init(ObjectClass *klass, void *data) + { + DeviceClass *dc = DEVICE_CLASS(klass); + S390CCWDeviceClass *cdc = S390_CCW_DEVICE_CLASS(klass); + + device_class_set_props(dc, vfio_ccw_properties); ++#ifdef CONFIG_IOMMUFD ++ object_class_property_add_str(klass, "fd", NULL, vfio_ccw_set_fd); ++#endif + dc->vmsd = &vfio_ccw_vmstate; + dc->desc = "VFIO-based subchannel assignment"; + set_bit(DEVICE_CATEGORY_MISC, dc->categories); +@@ -714,6 +732,7 @@ static const TypeInfo vfio_ccw_info = { + .name = TYPE_VFIO_CCW, + .parent = TYPE_S390_CCW, + .instance_size = sizeof(VFIOCCWDevice), ++ .instance_init = vfio_ccw_instance_init, + .class_init = vfio_ccw_class_init, + }; + +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-ccw-Move-VFIODevice-initializations-in-vfio_ccw.patch b/SOURCES/kvm-vfio-ccw-Move-VFIODevice-initializations-in-vfio_ccw.patch new file mode 100644 index 0000000..95b85f9 --- /dev/null +++ b/SOURCES/kvm-vfio-ccw-Move-VFIODevice-initializations-in-vfio_ccw.patch @@ -0,0 +1,85 @@ +From 2ef1c050722115247962e3cd4d8fcf73727e597e Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Tue, 21 Nov 2023 16:44:24 +0800 +Subject: [PATCH 045/101] vfio/ccw: Move VFIODevice initializations in + vfio_ccw_instance_init +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [44/67] 3345ed58f491aba8fd51bcc172af267ae53e6c8c (eauger1/centos-qemu-kvm) + +Some of the VFIODevice initializations is in vfio_ccw_realize, +move all of them in vfio_ccw_instance_init. + +No functional change intended. + +Suggested-by: Cédric Le Goater +Signed-off-by: Zhenzhong Duan +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Eric Farman +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +(cherry picked from commit c12b55ad6f9d3b4792b590e9211bd7319e4a2d70) +Signed-off-by: Eric Auger +--- + hw/vfio/ccw.c | 30 +++++++++++++++--------------- + 1 file changed, 15 insertions(+), 15 deletions(-) + +diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c +index 2afdf17dbe..6305a4c1b8 100644 +--- a/hw/vfio/ccw.c ++++ b/hw/vfio/ccw.c +@@ -594,20 +594,6 @@ static void vfio_ccw_realize(DeviceState *dev, Error **errp) + return; + } + +- vbasedev->ops = &vfio_ccw_ops; +- vbasedev->type = VFIO_DEVICE_TYPE_CCW; +- vbasedev->dev = dev; +- +- /* +- * All vfio-ccw devices are believed to operate in a way compatible with +- * discarding of memory in RAM blocks, ie. pages pinned in the host are +- * in the current working set of the guest driver and therefore never +- * overlap e.g., with pages available to the guest balloon driver. This +- * needs to be set before vfio_get_device() for vfio common to handle +- * ram_block_discard_disable(). +- */ +- vbasedev->ram_block_discard_allowed = true; +- + ret = vfio_attach_device(cdev->mdevid, vbasedev, + &address_space_memory, errp); + if (ret) { +@@ -695,8 +681,22 @@ static const VMStateDescription vfio_ccw_vmstate = { + static void vfio_ccw_instance_init(Object *obj) + { + VFIOCCWDevice *vcdev = VFIO_CCW(obj); ++ VFIODevice *vbasedev = &vcdev->vdev; ++ ++ vbasedev->type = VFIO_DEVICE_TYPE_CCW; ++ vbasedev->ops = &vfio_ccw_ops; ++ vbasedev->dev = DEVICE(vcdev); ++ vbasedev->fd = -1; + +- vcdev->vdev.fd = -1; ++ /* ++ * All vfio-ccw devices are believed to operate in a way compatible with ++ * discarding of memory in RAM blocks, ie. pages pinned in the host are ++ * in the current working set of the guest driver and therefore never ++ * overlap e.g., with pages available to the guest balloon driver. This ++ * needs to be set before vfio_get_device() for vfio common to handle ++ * ram_block_discard_disable(). ++ */ ++ vbasedev->ram_block_discard_allowed = true; + } + + #ifdef CONFIG_IOMMUFD +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-common-Introduce-vfio_container_init-destroy-he.patch b/SOURCES/kvm-vfio-common-Introduce-vfio_container_init-destroy-he.patch new file mode 100644 index 0000000..8615b6d --- /dev/null +++ b/SOURCES/kvm-vfio-common-Introduce-vfio_container_init-destroy-he.patch @@ -0,0 +1,98 @@ +From 7de36998dd6177380e46b8c5f3a91c3fad75483c Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Thu, 2 Nov 2023 15:12:30 +0800 +Subject: [PATCH 005/101] vfio/common: Introduce vfio_container_init/destroy + helper +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [4/67] 8287f687ef19cd84afede1e8f3b16ac3caf29a1d (eauger1/centos-qemu-kvm) + +This adds two helper functions vfio_container_init/destroy which will be +used by both legacy and iommufd containers to do base container specific +initialization and release. + +No functional change intended. + +Suggested-by: Cédric Le Goater +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +(cherry picked from commit ed2f7f80170251e7cdd2965a13ee97527d1fbec8) +Signed-off-by: Eric Auger +--- + hw/vfio/container-base.c | 9 +++++++++ + hw/vfio/container.c | 4 +++- + include/hw/vfio/vfio-container-base.h | 4 ++++ + 3 files changed, 16 insertions(+), 1 deletion(-) + +diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c +index 55d3a35fa4..e929435751 100644 +--- a/hw/vfio/container-base.c ++++ b/hw/vfio/container-base.c +@@ -30,3 +30,12 @@ int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, + g_assert(bcontainer->ops->dma_unmap); + return bcontainer->ops->dma_unmap(bcontainer, iova, size, iotlb); + } ++ ++void vfio_container_init(VFIOContainerBase *bcontainer, const VFIOIOMMUOps *ops) ++{ ++ bcontainer->ops = ops; ++} ++ ++void vfio_container_destroy(VFIOContainerBase *bcontainer) ++{ ++} +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index c04df26323..32a0251dd1 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -559,7 +559,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + QLIST_INIT(&container->giommu_list); + QLIST_INIT(&container->vrdl_list); + bcontainer = &container->bcontainer; +- bcontainer->ops = &vfio_legacy_ops; ++ vfio_container_init(bcontainer, &vfio_legacy_ops); + + ret = vfio_init_container(container, group->fd, errp); + if (ret) { +@@ -661,6 +661,7 @@ put_space_exit: + static void vfio_disconnect_container(VFIOGroup *group) + { + VFIOContainer *container = group->container; ++ VFIOContainerBase *bcontainer = &container->bcontainer; + + QLIST_REMOVE(group, container_next); + group->container = NULL; +@@ -695,6 +696,7 @@ static void vfio_disconnect_container(VFIOGroup *group) + QLIST_REMOVE(giommu, giommu_next); + g_free(giommu); + } ++ vfio_container_destroy(bcontainer); + + trace_vfio_disconnect_container(container->fd); + close(container->fd); +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index 56b033f59f..577f52ccbc 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -38,6 +38,10 @@ int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, + IOMMUTLBEntry *iotlb); + ++void vfio_container_init(VFIOContainerBase *bcontainer, ++ const VFIOIOMMUOps *ops); ++void vfio_container_destroy(VFIOContainerBase *bcontainer); ++ + struct VFIOIOMMUOps { + /* basic feature */ + int (*dma_map)(VFIOContainerBase *bcontainer, +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-common-Move-giommu_list-in-base-container.patch b/SOURCES/kvm-vfio-common-Move-giommu_list-in-base-container.patch new file mode 100644 index 0000000..eec555b --- /dev/null +++ b/SOURCES/kvm-vfio-common-Move-giommu_list-in-base-container.patch @@ -0,0 +1,221 @@ +From 36f4005c3dbb4c8b63a975494c75281de51c25f9 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Thu, 2 Nov 2023 15:12:31 +0800 +Subject: [PATCH 006/101] vfio/common: Move giommu_list in base container +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [5/67] ba5898e96c16c7f6e8108ae461b454d3c8c35404 (eauger1/centos-qemu-kvm) + +Move the giommu_list field in the base container and store +the base container in the VFIOGuestIOMMU. + +No functional change intended. + +Signed-off-by: Eric Auger +Signed-off-by: Yi Liu +Signed-off-by: Yi Sun +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +(cherry picked from commit dddf83ab99eb832c449249397a1c302c6ed746bf) +Signed-off-by: Eric Auger +--- + hw/vfio/common.c | 17 +++++++++++------ + hw/vfio/container-base.c | 9 +++++++++ + hw/vfio/container.c | 8 -------- + include/hw/vfio/vfio-common.h | 9 --------- + include/hw/vfio/vfio-container-base.h | 9 +++++++++ + 5 files changed, 29 insertions(+), 23 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index e610771888..43580bcc43 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -292,7 +292,7 @@ static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, + static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) + { + VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n); +- VFIOContainerBase *bcontainer = &giommu->container->bcontainer; ++ VFIOContainerBase *bcontainer = giommu->bcontainer; + hwaddr iova = iotlb->iova + giommu->iommu_offset; + void *vaddr; + int ret; +@@ -569,6 +569,7 @@ static void vfio_listener_region_add(MemoryListener *listener, + MemoryRegionSection *section) + { + VFIOContainer *container = container_of(listener, VFIOContainer, listener); ++ VFIOContainerBase *bcontainer = &container->bcontainer; + hwaddr iova, end; + Int128 llend, llsize; + void *vaddr; +@@ -612,7 +613,7 @@ static void vfio_listener_region_add(MemoryListener *listener, + giommu->iommu_mr = iommu_mr; + giommu->iommu_offset = section->offset_within_address_space - + section->offset_within_region; +- giommu->container = container; ++ giommu->bcontainer = bcontainer; + llend = int128_add(int128_make64(section->offset_within_region), + section->size); + llend = int128_sub(llend, int128_one()); +@@ -647,7 +648,7 @@ static void vfio_listener_region_add(MemoryListener *listener, + g_free(giommu); + goto fail; + } +- QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next); ++ QLIST_INSERT_HEAD(&bcontainer->giommu_list, giommu, giommu_next); + memory_region_iommu_replay(giommu->iommu_mr, &giommu->n); + + return; +@@ -732,6 +733,7 @@ static void vfio_listener_region_del(MemoryListener *listener, + MemoryRegionSection *section) + { + VFIOContainer *container = container_of(listener, VFIOContainer, listener); ++ VFIOContainerBase *bcontainer = &container->bcontainer; + hwaddr iova, end; + Int128 llend, llsize; + int ret; +@@ -744,7 +746,7 @@ static void vfio_listener_region_del(MemoryListener *listener, + if (memory_region_is_iommu(section->mr)) { + VFIOGuestIOMMU *giommu; + +- QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) { ++ QLIST_FOREACH(giommu, &bcontainer->giommu_list, giommu_next) { + if (MEMORY_REGION(giommu->iommu_mr) == section->mr && + giommu->n.start == section->offset_within_region) { + memory_region_unregister_iommu_notifier(section->mr, +@@ -1206,7 +1208,9 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) + vfio_giommu_dirty_notifier *gdn = container_of(n, + vfio_giommu_dirty_notifier, n); + VFIOGuestIOMMU *giommu = gdn->giommu; +- VFIOContainer *container = giommu->container; ++ VFIOContainerBase *bcontainer = giommu->bcontainer; ++ VFIOContainer *container = container_of(bcontainer, VFIOContainer, ++ bcontainer); + hwaddr iova = iotlb->iova + giommu->iommu_offset; + ram_addr_t translated_addr; + int ret = -EINVAL; +@@ -1284,12 +1288,13 @@ static int vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainer *container, + static int vfio_sync_dirty_bitmap(VFIOContainer *container, + MemoryRegionSection *section) + { ++ VFIOContainerBase *bcontainer = &container->bcontainer; + ram_addr_t ram_addr; + + if (memory_region_is_iommu(section->mr)) { + VFIOGuestIOMMU *giommu; + +- QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) { ++ QLIST_FOREACH(giommu, &bcontainer->giommu_list, giommu_next) { + if (MEMORY_REGION(giommu->iommu_mr) == section->mr && + giommu->n.start == section->offset_within_region) { + Int128 llend; +diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c +index e929435751..20bcb9669a 100644 +--- a/hw/vfio/container-base.c ++++ b/hw/vfio/container-base.c +@@ -34,8 +34,17 @@ int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, + void vfio_container_init(VFIOContainerBase *bcontainer, const VFIOIOMMUOps *ops) + { + bcontainer->ops = ops; ++ QLIST_INIT(&bcontainer->giommu_list); + } + + void vfio_container_destroy(VFIOContainerBase *bcontainer) + { ++ VFIOGuestIOMMU *giommu, *tmp; ++ ++ QLIST_FOREACH_SAFE(giommu, &bcontainer->giommu_list, giommu_next, tmp) { ++ memory_region_unregister_iommu_notifier( ++ MEMORY_REGION(giommu->iommu_mr), &giommu->n); ++ QLIST_REMOVE(giommu, giommu_next); ++ g_free(giommu); ++ } + } +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 32a0251dd1..133d3c8f5c 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -556,7 +556,6 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + container->dirty_pages_supported = false; + container->dma_max_mappings = 0; + container->iova_ranges = NULL; +- QLIST_INIT(&container->giommu_list); + QLIST_INIT(&container->vrdl_list); + bcontainer = &container->bcontainer; + vfio_container_init(bcontainer, &vfio_legacy_ops); +@@ -686,16 +685,9 @@ static void vfio_disconnect_container(VFIOGroup *group) + + if (QLIST_EMPTY(&container->group_list)) { + VFIOAddressSpace *space = container->space; +- VFIOGuestIOMMU *giommu, *tmp; + + QLIST_REMOVE(container, next); + +- QLIST_FOREACH_SAFE(giommu, &container->giommu_list, giommu_next, tmp) { +- memory_region_unregister_iommu_notifier( +- MEMORY_REGION(giommu->iommu_mr), &giommu->n); +- QLIST_REMOVE(giommu, giommu_next); +- g_free(giommu); +- } + vfio_container_destroy(bcontainer); + + trace_vfio_disconnect_container(container->fd); +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 24a26345e5..6be082b8f2 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -95,7 +95,6 @@ typedef struct VFIOContainer { + uint64_t max_dirty_bitmap_size; + unsigned long pgsizes; + unsigned int dma_max_mappings; +- QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; + QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; + QLIST_HEAD(, VFIOGroup) group_list; + QLIST_HEAD(, VFIORamDiscardListener) vrdl_list; +@@ -104,14 +103,6 @@ typedef struct VFIOContainer { + GList *iova_ranges; + } VFIOContainer; + +-typedef struct VFIOGuestIOMMU { +- VFIOContainer *container; +- IOMMUMemoryRegion *iommu_mr; +- hwaddr iommu_offset; +- IOMMUNotifier n; +- QLIST_ENTRY(VFIOGuestIOMMU) giommu_next; +-} VFIOGuestIOMMU; +- + typedef struct VFIORamDiscardListener { + VFIOContainer *container; + MemoryRegion *mr; +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index 577f52ccbc..a11aec5755 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -29,8 +29,17 @@ typedef struct { + */ + typedef struct VFIOContainerBase { + const VFIOIOMMUOps *ops; ++ QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; + } VFIOContainerBase; + ++typedef struct VFIOGuestIOMMU { ++ VFIOContainerBase *bcontainer; ++ IOMMUMemoryRegion *iommu_mr; ++ hwaddr iommu_offset; ++ IOMMUNotifier n; ++ QLIST_ENTRY(VFIOGuestIOMMU) giommu_next; ++} VFIOGuestIOMMU; ++ + int vfio_container_dma_map(VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, + void *vaddr, bool readonly); +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-common-return-early-if-space-isn-t-empty.patch b/SOURCES/kvm-vfio-common-return-early-if-space-isn-t-empty.patch new file mode 100644 index 0000000..261807a --- /dev/null +++ b/SOURCES/kvm-vfio-common-return-early-if-space-isn-t-empty.patch @@ -0,0 +1,55 @@ +From e9476ee64edd81fafd409fb3ceaad80668446bff Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Tue, 21 Nov 2023 16:44:02 +0800 +Subject: [PATCH 023/101] vfio/common: return early if space isn't empty +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [22/67] 239c21ae7cddc8efabc041b9c7774f15b4964631 (eauger1/centos-qemu-kvm) + +This is a trivial optimization. If there is active container in space, +vfio_reset_handler will never be unregistered. So revert the check of +space->containers and return early. + +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Reviewed-by: Eric Auger +Tested-by: Eric Auger +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +(cherry picked from commit 1eae5b7bd3ddd03b5591e9122b011c6520064a5a) +Signed-off-by: Eric Auger +--- + hw/vfio/common.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 572ae7c934..934f4f5446 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -1462,10 +1462,13 @@ VFIOAddressSpace *vfio_get_address_space(AddressSpace *as) + + void vfio_put_address_space(VFIOAddressSpace *space) + { +- if (QLIST_EMPTY(&space->containers)) { +- QLIST_REMOVE(space, list); +- g_free(space); ++ if (!QLIST_EMPTY(&space->containers)) { ++ return; + } ++ ++ QLIST_REMOVE(space, list); ++ g_free(space); ++ + if (QLIST_EMPTY(&vfio_address_spaces)) { + qemu_unregister_reset(vfio_reset_handler, NULL); + } +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-container-Convert-functions-to-base-container.patch b/SOURCES/kvm-vfio-container-Convert-functions-to-base-container.patch new file mode 100644 index 0000000..62caf8a --- /dev/null +++ b/SOURCES/kvm-vfio-container-Convert-functions-to-base-container.patch @@ -0,0 +1,257 @@ +From facad966c42b1ec38b12e45f2b84bd059542b60c Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Thu, 2 Nov 2023 15:12:35 +0800 +Subject: [PATCH 010/101] vfio/container: Convert functions to base container +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [9/67] a0002d6e9cb0ca76e3e2f25208ecba22dd9f9a88 (eauger1/centos-qemu-kvm) + +In the prospect to get rid of VFIOContainer refs +in common.c lets convert misc functions to use the base +container object instead: + +vfio_devices_all_dirty_tracking +vfio_devices_all_device_dirty_tracking +vfio_devices_all_running_and_mig_active +vfio_devices_query_dirty_bitmap +vfio_get_dirty_bitmap + +Signed-off-by: Eric Auger +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +(cherry picked from commit e1cac6b203f45b5322e831e8d50edfdf18609b09) +Signed-off-by: Eric Auger +--- + hw/vfio/common.c | 42 +++++++++++++++-------------------- + hw/vfio/container.c | 6 ++--- + hw/vfio/trace-events | 2 +- + include/hw/vfio/vfio-common.h | 9 ++++---- + 4 files changed, 26 insertions(+), 33 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 9415395ed9..cf6618f6ed 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -177,9 +177,8 @@ bool vfio_device_state_is_precopy(VFIODevice *vbasedev) + migration->device_state == VFIO_DEVICE_STATE_PRE_COPY_P2P; + } + +-static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) ++static bool vfio_devices_all_dirty_tracking(VFIOContainerBase *bcontainer) + { +- VFIOContainerBase *bcontainer = &container->bcontainer; + VFIODevice *vbasedev; + MigrationState *ms = migrate_get_current(); + +@@ -204,9 +203,8 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) + return true; + } + +-bool vfio_devices_all_device_dirty_tracking(VFIOContainer *container) ++bool vfio_devices_all_device_dirty_tracking(VFIOContainerBase *bcontainer) + { +- VFIOContainerBase *bcontainer = &container->bcontainer; + VFIODevice *vbasedev; + + QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { +@@ -222,9 +220,8 @@ bool vfio_devices_all_device_dirty_tracking(VFIOContainer *container) + * Check if all VFIO devices are running and migration is active, which is + * essentially equivalent to the migration being in pre-copy phase. + */ +-bool vfio_devices_all_running_and_mig_active(VFIOContainer *container) ++bool vfio_devices_all_running_and_mig_active(VFIOContainerBase *bcontainer) + { +- VFIOContainerBase *bcontainer = &container->bcontainer; + VFIODevice *vbasedev; + + if (!migration_is_active(migrate_get_current())) { +@@ -1082,7 +1079,7 @@ static void vfio_listener_log_global_start(MemoryListener *listener) + VFIOContainer *container = container_of(listener, VFIOContainer, listener); + int ret; + +- if (vfio_devices_all_device_dirty_tracking(container)) { ++ if (vfio_devices_all_device_dirty_tracking(&container->bcontainer)) { + ret = vfio_devices_dma_logging_start(container); + } else { + ret = vfio_container_set_dirty_page_tracking(&container->bcontainer, +@@ -1101,7 +1098,7 @@ static void vfio_listener_log_global_stop(MemoryListener *listener) + VFIOContainer *container = container_of(listener, VFIOContainer, listener); + int ret = 0; + +- if (vfio_devices_all_device_dirty_tracking(container)) { ++ if (vfio_devices_all_device_dirty_tracking(&container->bcontainer)) { + vfio_devices_dma_logging_stop(container); + } else { + ret = vfio_container_set_dirty_page_tracking(&container->bcontainer, +@@ -1141,11 +1138,10 @@ static int vfio_device_dma_logging_report(VFIODevice *vbasedev, hwaddr iova, + return 0; + } + +-int vfio_devices_query_dirty_bitmap(VFIOContainer *container, ++int vfio_devices_query_dirty_bitmap(VFIOContainerBase *bcontainer, + VFIOBitmap *vbmap, hwaddr iova, + hwaddr size) + { +- VFIOContainerBase *bcontainer = &container->bcontainer; + VFIODevice *vbasedev; + int ret; + +@@ -1165,17 +1161,16 @@ int vfio_devices_query_dirty_bitmap(VFIOContainer *container, + return 0; + } + +-int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, ++int vfio_get_dirty_bitmap(VFIOContainerBase *bcontainer, uint64_t iova, + uint64_t size, ram_addr_t ram_addr) + { + bool all_device_dirty_tracking = +- vfio_devices_all_device_dirty_tracking(container); ++ vfio_devices_all_device_dirty_tracking(bcontainer); + uint64_t dirty_pages; + VFIOBitmap vbmap; + int ret; + +- if (!container->bcontainer.dirty_pages_supported && +- !all_device_dirty_tracking) { ++ if (!bcontainer->dirty_pages_supported && !all_device_dirty_tracking) { + cpu_physical_memory_set_dirty_range(ram_addr, size, + tcg_enabled() ? DIRTY_CLIENTS_ALL : + DIRTY_CLIENTS_NOCODE); +@@ -1188,10 +1183,9 @@ int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, + } + + if (all_device_dirty_tracking) { +- ret = vfio_devices_query_dirty_bitmap(container, &vbmap, iova, size); ++ ret = vfio_devices_query_dirty_bitmap(bcontainer, &vbmap, iova, size); + } else { +- ret = vfio_container_query_dirty_bitmap(&container->bcontainer, &vbmap, +- iova, size); ++ ret = vfio_container_query_dirty_bitmap(bcontainer, &vbmap, iova, size); + } + + if (ret) { +@@ -1201,8 +1195,7 @@ int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, + dirty_pages = cpu_physical_memory_set_dirty_lebitmap(vbmap.bitmap, ram_addr, + vbmap.pages); + +- trace_vfio_get_dirty_bitmap(container->fd, iova, size, vbmap.size, +- ram_addr, dirty_pages); ++ trace_vfio_get_dirty_bitmap(iova, size, vbmap.size, ram_addr, dirty_pages); + out: + g_free(vbmap.bitmap); + +@@ -1236,8 +1229,8 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) + + rcu_read_lock(); + if (vfio_get_xlat_addr(iotlb, NULL, &translated_addr, NULL)) { +- ret = vfio_get_dirty_bitmap(container, iova, iotlb->addr_mask + 1, +- translated_addr); ++ ret = vfio_get_dirty_bitmap(&container->bcontainer, iova, ++ iotlb->addr_mask + 1, translated_addr); + if (ret) { + error_report("vfio_iommu_map_dirty_notify(%p, 0x%"HWADDR_PRIx", " + "0x%"HWADDR_PRIx") = %d (%s)", +@@ -1266,7 +1259,8 @@ static int vfio_ram_discard_get_dirty_bitmap(MemoryRegionSection *section, + * Sync the whole mapped region (spanning multiple individual mappings) + * in one go. + */ +- return vfio_get_dirty_bitmap(vrdl->container, iova, size, ram_addr); ++ return vfio_get_dirty_bitmap(&vrdl->container->bcontainer, iova, size, ++ ram_addr); + } + + static int vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainer *container, +@@ -1335,7 +1329,7 @@ static int vfio_sync_dirty_bitmap(VFIOContainer *container, + ram_addr = memory_region_get_ram_addr(section->mr) + + section->offset_within_region; + +- return vfio_get_dirty_bitmap(container, ++ return vfio_get_dirty_bitmap(&container->bcontainer, + REAL_HOST_PAGE_ALIGN(section->offset_within_address_space), + int128_get64(section->size), ram_addr); + } +@@ -1350,7 +1344,7 @@ static void vfio_listener_log_sync(MemoryListener *listener, + return; + } + +- if (vfio_devices_all_dirty_tracking(container)) { ++ if (vfio_devices_all_dirty_tracking(&container->bcontainer)) { + ret = vfio_sync_dirty_bitmap(container, section); + if (ret) { + error_report("vfio: Failed to sync dirty bitmap, err: %d (%s)", ret, +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 63a906de93..7bd81eab09 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -129,8 +129,8 @@ static int vfio_legacy_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova, + bool need_dirty_sync = false; + int ret; + +- if (iotlb && vfio_devices_all_running_and_mig_active(container)) { +- if (!vfio_devices_all_device_dirty_tracking(container) && ++ if (iotlb && vfio_devices_all_running_and_mig_active(bcontainer)) { ++ if (!vfio_devices_all_device_dirty_tracking(bcontainer) && + container->bcontainer.dirty_pages_supported) { + return vfio_dma_unmap_bitmap(container, iova, size, iotlb); + } +@@ -162,7 +162,7 @@ static int vfio_legacy_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova, + } + + if (need_dirty_sync) { +- ret = vfio_get_dirty_bitmap(container, iova, size, ++ ret = vfio_get_dirty_bitmap(bcontainer, iova, size, + iotlb->translated_addr); + if (ret) { + return ret; +diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events +index 9f7fedee98..08a1f9dfa4 100644 +--- a/hw/vfio/trace-events ++++ b/hw/vfio/trace-events +@@ -117,7 +117,7 @@ vfio_region_sparse_mmap_header(const char *name, int index, int nr_areas) "Devic + vfio_region_sparse_mmap_entry(int i, unsigned long start, unsigned long end) "sparse entry %d [0x%lx - 0x%lx]" + vfio_get_dev_region(const char *name, int index, uint32_t type, uint32_t subtype) "%s index %d, %08x/%08x" + vfio_legacy_dma_unmap_overflow_workaround(void) "" +-vfio_get_dirty_bitmap(int fd, uint64_t iova, uint64_t size, uint64_t bitmap_size, uint64_t start, uint64_t dirty_pages) "container fd=%d, iova=0x%"PRIx64" size= 0x%"PRIx64" bitmap_size=0x%"PRIx64" start=0x%"PRIx64" dirty_pages=%"PRIu64 ++vfio_get_dirty_bitmap(uint64_t iova, uint64_t size, uint64_t bitmap_size, uint64_t start, uint64_t dirty_pages) "iova=0x%"PRIx64" size= 0x%"PRIx64" bitmap_size=0x%"PRIx64" start=0x%"PRIx64" dirty_pages=%"PRIu64 + vfio_iommu_map_dirty_notify(uint64_t iova_start, uint64_t iova_end) "iommu dirty @ 0x%"PRIx64" - 0x%"PRIx64 + + # platform.c +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 9740cf9fbc..bc67e1316c 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -186,7 +186,6 @@ typedef struct VFIODisplay { + + VFIOAddressSpace *vfio_get_address_space(AddressSpace *as); + void vfio_put_address_space(VFIOAddressSpace *space); +-bool vfio_devices_all_running_and_saving(VFIOContainer *container); + + /* SPAPR specific */ + int vfio_container_add_section_window(VFIOContainer *container, +@@ -260,11 +259,11 @@ bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp); + void vfio_migration_exit(VFIODevice *vbasedev); + + int vfio_bitmap_alloc(VFIOBitmap *vbmap, hwaddr size); +-bool vfio_devices_all_running_and_mig_active(VFIOContainer *container); +-bool vfio_devices_all_device_dirty_tracking(VFIOContainer *container); +-int vfio_devices_query_dirty_bitmap(VFIOContainer *container, ++bool vfio_devices_all_running_and_mig_active(VFIOContainerBase *bcontainer); ++bool vfio_devices_all_device_dirty_tracking(VFIOContainerBase *bcontainer); ++int vfio_devices_query_dirty_bitmap(VFIOContainerBase *bcontainer, + VFIOBitmap *vbmap, hwaddr iova, + hwaddr size); +-int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, ++int vfio_get_dirty_bitmap(VFIOContainerBase *bcontainer, uint64_t iova, + uint64_t size, ram_addr_t ram_addr); + #endif /* HW_VFIO_VFIO_COMMON_H */ +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-container-Implement-attach-detach_device.patch b/SOURCES/kvm-vfio-container-Implement-attach-detach_device.patch new file mode 100644 index 0000000..92e9a38 --- /dev/null +++ b/SOURCES/kvm-vfio-container-Implement-attach-detach_device.patch @@ -0,0 +1,97 @@ +From a5d19bfbfddb36fa6d68ca6282a5acd9b245d48a Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Thu, 2 Nov 2023 15:12:41 +0800 +Subject: [PATCH 016/101] vfio/container: Implement attach/detach_device +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [15/67] e233c90e4af2061dc0612bc1b1d17be1a47daeae (eauger1/centos-qemu-kvm) + +No functional change intended. + +Signed-off-by: Eric Auger +Signed-off-by: Yi Liu +Signed-off-by: Yi Sun +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +(cherry picked from commit 1eb31f13b24c49884d8256f96a6664df2dd0824d) +Signed-off-by: Eric Auger +--- + hw/vfio/common.c | 16 ++++++++++++++++ + hw/vfio/container.c | 12 +++++------- + 2 files changed, 21 insertions(+), 7 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 8ef2e7967d..483ba82089 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -1498,3 +1498,19 @@ retry: + + return info; + } ++ ++int vfio_attach_device(char *name, VFIODevice *vbasedev, ++ AddressSpace *as, Error **errp) ++{ ++ const VFIOIOMMUOps *ops = &vfio_legacy_ops; ++ ++ return ops->attach_device(name, vbasedev, as, errp); ++} ++ ++void vfio_detach_device(VFIODevice *vbasedev) ++{ ++ if (!vbasedev->bcontainer) { ++ return; ++ } ++ vbasedev->bcontainer->ops->detach_device(vbasedev); ++} +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 721c0d7375..6bacf38222 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -873,8 +873,8 @@ static int vfio_device_groupid(VFIODevice *vbasedev, Error **errp) + * @name and @vbasedev->name are likely to be different depending + * on the type of the device, hence the need for passing @name + */ +-int vfio_attach_device(char *name, VFIODevice *vbasedev, +- AddressSpace *as, Error **errp) ++static int vfio_legacy_attach_device(const char *name, VFIODevice *vbasedev, ++ AddressSpace *as, Error **errp) + { + int groupid = vfio_device_groupid(vbasedev, errp); + VFIODevice *vbasedev_iter; +@@ -914,14 +914,10 @@ int vfio_attach_device(char *name, VFIODevice *vbasedev, + return ret; + } + +-void vfio_detach_device(VFIODevice *vbasedev) ++static void vfio_legacy_detach_device(VFIODevice *vbasedev) + { + VFIOGroup *group = vbasedev->group; + +- if (!vbasedev->bcontainer) { +- return; +- } +- + QLIST_REMOVE(vbasedev, global_next); + QLIST_REMOVE(vbasedev, container_next); + vbasedev->bcontainer = NULL; +@@ -933,6 +929,8 @@ void vfio_detach_device(VFIODevice *vbasedev) + const VFIOIOMMUOps vfio_legacy_ops = { + .dma_map = vfio_legacy_dma_map, + .dma_unmap = vfio_legacy_dma_unmap, ++ .attach_device = vfio_legacy_attach_device, ++ .detach_device = vfio_legacy_detach_device, + .set_dirty_page_tracking = vfio_legacy_set_dirty_page_tracking, + .query_dirty_bitmap = vfio_legacy_query_dirty_bitmap, + }; +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-container-Initialize-VFIOIOMMUOps-under-vfio_in.patch b/SOURCES/kvm-vfio-container-Initialize-VFIOIOMMUOps-under-vfio_in.patch new file mode 100644 index 0000000..42b406b --- /dev/null +++ b/SOURCES/kvm-vfio-container-Initialize-VFIOIOMMUOps-under-vfio_in.patch @@ -0,0 +1,65 @@ +From c3c9f366c356032fa57ff7cc664732ba87ceb3fb Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Tue, 19 Dec 2023 07:58:18 +0100 +Subject: [PATCH 051/101] vfio/container: Initialize VFIOIOMMUOps under + vfio_init_container() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [50/67] f325136391b22babadb1be3394c527deecdcd3ca (eauger1/centos-qemu-kvm) + +vfio_init_container() already defines the IOMMU type of the container. +Do the same for the VFIOIOMMUOps struct. This prepares ground for the +following patches that will deduce the associated VFIOIOMMUOps struct +from the IOMMU type. + +Reviewed-by: Zhenzhong Duan +Tested-by: Eric Farman +Signed-off-by: Cédric Le Goater +(cherry picked from commit bffe92af0e7571868d47a1d1cd2205e13054d492) +Signed-off-by: Eric Auger +--- + hw/vfio/container.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index afcfe80488..f4a0434a52 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -370,7 +370,7 @@ static int vfio_get_iommu_type(VFIOContainer *container, + } + + static int vfio_init_container(VFIOContainer *container, int group_fd, +- Error **errp) ++ VFIOAddressSpace *space, Error **errp) + { + int iommu_type, ret; + +@@ -401,6 +401,7 @@ static int vfio_init_container(VFIOContainer *container, int group_fd, + } + + container->iommu_type = iommu_type; ++ vfio_container_init(&container->bcontainer, space, &vfio_legacy_ops); + return 0; + } + +@@ -583,9 +584,8 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + container = g_malloc0(sizeof(*container)); + container->fd = fd; + bcontainer = &container->bcontainer; +- vfio_container_init(bcontainer, space, &vfio_legacy_ops); + +- ret = vfio_init_container(container, group->fd, errp); ++ ret = vfio_init_container(container, group->fd, space, errp); + if (ret) { + goto free_container_exit; + } +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-container-Intoduce-a-new-VFIOIOMMUClass-setup-h.patch b/SOURCES/kvm-vfio-container-Intoduce-a-new-VFIOIOMMUClass-setup-h.patch new file mode 100644 index 0000000..3411ecb --- /dev/null +++ b/SOURCES/kvm-vfio-container-Intoduce-a-new-VFIOIOMMUClass-setup-h.patch @@ -0,0 +1,55 @@ +From 29f13011e62f5370ef7fb3248dc85c90ae5bb042 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Tue, 19 Dec 2023 07:58:21 +0100 +Subject: [PATCH 054/101] vfio/container: Intoduce a new VFIOIOMMUClass::setup + handler +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [53/67] 8641161afc33d68795bcf51a47e89061b34d50a8 (eauger1/centos-qemu-kvm) + +This will help in converting the sPAPR IOMMU backend to a QOM interface. + +Reviewed-by: Zhenzhong Duan +Tested-by: Eric Farman +Signed-off-by: Cédric Le Goater +(cherry picked from commit 61d893f2cdb34a2b0255f9b5fbba6b49b94ff730) +Signed-off-by: Eric Auger +--- + hw/vfio/container.c | 1 + + include/hw/vfio/vfio-container-base.h | 1 + + 2 files changed, 2 insertions(+) + +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 220e838a91..c22bdd3216 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -1129,6 +1129,7 @@ static void vfio_iommu_legacy_class_init(ObjectClass *klass, void *data) + { + VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass); + ++ vioc->setup = vfio_legacy_setup; + vioc->dma_map = vfio_legacy_dma_map; + vioc->dma_unmap = vfio_legacy_dma_unmap; + vioc->attach_device = vfio_legacy_attach_device; +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index c60370fc5e..ce8b1fba88 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -109,6 +109,7 @@ struct VFIOIOMMUClass { + InterfaceClass parent_class; + + /* basic feature */ ++ int (*setup)(VFIOContainerBase *bcontainer, Error **errp); + int (*dma_map)(const VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, + void *vaddr, bool readonly); +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-container-Introduce-a-VFIOIOMMU-QOM-interface.patch b/SOURCES/kvm-vfio-container-Introduce-a-VFIOIOMMU-QOM-interface.patch new file mode 100644 index 0000000..7139e64 --- /dev/null +++ b/SOURCES/kvm-vfio-container-Introduce-a-VFIOIOMMU-QOM-interface.patch @@ -0,0 +1,143 @@ +From 5b63e4595e106196ef922b7f762c8f4150d73979 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Tue, 19 Dec 2023 07:58:19 +0100 +Subject: [PATCH 052/101] vfio/container: Introduce a VFIOIOMMU QOM interface +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [51/67] 7c06e2165efe94dcd203d44e422a7aa9fac9816c (eauger1/centos-qemu-kvm) + +VFIOContainerBase was not introduced as an abstract QOM object because +it felt unnecessary to expose all the IOMMU backends to the QEMU +machine and human interface. However, we can still abstract the IOMMU +backend handlers using a QOM interface class. This provides more +flexibility when referencing the various implementations. + +Simply transform the VFIOIOMMUOps struct in an InterfaceClass and do +some initial name replacements. Next changes will start converting +VFIOIOMMUOps. + +Reviewed-by: Zhenzhong Duan +Tested-by: Eric Farman +Signed-off-by: Cédric Le Goater +(cherry picked from commit fdaa774e67435a328c0e28006c4d749f2198294a) +Signed-off-by: Eric Auger +--- + hw/vfio/common.c | 2 +- + hw/vfio/container-base.c | 12 +++++++++++- + hw/vfio/pci.c | 2 +- + include/hw/vfio/vfio-container-base.h | 23 +++++++++++++++++++---- + 4 files changed, 32 insertions(+), 7 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 08a3e57672..49dab41566 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -1503,7 +1503,7 @@ retry: + int vfio_attach_device(char *name, VFIODevice *vbasedev, + AddressSpace *as, Error **errp) + { +- const VFIOIOMMUOps *ops = &vfio_legacy_ops; ++ const VFIOIOMMUClass *ops = &vfio_legacy_ops; + + #ifdef CONFIG_IOMMUFD + if (vbasedev->iommufd) { +diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c +index 1ffd25bbfa..913ae49077 100644 +--- a/hw/vfio/container-base.c ++++ b/hw/vfio/container-base.c +@@ -72,7 +72,7 @@ int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer, + } + + void vfio_container_init(VFIOContainerBase *bcontainer, VFIOAddressSpace *space, +- const VFIOIOMMUOps *ops) ++ const VFIOIOMMUClass *ops) + { + bcontainer->ops = ops; + bcontainer->space = space; +@@ -99,3 +99,13 @@ void vfio_container_destroy(VFIOContainerBase *bcontainer) + + g_list_free_full(bcontainer->iova_ranges, g_free); + } ++ ++static const TypeInfo types[] = { ++ { ++ .name = TYPE_VFIO_IOMMU, ++ .parent = TYPE_INTERFACE, ++ .class_size = sizeof(VFIOIOMMUClass), ++ }, ++}; ++ ++DEFINE_TYPES(types) +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 83c3238608..adb7c09367 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -2491,7 +2491,7 @@ int vfio_pci_get_pci_hot_reset_info(VFIOPCIDevice *vdev, + static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) + { + VFIODevice *vbasedev = &vdev->vbasedev; +- const VFIOIOMMUOps *ops = vbasedev->bcontainer->ops; ++ const VFIOIOMMUClass *ops = vbasedev->bcontainer->ops; + + return ops->pci_hot_reset(vbasedev, single); + } +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index 5c9594b6c7..d6147b4aee 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -16,7 +16,8 @@ + #include "exec/memory.h" + + typedef struct VFIODevice VFIODevice; +-typedef struct VFIOIOMMUOps VFIOIOMMUOps; ++typedef struct VFIOIOMMUClass VFIOIOMMUClass; ++#define VFIOIOMMUOps VFIOIOMMUClass /* To remove */ + + typedef struct { + unsigned long *bitmap; +@@ -34,7 +35,7 @@ typedef struct VFIOAddressSpace { + * This is the base object for vfio container backends + */ + typedef struct VFIOContainerBase { +- const VFIOIOMMUOps *ops; ++ const VFIOIOMMUClass *ops; + VFIOAddressSpace *space; + MemoryListener listener; + Error *error; +@@ -88,10 +89,24 @@ int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer, + + void vfio_container_init(VFIOContainerBase *bcontainer, + VFIOAddressSpace *space, +- const VFIOIOMMUOps *ops); ++ const VFIOIOMMUClass *ops); + void vfio_container_destroy(VFIOContainerBase *bcontainer); + +-struct VFIOIOMMUOps { ++ ++#define TYPE_VFIO_IOMMU "vfio-iommu" ++ ++/* ++ * VFIOContainerBase is not an abstract QOM object because it felt ++ * unnecessary to expose all the IOMMU backends to the QEMU machine ++ * and human interface. However, we can still abstract the IOMMU ++ * backend handlers using a QOM interface class. This provides more ++ * flexibility when referencing the various implementations. ++ */ ++DECLARE_CLASS_CHECKERS(VFIOIOMMUClass, VFIO_IOMMU, TYPE_VFIO_IOMMU) ++ ++struct VFIOIOMMUClass { ++ InterfaceClass parent_class; ++ + /* basic feature */ + int (*dma_map)(const VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-container-Introduce-a-VFIOIOMMU-legacy-QOM-inte.patch b/SOURCES/kvm-vfio-container-Introduce-a-VFIOIOMMU-legacy-QOM-inte.patch new file mode 100644 index 0000000..60439ff --- /dev/null +++ b/SOURCES/kvm-vfio-container-Introduce-a-VFIOIOMMU-legacy-QOM-inte.patch @@ -0,0 +1,168 @@ +From 58927bf236541b9423f855eca1970f7a3cf864a9 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Tue, 19 Dec 2023 07:58:20 +0100 +Subject: [PATCH 053/101] vfio/container: Introduce a VFIOIOMMU legacy QOM + interface +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [52/67] a81f39d13305e84699313e17ae64d10ff4b09067 (eauger1/centos-qemu-kvm) + +Convert the legacy VFIOIOMMUOps struct to the new VFIOIOMMU QOM +interface. The set of of operations for this backend can be referenced +with a literal typename instead of a C struct. This will simplify +support of multiple backends. + +Reviewed-by: Zhenzhong Duan +Tested-by: Eric Farman +Signed-off-by: Cédric Le Goater +(cherry picked from commit 9812feefab3a4ff95a6cfd73aecb120b406bc98c) +Signed-off-by: Eric Auger +--- + hw/vfio/common.c | 6 ++- + hw/vfio/container.c | 58 ++++++++++++++++++++++----- + include/hw/vfio/vfio-common.h | 1 - + include/hw/vfio/vfio-container-base.h | 1 + + 4 files changed, 55 insertions(+), 11 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 49dab41566..2329d0efc8 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -1503,13 +1503,17 @@ retry: + int vfio_attach_device(char *name, VFIODevice *vbasedev, + AddressSpace *as, Error **errp) + { +- const VFIOIOMMUClass *ops = &vfio_legacy_ops; ++ const VFIOIOMMUClass *ops = ++ VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_LEGACY)); + + #ifdef CONFIG_IOMMUFD + if (vbasedev->iommufd) { + ops = &vfio_iommufd_ops; + } + #endif ++ ++ assert(ops); ++ + return ops->attach_device(name, vbasedev, as, errp); + } + +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index f4a0434a52..220e838a91 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -369,10 +369,30 @@ static int vfio_get_iommu_type(VFIOContainer *container, + return -EINVAL; + } + ++/* ++ * vfio_get_iommu_ops - get a VFIOIOMMUClass associated with a type ++ */ ++static const VFIOIOMMUClass *vfio_get_iommu_class(int iommu_type, Error **errp) ++{ ++ ObjectClass *klass = NULL; ++ ++ switch (iommu_type) { ++ case VFIO_TYPE1v2_IOMMU: ++ case VFIO_TYPE1_IOMMU: ++ klass = object_class_by_name(TYPE_VFIO_IOMMU_LEGACY); ++ break; ++ default: ++ g_assert_not_reached(); ++ }; ++ ++ return VFIO_IOMMU_CLASS(klass); ++} ++ + static int vfio_init_container(VFIOContainer *container, int group_fd, + VFIOAddressSpace *space, Error **errp) + { + int iommu_type, ret; ++ const VFIOIOMMUClass *vioc; + + iommu_type = vfio_get_iommu_type(container, errp); + if (iommu_type < 0) { +@@ -401,7 +421,14 @@ static int vfio_init_container(VFIOContainer *container, int group_fd, + } + + container->iommu_type = iommu_type; +- vfio_container_init(&container->bcontainer, space, &vfio_legacy_ops); ++ ++ vioc = vfio_get_iommu_class(iommu_type, errp); ++ if (!vioc) { ++ error_setg(errp, "No available IOMMU models"); ++ return -EINVAL; ++ } ++ ++ vfio_container_init(&container->bcontainer, space, vioc); + return 0; + } + +@@ -1098,12 +1125,25 @@ out_single: + return ret; + } + +-const VFIOIOMMUOps vfio_legacy_ops = { +- .dma_map = vfio_legacy_dma_map, +- .dma_unmap = vfio_legacy_dma_unmap, +- .attach_device = vfio_legacy_attach_device, +- .detach_device = vfio_legacy_detach_device, +- .set_dirty_page_tracking = vfio_legacy_set_dirty_page_tracking, +- .query_dirty_bitmap = vfio_legacy_query_dirty_bitmap, +- .pci_hot_reset = vfio_legacy_pci_hot_reset, ++static void vfio_iommu_legacy_class_init(ObjectClass *klass, void *data) ++{ ++ VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass); ++ ++ vioc->dma_map = vfio_legacy_dma_map; ++ vioc->dma_unmap = vfio_legacy_dma_unmap; ++ vioc->attach_device = vfio_legacy_attach_device; ++ vioc->detach_device = vfio_legacy_detach_device; ++ vioc->set_dirty_page_tracking = vfio_legacy_set_dirty_page_tracking; ++ vioc->query_dirty_bitmap = vfio_legacy_query_dirty_bitmap; ++ vioc->pci_hot_reset = vfio_legacy_pci_hot_reset; + }; ++ ++static const TypeInfo types[] = { ++ { ++ .name = TYPE_VFIO_IOMMU_LEGACY, ++ .parent = TYPE_VFIO_IOMMU, ++ .class_init = vfio_iommu_legacy_class_init, ++ }, ++}; ++ ++DEFINE_TYPES(types) +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index b8aa8a5495..14c497b6b0 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -210,7 +210,6 @@ typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList; + typedef QLIST_HEAD(VFIODeviceList, VFIODevice) VFIODeviceList; + extern VFIOGroupList vfio_group_list; + extern VFIODeviceList vfio_device_list; +-extern const VFIOIOMMUOps vfio_legacy_ops; + extern const VFIOIOMMUOps vfio_iommufd_ops; + extern const MemoryListener vfio_memory_listener; + extern int vfio_kvm_device_fd; +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index d6147b4aee..c60370fc5e 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -94,6 +94,7 @@ void vfio_container_destroy(VFIOContainerBase *bcontainer); + + + #define TYPE_VFIO_IOMMU "vfio-iommu" ++#define TYPE_VFIO_IOMMU_LEGACY TYPE_VFIO_IOMMU "-legacy" + + /* + * VFIOContainerBase is not an abstract QOM object because it felt +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-container-Introduce-a-empty-VFIOIOMMUOps.patch b/SOURCES/kvm-vfio-container-Introduce-a-empty-VFIOIOMMUOps.patch new file mode 100644 index 0000000..2840e2c --- /dev/null +++ b/SOURCES/kvm-vfio-container-Introduce-a-empty-VFIOIOMMUOps.patch @@ -0,0 +1,71 @@ +From e56f961fbe95a53a52c5eca00b4fca17d825e860 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Thu, 2 Nov 2023 15:12:28 +0800 +Subject: [PATCH 003/101] vfio/container: Introduce a empty VFIOIOMMUOps +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [2/67] 0de0afffce42fa4a17f6d33a10b6162cdfbe8150 (eauger1/centos-qemu-kvm) + +This empty VFIOIOMMUOps named vfio_legacy_ops will hold all general +IOMMU ops of legacy container. + +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +(cherry picked from commit d24668579184f4098779983724ec74cd3db62e10) +Signed-off-by: Eric Auger +--- + hw/vfio/container.c | 5 +++++ + include/hw/vfio/vfio-common.h | 2 +- + 2 files changed, 6 insertions(+), 1 deletion(-) + +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 242010036a..4bc43ddfa4 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -472,6 +472,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + Error **errp) + { + VFIOContainer *container; ++ VFIOContainerBase *bcontainer; + int ret, fd; + VFIOAddressSpace *space; + +@@ -552,6 +553,8 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + container->iova_ranges = NULL; + QLIST_INIT(&container->giommu_list); + QLIST_INIT(&container->vrdl_list); ++ bcontainer = &container->bcontainer; ++ bcontainer->ops = &vfio_legacy_ops; + + ret = vfio_init_container(container, group->fd, errp); + if (ret) { +@@ -933,3 +936,5 @@ void vfio_detach_device(VFIODevice *vbasedev) + vfio_put_base_device(vbasedev); + vfio_put_group(group); + } ++ ++const VFIOIOMMUOps vfio_legacy_ops; +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 586d153c12..678161f207 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -255,7 +255,7 @@ typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList; + typedef QLIST_HEAD(VFIODeviceList, VFIODevice) VFIODeviceList; + extern VFIOGroupList vfio_group_list; + extern VFIODeviceList vfio_device_list; +- ++extern const VFIOIOMMUOps vfio_legacy_ops; + extern const MemoryListener vfio_memory_listener; + extern int vfio_kvm_device_fd; + +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-container-Introduce-vfio_legacy_setup-for-furth.patch b/SOURCES/kvm-vfio-container-Introduce-vfio_legacy_setup-for-furth.patch new file mode 100644 index 0000000..ae9ccd8 --- /dev/null +++ b/SOURCES/kvm-vfio-container-Introduce-vfio_legacy_setup-for-furth.patch @@ -0,0 +1,118 @@ +From 6c7546756e979e4f5ba29ae51a21c63fa90492cf Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Tue, 19 Dec 2023 07:58:17 +0100 +Subject: [PATCH 050/101] vfio/container: Introduce vfio_legacy_setup() for + further cleanups +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [49/67] 3a621ba2605c98b7fbf7fd9f93a207f728f1202e (eauger1/centos-qemu-kvm) + +This will help subsequent patches to unify the initialization of type1 +and sPAPR IOMMU backends. + +Reviewed-by: Zhenzhong Duan +Tested-by: Eric Farman +Signed-off-by: Cédric Le Goater +(cherry picked from commit d3764db87531cd53849ccee9b2f72aede90ccf5b) +Signed-off-by: Eric Auger +--- + hw/vfio/container.c | 63 +++++++++++++++++++++++++-------------------- + 1 file changed, 35 insertions(+), 28 deletions(-) + +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 1e77a2929e..afcfe80488 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -474,6 +474,35 @@ static void vfio_get_iommu_info_migration(VFIOContainer *container, + } + } + ++static int vfio_legacy_setup(VFIOContainerBase *bcontainer, Error **errp) ++{ ++ VFIOContainer *container = container_of(bcontainer, VFIOContainer, ++ bcontainer); ++ g_autofree struct vfio_iommu_type1_info *info = NULL; ++ int ret; ++ ++ ret = vfio_get_iommu_info(container, &info); ++ if (ret) { ++ error_setg_errno(errp, -ret, "Failed to get VFIO IOMMU info"); ++ return ret; ++ } ++ ++ if (info->flags & VFIO_IOMMU_INFO_PGSIZES) { ++ bcontainer->pgsizes = info->iova_pgsizes; ++ } else { ++ bcontainer->pgsizes = qemu_real_host_page_size(); ++ } ++ ++ if (!vfio_get_info_dma_avail(info, &bcontainer->dma_max_mappings)) { ++ bcontainer->dma_max_mappings = 65535; ++ } ++ ++ vfio_get_info_iova_range(info, bcontainer); ++ ++ vfio_get_iommu_info_migration(container, info); ++ return 0; ++} ++ + static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + Error **errp) + { +@@ -570,40 +599,18 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + switch (container->iommu_type) { + case VFIO_TYPE1v2_IOMMU: + case VFIO_TYPE1_IOMMU: +- { +- struct vfio_iommu_type1_info *info; +- +- ret = vfio_get_iommu_info(container, &info); +- if (ret) { +- error_setg_errno(errp, -ret, "Failed to get VFIO IOMMU info"); +- goto enable_discards_exit; +- } +- +- if (info->flags & VFIO_IOMMU_INFO_PGSIZES) { +- bcontainer->pgsizes = info->iova_pgsizes; +- } else { +- bcontainer->pgsizes = qemu_real_host_page_size(); +- } +- +- if (!vfio_get_info_dma_avail(info, &bcontainer->dma_max_mappings)) { +- bcontainer->dma_max_mappings = 65535; +- } +- +- vfio_get_info_iova_range(info, bcontainer); +- +- vfio_get_iommu_info_migration(container, info); +- g_free(info); ++ ret = vfio_legacy_setup(bcontainer, errp); + break; +- } + case VFIO_SPAPR_TCE_v2_IOMMU: + case VFIO_SPAPR_TCE_IOMMU: +- { + ret = vfio_spapr_container_init(container, errp); +- if (ret) { +- goto enable_discards_exit; +- } + break; ++ default: ++ g_assert_not_reached(); + } ++ ++ if (ret) { ++ goto enable_discards_exit; + } + + vfio_kvm_device_add_group(group); +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-container-Move-dirty_pgsizes-and-max_dirty_bitm.patch b/SOURCES/kvm-vfio-container-Move-dirty_pgsizes-and-max_dirty_bitm.patch new file mode 100644 index 0000000..3d46a06 --- /dev/null +++ b/SOURCES/kvm-vfio-container-Move-dirty_pgsizes-and-max_dirty_bitm.patch @@ -0,0 +1,102 @@ +From 6a597d7c82a4538fa1f928db7e600ec2e5a44361 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Thu, 2 Nov 2023 15:12:39 +0800 +Subject: [PATCH 014/101] vfio/container: Move dirty_pgsizes and + max_dirty_bitmap_size to base container +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [13/67] b9fe57174368e36788b017cc2ad13b748592cfc2 (eauger1/centos-qemu-kvm) + +No functional change intended. + +Signed-off-by: Eric Auger +Signed-off-by: Yi Liu +Signed-off-by: Yi Sun +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +(cherry picked from commit 4d6b95010c59127ac4f7230d6ee88b5d0e99738c) +Signed-off-by: Eric Auger +--- + hw/vfio/container.c | 9 +++++---- + include/hw/vfio/vfio-common.h | 2 -- + include/hw/vfio/vfio-container-base.h | 2 ++ + 3 files changed, 7 insertions(+), 6 deletions(-) + +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 5c1dee8c9f..c8088a8174 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -64,6 +64,7 @@ static int vfio_dma_unmap_bitmap(VFIOContainer *container, + hwaddr iova, ram_addr_t size, + IOMMUTLBEntry *iotlb) + { ++ VFIOContainerBase *bcontainer = &container->bcontainer; + struct vfio_iommu_type1_dma_unmap *unmap; + struct vfio_bitmap *bitmap; + VFIOBitmap vbmap; +@@ -91,7 +92,7 @@ static int vfio_dma_unmap_bitmap(VFIOContainer *container, + bitmap->size = vbmap.size; + bitmap->data = (__u64 *)vbmap.bitmap; + +- if (vbmap.size > container->max_dirty_bitmap_size) { ++ if (vbmap.size > bcontainer->max_dirty_bitmap_size) { + error_report("UNMAP: Size of bitmap too big 0x%"PRIx64, vbmap.size); + ret = -E2BIG; + goto unmap_exit; +@@ -131,7 +132,7 @@ static int vfio_legacy_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova, + + if (iotlb && vfio_devices_all_running_and_mig_active(bcontainer)) { + if (!vfio_devices_all_device_dirty_tracking(bcontainer) && +- container->bcontainer.dirty_pages_supported) { ++ bcontainer->dirty_pages_supported) { + return vfio_dma_unmap_bitmap(container, iova, size, iotlb); + } + +@@ -469,8 +470,8 @@ static void vfio_get_iommu_info_migration(VFIOContainer *container, + */ + if (cap_mig->pgsize_bitmap & qemu_real_host_page_size()) { + bcontainer->dirty_pages_supported = true; +- container->max_dirty_bitmap_size = cap_mig->max_dirty_bitmap_size; +- container->dirty_pgsizes = cap_mig->pgsize_bitmap; ++ bcontainer->max_dirty_bitmap_size = cap_mig->max_dirty_bitmap_size; ++ bcontainer->dirty_pgsizes = cap_mig->pgsize_bitmap; + } + } + +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 922022cbc6..b1c9fe711b 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -80,8 +80,6 @@ typedef struct VFIOContainer { + int fd; /* /dev/vfio/vfio, empowered by the attached groups */ + MemoryListener prereg_listener; + unsigned iommu_type; +- uint64_t dirty_pgsizes; +- uint64_t max_dirty_bitmap_size; + QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; + QLIST_HEAD(, VFIOGroup) group_list; + GList *iova_ranges; +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index 95f8d319e0..80e4a993c5 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -39,6 +39,8 @@ typedef struct VFIOContainerBase { + MemoryListener listener; + Error *error; + bool initialized; ++ uint64_t dirty_pgsizes; ++ uint64_t max_dirty_bitmap_size; + unsigned long pgsizes; + unsigned int dma_max_mappings; + bool dirty_pages_supported; +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-container-Move-iova_ranges-to-base-container.patch b/SOURCES/kvm-vfio-container-Move-iova_ranges-to-base-container.patch new file mode 100644 index 0000000..c9c79b6 --- /dev/null +++ b/SOURCES/kvm-vfio-container-Move-iova_ranges-to-base-container.patch @@ -0,0 +1,168 @@ +From 882143ef30da4182f049eb8192e0fac317c372b3 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Thu, 2 Nov 2023 15:12:40 +0800 +Subject: [PATCH 015/101] vfio/container: Move iova_ranges to base container +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [14/67] 49f2e3c484b4c0c63be9aa4eb1bf08804dcb1ec3 (eauger1/centos-qemu-kvm) + +Meanwhile remove the helper function vfio_free_container as it +only calls g_free now. + +No functional change intended. + +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +(cherry picked from commit f79baf8c9575ac3193ca86ec508791c86d96b13e) +Signed-off-by: Eric Auger +--- + hw/vfio/common.c | 5 +++-- + hw/vfio/container-base.c | 3 +++ + hw/vfio/container.c | 19 ++++++------------- + include/hw/vfio/vfio-common.h | 1 - + include/hw/vfio/vfio-container-base.h | 1 + + 5 files changed, 13 insertions(+), 16 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index be623e544b..8ef2e7967d 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -637,9 +637,10 @@ static void vfio_listener_region_add(MemoryListener *listener, + goto fail; + } + +- if (container->iova_ranges) { ++ if (bcontainer->iova_ranges) { + ret = memory_region_iommu_set_iova_ranges(giommu->iommu_mr, +- container->iova_ranges, &err); ++ bcontainer->iova_ranges, ++ &err); + if (ret) { + g_free(giommu); + goto fail; +diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c +index 7f508669f5..0177f43741 100644 +--- a/hw/vfio/container-base.c ++++ b/hw/vfio/container-base.c +@@ -54,6 +54,7 @@ void vfio_container_init(VFIOContainerBase *bcontainer, VFIOAddressSpace *space, + bcontainer->error = NULL; + bcontainer->dirty_pages_supported = false; + bcontainer->dma_max_mappings = 0; ++ bcontainer->iova_ranges = NULL; + QLIST_INIT(&bcontainer->giommu_list); + QLIST_INIT(&bcontainer->vrdl_list); + } +@@ -70,4 +71,6 @@ void vfio_container_destroy(VFIOContainerBase *bcontainer) + QLIST_REMOVE(giommu, giommu_next); + g_free(giommu); + } ++ ++ g_list_free_full(bcontainer->iova_ranges, g_free); + } +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index c8088a8174..721c0d7375 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -308,7 +308,7 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info, + } + + static bool vfio_get_info_iova_range(struct vfio_iommu_type1_info *info, +- VFIOContainer *container) ++ VFIOContainerBase *bcontainer) + { + struct vfio_info_cap_header *hdr; + struct vfio_iommu_type1_info_cap_iova_range *cap; +@@ -326,8 +326,8 @@ static bool vfio_get_info_iova_range(struct vfio_iommu_type1_info *info, + + range_set_bounds(range, cap->iova_ranges[i].start, + cap->iova_ranges[i].end); +- container->iova_ranges = +- range_list_insert(container->iova_ranges, range); ++ bcontainer->iova_ranges = ++ range_list_insert(bcontainer->iova_ranges, range); + } + + return true; +@@ -475,12 +475,6 @@ static void vfio_get_iommu_info_migration(VFIOContainer *container, + } + } + +-static void vfio_free_container(VFIOContainer *container) +-{ +- g_list_free_full(container->iova_ranges, g_free); +- g_free(container); +-} +- + static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + Error **errp) + { +@@ -560,7 +554,6 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + + container = g_malloc0(sizeof(*container)); + container->fd = fd; +- container->iova_ranges = NULL; + bcontainer = &container->bcontainer; + vfio_container_init(bcontainer, space, &vfio_legacy_ops); + +@@ -597,7 +590,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + bcontainer->dma_max_mappings = 65535; + } + +- vfio_get_info_iova_range(info, container); ++ vfio_get_info_iova_range(info, bcontainer); + + vfio_get_iommu_info_migration(container, info); + g_free(info); +@@ -649,7 +642,7 @@ enable_discards_exit: + vfio_ram_block_discard_disable(container, false); + + free_container_exit: +- vfio_free_container(container); ++ g_free(container); + + close_fd_exit: + close(fd); +@@ -693,7 +686,7 @@ static void vfio_disconnect_container(VFIOGroup *group) + + trace_vfio_disconnect_container(container->fd); + close(container->fd); +- vfio_free_container(container); ++ g_free(container); + + vfio_put_address_space(space); + } +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index b1c9fe711b..b9e5a0e64b 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -82,7 +82,6 @@ typedef struct VFIOContainer { + unsigned iommu_type; + QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; + QLIST_HEAD(, VFIOGroup) group_list; +- GList *iova_ranges; + } VFIOContainer; + + typedef struct VFIOHostDMAWindow { +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index 80e4a993c5..9658ffb526 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -48,6 +48,7 @@ typedef struct VFIOContainerBase { + QLIST_HEAD(, VFIORamDiscardListener) vrdl_list; + QLIST_ENTRY(VFIOContainerBase) next; + QLIST_HEAD(, VFIODevice) device_list; ++ GList *iova_ranges; + } VFIOContainerBase; + + typedef struct VFIOGuestIOMMU { +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-container-Move-listener-to-base-container.patch b/SOURCES/kvm-vfio-container-Move-listener-to-base-container.patch new file mode 100644 index 0000000..3198bfd --- /dev/null +++ b/SOURCES/kvm-vfio-container-Move-listener-to-base-container.patch @@ -0,0 +1,522 @@ +From 36bc7782bb02f81368e3e43a3947d16ad362e137 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Thu, 2 Nov 2023 15:12:38 +0800 +Subject: [PATCH 013/101] vfio/container: Move listener to base container +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [12/67] f469ab126c6366170aa2520f9b4d9969d3ae0a04 (eauger1/centos-qemu-kvm) + +Move listener to base container. Also error and initialized fields +are moved at the same time. + +No functional change intended. + +Signed-off-by: Eric Auger +Signed-off-by: Yi Liu +Signed-off-by: Yi Sun +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +(cherry picked from commit c7b313d300f161c650d011a5c9da469bcd5d34e4) +Signed-off-by: Eric Auger +--- + hw/vfio/common.c | 110 +++++++++++++------------- + hw/vfio/container-base.c | 1 + + hw/vfio/container.c | 19 +++-- + hw/vfio/spapr.c | 11 +-- + include/hw/vfio/vfio-common.h | 3 - + include/hw/vfio/vfio-container-base.h | 3 + + 6 files changed, 74 insertions(+), 73 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index f15665789f..be623e544b 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -541,7 +541,7 @@ static bool vfio_listener_valid_section(MemoryRegionSection *section, + return true; + } + +-static bool vfio_get_section_iova_range(VFIOContainer *container, ++static bool vfio_get_section_iova_range(VFIOContainerBase *bcontainer, + MemoryRegionSection *section, + hwaddr *out_iova, hwaddr *out_end, + Int128 *out_llend) +@@ -569,8 +569,10 @@ static bool vfio_get_section_iova_range(VFIOContainer *container, + static void vfio_listener_region_add(MemoryListener *listener, + MemoryRegionSection *section) + { +- VFIOContainer *container = container_of(listener, VFIOContainer, listener); +- VFIOContainerBase *bcontainer = &container->bcontainer; ++ VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, ++ listener); ++ VFIOContainer *container = container_of(bcontainer, VFIOContainer, ++ bcontainer); + hwaddr iova, end; + Int128 llend, llsize; + void *vaddr; +@@ -581,7 +583,8 @@ static void vfio_listener_region_add(MemoryListener *listener, + return; + } + +- if (!vfio_get_section_iova_range(container, section, &iova, &end, &llend)) { ++ if (!vfio_get_section_iova_range(bcontainer, section, &iova, &end, ++ &llend)) { + if (memory_region_is_ram_device(section->mr)) { + trace_vfio_listener_region_add_no_dma_map( + memory_region_name(section->mr), +@@ -688,13 +691,12 @@ static void vfio_listener_region_add(MemoryListener *listener, + } + } + +- ret = vfio_container_dma_map(&container->bcontainer, +- iova, int128_get64(llsize), vaddr, +- section->readonly); ++ ret = vfio_container_dma_map(bcontainer, iova, int128_get64(llsize), ++ vaddr, section->readonly); + if (ret) { + error_setg(&err, "vfio_container_dma_map(%p, 0x%"HWADDR_PRIx", " + "0x%"HWADDR_PRIx", %p) = %d (%s)", +- container, iova, int128_get64(llsize), vaddr, ret, ++ bcontainer, iova, int128_get64(llsize), vaddr, ret, + strerror(-ret)); + if (memory_region_is_ram_device(section->mr)) { + /* Allow unexpected mappings not to be fatal for RAM devices */ +@@ -716,9 +718,9 @@ fail: + * can gracefully fail. Runtime, there's not much we can do other + * than throw a hardware error. + */ +- if (!container->initialized) { +- if (!container->error) { +- error_propagate_prepend(&container->error, err, ++ if (!bcontainer->initialized) { ++ if (!bcontainer->error) { ++ error_propagate_prepend(&bcontainer->error, err, + "Region %s: ", + memory_region_name(section->mr)); + } else { +@@ -733,8 +735,10 @@ fail: + static void vfio_listener_region_del(MemoryListener *listener, + MemoryRegionSection *section) + { +- VFIOContainer *container = container_of(listener, VFIOContainer, listener); +- VFIOContainerBase *bcontainer = &container->bcontainer; ++ VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, ++ listener); ++ VFIOContainer *container = container_of(bcontainer, VFIOContainer, ++ bcontainer); + hwaddr iova, end; + Int128 llend, llsize; + int ret; +@@ -767,7 +771,8 @@ static void vfio_listener_region_del(MemoryListener *listener, + */ + } + +- if (!vfio_get_section_iova_range(container, section, &iova, &end, &llend)) { ++ if (!vfio_get_section_iova_range(bcontainer, section, &iova, &end, ++ &llend)) { + return; + } + +@@ -790,22 +795,22 @@ static void vfio_listener_region_del(MemoryListener *listener, + if (int128_eq(llsize, int128_2_64())) { + /* The unmap ioctl doesn't accept a full 64-bit span. */ + llsize = int128_rshift(llsize, 1); +- ret = vfio_container_dma_unmap(&container->bcontainer, iova, ++ ret = vfio_container_dma_unmap(bcontainer, iova, + int128_get64(llsize), NULL); + if (ret) { + error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", " + "0x%"HWADDR_PRIx") = %d (%s)", +- container, iova, int128_get64(llsize), ret, ++ bcontainer, iova, int128_get64(llsize), ret, + strerror(-ret)); + } + iova += int128_get64(llsize); + } +- ret = vfio_container_dma_unmap(&container->bcontainer, iova, ++ ret = vfio_container_dma_unmap(bcontainer, iova, + int128_get64(llsize), NULL); + if (ret) { + error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", " + "0x%"HWADDR_PRIx") = %d (%s)", +- container, iova, int128_get64(llsize), ret, ++ bcontainer, iova, int128_get64(llsize), ret, + strerror(-ret)); + } + } +@@ -825,16 +830,15 @@ typedef struct VFIODirtyRanges { + } VFIODirtyRanges; + + typedef struct VFIODirtyRangesListener { +- VFIOContainer *container; ++ VFIOContainerBase *bcontainer; + VFIODirtyRanges ranges; + MemoryListener listener; + } VFIODirtyRangesListener; + + static bool vfio_section_is_vfio_pci(MemoryRegionSection *section, +- VFIOContainer *container) ++ VFIOContainerBase *bcontainer) + { + VFIOPCIDevice *pcidev; +- VFIOContainerBase *bcontainer = &container->bcontainer; + VFIODevice *vbasedev; + Object *owner; + +@@ -863,7 +867,7 @@ static void vfio_dirty_tracking_update(MemoryListener *listener, + hwaddr iova, end, *min, *max; + + if (!vfio_listener_valid_section(section, "tracking_update") || +- !vfio_get_section_iova_range(dirty->container, section, ++ !vfio_get_section_iova_range(dirty->bcontainer, section, + &iova, &end, NULL)) { + return; + } +@@ -887,7 +891,7 @@ static void vfio_dirty_tracking_update(MemoryListener *listener, + * The alternative would be an IOVATree but that has a much bigger runtime + * overhead and unnecessary complexity. + */ +- if (vfio_section_is_vfio_pci(section, dirty->container) && ++ if (vfio_section_is_vfio_pci(section, dirty->bcontainer) && + iova >= UINT32_MAX) { + min = &range->minpci64; + max = &range->maxpci64; +@@ -911,7 +915,7 @@ static const MemoryListener vfio_dirty_tracking_listener = { + .region_add = vfio_dirty_tracking_update, + }; + +-static void vfio_dirty_tracking_init(VFIOContainer *container, ++static void vfio_dirty_tracking_init(VFIOContainerBase *bcontainer, + VFIODirtyRanges *ranges) + { + VFIODirtyRangesListener dirty; +@@ -921,10 +925,10 @@ static void vfio_dirty_tracking_init(VFIOContainer *container, + dirty.ranges.min64 = UINT64_MAX; + dirty.ranges.minpci64 = UINT64_MAX; + dirty.listener = vfio_dirty_tracking_listener; +- dirty.container = container; ++ dirty.bcontainer = bcontainer; + + memory_listener_register(&dirty.listener, +- container->bcontainer.space->as); ++ bcontainer->space->as); + + *ranges = dirty.ranges; + +@@ -936,12 +940,11 @@ static void vfio_dirty_tracking_init(VFIOContainer *container, + memory_listener_unregister(&dirty.listener); + } + +-static void vfio_devices_dma_logging_stop(VFIOContainer *container) ++static void vfio_devices_dma_logging_stop(VFIOContainerBase *bcontainer) + { + uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature), + sizeof(uint64_t))] = {}; + struct vfio_device_feature *feature = (struct vfio_device_feature *)buf; +- VFIOContainerBase *bcontainer = &container->bcontainer; + VFIODevice *vbasedev; + + feature->argsz = sizeof(buf); +@@ -962,7 +965,7 @@ static void vfio_devices_dma_logging_stop(VFIOContainer *container) + } + + static struct vfio_device_feature * +-vfio_device_feature_dma_logging_start_create(VFIOContainer *container, ++vfio_device_feature_dma_logging_start_create(VFIOContainerBase *bcontainer, + VFIODirtyRanges *tracking) + { + struct vfio_device_feature *feature; +@@ -1035,16 +1038,15 @@ static void vfio_device_feature_dma_logging_start_destroy( + g_free(feature); + } + +-static int vfio_devices_dma_logging_start(VFIOContainer *container) ++static int vfio_devices_dma_logging_start(VFIOContainerBase *bcontainer) + { + struct vfio_device_feature *feature; + VFIODirtyRanges ranges; +- VFIOContainerBase *bcontainer = &container->bcontainer; + VFIODevice *vbasedev; + int ret = 0; + +- vfio_dirty_tracking_init(container, &ranges); +- feature = vfio_device_feature_dma_logging_start_create(container, ++ vfio_dirty_tracking_init(bcontainer, &ranges); ++ feature = vfio_device_feature_dma_logging_start_create(bcontainer, + &ranges); + if (!feature) { + return -errno; +@@ -1067,7 +1069,7 @@ static int vfio_devices_dma_logging_start(VFIOContainer *container) + + out: + if (ret) { +- vfio_devices_dma_logging_stop(container); ++ vfio_devices_dma_logging_stop(bcontainer); + } + + vfio_device_feature_dma_logging_start_destroy(feature); +@@ -1077,14 +1079,14 @@ out: + + static void vfio_listener_log_global_start(MemoryListener *listener) + { +- VFIOContainer *container = container_of(listener, VFIOContainer, listener); ++ VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, ++ listener); + int ret; + +- if (vfio_devices_all_device_dirty_tracking(&container->bcontainer)) { +- ret = vfio_devices_dma_logging_start(container); ++ if (vfio_devices_all_device_dirty_tracking(bcontainer)) { ++ ret = vfio_devices_dma_logging_start(bcontainer); + } else { +- ret = vfio_container_set_dirty_page_tracking(&container->bcontainer, +- true); ++ ret = vfio_container_set_dirty_page_tracking(bcontainer, true); + } + + if (ret) { +@@ -1096,14 +1098,14 @@ static void vfio_listener_log_global_start(MemoryListener *listener) + + static void vfio_listener_log_global_stop(MemoryListener *listener) + { +- VFIOContainer *container = container_of(listener, VFIOContainer, listener); ++ VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, ++ listener); + int ret = 0; + +- if (vfio_devices_all_device_dirty_tracking(&container->bcontainer)) { +- vfio_devices_dma_logging_stop(container); ++ if (vfio_devices_all_device_dirty_tracking(bcontainer)) { ++ vfio_devices_dma_logging_stop(bcontainer); + } else { +- ret = vfio_container_set_dirty_page_tracking(&container->bcontainer, +- false); ++ ret = vfio_container_set_dirty_page_tracking(bcontainer, false); + } + + if (ret) { +@@ -1214,8 +1216,6 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) + vfio_giommu_dirty_notifier, n); + VFIOGuestIOMMU *giommu = gdn->giommu; + VFIOContainerBase *bcontainer = giommu->bcontainer; +- VFIOContainer *container = container_of(bcontainer, VFIOContainer, +- bcontainer); + hwaddr iova = iotlb->iova + giommu->iommu_offset; + ram_addr_t translated_addr; + int ret = -EINVAL; +@@ -1230,12 +1230,12 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) + + rcu_read_lock(); + if (vfio_get_xlat_addr(iotlb, NULL, &translated_addr, NULL)) { +- ret = vfio_get_dirty_bitmap(&container->bcontainer, iova, +- iotlb->addr_mask + 1, translated_addr); ++ ret = vfio_get_dirty_bitmap(bcontainer, iova, iotlb->addr_mask + 1, ++ translated_addr); + if (ret) { + error_report("vfio_iommu_map_dirty_notify(%p, 0x%"HWADDR_PRIx", " + "0x%"HWADDR_PRIx") = %d (%s)", +- container, iova, iotlb->addr_mask + 1, ret, ++ bcontainer, iova, iotlb->addr_mask + 1, ret, + strerror(-ret)); + } + } +@@ -1291,10 +1291,9 @@ vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainerBase *bcontainer, + &vrdl); + } + +-static int vfio_sync_dirty_bitmap(VFIOContainer *container, ++static int vfio_sync_dirty_bitmap(VFIOContainerBase *bcontainer, + MemoryRegionSection *section) + { +- VFIOContainerBase *bcontainer = &container->bcontainer; + ram_addr_t ram_addr; + + if (memory_region_is_iommu(section->mr)) { +@@ -1330,7 +1329,7 @@ static int vfio_sync_dirty_bitmap(VFIOContainer *container, + ram_addr = memory_region_get_ram_addr(section->mr) + + section->offset_within_region; + +- return vfio_get_dirty_bitmap(&container->bcontainer, ++ return vfio_get_dirty_bitmap(bcontainer, + REAL_HOST_PAGE_ALIGN(section->offset_within_address_space), + int128_get64(section->size), ram_addr); + } +@@ -1338,15 +1337,16 @@ static int vfio_sync_dirty_bitmap(VFIOContainer *container, + static void vfio_listener_log_sync(MemoryListener *listener, + MemoryRegionSection *section) + { +- VFIOContainer *container = container_of(listener, VFIOContainer, listener); ++ VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, ++ listener); + int ret; + + if (vfio_listener_skipped_section(section)) { + return; + } + +- if (vfio_devices_all_dirty_tracking(&container->bcontainer)) { +- ret = vfio_sync_dirty_bitmap(container, section); ++ if (vfio_devices_all_dirty_tracking(bcontainer)) { ++ ret = vfio_sync_dirty_bitmap(bcontainer, section); + if (ret) { + error_report("vfio: Failed to sync dirty bitmap, err: %d (%s)", ret, + strerror(-ret)); +diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c +index 584eee4ba1..7f508669f5 100644 +--- a/hw/vfio/container-base.c ++++ b/hw/vfio/container-base.c +@@ -51,6 +51,7 @@ void vfio_container_init(VFIOContainerBase *bcontainer, VFIOAddressSpace *space, + { + bcontainer->ops = ops; + bcontainer->space = space; ++ bcontainer->error = NULL; + bcontainer->dirty_pages_supported = false; + bcontainer->dma_max_mappings = 0; + QLIST_INIT(&bcontainer->giommu_list); +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 6ba2e2f8c4..5c1dee8c9f 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -453,6 +453,7 @@ static void vfio_get_iommu_info_migration(VFIOContainer *container, + { + struct vfio_info_cap_header *hdr; + struct vfio_iommu_type1_info_cap_migration *cap_mig; ++ VFIOContainerBase *bcontainer = &container->bcontainer; + + hdr = vfio_get_iommu_info_cap(info, VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION); + if (!hdr) { +@@ -467,7 +468,7 @@ static void vfio_get_iommu_info_migration(VFIOContainer *container, + * qemu_real_host_page_size to mark those dirty. + */ + if (cap_mig->pgsize_bitmap & qemu_real_host_page_size()) { +- container->bcontainer.dirty_pages_supported = true; ++ bcontainer->dirty_pages_supported = true; + container->max_dirty_bitmap_size = cap_mig->max_dirty_bitmap_size; + container->dirty_pgsizes = cap_mig->pgsize_bitmap; + } +@@ -558,7 +559,6 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + + container = g_malloc0(sizeof(*container)); + container->fd = fd; +- container->error = NULL; + container->iova_ranges = NULL; + bcontainer = &container->bcontainer; + vfio_container_init(bcontainer, space, &vfio_legacy_ops); +@@ -621,25 +621,24 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + group->container = container; + QLIST_INSERT_HEAD(&container->group_list, group, container_next); + +- container->listener = vfio_memory_listener; +- +- memory_listener_register(&container->listener, bcontainer->space->as); ++ bcontainer->listener = vfio_memory_listener; ++ memory_listener_register(&bcontainer->listener, bcontainer->space->as); + +- if (container->error) { ++ if (bcontainer->error) { + ret = -1; +- error_propagate_prepend(errp, container->error, ++ error_propagate_prepend(errp, bcontainer->error, + "memory listener initialization failed: "); + goto listener_release_exit; + } + +- container->initialized = true; ++ bcontainer->initialized = true; + + return 0; + listener_release_exit: + QLIST_REMOVE(group, container_next); + QLIST_REMOVE(bcontainer, next); + vfio_kvm_device_del_group(group); +- memory_listener_unregister(&container->listener); ++ memory_listener_unregister(&bcontainer->listener); + if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU || + container->iommu_type == VFIO_SPAPR_TCE_IOMMU) { + vfio_spapr_container_deinit(container); +@@ -674,7 +673,7 @@ static void vfio_disconnect_container(VFIOGroup *group) + * group. + */ + if (QLIST_EMPTY(&container->group_list)) { +- memory_listener_unregister(&container->listener); ++ memory_listener_unregister(&bcontainer->listener); + if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU || + container->iommu_type == VFIO_SPAPR_TCE_IOMMU) { + vfio_spapr_container_deinit(container); +diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c +index 4f76bdd3ca..7a50975f25 100644 +--- a/hw/vfio/spapr.c ++++ b/hw/vfio/spapr.c +@@ -46,6 +46,7 @@ static void vfio_prereg_listener_region_add(MemoryListener *listener, + { + VFIOContainer *container = container_of(listener, VFIOContainer, + prereg_listener); ++ VFIOContainerBase *bcontainer = &container->bcontainer; + const hwaddr gpa = section->offset_within_address_space; + hwaddr end; + int ret; +@@ -88,9 +89,9 @@ static void vfio_prereg_listener_region_add(MemoryListener *listener, + * can gracefully fail. Runtime, there's not much we can do other + * than throw a hardware error. + */ +- if (!container->initialized) { +- if (!container->error) { +- error_setg_errno(&container->error, -ret, ++ if (!bcontainer->initialized) { ++ if (!bcontainer->error) { ++ error_setg_errno(&bcontainer->error, -ret, + "Memory registering failed"); + } + } else { +@@ -445,9 +446,9 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp) + + memory_listener_register(&container->prereg_listener, + &address_space_memory); +- if (container->error) { ++ if (bcontainer->error) { + ret = -1; +- error_propagate_prepend(errp, container->error, ++ error_propagate_prepend(errp, bcontainer->error, + "RAM memory listener initialization failed: "); + goto listener_unregister_exit; + } +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 8a607a4c17..922022cbc6 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -78,11 +78,8 @@ struct VFIOGroup; + typedef struct VFIOContainer { + VFIOContainerBase bcontainer; + int fd; /* /dev/vfio/vfio, empowered by the attached groups */ +- MemoryListener listener; + MemoryListener prereg_listener; + unsigned iommu_type; +- Error *error; +- bool initialized; + uint64_t dirty_pgsizes; + uint64_t max_dirty_bitmap_size; + QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index 8e05b5ac5a..95f8d319e0 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -36,6 +36,9 @@ typedef struct VFIOAddressSpace { + typedef struct VFIOContainerBase { + const VFIOIOMMUOps *ops; + VFIOAddressSpace *space; ++ MemoryListener listener; ++ Error *error; ++ bool initialized; + unsigned long pgsizes; + unsigned int dma_max_mappings; + bool dirty_pages_supported; +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-container-Move-per-container-device-list-in-bas.patch b/SOURCES/kvm-vfio-container-Move-per-container-device-list-in-bas.patch new file mode 100644 index 0000000..df483e3 --- /dev/null +++ b/SOURCES/kvm-vfio-container-Move-per-container-device-list-in-bas.patch @@ -0,0 +1,230 @@ +From 0b3fbb6bf5c5bccec184829ff9454fd637c512b9 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Thu, 2 Nov 2023 15:12:34 +0800 +Subject: [PATCH 009/101] vfio/container: Move per container device list in + base container +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [8/67] d546cc25f4424b2d42356765c860fdaf4a3ba652 (eauger1/centos-qemu-kvm) + +VFIO Device is also changed to point to base container instead of +legacy container. + +No functional change intended. + +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +(cherry picked from commit 3e6015d1117579324b456aa169dfca06da9922cf) +Signed-off-by: Eric Auger +--- + hw/vfio/common.c | 23 +++++++++++++++-------- + hw/vfio/container.c | 12 ++++++------ + include/hw/vfio/vfio-common.h | 3 +-- + include/hw/vfio/vfio-container-base.h | 1 + + 4 files changed, 23 insertions(+), 16 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index b1a875ca93..9415395ed9 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -145,7 +145,7 @@ void vfio_unblock_multiple_devices_migration(void) + + bool vfio_viommu_preset(VFIODevice *vbasedev) + { +- return vbasedev->container->bcontainer.space->as != &address_space_memory; ++ return vbasedev->bcontainer->space->as != &address_space_memory; + } + + static void vfio_set_migration_error(int err) +@@ -179,6 +179,7 @@ bool vfio_device_state_is_precopy(VFIODevice *vbasedev) + + static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) + { ++ VFIOContainerBase *bcontainer = &container->bcontainer; + VFIODevice *vbasedev; + MigrationState *ms = migrate_get_current(); + +@@ -187,7 +188,7 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) + return false; + } + +- QLIST_FOREACH(vbasedev, &container->device_list, container_next) { ++ QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { + VFIOMigration *migration = vbasedev->migration; + + if (!migration) { +@@ -205,9 +206,10 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) + + bool vfio_devices_all_device_dirty_tracking(VFIOContainer *container) + { ++ VFIOContainerBase *bcontainer = &container->bcontainer; + VFIODevice *vbasedev; + +- QLIST_FOREACH(vbasedev, &container->device_list, container_next) { ++ QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { + if (!vbasedev->dirty_pages_supported) { + return false; + } +@@ -222,13 +224,14 @@ bool vfio_devices_all_device_dirty_tracking(VFIOContainer *container) + */ + bool vfio_devices_all_running_and_mig_active(VFIOContainer *container) + { ++ VFIOContainerBase *bcontainer = &container->bcontainer; + VFIODevice *vbasedev; + + if (!migration_is_active(migrate_get_current())) { + return false; + } + +- QLIST_FOREACH(vbasedev, &container->device_list, container_next) { ++ QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { + VFIOMigration *migration = vbasedev->migration; + + if (!migration) { +@@ -833,12 +836,13 @@ static bool vfio_section_is_vfio_pci(MemoryRegionSection *section, + VFIOContainer *container) + { + VFIOPCIDevice *pcidev; ++ VFIOContainerBase *bcontainer = &container->bcontainer; + VFIODevice *vbasedev; + Object *owner; + + owner = memory_region_owner(section->mr); + +- QLIST_FOREACH(vbasedev, &container->device_list, container_next) { ++ QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { + if (vbasedev->type != VFIO_DEVICE_TYPE_PCI) { + continue; + } +@@ -939,13 +943,14 @@ static void vfio_devices_dma_logging_stop(VFIOContainer *container) + uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature), + sizeof(uint64_t))] = {}; + struct vfio_device_feature *feature = (struct vfio_device_feature *)buf; ++ VFIOContainerBase *bcontainer = &container->bcontainer; + VFIODevice *vbasedev; + + feature->argsz = sizeof(buf); + feature->flags = VFIO_DEVICE_FEATURE_SET | + VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP; + +- QLIST_FOREACH(vbasedev, &container->device_list, container_next) { ++ QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { + if (!vbasedev->dirty_tracking) { + continue; + } +@@ -1036,6 +1041,7 @@ static int vfio_devices_dma_logging_start(VFIOContainer *container) + { + struct vfio_device_feature *feature; + VFIODirtyRanges ranges; ++ VFIOContainerBase *bcontainer = &container->bcontainer; + VFIODevice *vbasedev; + int ret = 0; + +@@ -1046,7 +1052,7 @@ static int vfio_devices_dma_logging_start(VFIOContainer *container) + return -errno; + } + +- QLIST_FOREACH(vbasedev, &container->device_list, container_next) { ++ QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { + if (vbasedev->dirty_tracking) { + continue; + } +@@ -1139,10 +1145,11 @@ int vfio_devices_query_dirty_bitmap(VFIOContainer *container, + VFIOBitmap *vbmap, hwaddr iova, + hwaddr size) + { ++ VFIOContainerBase *bcontainer = &container->bcontainer; + VFIODevice *vbasedev; + int ret; + +- QLIST_FOREACH(vbasedev, &container->device_list, container_next) { ++ QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) { + ret = vfio_device_dma_logging_report(vbasedev, iova, size, + vbmap->bitmap); + if (ret) { +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 3ab74e2615..63a906de93 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -888,7 +888,7 @@ int vfio_attach_device(char *name, VFIODevice *vbasedev, + int groupid = vfio_device_groupid(vbasedev, errp); + VFIODevice *vbasedev_iter; + VFIOGroup *group; +- VFIOContainer *container; ++ VFIOContainerBase *bcontainer; + int ret; + + if (groupid < 0) { +@@ -915,9 +915,9 @@ int vfio_attach_device(char *name, VFIODevice *vbasedev, + return ret; + } + +- container = group->container; +- vbasedev->container = container; +- QLIST_INSERT_HEAD(&container->device_list, vbasedev, container_next); ++ bcontainer = &group->container->bcontainer; ++ vbasedev->bcontainer = bcontainer; ++ QLIST_INSERT_HEAD(&bcontainer->device_list, vbasedev, container_next); + QLIST_INSERT_HEAD(&vfio_device_list, vbasedev, global_next); + + return ret; +@@ -927,13 +927,13 @@ void vfio_detach_device(VFIODevice *vbasedev) + { + VFIOGroup *group = vbasedev->group; + +- if (!vbasedev->container) { ++ if (!vbasedev->bcontainer) { + return; + } + + QLIST_REMOVE(vbasedev, global_next); + QLIST_REMOVE(vbasedev, container_next); +- vbasedev->container = NULL; ++ vbasedev->bcontainer = NULL; + trace_vfio_detach_device(vbasedev->name, group->groupid); + vfio_put_base_device(vbasedev); + vfio_put_group(group); +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 60f2785fe0..9740cf9fbc 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -90,7 +90,6 @@ typedef struct VFIOContainer { + QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; + QLIST_HEAD(, VFIOGroup) group_list; + QLIST_HEAD(, VFIORamDiscardListener) vrdl_list; +- QLIST_HEAD(, VFIODevice) device_list; + GList *iova_ranges; + } VFIOContainer; + +@@ -118,7 +117,7 @@ typedef struct VFIODevice { + QLIST_ENTRY(VFIODevice) container_next; + QLIST_ENTRY(VFIODevice) global_next; + struct VFIOGroup *group; +- VFIOContainer *container; ++ VFIOContainerBase *bcontainer; + char *sysfsdev; + char *name; + DeviceState *dev; +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index f244f003d0..7090962496 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -39,6 +39,7 @@ typedef struct VFIOContainerBase { + bool dirty_pages_supported; + QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; + QLIST_ENTRY(VFIOContainerBase) next; ++ QLIST_HEAD(, VFIODevice) device_list; + } VFIOContainerBase; + + typedef struct VFIOGuestIOMMU { +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-container-Move-pgsizes-and-dma_max_mappings-to-.patch b/SOURCES/kvm-vfio-container-Move-pgsizes-and-dma_max_mappings-to-.patch new file mode 100644 index 0000000..0db20c2 --- /dev/null +++ b/SOURCES/kvm-vfio-container-Move-pgsizes-and-dma_max_mappings-to-.patch @@ -0,0 +1,242 @@ +From d798939fbbe6c27200c165edd6f3771413821b34 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Thu, 2 Nov 2023 15:12:36 +0800 +Subject: [PATCH 011/101] vfio/container: Move pgsizes and dma_max_mappings to + base container +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [10/67] e80696175aba159a17ce9a869535db66682deb08 (eauger1/centos-qemu-kvm) + +No functional change intended. + +Signed-off-by: Eric Auger +Signed-off-by: Yi Liu +Signed-off-by: Yi Sun +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +(cherry picked from commit 7ab1cb74ffdbf92ef237243b41bde5c7067d5298) +Signed-off-by: Eric Auger +--- + hw/vfio/common.c | 17 +++++++++-------- + hw/vfio/container-base.c | 1 + + hw/vfio/container.c | 11 +++++------ + hw/vfio/spapr.c | 10 ++++++---- + include/hw/vfio/vfio-common.h | 2 -- + include/hw/vfio/vfio-container-base.h | 2 ++ + 6 files changed, 23 insertions(+), 20 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index cf6618f6ed..1cb53d369e 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -401,6 +401,7 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl, + static void vfio_register_ram_discard_listener(VFIOContainer *container, + MemoryRegionSection *section) + { ++ VFIOContainerBase *bcontainer = &container->bcontainer; + RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr); + VFIORamDiscardListener *vrdl; + +@@ -419,8 +420,8 @@ static void vfio_register_ram_discard_listener(VFIOContainer *container, + section->mr); + + g_assert(vrdl->granularity && is_power_of_2(vrdl->granularity)); +- g_assert(container->pgsizes && +- vrdl->granularity >= 1ULL << ctz64(container->pgsizes)); ++ g_assert(bcontainer->pgsizes && ++ vrdl->granularity >= 1ULL << ctz64(bcontainer->pgsizes)); + + ram_discard_listener_init(&vrdl->listener, + vfio_ram_discard_notify_populate, +@@ -441,7 +442,7 @@ static void vfio_register_ram_discard_listener(VFIOContainer *container, + * number of sections in the address space we could have over time, + * also consuming DMA mappings. + */ +- if (container->dma_max_mappings) { ++ if (bcontainer->dma_max_mappings) { + unsigned int vrdl_count = 0, vrdl_mappings = 0, max_memslots = 512; + + #ifdef CONFIG_KVM +@@ -462,11 +463,11 @@ static void vfio_register_ram_discard_listener(VFIOContainer *container, + } + + if (vrdl_mappings + max_memslots - vrdl_count > +- container->dma_max_mappings) { ++ bcontainer->dma_max_mappings) { + warn_report("%s: possibly running out of DMA mappings. E.g., try" + " increasing the 'block-size' of virtio-mem devies." + " Maximum possible DMA mappings: %d, Maximum possible" +- " memslots: %d", __func__, container->dma_max_mappings, ++ " memslots: %d", __func__, bcontainer->dma_max_mappings, + max_memslots); + } + } +@@ -626,7 +627,7 @@ static void vfio_listener_region_add(MemoryListener *listener, + iommu_idx); + + ret = memory_region_iommu_set_page_size_mask(giommu->iommu_mr, +- container->pgsizes, ++ bcontainer->pgsizes, + &err); + if (ret) { + g_free(giommu); +@@ -675,7 +676,7 @@ static void vfio_listener_region_add(MemoryListener *listener, + llsize = int128_sub(llend, int128_make64(iova)); + + if (memory_region_is_ram_device(section->mr)) { +- hwaddr pgmask = (1ULL << ctz64(container->pgsizes)) - 1; ++ hwaddr pgmask = (1ULL << ctz64(bcontainer->pgsizes)) - 1; + + if ((iova & pgmask) || (int128_get64(llsize) & pgmask)) { + trace_vfio_listener_region_add_no_dma_map( +@@ -777,7 +778,7 @@ static void vfio_listener_region_del(MemoryListener *listener, + if (memory_region_is_ram_device(section->mr)) { + hwaddr pgmask; + +- pgmask = (1ULL << ctz64(container->pgsizes)) - 1; ++ pgmask = (1ULL << ctz64(bcontainer->pgsizes)) - 1; + try_unmap = !((iova & pgmask) || (int128_get64(llsize) & pgmask)); + } else if (memory_region_has_ram_discard_manager(section->mr)) { + vfio_unregister_ram_discard_listener(container, section); +diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c +index 5d654ae172..dcce111349 100644 +--- a/hw/vfio/container-base.c ++++ b/hw/vfio/container-base.c +@@ -52,6 +52,7 @@ void vfio_container_init(VFIOContainerBase *bcontainer, VFIOAddressSpace *space, + bcontainer->ops = ops; + bcontainer->space = space; + bcontainer->dirty_pages_supported = false; ++ bcontainer->dma_max_mappings = 0; + QLIST_INIT(&bcontainer->giommu_list); + } + +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 7bd81eab09..c5a6262882 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -154,7 +154,7 @@ static int vfio_legacy_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova, + if (errno == EINVAL && unmap.size && !(unmap.iova + unmap.size) && + container->iommu_type == VFIO_TYPE1v2_IOMMU) { + trace_vfio_legacy_dma_unmap_overflow_workaround(); +- unmap.size -= 1ULL << ctz64(container->pgsizes); ++ unmap.size -= 1ULL << ctz64(bcontainer->pgsizes); + continue; + } + error_report("VFIO_UNMAP_DMA failed: %s", strerror(errno)); +@@ -559,7 +559,6 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + container = g_malloc0(sizeof(*container)); + container->fd = fd; + container->error = NULL; +- container->dma_max_mappings = 0; + container->iova_ranges = NULL; + QLIST_INIT(&container->vrdl_list); + bcontainer = &container->bcontainer; +@@ -589,13 +588,13 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + } + + if (info->flags & VFIO_IOMMU_INFO_PGSIZES) { +- container->pgsizes = info->iova_pgsizes; ++ bcontainer->pgsizes = info->iova_pgsizes; + } else { +- container->pgsizes = qemu_real_host_page_size(); ++ bcontainer->pgsizes = qemu_real_host_page_size(); + } + +- if (!vfio_get_info_dma_avail(info, &container->dma_max_mappings)) { +- container->dma_max_mappings = 65535; ++ if (!vfio_get_info_dma_avail(info, &bcontainer->dma_max_mappings)) { ++ bcontainer->dma_max_mappings = 65535; + } + + vfio_get_info_iova_range(info, container); +diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c +index 83da2f7ec2..4f76bdd3ca 100644 +--- a/hw/vfio/spapr.c ++++ b/hw/vfio/spapr.c +@@ -226,6 +226,7 @@ static int vfio_spapr_create_window(VFIOContainer *container, + hwaddr *pgsize) + { + int ret = 0; ++ VFIOContainerBase *bcontainer = &container->bcontainer; + IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(section->mr); + uint64_t pagesize = memory_region_iommu_get_min_page_size(iommu_mr), pgmask; + unsigned entries, bits_total, bits_per_level, max_levels; +@@ -239,13 +240,13 @@ static int vfio_spapr_create_window(VFIOContainer *container, + if (pagesize > rampagesize) { + pagesize = rampagesize; + } +- pgmask = container->pgsizes & (pagesize | (pagesize - 1)); ++ pgmask = bcontainer->pgsizes & (pagesize | (pagesize - 1)); + pagesize = pgmask ? (1ULL << (63 - clz64(pgmask))) : 0; + if (!pagesize) { + error_report("Host doesn't support page size 0x%"PRIx64 + ", the supported mask is 0x%lx", + memory_region_iommu_get_min_page_size(iommu_mr), +- container->pgsizes); ++ bcontainer->pgsizes); + return -EINVAL; + } + +@@ -421,6 +422,7 @@ void vfio_container_del_section_window(VFIOContainer *container, + + int vfio_spapr_container_init(VFIOContainer *container, Error **errp) + { ++ VFIOContainerBase *bcontainer = &container->bcontainer; + struct vfio_iommu_spapr_tce_info info; + bool v2 = container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU; + int ret, fd = container->fd; +@@ -461,7 +463,7 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp) + } + + if (v2) { +- container->pgsizes = info.ddw.pgsizes; ++ bcontainer->pgsizes = info.ddw.pgsizes; + /* + * There is a default window in just created container. + * To make region_add/del simpler, we better remove this +@@ -476,7 +478,7 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp) + } + } else { + /* The default table uses 4K pages */ +- container->pgsizes = 0x1000; ++ bcontainer->pgsizes = 0x1000; + vfio_host_win_add(container, info.dma32_window_start, + info.dma32_window_start + + info.dma32_window_size - 1, +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index bc67e1316c..d3dc2f9dcb 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -85,8 +85,6 @@ typedef struct VFIOContainer { + bool initialized; + uint64_t dirty_pgsizes; + uint64_t max_dirty_bitmap_size; +- unsigned long pgsizes; +- unsigned int dma_max_mappings; + QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; + QLIST_HEAD(, VFIOGroup) group_list; + QLIST_HEAD(, VFIORamDiscardListener) vrdl_list; +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index 7090962496..85ec7e1a56 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -36,6 +36,8 @@ typedef struct VFIOAddressSpace { + typedef struct VFIOContainerBase { + const VFIOIOMMUOps *ops; + VFIOAddressSpace *space; ++ unsigned long pgsizes; ++ unsigned int dma_max_mappings; + bool dirty_pages_supported; + QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; + QLIST_ENTRY(VFIOContainerBase) next; +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-container-Move-space-field-to-base-container.patch b/SOURCES/kvm-vfio-container-Move-space-field-to-base-container.patch new file mode 100644 index 0000000..edd4538 --- /dev/null +++ b/SOURCES/kvm-vfio-container-Move-space-field-to-base-container.patch @@ -0,0 +1,265 @@ +From 3ba43cbc5b096feed6272e070cf152d5fc74df01 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Thu, 2 Nov 2023 15:12:32 +0800 +Subject: [PATCH 007/101] vfio/container: Move space field to base container +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [6/67] b0aa17d9ec4588bd64373452a30306e826234d0b (eauger1/centos-qemu-kvm) + +Move the space field to the base object. Also the VFIOAddressSpace +now contains a list of base containers. + +No functional change intended. + +Signed-off-by: Eric Auger +Signed-off-by: Yi Liu +Signed-off-by: Yi Sun +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +(cherry picked from commit e5597063386a0c76308ad16da31726d23f489945) +Signed-off-by: Eric Auger +--- + hw/ppc/spapr_pci_vfio.c | 10 +++++----- + hw/vfio/common.c | 4 ++-- + hw/vfio/container-base.c | 6 +++++- + hw/vfio/container.c | 18 ++++++++---------- + include/hw/vfio/vfio-common.h | 8 -------- + include/hw/vfio/vfio-container-base.h | 9 +++++++++ + 6 files changed, 29 insertions(+), 26 deletions(-) + +diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c +index f283f7e38d..d1d07bec46 100644 +--- a/hw/ppc/spapr_pci_vfio.c ++++ b/hw/ppc/spapr_pci_vfio.c +@@ -84,27 +84,27 @@ static int vfio_eeh_container_op(VFIOContainer *container, uint32_t op) + static VFIOContainer *vfio_eeh_as_container(AddressSpace *as) + { + VFIOAddressSpace *space = vfio_get_address_space(as); +- VFIOContainer *container = NULL; ++ VFIOContainerBase *bcontainer = NULL; + + if (QLIST_EMPTY(&space->containers)) { + /* No containers to act on */ + goto out; + } + +- container = QLIST_FIRST(&space->containers); ++ bcontainer = QLIST_FIRST(&space->containers); + +- if (QLIST_NEXT(container, next)) { ++ if (QLIST_NEXT(bcontainer, next)) { + /* + * We don't yet have logic to synchronize EEH state across + * multiple containers + */ +- container = NULL; ++ bcontainer = NULL; + goto out; + } + + out: + vfio_put_address_space(space); +- return container; ++ return container_of(bcontainer, VFIOContainer, bcontainer); + } + + static bool vfio_eeh_as_ok(AddressSpace *as) +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 43580bcc43..1d8202537e 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -145,7 +145,7 @@ void vfio_unblock_multiple_devices_migration(void) + + bool vfio_viommu_preset(VFIODevice *vbasedev) + { +- return vbasedev->container->space->as != &address_space_memory; ++ return vbasedev->container->bcontainer.space->as != &address_space_memory; + } + + static void vfio_set_migration_error(int err) +@@ -922,7 +922,7 @@ static void vfio_dirty_tracking_init(VFIOContainer *container, + dirty.container = container; + + memory_listener_register(&dirty.listener, +- container->space->as); ++ container->bcontainer.space->as); + + *ranges = dirty.ranges; + +diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c +index 20bcb9669a..3933391e0d 100644 +--- a/hw/vfio/container-base.c ++++ b/hw/vfio/container-base.c +@@ -31,9 +31,11 @@ int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, + return bcontainer->ops->dma_unmap(bcontainer, iova, size, iotlb); + } + +-void vfio_container_init(VFIOContainerBase *bcontainer, const VFIOIOMMUOps *ops) ++void vfio_container_init(VFIOContainerBase *bcontainer, VFIOAddressSpace *space, ++ const VFIOIOMMUOps *ops) + { + bcontainer->ops = ops; ++ bcontainer->space = space; + QLIST_INIT(&bcontainer->giommu_list); + } + +@@ -41,6 +43,8 @@ void vfio_container_destroy(VFIOContainerBase *bcontainer) + { + VFIOGuestIOMMU *giommu, *tmp; + ++ QLIST_REMOVE(bcontainer, next); ++ + QLIST_FOREACH_SAFE(giommu, &bcontainer->giommu_list, giommu_next, tmp) { + memory_region_unregister_iommu_notifier( + MEMORY_REGION(giommu->iommu_mr), &giommu->n); +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 133d3c8f5c..f12fcb6fe1 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -514,7 +514,8 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + * details once we know which type of IOMMU we are using. + */ + +- QLIST_FOREACH(container, &space->containers, next) { ++ QLIST_FOREACH(bcontainer, &space->containers, next) { ++ container = container_of(bcontainer, VFIOContainer, bcontainer); + if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) { + ret = vfio_ram_block_discard_disable(container, true); + if (ret) { +@@ -550,7 +551,6 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + } + + container = g_malloc0(sizeof(*container)); +- container->space = space; + container->fd = fd; + container->error = NULL; + container->dirty_pages_supported = false; +@@ -558,7 +558,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + container->iova_ranges = NULL; + QLIST_INIT(&container->vrdl_list); + bcontainer = &container->bcontainer; +- vfio_container_init(bcontainer, &vfio_legacy_ops); ++ vfio_container_init(bcontainer, space, &vfio_legacy_ops); + + ret = vfio_init_container(container, group->fd, errp); + if (ret) { +@@ -613,14 +613,14 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + vfio_kvm_device_add_group(group); + + QLIST_INIT(&container->group_list); +- QLIST_INSERT_HEAD(&space->containers, container, next); ++ QLIST_INSERT_HEAD(&space->containers, bcontainer, next); + + group->container = container; + QLIST_INSERT_HEAD(&container->group_list, group, container_next); + + container->listener = vfio_memory_listener; + +- memory_listener_register(&container->listener, container->space->as); ++ memory_listener_register(&container->listener, bcontainer->space->as); + + if (container->error) { + ret = -1; +@@ -634,7 +634,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + return 0; + listener_release_exit: + QLIST_REMOVE(group, container_next); +- QLIST_REMOVE(container, next); ++ QLIST_REMOVE(bcontainer, next); + vfio_kvm_device_del_group(group); + memory_listener_unregister(&container->listener); + if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU || +@@ -684,9 +684,7 @@ static void vfio_disconnect_container(VFIOGroup *group) + } + + if (QLIST_EMPTY(&container->group_list)) { +- VFIOAddressSpace *space = container->space; +- +- QLIST_REMOVE(container, next); ++ VFIOAddressSpace *space = bcontainer->space; + + vfio_container_destroy(bcontainer); + +@@ -707,7 +705,7 @@ static VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp) + QLIST_FOREACH(group, &vfio_group_list, next) { + if (group->groupid == groupid) { + /* Found it. Now is it already in the right context? */ +- if (group->container->space->as == as) { ++ if (group->container->bcontainer.space->as == as) { + return group; + } else { + error_setg(errp, "group %d used in multiple address spaces", +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 6be082b8f2..bd4de6cb3a 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -73,17 +73,10 @@ typedef struct VFIOMigration { + bool initial_data_sent; + } VFIOMigration; + +-typedef struct VFIOAddressSpace { +- AddressSpace *as; +- QLIST_HEAD(, VFIOContainer) containers; +- QLIST_ENTRY(VFIOAddressSpace) list; +-} VFIOAddressSpace; +- + struct VFIOGroup; + + typedef struct VFIOContainer { + VFIOContainerBase bcontainer; +- VFIOAddressSpace *space; + int fd; /* /dev/vfio/vfio, empowered by the attached groups */ + MemoryListener listener; + MemoryListener prereg_listener; +@@ -98,7 +91,6 @@ typedef struct VFIOContainer { + QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; + QLIST_HEAD(, VFIOGroup) group_list; + QLIST_HEAD(, VFIORamDiscardListener) vrdl_list; +- QLIST_ENTRY(VFIOContainer) next; + QLIST_HEAD(, VFIODevice) device_list; + GList *iova_ranges; + } VFIOContainer; +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index a11aec5755..c7cc6ec9c5 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -24,12 +24,20 @@ typedef struct { + hwaddr pages; + } VFIOBitmap; + ++typedef struct VFIOAddressSpace { ++ AddressSpace *as; ++ QLIST_HEAD(, VFIOContainerBase) containers; ++ QLIST_ENTRY(VFIOAddressSpace) list; ++} VFIOAddressSpace; ++ + /* + * This is the base object for vfio container backends + */ + typedef struct VFIOContainerBase { + const VFIOIOMMUOps *ops; ++ VFIOAddressSpace *space; + QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; ++ QLIST_ENTRY(VFIOContainerBase) next; + } VFIOContainerBase; + + typedef struct VFIOGuestIOMMU { +@@ -48,6 +56,7 @@ int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, + IOMMUTLBEntry *iotlb); + + void vfio_container_init(VFIOContainerBase *bcontainer, ++ VFIOAddressSpace *space, + const VFIOIOMMUOps *ops); + void vfio_container_destroy(VFIOContainerBase *bcontainer); + +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-container-Move-vrdl_list-to-base-container.patch b/SOURCES/kvm-vfio-container-Move-vrdl_list-to-base-container.patch new file mode 100644 index 0000000..5e31d07 --- /dev/null +++ b/SOURCES/kvm-vfio-container-Move-vrdl_list-to-base-container.patch @@ -0,0 +1,255 @@ +From aadd055dcc06cb964ebfd2868b7e9b207d62ae0e Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Thu, 2 Nov 2023 15:12:37 +0800 +Subject: [PATCH 012/101] vfio/container: Move vrdl_list to base container +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [11/67] 42da5389e39291839259f0e4c020c7461b7225cc (eauger1/centos-qemu-kvm) + +No functional change intended. + +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +(cherry picked from commit dc74a4b0056c0c803d46612a2319294921097974) +Signed-off-by: Eric Auger +--- + hw/vfio/common.c | 38 +++++++++++++-------------- + hw/vfio/container-base.c | 1 + + hw/vfio/container.c | 1 - + include/hw/vfio/vfio-common.h | 11 -------- + include/hw/vfio/vfio-container-base.h | 11 ++++++++ + 5 files changed, 31 insertions(+), 31 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 1cb53d369e..f15665789f 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -351,13 +351,13 @@ static void vfio_ram_discard_notify_discard(RamDiscardListener *rdl, + { + VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener, + listener); ++ VFIOContainerBase *bcontainer = vrdl->bcontainer; + const hwaddr size = int128_get64(section->size); + const hwaddr iova = section->offset_within_address_space; + int ret; + + /* Unmap with a single call. */ +- ret = vfio_container_dma_unmap(&vrdl->container->bcontainer, +- iova, size , NULL); ++ ret = vfio_container_dma_unmap(bcontainer, iova, size , NULL); + if (ret) { + error_report("%s: vfio_container_dma_unmap() failed: %s", __func__, + strerror(-ret)); +@@ -369,6 +369,7 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl, + { + VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener, + listener); ++ VFIOContainerBase *bcontainer = vrdl->bcontainer; + const hwaddr end = section->offset_within_region + + int128_get64(section->size); + hwaddr start, next, iova; +@@ -387,8 +388,8 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl, + section->offset_within_address_space; + vaddr = memory_region_get_ram_ptr(section->mr) + start; + +- ret = vfio_container_dma_map(&vrdl->container->bcontainer, iova, +- next - start, vaddr, section->readonly); ++ ret = vfio_container_dma_map(bcontainer, iova, next - start, ++ vaddr, section->readonly); + if (ret) { + /* Rollback */ + vfio_ram_discard_notify_discard(rdl, section); +@@ -398,10 +399,9 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl, + return 0; + } + +-static void vfio_register_ram_discard_listener(VFIOContainer *container, ++static void vfio_register_ram_discard_listener(VFIOContainerBase *bcontainer, + MemoryRegionSection *section) + { +- VFIOContainerBase *bcontainer = &container->bcontainer; + RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr); + VFIORamDiscardListener *vrdl; + +@@ -412,7 +412,7 @@ static void vfio_register_ram_discard_listener(VFIOContainer *container, + g_assert(QEMU_IS_ALIGNED(int128_get64(section->size), TARGET_PAGE_SIZE)); + + vrdl = g_new0(VFIORamDiscardListener, 1); +- vrdl->container = container; ++ vrdl->bcontainer = bcontainer; + vrdl->mr = section->mr; + vrdl->offset_within_address_space = section->offset_within_address_space; + vrdl->size = int128_get64(section->size); +@@ -427,7 +427,7 @@ static void vfio_register_ram_discard_listener(VFIOContainer *container, + vfio_ram_discard_notify_populate, + vfio_ram_discard_notify_discard, true); + ram_discard_manager_register_listener(rdm, &vrdl->listener, section); +- QLIST_INSERT_HEAD(&container->vrdl_list, vrdl, next); ++ QLIST_INSERT_HEAD(&bcontainer->vrdl_list, vrdl, next); + + /* + * Sanity-check if we have a theoretically problematic setup where we could +@@ -451,7 +451,7 @@ static void vfio_register_ram_discard_listener(VFIOContainer *container, + } + #endif + +- QLIST_FOREACH(vrdl, &container->vrdl_list, next) { ++ QLIST_FOREACH(vrdl, &bcontainer->vrdl_list, next) { + hwaddr start, end; + + start = QEMU_ALIGN_DOWN(vrdl->offset_within_address_space, +@@ -473,13 +473,13 @@ static void vfio_register_ram_discard_listener(VFIOContainer *container, + } + } + +-static void vfio_unregister_ram_discard_listener(VFIOContainer *container, ++static void vfio_unregister_ram_discard_listener(VFIOContainerBase *bcontainer, + MemoryRegionSection *section) + { + RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr); + VFIORamDiscardListener *vrdl = NULL; + +- QLIST_FOREACH(vrdl, &container->vrdl_list, next) { ++ QLIST_FOREACH(vrdl, &bcontainer->vrdl_list, next) { + if (vrdl->mr == section->mr && + vrdl->offset_within_address_space == + section->offset_within_address_space) { +@@ -663,7 +663,7 @@ static void vfio_listener_region_add(MemoryListener *listener, + * about changes. + */ + if (memory_region_has_ram_discard_manager(section->mr)) { +- vfio_register_ram_discard_listener(container, section); ++ vfio_register_ram_discard_listener(bcontainer, section); + return; + } + +@@ -781,7 +781,7 @@ static void vfio_listener_region_del(MemoryListener *listener, + pgmask = (1ULL << ctz64(bcontainer->pgsizes)) - 1; + try_unmap = !((iova & pgmask) || (int128_get64(llsize) & pgmask)); + } else if (memory_region_has_ram_discard_manager(section->mr)) { +- vfio_unregister_ram_discard_listener(container, section); ++ vfio_unregister_ram_discard_listener(bcontainer, section); + /* Unregistering will trigger an unmap. */ + try_unmap = false; + } +@@ -1260,17 +1260,17 @@ static int vfio_ram_discard_get_dirty_bitmap(MemoryRegionSection *section, + * Sync the whole mapped region (spanning multiple individual mappings) + * in one go. + */ +- return vfio_get_dirty_bitmap(&vrdl->container->bcontainer, iova, size, +- ram_addr); ++ return vfio_get_dirty_bitmap(vrdl->bcontainer, iova, size, ram_addr); + } + +-static int vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainer *container, +- MemoryRegionSection *section) ++static int ++vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainerBase *bcontainer, ++ MemoryRegionSection *section) + { + RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr); + VFIORamDiscardListener *vrdl = NULL; + +- QLIST_FOREACH(vrdl, &container->vrdl_list, next) { ++ QLIST_FOREACH(vrdl, &bcontainer->vrdl_list, next) { + if (vrdl->mr == section->mr && + vrdl->offset_within_address_space == + section->offset_within_address_space) { +@@ -1324,7 +1324,7 @@ static int vfio_sync_dirty_bitmap(VFIOContainer *container, + } + return 0; + } else if (memory_region_has_ram_discard_manager(section->mr)) { +- return vfio_sync_ram_discard_listener_dirty_bitmap(container, section); ++ return vfio_sync_ram_discard_listener_dirty_bitmap(bcontainer, section); + } + + ram_addr = memory_region_get_ram_addr(section->mr) + +diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c +index dcce111349..584eee4ba1 100644 +--- a/hw/vfio/container-base.c ++++ b/hw/vfio/container-base.c +@@ -54,6 +54,7 @@ void vfio_container_init(VFIOContainerBase *bcontainer, VFIOAddressSpace *space, + bcontainer->dirty_pages_supported = false; + bcontainer->dma_max_mappings = 0; + QLIST_INIT(&bcontainer->giommu_list); ++ QLIST_INIT(&bcontainer->vrdl_list); + } + + void vfio_container_destroy(VFIOContainerBase *bcontainer) +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index c5a6262882..6ba2e2f8c4 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -560,7 +560,6 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + container->fd = fd; + container->error = NULL; + container->iova_ranges = NULL; +- QLIST_INIT(&container->vrdl_list); + bcontainer = &container->bcontainer; + vfio_container_init(bcontainer, space, &vfio_legacy_ops); + +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index d3dc2f9dcb..8a607a4c17 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -87,20 +87,9 @@ typedef struct VFIOContainer { + uint64_t max_dirty_bitmap_size; + QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; + QLIST_HEAD(, VFIOGroup) group_list; +- QLIST_HEAD(, VFIORamDiscardListener) vrdl_list; + GList *iova_ranges; + } VFIOContainer; + +-typedef struct VFIORamDiscardListener { +- VFIOContainer *container; +- MemoryRegion *mr; +- hwaddr offset_within_address_space; +- hwaddr size; +- uint64_t granularity; +- RamDiscardListener listener; +- QLIST_ENTRY(VFIORamDiscardListener) next; +-} VFIORamDiscardListener; +- + typedef struct VFIOHostDMAWindow { + hwaddr min_iova; + hwaddr max_iova; +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index 85ec7e1a56..8e05b5ac5a 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -40,6 +40,7 @@ typedef struct VFIOContainerBase { + unsigned int dma_max_mappings; + bool dirty_pages_supported; + QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; ++ QLIST_HEAD(, VFIORamDiscardListener) vrdl_list; + QLIST_ENTRY(VFIOContainerBase) next; + QLIST_HEAD(, VFIODevice) device_list; + } VFIOContainerBase; +@@ -52,6 +53,16 @@ typedef struct VFIOGuestIOMMU { + QLIST_ENTRY(VFIOGuestIOMMU) giommu_next; + } VFIOGuestIOMMU; + ++typedef struct VFIORamDiscardListener { ++ VFIOContainerBase *bcontainer; ++ MemoryRegion *mr; ++ hwaddr offset_within_address_space; ++ hwaddr size; ++ uint64_t granularity; ++ RamDiscardListener listener; ++ QLIST_ENTRY(VFIORamDiscardListener) next; ++} VFIORamDiscardListener; ++ + int vfio_container_dma_map(VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, + void *vaddr, bool readonly); +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-container-Rename-vfio_init_container-to-vfio_se.patch b/SOURCES/kvm-vfio-container-Rename-vfio_init_container-to-vfio_se.patch new file mode 100644 index 0000000..f68be0b --- /dev/null +++ b/SOURCES/kvm-vfio-container-Rename-vfio_init_container-to-vfio_se.patch @@ -0,0 +1,66 @@ +From edfc1ee2a1854d180ffad92e70212535a2ca668c Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Thu, 21 Dec 2023 10:45:17 +0800 +Subject: [PATCH 062/101] vfio/container: Rename vfio_init_container to + vfio_set_iommu +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [61/67] 5e7f956379b54fe6fa7e078ec17e71325aa109af (eauger1/centos-qemu-kvm) + +vfio_container_init() and vfio_init_container() names are confusing +especially when we see vfio_init_container() calls vfio_container_init(). + +vfio_container_init() operates on base container which is consistent +with all routines handling 'VFIOContainerBase *' ops. + +vfio_init_container() operates on legacy container and setup IOMMU +context with ioctl(VFIO_SET_IOMMU). + +So choose to rename vfio_init_container to vfio_set_iommu to avoid +the confusion. + +No functional change intended. + +Suggested-by: Cédric Le Goater +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +(cherry picked from commit 9f734a117cbf63b03577b46c8cad8ad88ec6dced) +Signed-off-by: Eric Auger +--- + hw/vfio/container.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 8d334f52f2..bd25b9fbad 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -392,8 +392,8 @@ static const VFIOIOMMUClass *vfio_get_iommu_class(int iommu_type, Error **errp) + return VFIO_IOMMU_CLASS(klass); + } + +-static int vfio_init_container(VFIOContainer *container, int group_fd, +- VFIOAddressSpace *space, Error **errp) ++static int vfio_set_iommu(VFIOContainer *container, int group_fd, ++ VFIOAddressSpace *space, Error **errp) + { + int iommu_type, ret; + const VFIOIOMMUClass *vioc; +@@ -616,7 +616,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + container->fd = fd; + bcontainer = &container->bcontainer; + +- ret = vfio_init_container(container, group->fd, space, errp); ++ ret = vfio_set_iommu(container, group->fd, space, errp); + if (ret) { + goto free_container_exit; + } +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-container-Replace-basename-with-g_path_get_base.patch b/SOURCES/kvm-vfio-container-Replace-basename-with-g_path_get_base.patch new file mode 100644 index 0000000..77df179 --- /dev/null +++ b/SOURCES/kvm-vfio-container-Replace-basename-with-g_path_get_base.patch @@ -0,0 +1,59 @@ +From 8d3857c7877da58ed0c6b62cf2714c4127350522 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 20 Dec 2023 14:53:02 +0100 +Subject: [PATCH 059/101] vfio/container: Replace basename with + g_path_get_basename +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [58/67] 56a90f23dadc89271b1fff014fc64ade87c1a4cb (eauger1/centos-qemu-kvm) + +g_path_get_basename() is a portable utility function that has the +advantage of not modifing the string argument. It also fixes a compile +breakage with the Musl C library reported in [1]. + +[1] https://lore.kernel.org/all/20231212010228.2701544-1-raj.khem@gmail.com/ + +Reported-by: Khem Raj +Reviewed-by: Eric Auger +Reviewed-by: Zhao Liu +Reviewed-by: Zhenzhong Duan +Signed-off-by: Cédric Le Goater +(cherry picked from commit 213ae3ffda463c0503e39e0cf827511b5298c314) +Signed-off-by: Eric Auger +--- + hw/vfio/container.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 688cf23bab..8d334f52f2 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -869,7 +869,8 @@ static void vfio_put_base_device(VFIODevice *vbasedev) + + static int vfio_device_groupid(VFIODevice *vbasedev, Error **errp) + { +- char *tmp, group_path[PATH_MAX], *group_name; ++ char *tmp, group_path[PATH_MAX]; ++ g_autofree char *group_name = NULL; + int ret, groupid; + ssize_t len; + +@@ -885,7 +886,7 @@ static int vfio_device_groupid(VFIODevice *vbasedev, Error **errp) + + group_path[len] = 0; + +- group_name = basename(group_path); ++ group_name = g_path_get_basename(group_path); + if (sscanf(group_name, "%d", &groupid) != 1) { + error_setg_errno(errp, errno, "failed to read %s", group_path); + return -errno; +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-container-Switch-to-IOMMU-BE-set_dirty_page_tra.patch b/SOURCES/kvm-vfio-container-Switch-to-IOMMU-BE-set_dirty_page_tra.patch new file mode 100644 index 0000000..5442688 --- /dev/null +++ b/SOURCES/kvm-vfio-container-Switch-to-IOMMU-BE-set_dirty_page_tra.patch @@ -0,0 +1,235 @@ +From a2c8aa64b1b21a3e1d4cf2a4fe7d84dc32f69284 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Thu, 2 Nov 2023 15:12:33 +0800 +Subject: [PATCH 008/101] vfio/container: Switch to IOMMU BE + set_dirty_page_tracking/query_dirty_bitmap API +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [7/67] 88368809c7990e1d9b01406e48694fe3e3fb1397 (eauger1/centos-qemu-kvm) + +dirty_pages_supported field is also moved to the base container + +No functional change intended. + +Signed-off-by: Eric Auger +Signed-off-by: Yi Liu +Signed-off-by: Yi Sun +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +(cherry picked from commit bb424490edcef73d07f200d53f69415b203d81df) +Signed-off-by: Eric Auger +--- + hw/vfio/common.c | 12 ++++++++---- + hw/vfio/container-base.c | 16 ++++++++++++++++ + hw/vfio/container.c | 21 ++++++++++++++------- + include/hw/vfio/vfio-common.h | 6 ------ + include/hw/vfio/vfio-container-base.h | 6 ++++++ + 5 files changed, 44 insertions(+), 17 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 1d8202537e..b1a875ca93 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -1079,7 +1079,8 @@ static void vfio_listener_log_global_start(MemoryListener *listener) + if (vfio_devices_all_device_dirty_tracking(container)) { + ret = vfio_devices_dma_logging_start(container); + } else { +- ret = vfio_set_dirty_page_tracking(container, true); ++ ret = vfio_container_set_dirty_page_tracking(&container->bcontainer, ++ true); + } + + if (ret) { +@@ -1097,7 +1098,8 @@ static void vfio_listener_log_global_stop(MemoryListener *listener) + if (vfio_devices_all_device_dirty_tracking(container)) { + vfio_devices_dma_logging_stop(container); + } else { +- ret = vfio_set_dirty_page_tracking(container, false); ++ ret = vfio_container_set_dirty_page_tracking(&container->bcontainer, ++ false); + } + + if (ret) { +@@ -1165,7 +1167,8 @@ int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, + VFIOBitmap vbmap; + int ret; + +- if (!container->dirty_pages_supported && !all_device_dirty_tracking) { ++ if (!container->bcontainer.dirty_pages_supported && ++ !all_device_dirty_tracking) { + cpu_physical_memory_set_dirty_range(ram_addr, size, + tcg_enabled() ? DIRTY_CLIENTS_ALL : + DIRTY_CLIENTS_NOCODE); +@@ -1180,7 +1183,8 @@ int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, + if (all_device_dirty_tracking) { + ret = vfio_devices_query_dirty_bitmap(container, &vbmap, iova, size); + } else { +- ret = vfio_query_dirty_bitmap(container, &vbmap, iova, size); ++ ret = vfio_container_query_dirty_bitmap(&container->bcontainer, &vbmap, ++ iova, size); + } + + if (ret) { +diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c +index 3933391e0d..5d654ae172 100644 +--- a/hw/vfio/container-base.c ++++ b/hw/vfio/container-base.c +@@ -31,11 +31,27 @@ int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, + return bcontainer->ops->dma_unmap(bcontainer, iova, size, iotlb); + } + ++int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer, ++ bool start) ++{ ++ g_assert(bcontainer->ops->set_dirty_page_tracking); ++ return bcontainer->ops->set_dirty_page_tracking(bcontainer, start); ++} ++ ++int vfio_container_query_dirty_bitmap(VFIOContainerBase *bcontainer, ++ VFIOBitmap *vbmap, ++ hwaddr iova, hwaddr size) ++{ ++ g_assert(bcontainer->ops->query_dirty_bitmap); ++ return bcontainer->ops->query_dirty_bitmap(bcontainer, vbmap, iova, size); ++} ++ + void vfio_container_init(VFIOContainerBase *bcontainer, VFIOAddressSpace *space, + const VFIOIOMMUOps *ops) + { + bcontainer->ops = ops; + bcontainer->space = space; ++ bcontainer->dirty_pages_supported = false; + QLIST_INIT(&bcontainer->giommu_list); + } + +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index f12fcb6fe1..3ab74e2615 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -131,7 +131,7 @@ static int vfio_legacy_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova, + + if (iotlb && vfio_devices_all_running_and_mig_active(container)) { + if (!vfio_devices_all_device_dirty_tracking(container) && +- container->dirty_pages_supported) { ++ container->bcontainer.dirty_pages_supported) { + return vfio_dma_unmap_bitmap(container, iova, size, iotlb); + } + +@@ -205,14 +205,17 @@ static int vfio_legacy_dma_map(VFIOContainerBase *bcontainer, hwaddr iova, + return -errno; + } + +-int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start) ++static int vfio_legacy_set_dirty_page_tracking(VFIOContainerBase *bcontainer, ++ bool start) + { ++ VFIOContainer *container = container_of(bcontainer, VFIOContainer, ++ bcontainer); + int ret; + struct vfio_iommu_type1_dirty_bitmap dirty = { + .argsz = sizeof(dirty), + }; + +- if (!container->dirty_pages_supported) { ++ if (!bcontainer->dirty_pages_supported) { + return 0; + } + +@@ -232,9 +235,12 @@ int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start) + return ret; + } + +-int vfio_query_dirty_bitmap(VFIOContainer *container, VFIOBitmap *vbmap, +- hwaddr iova, hwaddr size) ++static int vfio_legacy_query_dirty_bitmap(VFIOContainerBase *bcontainer, ++ VFIOBitmap *vbmap, ++ hwaddr iova, hwaddr size) + { ++ VFIOContainer *container = container_of(bcontainer, VFIOContainer, ++ bcontainer); + struct vfio_iommu_type1_dirty_bitmap *dbitmap; + struct vfio_iommu_type1_dirty_bitmap_get *range; + int ret; +@@ -461,7 +467,7 @@ static void vfio_get_iommu_info_migration(VFIOContainer *container, + * qemu_real_host_page_size to mark those dirty. + */ + if (cap_mig->pgsize_bitmap & qemu_real_host_page_size()) { +- container->dirty_pages_supported = true; ++ container->bcontainer.dirty_pages_supported = true; + container->max_dirty_bitmap_size = cap_mig->max_dirty_bitmap_size; + container->dirty_pgsizes = cap_mig->pgsize_bitmap; + } +@@ -553,7 +559,6 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + container = g_malloc0(sizeof(*container)); + container->fd = fd; + container->error = NULL; +- container->dirty_pages_supported = false; + container->dma_max_mappings = 0; + container->iova_ranges = NULL; + QLIST_INIT(&container->vrdl_list); +@@ -937,4 +942,6 @@ void vfio_detach_device(VFIODevice *vbasedev) + const VFIOIOMMUOps vfio_legacy_ops = { + .dma_map = vfio_legacy_dma_map, + .dma_unmap = vfio_legacy_dma_unmap, ++ .set_dirty_page_tracking = vfio_legacy_set_dirty_page_tracking, ++ .query_dirty_bitmap = vfio_legacy_query_dirty_bitmap, + }; +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index bd4de6cb3a..60f2785fe0 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -83,7 +83,6 @@ typedef struct VFIOContainer { + unsigned iommu_type; + Error *error; + bool initialized; +- bool dirty_pages_supported; + uint64_t dirty_pgsizes; + uint64_t max_dirty_bitmap_size; + unsigned long pgsizes; +@@ -190,11 +189,6 @@ VFIOAddressSpace *vfio_get_address_space(AddressSpace *as); + void vfio_put_address_space(VFIOAddressSpace *space); + bool vfio_devices_all_running_and_saving(VFIOContainer *container); + +-/* container->fd */ +-int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start); +-int vfio_query_dirty_bitmap(VFIOContainer *container, VFIOBitmap *vbmap, +- hwaddr iova, hwaddr size); +- + /* SPAPR specific */ + int vfio_container_add_section_window(VFIOContainer *container, + MemoryRegionSection *section, +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index c7cc6ec9c5..f244f003d0 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -36,6 +36,7 @@ typedef struct VFIOAddressSpace { + typedef struct VFIOContainerBase { + const VFIOIOMMUOps *ops; + VFIOAddressSpace *space; ++ bool dirty_pages_supported; + QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; + QLIST_ENTRY(VFIOContainerBase) next; + } VFIOContainerBase; +@@ -54,6 +55,11 @@ int vfio_container_dma_map(VFIOContainerBase *bcontainer, + int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, + IOMMUTLBEntry *iotlb); ++int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer, ++ bool start); ++int vfio_container_query_dirty_bitmap(VFIOContainerBase *bcontainer, ++ VFIOBitmap *vbmap, ++ hwaddr iova, hwaddr size); + + void vfio_container_init(VFIOContainerBase *bcontainer, + VFIOAddressSpace *space, +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-container-Switch-to-dma_map-unmap-API.patch b/SOURCES/kvm-vfio-container-Switch-to-dma_map-unmap-API.patch new file mode 100644 index 0000000..cfb5eb1 --- /dev/null +++ b/SOURCES/kvm-vfio-container-Switch-to-dma_map-unmap-API.patch @@ -0,0 +1,303 @@ +From 00daef8e3f4f64b1401b2e8945c256d27fbfa960 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Thu, 2 Nov 2023 15:12:29 +0800 +Subject: [PATCH 004/101] vfio/container: Switch to dma_map|unmap API +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [3/67] 9a20e2f2b277be65463f145df3309271493be6ac (eauger1/centos-qemu-kvm) + +No functional change intended. + +Signed-off-by: Eric Auger +Signed-off-by: Yi Liu +Signed-off-by: Yi Sun +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +(cherry picked from commit b08501a999e2448f500a46d68da503be55186b04) +Signed-off-by: Eric Auger +--- + hw/vfio/common.c | 45 +++++++++++++++------------ + hw/vfio/container-base.c | 32 +++++++++++++++++++ + hw/vfio/container.c | 22 ++++++++----- + hw/vfio/meson.build | 1 + + hw/vfio/trace-events | 2 +- + include/hw/vfio/vfio-common.h | 4 --- + include/hw/vfio/vfio-container-base.h | 7 +++++ + 7 files changed, 81 insertions(+), 32 deletions(-) + create mode 100644 hw/vfio/container-base.c + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index e70fdf5e0c..e610771888 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -292,7 +292,7 @@ static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, + static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) + { + VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n); +- VFIOContainer *container = giommu->container; ++ VFIOContainerBase *bcontainer = &giommu->container->bcontainer; + hwaddr iova = iotlb->iova + giommu->iommu_offset; + void *vaddr; + int ret; +@@ -322,21 +322,22 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) + * of vaddr will always be there, even if the memory object is + * destroyed and its backing memory munmap-ed. + */ +- ret = vfio_dma_map(container, iova, +- iotlb->addr_mask + 1, vaddr, +- read_only); ++ ret = vfio_container_dma_map(bcontainer, iova, ++ iotlb->addr_mask + 1, vaddr, ++ read_only); + if (ret) { +- error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", " ++ error_report("vfio_container_dma_map(%p, 0x%"HWADDR_PRIx", " + "0x%"HWADDR_PRIx", %p) = %d (%s)", +- container, iova, ++ bcontainer, iova, + iotlb->addr_mask + 1, vaddr, ret, strerror(-ret)); + } + } else { +- ret = vfio_dma_unmap(container, iova, iotlb->addr_mask + 1, iotlb); ++ ret = vfio_container_dma_unmap(bcontainer, iova, ++ iotlb->addr_mask + 1, iotlb); + if (ret) { +- error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", " ++ error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", " + "0x%"HWADDR_PRIx") = %d (%s)", +- container, iova, ++ bcontainer, iova, + iotlb->addr_mask + 1, ret, strerror(-ret)); + vfio_set_migration_error(ret); + } +@@ -355,9 +356,10 @@ static void vfio_ram_discard_notify_discard(RamDiscardListener *rdl, + int ret; + + /* Unmap with a single call. */ +- ret = vfio_dma_unmap(vrdl->container, iova, size , NULL); ++ ret = vfio_container_dma_unmap(&vrdl->container->bcontainer, ++ iova, size , NULL); + if (ret) { +- error_report("%s: vfio_dma_unmap() failed: %s", __func__, ++ error_report("%s: vfio_container_dma_unmap() failed: %s", __func__, + strerror(-ret)); + } + } +@@ -385,8 +387,8 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl, + section->offset_within_address_space; + vaddr = memory_region_get_ram_ptr(section->mr) + start; + +- ret = vfio_dma_map(vrdl->container, iova, next - start, +- vaddr, section->readonly); ++ ret = vfio_container_dma_map(&vrdl->container->bcontainer, iova, ++ next - start, vaddr, section->readonly); + if (ret) { + /* Rollback */ + vfio_ram_discard_notify_discard(rdl, section); +@@ -684,10 +686,11 @@ static void vfio_listener_region_add(MemoryListener *listener, + } + } + +- ret = vfio_dma_map(container, iova, int128_get64(llsize), +- vaddr, section->readonly); ++ ret = vfio_container_dma_map(&container->bcontainer, ++ iova, int128_get64(llsize), vaddr, ++ section->readonly); + if (ret) { +- error_setg(&err, "vfio_dma_map(%p, 0x%"HWADDR_PRIx", " ++ error_setg(&err, "vfio_container_dma_map(%p, 0x%"HWADDR_PRIx", " + "0x%"HWADDR_PRIx", %p) = %d (%s)", + container, iova, int128_get64(llsize), vaddr, ret, + strerror(-ret)); +@@ -784,18 +787,20 @@ static void vfio_listener_region_del(MemoryListener *listener, + if (int128_eq(llsize, int128_2_64())) { + /* The unmap ioctl doesn't accept a full 64-bit span. */ + llsize = int128_rshift(llsize, 1); +- ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL); ++ ret = vfio_container_dma_unmap(&container->bcontainer, iova, ++ int128_get64(llsize), NULL); + if (ret) { +- error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", " ++ error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", " + "0x%"HWADDR_PRIx") = %d (%s)", + container, iova, int128_get64(llsize), ret, + strerror(-ret)); + } + iova += int128_get64(llsize); + } +- ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL); ++ ret = vfio_container_dma_unmap(&container->bcontainer, iova, ++ int128_get64(llsize), NULL); + if (ret) { +- error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", " ++ error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", " + "0x%"HWADDR_PRIx") = %d (%s)", + container, iova, int128_get64(llsize), ret, + strerror(-ret)); +diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c +new file mode 100644 +index 0000000000..55d3a35fa4 +--- /dev/null ++++ b/hw/vfio/container-base.c +@@ -0,0 +1,32 @@ ++/* ++ * VFIO BASE CONTAINER ++ * ++ * Copyright (C) 2023 Intel Corporation. ++ * Copyright Red Hat, Inc. 2023 ++ * ++ * Authors: Yi Liu ++ * Eric Auger ++ * ++ * SPDX-License-Identifier: GPL-2.0-or-later ++ */ ++ ++#include "qemu/osdep.h" ++#include "qapi/error.h" ++#include "qemu/error-report.h" ++#include "hw/vfio/vfio-container-base.h" ++ ++int vfio_container_dma_map(VFIOContainerBase *bcontainer, ++ hwaddr iova, ram_addr_t size, ++ void *vaddr, bool readonly) ++{ ++ g_assert(bcontainer->ops->dma_map); ++ return bcontainer->ops->dma_map(bcontainer, iova, size, vaddr, readonly); ++} ++ ++int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, ++ hwaddr iova, ram_addr_t size, ++ IOMMUTLBEntry *iotlb) ++{ ++ g_assert(bcontainer->ops->dma_unmap); ++ return bcontainer->ops->dma_unmap(bcontainer, iova, size, iotlb); ++} +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 4bc43ddfa4..c04df26323 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -115,9 +115,11 @@ unmap_exit: + /* + * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86 + */ +-int vfio_dma_unmap(VFIOContainer *container, hwaddr iova, +- ram_addr_t size, IOMMUTLBEntry *iotlb) ++static int vfio_legacy_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova, ++ ram_addr_t size, IOMMUTLBEntry *iotlb) + { ++ VFIOContainer *container = container_of(bcontainer, VFIOContainer, ++ bcontainer); + struct vfio_iommu_type1_dma_unmap unmap = { + .argsz = sizeof(unmap), + .flags = 0, +@@ -151,7 +153,7 @@ int vfio_dma_unmap(VFIOContainer *container, hwaddr iova, + */ + if (errno == EINVAL && unmap.size && !(unmap.iova + unmap.size) && + container->iommu_type == VFIO_TYPE1v2_IOMMU) { +- trace_vfio_dma_unmap_overflow_workaround(); ++ trace_vfio_legacy_dma_unmap_overflow_workaround(); + unmap.size -= 1ULL << ctz64(container->pgsizes); + continue; + } +@@ -170,9 +172,11 @@ int vfio_dma_unmap(VFIOContainer *container, hwaddr iova, + return 0; + } + +-int vfio_dma_map(VFIOContainer *container, hwaddr iova, +- ram_addr_t size, void *vaddr, bool readonly) ++static int vfio_legacy_dma_map(VFIOContainerBase *bcontainer, hwaddr iova, ++ ram_addr_t size, void *vaddr, bool readonly) + { ++ VFIOContainer *container = container_of(bcontainer, VFIOContainer, ++ bcontainer); + struct vfio_iommu_type1_dma_map map = { + .argsz = sizeof(map), + .flags = VFIO_DMA_MAP_FLAG_READ, +@@ -191,7 +195,8 @@ int vfio_dma_map(VFIOContainer *container, hwaddr iova, + * the VGA ROM space. + */ + if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 || +- (errno == EBUSY && vfio_dma_unmap(container, iova, size, NULL) == 0 && ++ (errno == EBUSY && ++ vfio_legacy_dma_unmap(bcontainer, iova, size, NULL) == 0 && + ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) { + return 0; + } +@@ -937,4 +942,7 @@ void vfio_detach_device(VFIODevice *vbasedev) + vfio_put_group(group); + } + +-const VFIOIOMMUOps vfio_legacy_ops; ++const VFIOIOMMUOps vfio_legacy_ops = { ++ .dma_map = vfio_legacy_dma_map, ++ .dma_unmap = vfio_legacy_dma_unmap, ++}; +diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build +index 2a6912c940..eb6ce6229d 100644 +--- a/hw/vfio/meson.build ++++ b/hw/vfio/meson.build +@@ -2,6 +2,7 @@ vfio_ss = ss.source_set() + vfio_ss.add(files( + 'helpers.c', + 'common.c', ++ 'container-base.c', + 'container.c', + 'spapr.c', + 'migration.c', +diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events +index 0eb2387cf2..9f7fedee98 100644 +--- a/hw/vfio/trace-events ++++ b/hw/vfio/trace-events +@@ -116,7 +116,7 @@ vfio_region_unmap(const char *name, unsigned long offset, unsigned long end) "Re + vfio_region_sparse_mmap_header(const char *name, int index, int nr_areas) "Device %s region %d: %d sparse mmap entries" + vfio_region_sparse_mmap_entry(int i, unsigned long start, unsigned long end) "sparse entry %d [0x%lx - 0x%lx]" + vfio_get_dev_region(const char *name, int index, uint32_t type, uint32_t subtype) "%s index %d, %08x/%08x" +-vfio_dma_unmap_overflow_workaround(void) "" ++vfio_legacy_dma_unmap_overflow_workaround(void) "" + vfio_get_dirty_bitmap(int fd, uint64_t iova, uint64_t size, uint64_t bitmap_size, uint64_t start, uint64_t dirty_pages) "container fd=%d, iova=0x%"PRIx64" size= 0x%"PRIx64" bitmap_size=0x%"PRIx64" start=0x%"PRIx64" dirty_pages=%"PRIu64 + vfio_iommu_map_dirty_notify(uint64_t iova_start, uint64_t iova_end) "iommu dirty @ 0x%"PRIx64" - 0x%"PRIx64 + +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 678161f207..24a26345e5 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -208,10 +208,6 @@ void vfio_put_address_space(VFIOAddressSpace *space); + bool vfio_devices_all_running_and_saving(VFIOContainer *container); + + /* container->fd */ +-int vfio_dma_unmap(VFIOContainer *container, hwaddr iova, +- ram_addr_t size, IOMMUTLBEntry *iotlb); +-int vfio_dma_map(VFIOContainer *container, hwaddr iova, +- ram_addr_t size, void *vaddr, bool readonly); + int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start); + int vfio_query_dirty_bitmap(VFIOContainer *container, VFIOBitmap *vbmap, + hwaddr iova, hwaddr size); +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index 1d6daaea5d..56b033f59f 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -31,6 +31,13 @@ typedef struct VFIOContainerBase { + const VFIOIOMMUOps *ops; + } VFIOContainerBase; + ++int vfio_container_dma_map(VFIOContainerBase *bcontainer, ++ hwaddr iova, ram_addr_t size, ++ void *vaddr, bool readonly); ++int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, ++ hwaddr iova, ram_addr_t size, ++ IOMMUTLBEntry *iotlb); ++ + struct VFIOIOMMUOps { + /* basic feature */ + int (*dma_map)(VFIOContainerBase *bcontainer, +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-iommufd-Add-support-for-iova_ranges-and-pgsizes.patch b/SOURCES/kvm-vfio-iommufd-Add-support-for-iova_ranges-and-pgsizes.patch new file mode 100644 index 0000000..52e3d87 --- /dev/null +++ b/SOURCES/kvm-vfio-iommufd-Add-support-for-iova_ranges-and-pgsizes.patch @@ -0,0 +1,115 @@ +From 49435d4d592bc890f56b69c2290f890c87b5a103 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Tue, 21 Nov 2023 16:44:05 +0800 +Subject: [PATCH 026/101] vfio/iommufd: Add support for iova_ranges and pgsizes +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [25/67] 578af0547d97276ccd4936b574c12118fc70d468 (eauger1/centos-qemu-kvm) + +Some vIOMMU such as virtio-iommu use IOVA ranges from host side to +setup reserved ranges for passthrough device, so that guest will not +use an IOVA range beyond host support. + +Use an uAPI of IOMMUFD to get IOVA ranges of host side and pass to +vIOMMU just like the legacy backend, if this fails, fallback to +64bit IOVA range. + +Also use out_iova_alignment returned from uAPI as pgsizes instead of +qemu_real_host_page_size() as a fallback. + +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Reviewed-by: Eric Auger +Tested-by: Eric Auger +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +(cherry picked from commit 714e9affa8ae1d84007c8afde7bb10fef9cb883d) +Signed-off-by: Eric Auger +--- + hw/vfio/iommufd.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 55 insertions(+), 1 deletion(-) + +diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c +index 6d31aeac7b..01b448e840 100644 +--- a/hw/vfio/iommufd.c ++++ b/hw/vfio/iommufd.c +@@ -261,6 +261,53 @@ static int iommufd_cdev_ram_block_discard_disable(bool state) + return ram_block_uncoordinated_discard_disable(state); + } + ++static int iommufd_cdev_get_info_iova_range(VFIOIOMMUFDContainer *container, ++ uint32_t ioas_id, Error **errp) ++{ ++ VFIOContainerBase *bcontainer = &container->bcontainer; ++ struct iommu_ioas_iova_ranges *info; ++ struct iommu_iova_range *iova_ranges; ++ int ret, sz, fd = container->be->fd; ++ ++ info = g_malloc0(sizeof(*info)); ++ info->size = sizeof(*info); ++ info->ioas_id = ioas_id; ++ ++ ret = ioctl(fd, IOMMU_IOAS_IOVA_RANGES, info); ++ if (ret && errno != EMSGSIZE) { ++ goto error; ++ } ++ ++ sz = info->num_iovas * sizeof(struct iommu_iova_range); ++ info = g_realloc(info, sizeof(*info) + sz); ++ info->allowed_iovas = (uintptr_t)(info + 1); ++ ++ ret = ioctl(fd, IOMMU_IOAS_IOVA_RANGES, info); ++ if (ret) { ++ goto error; ++ } ++ ++ iova_ranges = (struct iommu_iova_range *)(uintptr_t)info->allowed_iovas; ++ ++ for (int i = 0; i < info->num_iovas; i++) { ++ Range *range = g_new(Range, 1); ++ ++ range_set_bounds(range, iova_ranges[i].start, iova_ranges[i].last); ++ bcontainer->iova_ranges = ++ range_list_insert(bcontainer->iova_ranges, range); ++ } ++ bcontainer->pgsizes = info->out_iova_alignment; ++ ++ g_free(info); ++ return 0; ++ ++error: ++ ret = -errno; ++ g_free(info); ++ error_setg_errno(errp, errno, "Cannot get IOVA ranges"); ++ return ret; ++} ++ + static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, + AddressSpace *as, Error **errp) + { +@@ -335,7 +382,14 @@ static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, + goto err_discard_disable; + } + +- bcontainer->pgsizes = qemu_real_host_page_size(); ++ ret = iommufd_cdev_get_info_iova_range(container, ioas_id, &err); ++ if (ret) { ++ error_append_hint(&err, ++ "Fallback to default 64bit IOVA range and 4K page size\n"); ++ warn_report_err(err); ++ err = NULL; ++ bcontainer->pgsizes = qemu_real_host_page_size(); ++ } + + bcontainer->listener = vfio_memory_listener; + memory_listener_register(&bcontainer->listener, bcontainer->space->as); +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-iommufd-Enable-pci-hot-reset-through-iommufd-cd.patch b/SOURCES/kvm-vfio-iommufd-Enable-pci-hot-reset-through-iommufd-cd.patch new file mode 100644 index 0000000..48db196 --- /dev/null +++ b/SOURCES/kvm-vfio-iommufd-Enable-pci-hot-reset-through-iommufd-cd.patch @@ -0,0 +1,215 @@ +From e94700896dd8fcea149d9719eccde6f485440be2 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Tue, 21 Nov 2023 16:44:08 +0800 +Subject: [PATCH 029/101] vfio/iommufd: Enable pci hot reset through iommufd + cdev interface +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [28/67] ca1ae970138ee4a6f4b3b49817e775f3159f4c97 (eauger1/centos-qemu-kvm) + +Implement the newly introduced pci_hot_reset callback named +iommufd_cdev_pci_hot_reset to do iommufd specific check and +reset operation. + +Signed-off-by: Zhenzhong Duan +Reviewed-by: Eric Auger +Tested-by: Eric Auger +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +(cherry picked from commit 96d6f85ff012abd7aaa35b1a2bc48b8640c898d9) +Signed-off-by: Eric Auger +--- + hw/vfio/iommufd.c | 150 +++++++++++++++++++++++++++++++++++++++++++ + hw/vfio/trace-events | 1 + + 2 files changed, 151 insertions(+) + +diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c +index 01b448e840..6e53e013ef 100644 +--- a/hw/vfio/iommufd.c ++++ b/hw/vfio/iommufd.c +@@ -24,6 +24,7 @@ + #include "sysemu/reset.h" + #include "qemu/cutils.h" + #include "qemu/chardev_open.h" ++#include "pci.h" + + static int iommufd_cdev_map(VFIOContainerBase *bcontainer, hwaddr iova, + ram_addr_t size, void *vaddr, bool readonly) +@@ -468,9 +469,158 @@ static void iommufd_cdev_detach(VFIODevice *vbasedev) + close(vbasedev->fd); + } + ++static VFIODevice *iommufd_cdev_pci_find_by_devid(__u32 devid) ++{ ++ VFIODevice *vbasedev_iter; ++ ++ QLIST_FOREACH(vbasedev_iter, &vfio_device_list, global_next) { ++ if (vbasedev_iter->bcontainer->ops != &vfio_iommufd_ops) { ++ continue; ++ } ++ if (devid == vbasedev_iter->devid) { ++ return vbasedev_iter; ++ } ++ } ++ return NULL; ++} ++ ++static VFIOPCIDevice * ++iommufd_cdev_dep_get_realized_vpdev(struct vfio_pci_dependent_device *dep_dev, ++ VFIODevice *reset_dev) ++{ ++ VFIODevice *vbasedev_tmp; ++ ++ if (dep_dev->devid == reset_dev->devid || ++ dep_dev->devid == VFIO_PCI_DEVID_OWNED) { ++ return NULL; ++ } ++ ++ vbasedev_tmp = iommufd_cdev_pci_find_by_devid(dep_dev->devid); ++ if (!vbasedev_tmp || !vbasedev_tmp->dev->realized || ++ vbasedev_tmp->type != VFIO_DEVICE_TYPE_PCI) { ++ return NULL; ++ } ++ ++ return container_of(vbasedev_tmp, VFIOPCIDevice, vbasedev); ++} ++ ++static int iommufd_cdev_pci_hot_reset(VFIODevice *vbasedev, bool single) ++{ ++ VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); ++ struct vfio_pci_hot_reset_info *info = NULL; ++ struct vfio_pci_dependent_device *devices; ++ struct vfio_pci_hot_reset *reset; ++ int ret, i; ++ bool multi = false; ++ ++ trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi"); ++ ++ if (!single) { ++ vfio_pci_pre_reset(vdev); ++ } ++ vdev->vbasedev.needs_reset = false; ++ ++ ret = vfio_pci_get_pci_hot_reset_info(vdev, &info); ++ ++ if (ret) { ++ goto out_single; ++ } ++ ++ assert(info->flags & VFIO_PCI_HOT_RESET_FLAG_DEV_ID); ++ ++ devices = &info->devices[0]; ++ ++ if (!(info->flags & VFIO_PCI_HOT_RESET_FLAG_DEV_ID_OWNED)) { ++ if (!vdev->has_pm_reset) { ++ for (i = 0; i < info->count; i++) { ++ if (devices[i].devid == VFIO_PCI_DEVID_NOT_OWNED) { ++ error_report("vfio: Cannot reset device %s, " ++ "depends on device %04x:%02x:%02x.%x " ++ "which is not owned.", ++ vdev->vbasedev.name, devices[i].segment, ++ devices[i].bus, PCI_SLOT(devices[i].devfn), ++ PCI_FUNC(devices[i].devfn)); ++ } ++ } ++ } ++ ret = -EPERM; ++ goto out_single; ++ } ++ ++ trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name); ++ ++ for (i = 0; i < info->count; i++) { ++ VFIOPCIDevice *tmp; ++ ++ trace_iommufd_cdev_pci_hot_reset_dep_devices(devices[i].segment, ++ devices[i].bus, ++ PCI_SLOT(devices[i].devfn), ++ PCI_FUNC(devices[i].devfn), ++ devices[i].devid); ++ ++ /* ++ * If a VFIO cdev device is resettable, all the dependent devices ++ * are either bound to same iommufd or within same iommu_groups as ++ * one of the iommufd bound devices. ++ */ ++ assert(devices[i].devid != VFIO_PCI_DEVID_NOT_OWNED); ++ ++ tmp = iommufd_cdev_dep_get_realized_vpdev(&devices[i], &vdev->vbasedev); ++ if (!tmp) { ++ continue; ++ } ++ ++ if (single) { ++ ret = -EINVAL; ++ goto out_single; ++ } ++ vfio_pci_pre_reset(tmp); ++ tmp->vbasedev.needs_reset = false; ++ multi = true; ++ } ++ ++ if (!single && !multi) { ++ ret = -EINVAL; ++ goto out_single; ++ } ++ ++ /* Use zero length array for hot reset with iommufd backend */ ++ reset = g_malloc0(sizeof(*reset)); ++ reset->argsz = sizeof(*reset); ++ ++ /* Bus reset! */ ++ ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset); ++ g_free(reset); ++ if (ret) { ++ ret = -errno; ++ } ++ ++ trace_vfio_pci_hot_reset_result(vdev->vbasedev.name, ++ ret ? strerror(errno) : "Success"); ++ ++ /* Re-enable INTx on affected devices */ ++ for (i = 0; i < info->count; i++) { ++ VFIOPCIDevice *tmp; ++ ++ tmp = iommufd_cdev_dep_get_realized_vpdev(&devices[i], &vdev->vbasedev); ++ if (!tmp) { ++ continue; ++ } ++ vfio_pci_post_reset(tmp); ++ } ++out_single: ++ if (!single) { ++ vfio_pci_post_reset(vdev); ++ } ++ g_free(info); ++ ++ return ret; ++} ++ + const VFIOIOMMUOps vfio_iommufd_ops = { + .dma_map = iommufd_cdev_map, + .dma_unmap = iommufd_cdev_unmap, + .attach_device = iommufd_cdev_attach, + .detach_device = iommufd_cdev_detach, ++ .pci_hot_reset = iommufd_cdev_pci_hot_reset, + }; +diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events +index 3340c93af0..8fdde54456 100644 +--- a/hw/vfio/trace-events ++++ b/hw/vfio/trace-events +@@ -174,3 +174,4 @@ iommufd_cdev_detach_ioas_hwpt(int iommufd, const char *name) " [iommufd=%d] Succ + iommufd_cdev_fail_attach_existing_container(const char *msg) " %s" + iommufd_cdev_alloc_ioas(int iommufd, int ioas_id) " [iommufd=%d] new IOMMUFD container with ioasid=%d" + iommufd_cdev_device_info(char *name, int devfd, int num_irqs, int num_regions, int flags) " %s (%d) num_irqs=%d num_regions=%d flags=%d" ++iommufd_cdev_pci_hot_reset_dep_devices(int domain, int bus, int slot, int function, int dev_id) "\t%04x:%02x:%02x.%x devid %d" +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-iommufd-Implement-the-iommufd-backend.patch b/SOURCES/kvm-vfio-iommufd-Implement-the-iommufd-backend.patch new file mode 100644 index 0000000..f00cbcd --- /dev/null +++ b/SOURCES/kvm-vfio-iommufd-Implement-the-iommufd-backend.patch @@ -0,0 +1,561 @@ +From f018d0b686406256c2b5e823e4227316ee1394e9 Mon Sep 17 00:00:00 2001 +From: Yi Liu +Date: Tue, 21 Nov 2023 16:44:03 +0800 +Subject: [PATCH 024/101] vfio/iommufd: Implement the iommufd backend +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [23/67] d11046654117a690542a1e2b48b9d1994f778b2d (eauger1/centos-qemu-kvm) + +The iommufd backend is implemented based on the new /dev/iommu user API. +This backend obviously depends on CONFIG_IOMMUFD. + +So far, the iommufd backend doesn't support dirty page sync yet. + +Co-authored-by: Eric Auger +Signed-off-by: Yi Liu +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Tested-by: Eric Auger +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +(cherry picked from commit 5ee3dc7af7859e7b8aa34c10c21778101c15e812) +Signed-off-by: Eric Auger +--- + hw/vfio/common.c | 6 + + hw/vfio/iommufd.c | 422 ++++++++++++++++++++++++++++++++++ + hw/vfio/meson.build | 3 + + hw/vfio/trace-events | 10 + + include/hw/vfio/vfio-common.h | 11 + + 5 files changed, 452 insertions(+) + create mode 100644 hw/vfio/iommufd.c + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 934f4f5446..6569732b7a 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -19,6 +19,7 @@ + */ + + #include "qemu/osdep.h" ++#include CONFIG_DEVICES /* CONFIG_IOMMUFD */ + #include + #ifdef CONFIG_KVM + #include +@@ -1503,6 +1504,11 @@ int vfio_attach_device(char *name, VFIODevice *vbasedev, + { + const VFIOIOMMUOps *ops = &vfio_legacy_ops; + ++#ifdef CONFIG_IOMMUFD ++ if (vbasedev->iommufd) { ++ ops = &vfio_iommufd_ops; ++ } ++#endif + return ops->attach_device(name, vbasedev, as, errp); + } + +diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c +new file mode 100644 +index 0000000000..6d31aeac7b +--- /dev/null ++++ b/hw/vfio/iommufd.c +@@ -0,0 +1,422 @@ ++/* ++ * iommufd container backend ++ * ++ * Copyright (C) 2023 Intel Corporation. ++ * Copyright Red Hat, Inc. 2023 ++ * ++ * Authors: Yi Liu ++ * Eric Auger ++ * ++ * SPDX-License-Identifier: GPL-2.0-or-later ++ */ ++ ++#include "qemu/osdep.h" ++#include ++#include ++#include ++ ++#include "hw/vfio/vfio-common.h" ++#include "qemu/error-report.h" ++#include "trace.h" ++#include "qapi/error.h" ++#include "sysemu/iommufd.h" ++#include "hw/qdev-core.h" ++#include "sysemu/reset.h" ++#include "qemu/cutils.h" ++#include "qemu/chardev_open.h" ++ ++static int iommufd_cdev_map(VFIOContainerBase *bcontainer, hwaddr iova, ++ ram_addr_t size, void *vaddr, bool readonly) ++{ ++ VFIOIOMMUFDContainer *container = ++ container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); ++ ++ return iommufd_backend_map_dma(container->be, ++ container->ioas_id, ++ iova, size, vaddr, readonly); ++} ++ ++static int iommufd_cdev_unmap(VFIOContainerBase *bcontainer, ++ hwaddr iova, ram_addr_t size, ++ IOMMUTLBEntry *iotlb) ++{ ++ VFIOIOMMUFDContainer *container = ++ container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); ++ ++ /* TODO: Handle dma_unmap_bitmap with iotlb args (migration) */ ++ return iommufd_backend_unmap_dma(container->be, ++ container->ioas_id, iova, size); ++} ++ ++static int iommufd_cdev_kvm_device_add(VFIODevice *vbasedev, Error **errp) ++{ ++ return vfio_kvm_device_add_fd(vbasedev->fd, errp); ++} ++ ++static void iommufd_cdev_kvm_device_del(VFIODevice *vbasedev) ++{ ++ Error *err = NULL; ++ ++ if (vfio_kvm_device_del_fd(vbasedev->fd, &err)) { ++ error_report_err(err); ++ } ++} ++ ++static int iommufd_cdev_connect_and_bind(VFIODevice *vbasedev, Error **errp) ++{ ++ IOMMUFDBackend *iommufd = vbasedev->iommufd; ++ struct vfio_device_bind_iommufd bind = { ++ .argsz = sizeof(bind), ++ .flags = 0, ++ }; ++ int ret; ++ ++ ret = iommufd_backend_connect(iommufd, errp); ++ if (ret) { ++ return ret; ++ } ++ ++ /* ++ * Add device to kvm-vfio to be prepared for the tracking ++ * in KVM. Especially for some emulated devices, it requires ++ * to have kvm information in the device open. ++ */ ++ ret = iommufd_cdev_kvm_device_add(vbasedev, errp); ++ if (ret) { ++ goto err_kvm_device_add; ++ } ++ ++ /* Bind device to iommufd */ ++ bind.iommufd = iommufd->fd; ++ ret = ioctl(vbasedev->fd, VFIO_DEVICE_BIND_IOMMUFD, &bind); ++ if (ret) { ++ error_setg_errno(errp, errno, "error bind device fd=%d to iommufd=%d", ++ vbasedev->fd, bind.iommufd); ++ goto err_bind; ++ } ++ ++ vbasedev->devid = bind.out_devid; ++ trace_iommufd_cdev_connect_and_bind(bind.iommufd, vbasedev->name, ++ vbasedev->fd, vbasedev->devid); ++ return ret; ++err_bind: ++ iommufd_cdev_kvm_device_del(vbasedev); ++err_kvm_device_add: ++ iommufd_backend_disconnect(iommufd); ++ return ret; ++} ++ ++static void iommufd_cdev_unbind_and_disconnect(VFIODevice *vbasedev) ++{ ++ /* Unbind is automatically conducted when device fd is closed */ ++ iommufd_cdev_kvm_device_del(vbasedev); ++ iommufd_backend_disconnect(vbasedev->iommufd); ++} ++ ++static int iommufd_cdev_getfd(const char *sysfs_path, Error **errp) ++{ ++ long int ret = -ENOTTY; ++ char *path, *vfio_dev_path = NULL, *vfio_path = NULL; ++ DIR *dir = NULL; ++ struct dirent *dent; ++ gchar *contents; ++ struct stat st; ++ gsize length; ++ int major, minor; ++ dev_t vfio_devt; ++ ++ path = g_strdup_printf("%s/vfio-dev", sysfs_path); ++ if (stat(path, &st) < 0) { ++ error_setg_errno(errp, errno, "no such host device"); ++ goto out_free_path; ++ } ++ ++ dir = opendir(path); ++ if (!dir) { ++ error_setg_errno(errp, errno, "couldn't open directory %s", path); ++ goto out_free_path; ++ } ++ ++ while ((dent = readdir(dir))) { ++ if (!strncmp(dent->d_name, "vfio", 4)) { ++ vfio_dev_path = g_strdup_printf("%s/%s/dev", path, dent->d_name); ++ break; ++ } ++ } ++ ++ if (!vfio_dev_path) { ++ error_setg(errp, "failed to find vfio-dev/vfioX/dev"); ++ goto out_close_dir; ++ } ++ ++ if (!g_file_get_contents(vfio_dev_path, &contents, &length, NULL)) { ++ error_setg(errp, "failed to load \"%s\"", vfio_dev_path); ++ goto out_free_dev_path; ++ } ++ ++ if (sscanf(contents, "%d:%d", &major, &minor) != 2) { ++ error_setg(errp, "failed to get major:minor for \"%s\"", vfio_dev_path); ++ goto out_free_dev_path; ++ } ++ g_free(contents); ++ vfio_devt = makedev(major, minor); ++ ++ vfio_path = g_strdup_printf("/dev/vfio/devices/%s", dent->d_name); ++ ret = open_cdev(vfio_path, vfio_devt); ++ if (ret < 0) { ++ error_setg(errp, "Failed to open %s", vfio_path); ++ } ++ ++ trace_iommufd_cdev_getfd(vfio_path, ret); ++ g_free(vfio_path); ++ ++out_free_dev_path: ++ g_free(vfio_dev_path); ++out_close_dir: ++ closedir(dir); ++out_free_path: ++ if (*errp) { ++ error_prepend(errp, VFIO_MSG_PREFIX, path); ++ } ++ g_free(path); ++ ++ return ret; ++} ++ ++static int iommufd_cdev_attach_ioas_hwpt(VFIODevice *vbasedev, uint32_t id, ++ Error **errp) ++{ ++ int ret, iommufd = vbasedev->iommufd->fd; ++ struct vfio_device_attach_iommufd_pt attach_data = { ++ .argsz = sizeof(attach_data), ++ .flags = 0, ++ .pt_id = id, ++ }; ++ ++ /* Attach device to an IOAS or hwpt within iommufd */ ++ ret = ioctl(vbasedev->fd, VFIO_DEVICE_ATTACH_IOMMUFD_PT, &attach_data); ++ if (ret) { ++ error_setg_errno(errp, errno, ++ "[iommufd=%d] error attach %s (%d) to id=%d", ++ iommufd, vbasedev->name, vbasedev->fd, id); ++ } else { ++ trace_iommufd_cdev_attach_ioas_hwpt(iommufd, vbasedev->name, ++ vbasedev->fd, id); ++ } ++ return ret; ++} ++ ++static int iommufd_cdev_detach_ioas_hwpt(VFIODevice *vbasedev, Error **errp) ++{ ++ int ret, iommufd = vbasedev->iommufd->fd; ++ struct vfio_device_detach_iommufd_pt detach_data = { ++ .argsz = sizeof(detach_data), ++ .flags = 0, ++ }; ++ ++ ret = ioctl(vbasedev->fd, VFIO_DEVICE_DETACH_IOMMUFD_PT, &detach_data); ++ if (ret) { ++ error_setg_errno(errp, errno, "detach %s failed", vbasedev->name); ++ } else { ++ trace_iommufd_cdev_detach_ioas_hwpt(iommufd, vbasedev->name); ++ } ++ return ret; ++} ++ ++static int iommufd_cdev_attach_container(VFIODevice *vbasedev, ++ VFIOIOMMUFDContainer *container, ++ Error **errp) ++{ ++ return iommufd_cdev_attach_ioas_hwpt(vbasedev, container->ioas_id, errp); ++} ++ ++static void iommufd_cdev_detach_container(VFIODevice *vbasedev, ++ VFIOIOMMUFDContainer *container) ++{ ++ Error *err = NULL; ++ ++ if (iommufd_cdev_detach_ioas_hwpt(vbasedev, &err)) { ++ error_report_err(err); ++ } ++} ++ ++static void iommufd_cdev_container_destroy(VFIOIOMMUFDContainer *container) ++{ ++ VFIOContainerBase *bcontainer = &container->bcontainer; ++ ++ if (!QLIST_EMPTY(&bcontainer->device_list)) { ++ return; ++ } ++ memory_listener_unregister(&bcontainer->listener); ++ vfio_container_destroy(bcontainer); ++ iommufd_backend_free_id(container->be, container->ioas_id); ++ g_free(container); ++} ++ ++static int iommufd_cdev_ram_block_discard_disable(bool state) ++{ ++ /* ++ * We support coordinated discarding of RAM via the RamDiscardManager. ++ */ ++ return ram_block_uncoordinated_discard_disable(state); ++} ++ ++static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, ++ AddressSpace *as, Error **errp) ++{ ++ VFIOContainerBase *bcontainer; ++ VFIOIOMMUFDContainer *container; ++ VFIOAddressSpace *space; ++ struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) }; ++ int ret, devfd; ++ uint32_t ioas_id; ++ Error *err = NULL; ++ ++ devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp); ++ if (devfd < 0) { ++ return devfd; ++ } ++ vbasedev->fd = devfd; ++ ++ ret = iommufd_cdev_connect_and_bind(vbasedev, errp); ++ if (ret) { ++ goto err_connect_bind; ++ } ++ ++ space = vfio_get_address_space(as); ++ ++ /* try to attach to an existing container in this space */ ++ QLIST_FOREACH(bcontainer, &space->containers, next) { ++ container = container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); ++ if (bcontainer->ops != &vfio_iommufd_ops || ++ vbasedev->iommufd != container->be) { ++ continue; ++ } ++ if (iommufd_cdev_attach_container(vbasedev, container, &err)) { ++ const char *msg = error_get_pretty(err); ++ ++ trace_iommufd_cdev_fail_attach_existing_container(msg); ++ error_free(err); ++ err = NULL; ++ } else { ++ ret = iommufd_cdev_ram_block_discard_disable(true); ++ if (ret) { ++ error_setg(errp, ++ "Cannot set discarding of RAM broken (%d)", ret); ++ goto err_discard_disable; ++ } ++ goto found_container; ++ } ++ } ++ ++ /* Need to allocate a new dedicated container */ ++ ret = iommufd_backend_alloc_ioas(vbasedev->iommufd, &ioas_id, errp); ++ if (ret < 0) { ++ goto err_alloc_ioas; ++ } ++ ++ trace_iommufd_cdev_alloc_ioas(vbasedev->iommufd->fd, ioas_id); ++ ++ container = g_malloc0(sizeof(*container)); ++ container->be = vbasedev->iommufd; ++ container->ioas_id = ioas_id; ++ ++ bcontainer = &container->bcontainer; ++ vfio_container_init(bcontainer, space, &vfio_iommufd_ops); ++ QLIST_INSERT_HEAD(&space->containers, bcontainer, next); ++ ++ ret = iommufd_cdev_attach_container(vbasedev, container, errp); ++ if (ret) { ++ goto err_attach_container; ++ } ++ ++ ret = iommufd_cdev_ram_block_discard_disable(true); ++ if (ret) { ++ goto err_discard_disable; ++ } ++ ++ bcontainer->pgsizes = qemu_real_host_page_size(); ++ ++ bcontainer->listener = vfio_memory_listener; ++ memory_listener_register(&bcontainer->listener, bcontainer->space->as); ++ ++ if (bcontainer->error) { ++ ret = -1; ++ error_propagate_prepend(errp, bcontainer->error, ++ "memory listener initialization failed: "); ++ goto err_listener_register; ++ } ++ ++ bcontainer->initialized = true; ++ ++found_container: ++ ret = ioctl(devfd, VFIO_DEVICE_GET_INFO, &dev_info); ++ if (ret) { ++ error_setg_errno(errp, errno, "error getting device info"); ++ goto err_listener_register; ++ } ++ ++ /* ++ * TODO: examine RAM_BLOCK_DISCARD stuff, should we do group level ++ * for discarding incompatibility check as well? ++ */ ++ if (vbasedev->ram_block_discard_allowed) { ++ iommufd_cdev_ram_block_discard_disable(false); ++ } ++ ++ vbasedev->group = 0; ++ vbasedev->num_irqs = dev_info.num_irqs; ++ vbasedev->num_regions = dev_info.num_regions; ++ vbasedev->flags = dev_info.flags; ++ vbasedev->reset_works = !!(dev_info.flags & VFIO_DEVICE_FLAGS_RESET); ++ vbasedev->bcontainer = bcontainer; ++ QLIST_INSERT_HEAD(&bcontainer->device_list, vbasedev, container_next); ++ QLIST_INSERT_HEAD(&vfio_device_list, vbasedev, global_next); ++ ++ trace_iommufd_cdev_device_info(vbasedev->name, devfd, vbasedev->num_irqs, ++ vbasedev->num_regions, vbasedev->flags); ++ return 0; ++ ++err_listener_register: ++ iommufd_cdev_ram_block_discard_disable(false); ++err_discard_disable: ++ iommufd_cdev_detach_container(vbasedev, container); ++err_attach_container: ++ iommufd_cdev_container_destroy(container); ++err_alloc_ioas: ++ vfio_put_address_space(space); ++ iommufd_cdev_unbind_and_disconnect(vbasedev); ++err_connect_bind: ++ close(vbasedev->fd); ++ return ret; ++} ++ ++static void iommufd_cdev_detach(VFIODevice *vbasedev) ++{ ++ VFIOContainerBase *bcontainer = vbasedev->bcontainer; ++ VFIOAddressSpace *space = bcontainer->space; ++ VFIOIOMMUFDContainer *container = container_of(bcontainer, ++ VFIOIOMMUFDContainer, ++ bcontainer); ++ QLIST_REMOVE(vbasedev, global_next); ++ QLIST_REMOVE(vbasedev, container_next); ++ vbasedev->bcontainer = NULL; ++ ++ if (!vbasedev->ram_block_discard_allowed) { ++ iommufd_cdev_ram_block_discard_disable(false); ++ } ++ ++ iommufd_cdev_detach_container(vbasedev, container); ++ iommufd_cdev_container_destroy(container); ++ vfio_put_address_space(space); ++ ++ iommufd_cdev_unbind_and_disconnect(vbasedev); ++ close(vbasedev->fd); ++} ++ ++const VFIOIOMMUOps vfio_iommufd_ops = { ++ .dma_map = iommufd_cdev_map, ++ .dma_unmap = iommufd_cdev_unmap, ++ .attach_device = iommufd_cdev_attach, ++ .detach_device = iommufd_cdev_detach, ++}; +diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build +index eb6ce6229d..e5d98b6adc 100644 +--- a/hw/vfio/meson.build ++++ b/hw/vfio/meson.build +@@ -7,6 +7,9 @@ vfio_ss.add(files( + 'spapr.c', + 'migration.c', + )) ++vfio_ss.add(when: 'CONFIG_IOMMUFD', if_true: files( ++ 'iommufd.c', ++)) + vfio_ss.add(when: 'CONFIG_VFIO_PCI', if_true: files( + 'display.c', + 'pci-quirks.c', +diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events +index 08a1f9dfa4..3340c93af0 100644 +--- a/hw/vfio/trace-events ++++ b/hw/vfio/trace-events +@@ -164,3 +164,13 @@ vfio_state_pending_estimate(const char *name, uint64_t precopy, uint64_t postcop + vfio_state_pending_exact(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t stopcopy_size, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" stopcopy size 0x%"PRIx64" precopy initial size 0x%"PRIx64" precopy dirty size 0x%"PRIx64 + vfio_vmstate_change(const char *name, int running, const char *reason, const char *dev_state) " (%s) running %d reason %s device state %s" + vfio_vmstate_change_prepare(const char *name, int running, const char *reason, const char *dev_state) " (%s) running %d reason %s device state %s" ++ ++#iommufd.c ++ ++iommufd_cdev_connect_and_bind(int iommufd, const char *name, int devfd, int devid) " [iommufd=%d] Successfully bound device %s (fd=%d): output devid=%d" ++iommufd_cdev_getfd(const char *dev, int devfd) " %s (fd=%d)" ++iommufd_cdev_attach_ioas_hwpt(int iommufd, const char *name, int devfd, int id) " [iommufd=%d] Successfully attached device %s (%d) to id=%d" ++iommufd_cdev_detach_ioas_hwpt(int iommufd, const char *name) " [iommufd=%d] Successfully detached %s" ++iommufd_cdev_fail_attach_existing_container(const char *msg) " %s" ++iommufd_cdev_alloc_ioas(int iommufd, int ioas_id) " [iommufd=%d] new IOMMUFD container with ioasid=%d" ++iommufd_cdev_device_info(char *name, int devfd, int num_irqs, int num_regions, int flags) " %s (%d) num_irqs=%d num_regions=%d flags=%d" +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 24ecc0e7ee..3dac5c167e 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -89,6 +89,14 @@ typedef struct VFIOHostDMAWindow { + QLIST_ENTRY(VFIOHostDMAWindow) hostwin_next; + } VFIOHostDMAWindow; + ++typedef struct IOMMUFDBackend IOMMUFDBackend; ++ ++typedef struct VFIOIOMMUFDContainer { ++ VFIOContainerBase bcontainer; ++ IOMMUFDBackend *be; ++ uint32_t ioas_id; ++} VFIOIOMMUFDContainer; ++ + typedef struct VFIODeviceOps VFIODeviceOps; + + typedef struct VFIODevice { +@@ -116,6 +124,8 @@ typedef struct VFIODevice { + OnOffAuto pre_copy_dirty_page_tracking; + bool dirty_pages_supported; + bool dirty_tracking; ++ int devid; ++ IOMMUFDBackend *iommufd; + } VFIODevice; + + struct VFIODeviceOps { +@@ -201,6 +211,7 @@ typedef QLIST_HEAD(VFIODeviceList, VFIODevice) VFIODeviceList; + extern VFIOGroupList vfio_group_list; + extern VFIODeviceList vfio_device_list; + extern const VFIOIOMMUOps vfio_legacy_ops; ++extern const VFIOIOMMUOps vfio_iommufd_ops; + extern const MemoryListener vfio_memory_listener; + extern int vfio_kvm_device_fd; + +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-iommufd-Introduce-a-VFIOIOMMU-iommufd-QOM-inter.patch b/SOURCES/kvm-vfio-iommufd-Introduce-a-VFIOIOMMU-iommufd-QOM-inter.patch new file mode 100644 index 0000000..866a437 --- /dev/null +++ b/SOURCES/kvm-vfio-iommufd-Introduce-a-VFIOIOMMU-iommufd-QOM-inter.patch @@ -0,0 +1,155 @@ +From f98defd6fe081bc44f5bd823d187d7d3b12832ac Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Tue, 19 Dec 2023 07:58:23 +0100 +Subject: [PATCH 056/101] vfio/iommufd: Introduce a VFIOIOMMU iommufd QOM + interface +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [55/67] 789ecf74ace326b0df5d494fd558d7d0b6294a85 (eauger1/centos-qemu-kvm) + +As previously done for the sPAPR and legacy IOMMU backends, convert +the VFIOIOMMUOps struct to a QOM interface. The set of of operations +for this backend can be referenced with a literal typename instead of +a C struct. + +Reviewed-by: Zhenzhong Duan +Tested-by: Eric Farman +Signed-off-by: Cédric Le Goater +(cherry picked from commit ce5f6d49f5845c3b9955cc377a5223c3f8d7ba1e) +Signed-off-by: Eric Auger +--- + hw/vfio/common.c | 2 +- + hw/vfio/iommufd.c | 35 ++++++++++++++++++++------- + include/hw/vfio/vfio-common.h | 1 - + include/hw/vfio/vfio-container-base.h | 2 +- + 4 files changed, 28 insertions(+), 12 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 2329d0efc8..89ff1c7aed 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -1508,7 +1508,7 @@ int vfio_attach_device(char *name, VFIODevice *vbasedev, + + #ifdef CONFIG_IOMMUFD + if (vbasedev->iommufd) { +- ops = &vfio_iommufd_ops; ++ ops = VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD)); + } + #endif + +diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c +index 87a561c545..d4c586e842 100644 +--- a/hw/vfio/iommufd.c ++++ b/hw/vfio/iommufd.c +@@ -319,6 +319,8 @@ static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, + int ret, devfd; + uint32_t ioas_id; + Error *err = NULL; ++ const VFIOIOMMUClass *iommufd_vioc = ++ VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD)); + + if (vbasedev->fd < 0) { + devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp); +@@ -340,7 +342,7 @@ static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, + /* try to attach to an existing container in this space */ + QLIST_FOREACH(bcontainer, &space->containers, next) { + container = container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); +- if (bcontainer->ops != &vfio_iommufd_ops || ++ if (bcontainer->ops != iommufd_vioc || + vbasedev->iommufd != container->be) { + continue; + } +@@ -374,7 +376,7 @@ static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, + container->ioas_id = ioas_id; + + bcontainer = &container->bcontainer; +- vfio_container_init(bcontainer, space, &vfio_iommufd_ops); ++ vfio_container_init(bcontainer, space, iommufd_vioc); + QLIST_INSERT_HEAD(&space->containers, bcontainer, next); + + ret = iommufd_cdev_attach_container(vbasedev, container, errp); +@@ -476,9 +478,11 @@ static void iommufd_cdev_detach(VFIODevice *vbasedev) + static VFIODevice *iommufd_cdev_pci_find_by_devid(__u32 devid) + { + VFIODevice *vbasedev_iter; ++ const VFIOIOMMUClass *iommufd_vioc = ++ VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD)); + + QLIST_FOREACH(vbasedev_iter, &vfio_device_list, global_next) { +- if (vbasedev_iter->bcontainer->ops != &vfio_iommufd_ops) { ++ if (vbasedev_iter->bcontainer->ops != iommufd_vioc) { + continue; + } + if (devid == vbasedev_iter->devid) { +@@ -621,10 +625,23 @@ out_single: + return ret; + } + +-const VFIOIOMMUOps vfio_iommufd_ops = { +- .dma_map = iommufd_cdev_map, +- .dma_unmap = iommufd_cdev_unmap, +- .attach_device = iommufd_cdev_attach, +- .detach_device = iommufd_cdev_detach, +- .pci_hot_reset = iommufd_cdev_pci_hot_reset, ++static void vfio_iommu_iommufd_class_init(ObjectClass *klass, void *data) ++{ ++ VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass); ++ ++ vioc->dma_map = iommufd_cdev_map; ++ vioc->dma_unmap = iommufd_cdev_unmap; ++ vioc->attach_device = iommufd_cdev_attach; ++ vioc->detach_device = iommufd_cdev_detach; ++ vioc->pci_hot_reset = iommufd_cdev_pci_hot_reset; + }; ++ ++static const TypeInfo types[] = { ++ { ++ .name = TYPE_VFIO_IOMMU_IOMMUFD, ++ .parent = TYPE_VFIO_IOMMU, ++ .class_init = vfio_iommu_iommufd_class_init, ++ }, ++}; ++ ++DEFINE_TYPES(types) +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 14c497b6b0..9b7ef7d02b 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -210,7 +210,6 @@ typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList; + typedef QLIST_HEAD(VFIODeviceList, VFIODevice) VFIODeviceList; + extern VFIOGroupList vfio_group_list; + extern VFIODeviceList vfio_device_list; +-extern const VFIOIOMMUOps vfio_iommufd_ops; + extern const MemoryListener vfio_memory_listener; + extern int vfio_kvm_device_fd; + +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index 9e21d7811f..b2813b0c11 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -17,7 +17,6 @@ + + typedef struct VFIODevice VFIODevice; + typedef struct VFIOIOMMUClass VFIOIOMMUClass; +-#define VFIOIOMMUOps VFIOIOMMUClass /* To remove */ + + typedef struct { + unsigned long *bitmap; +@@ -96,6 +95,7 @@ void vfio_container_destroy(VFIOContainerBase *bcontainer); + #define TYPE_VFIO_IOMMU "vfio-iommu" + #define TYPE_VFIO_IOMMU_LEGACY TYPE_VFIO_IOMMU "-legacy" + #define TYPE_VFIO_IOMMU_SPAPR TYPE_VFIO_IOMMU "-spapr" ++#define TYPE_VFIO_IOMMU_IOMMUFD TYPE_VFIO_IOMMU "-iommufd" + + /* + * VFIOContainerBase is not an abstract QOM object because it felt +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-iommufd-Relax-assert-check-for-iommufd-backend.patch b/SOURCES/kvm-vfio-iommufd-Relax-assert-check-for-iommufd-backend.patch new file mode 100644 index 0000000..f77032b --- /dev/null +++ b/SOURCES/kvm-vfio-iommufd-Relax-assert-check-for-iommufd-backend.patch @@ -0,0 +1,71 @@ +From 5a49c5bb690d55fc88b6fb12f059ae932de0a716 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Tue, 21 Nov 2023 16:44:04 +0800 +Subject: [PATCH 025/101] vfio/iommufd: Relax assert check for iommufd backend +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [24/67] 2c9e41e9ca0b67ebf807d1643a98866a0cb75768 (eauger1/centos-qemu-kvm) + +Currently iommufd doesn't support dirty page sync yet, +but it will not block us doing live migration if VFIO +migration is force enabled. + +So in this case we allow set_dirty_page_tracking to be NULL. +Note we don't need same change for query_dirty_bitmap because +when dirty page sync isn't supported, query_dirty_bitmap will +never be called. + +Suggested-by: Cédric Le Goater +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Reviewed-by: Eric Auger +Tested-by: Eric Auger +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +(cherry picked from commit 36e84d0c17102fa1c887d8c650a13ec08fca0ec0) +Signed-off-by: Eric Auger +--- + hw/vfio/container-base.c | 4 ++++ + hw/vfio/container.c | 4 ---- + 2 files changed, 4 insertions(+), 4 deletions(-) + +diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c +index 71f7274973..eee2dcfe76 100644 +--- a/hw/vfio/container-base.c ++++ b/hw/vfio/container-base.c +@@ -55,6 +55,10 @@ void vfio_container_del_section_window(VFIOContainerBase *bcontainer, + int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer, + bool start) + { ++ if (!bcontainer->dirty_pages_supported) { ++ return 0; ++ } ++ + g_assert(bcontainer->ops->set_dirty_page_tracking); + return bcontainer->ops->set_dirty_page_tracking(bcontainer, start); + } +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 6bacf38222..ed2d721b2b 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -216,10 +216,6 @@ static int vfio_legacy_set_dirty_page_tracking(VFIOContainerBase *bcontainer, + .argsz = sizeof(dirty), + }; + +- if (!bcontainer->dirty_pages_supported) { +- return 0; +- } +- + if (start) { + dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_START; + } else { +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-iommufd-Remove-CONFIG_IOMMUFD-usage.patch b/SOURCES/kvm-vfio-iommufd-Remove-CONFIG_IOMMUFD-usage.patch new file mode 100644 index 0000000..97d30c9 --- /dev/null +++ b/SOURCES/kvm-vfio-iommufd-Remove-CONFIG_IOMMUFD-usage.patch @@ -0,0 +1,55 @@ +From 5549bf1b2e07213c23e280a43ab2ab67d5b7304a Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Tue, 19 Dec 2023 07:58:25 +0100 +Subject: [PATCH 058/101] vfio/iommufd: Remove CONFIG_IOMMUFD usage +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [57/67] 3a6a45d379241d9412e0b8bcfeb9be0b4add59a5 (eauger1/centos-qemu-kvm) + +Availability of the IOMMUFD backend can now be fully determined at +runtime and the ifdef check was a build time protection (for PPC not +supporting it mostly). + +Reviewed-by: Zhenzhong Duan +Tested-by: Eric Farman +Signed-off-by: Cédric Le Goater +(cherry picked from commit c1139fa4feba8c320e4bd0a4e34af55caa5ffbb9) +Signed-off-by: Eric Auger +--- + hw/vfio/common.c | 3 --- + 1 file changed, 3 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 89ff1c7aed..0d4d8b8416 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -19,7 +19,6 @@ + */ + + #include "qemu/osdep.h" +-#include CONFIG_DEVICES /* CONFIG_IOMMUFD */ + #include + #ifdef CONFIG_KVM + #include +@@ -1506,11 +1505,9 @@ int vfio_attach_device(char *name, VFIODevice *vbasedev, + const VFIOIOMMUClass *ops = + VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_LEGACY)); + +-#ifdef CONFIG_IOMMUFD + if (vbasedev->iommufd) { + ops = VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD)); + } +-#endif + + assert(ops); + +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-iommufd-Remove-the-use-of-stat-to-check-file-ex.patch b/SOURCES/kvm-vfio-iommufd-Remove-the-use-of-stat-to-check-file-ex.patch new file mode 100644 index 0000000..7401d52 --- /dev/null +++ b/SOURCES/kvm-vfio-iommufd-Remove-the-use-of-stat-to-check-file-ex.patch @@ -0,0 +1,56 @@ +From 6b36dc2a305af856af03aad2e315eea96a349153 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Thu, 21 Dec 2023 09:09:57 +0100 +Subject: [PATCH 061/101] vfio/iommufd: Remove the use of stat() to check file + existence +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [60/67] 485770e45c1a6399780939bfb8b01b615d9213c6 (eauger1/centos-qemu-kvm) + +Using stat() before opening a file or a directory can lead to a +time-of-check to time-of-use (TOCTOU) filesystem race, which is +reported by coverity as a Security best practices violations. The +sequence could be replaced by open and fdopendir but it doesn't add +much in this case. Simply use opendir to avoid the race. + +Fixes: CID 1531551 +Signed-off-by: Cédric Le Goater +Reviewed-by: Zhenzhong Duan +(cherry picked from commit 6ba254801f6bc7f3ef68a6414f1b107237c7eb26) +Signed-off-by: Eric Auger +--- + hw/vfio/iommufd.c | 6 ------ + 1 file changed, 6 deletions(-) + +diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c +index d4c586e842..9bfddc1360 100644 +--- a/hw/vfio/iommufd.c ++++ b/hw/vfio/iommufd.c +@@ -121,17 +121,11 @@ static int iommufd_cdev_getfd(const char *sysfs_path, Error **errp) + DIR *dir = NULL; + struct dirent *dent; + gchar *contents; +- struct stat st; + gsize length; + int major, minor; + dev_t vfio_devt; + + path = g_strdup_printf("%s/vfio-dev", sysfs_path); +- if (stat(path, &st) < 0) { +- error_setg_errno(errp, errno, "no such host device"); +- goto out_free_path; +- } +- + dir = opendir(path); + if (!dir) { + error_setg_errno(errp, errno, "couldn't open directory %s", path); +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-migration-Add-VFIO-migration-pre-copy-support.patch b/SOURCES/kvm-vfio-migration-Add-VFIO-migration-pre-copy-support.patch deleted file mode 100644 index b8e72e6..0000000 --- a/SOURCES/kvm-vfio-migration-Add-VFIO-migration-pre-copy-support.patch +++ /dev/null @@ -1,438 +0,0 @@ -From 080d28c191b7d951f1f4596dcaa13d590c07d886 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 15/37] vfio/migration: Add VFIO migration pre-copy support -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [13/28] 7b2ea1471440d47e5aed1211c96942ca7bface96 (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit eda7362af959 -Author: Avihai Horon -Date: Wed Jun 21 14:12:00 2023 +0300 - - vfio/migration: Add VFIO migration pre-copy support - - Pre-copy support allows the VFIO device data to be transferred while the - VM is running. This helps to accommodate VFIO devices that have a large - amount of data that needs to be transferred, and it can reduce migration - downtime. - - Pre-copy support is optional in VFIO migration protocol v2. - Implement pre-copy of VFIO migration protocol v2 and use it for devices - that support it. Full description of it can be found in the following - Linux commit: 4db52602a607 ("vfio: Extend the device migration protocol - with PRE_COPY"). - - Signed-off-by: Avihai Horon - Reviewed-by: Cédric Le Goater - Tested-by: YangHang Liu - Acked-by: Alex Williamson - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - docs/devel/vfio-migration.rst | 35 +++++--- - hw/vfio/common.c | 6 +- - hw/vfio/migration.c | 165 ++++++++++++++++++++++++++++++++-- - hw/vfio/trace-events | 4 +- - include/hw/vfio/vfio-common.h | 2 + - 5 files changed, 190 insertions(+), 22 deletions(-) - -diff --git a/docs/devel/vfio-migration.rst b/docs/devel/vfio-migration.rst -index 1b68ccf115..e896b2a673 100644 ---- a/docs/devel/vfio-migration.rst -+++ b/docs/devel/vfio-migration.rst -@@ -7,12 +7,14 @@ the guest is running on source host and restoring this saved state on the - destination host. This document details how saving and restoring of VFIO - devices is done in QEMU. - --Migration of VFIO devices currently consists of a single stop-and-copy phase. --During the stop-and-copy phase the guest is stopped and the entire VFIO device --data is transferred to the destination. -- --The pre-copy phase of migration is currently not supported for VFIO devices. --Support for VFIO pre-copy will be added later on. -+Migration of VFIO devices consists of two phases: the optional pre-copy phase, -+and the stop-and-copy phase. The pre-copy phase is iterative and allows to -+accommodate VFIO devices that have a large amount of data that needs to be -+transferred. The iterative pre-copy phase of migration allows for the guest to -+continue whilst the VFIO device state is transferred to the destination, this -+helps to reduce the total downtime of the VM. VFIO devices opt-in to pre-copy -+support by reporting the VFIO_MIGRATION_PRE_COPY flag in the -+VFIO_DEVICE_FEATURE_MIGRATION ioctl. - - Note that currently VFIO migration is supported only for a single device. This - is due to VFIO migration's lack of P2P support. However, P2P support is planned -@@ -29,10 +31,20 @@ VFIO implements the device hooks for the iterative approach as follows: - * A ``load_setup`` function that sets the VFIO device on the destination in - _RESUMING state. - -+* A ``state_pending_estimate`` function that reports an estimate of the -+ remaining pre-copy data that the vendor driver has yet to save for the VFIO -+ device. -+ - * A ``state_pending_exact`` function that reads pending_bytes from the vendor - driver, which indicates the amount of data that the vendor driver has yet to - save for the VFIO device. - -+* An ``is_active_iterate`` function that indicates ``save_live_iterate`` is -+ active only when the VFIO device is in pre-copy states. -+ -+* A ``save_live_iterate`` function that reads the VFIO device's data from the -+ vendor driver during iterative pre-copy phase. -+ - * A ``save_state`` function to save the device config space if it is present. - - * A ``save_live_complete_precopy`` function that sets the VFIO device in -@@ -111,8 +123,10 @@ Flow of state changes during Live migration - =========================================== - - Below is the flow of state change during live migration. --The values in the brackets represent the VM state, the migration state, and -+The values in the parentheses represent the VM state, the migration state, and - the VFIO device state, respectively. -+The text in the square brackets represents the flow if the VFIO device supports -+pre-copy. - - Live migration save path - ------------------------ -@@ -124,11 +138,12 @@ Live migration save path - | - migrate_init spawns migration_thread - Migration thread then calls each device's .save_setup() -- (RUNNING, _SETUP, _RUNNING) -+ (RUNNING, _SETUP, _RUNNING [_PRE_COPY]) - | -- (RUNNING, _ACTIVE, _RUNNING) -- If device is active, get pending_bytes by .state_pending_exact() -+ (RUNNING, _ACTIVE, _RUNNING [_PRE_COPY]) -+ If device is active, get pending_bytes by .state_pending_{estimate,exact}() - If total pending_bytes >= threshold_size, call .save_live_iterate() -+ [Data of VFIO device for pre-copy phase is copied] - Iterate till total pending bytes converge and are less than threshold - | - On migration completion, vCPU stops and calls .save_live_complete_precopy for -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 78358ede27..b73086e17a 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -492,7 +492,8 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) - } - - if (vbasedev->pre_copy_dirty_page_tracking == ON_OFF_AUTO_OFF && -- migration->device_state == VFIO_DEVICE_STATE_RUNNING) { -+ (migration->device_state == VFIO_DEVICE_STATE_RUNNING || -+ migration->device_state == VFIO_DEVICE_STATE_PRE_COPY)) { - return false; - } - } -@@ -537,7 +538,8 @@ static bool vfio_devices_all_running_and_mig_active(VFIOContainer *container) - return false; - } - -- if (migration->device_state == VFIO_DEVICE_STATE_RUNNING) { -+ if (migration->device_state == VFIO_DEVICE_STATE_RUNNING || -+ migration->device_state == VFIO_DEVICE_STATE_PRE_COPY) { - continue; - } else { - return false; -diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c -index 8d33414379..d8f6a22ae1 100644 ---- a/hw/vfio/migration.c -+++ b/hw/vfio/migration.c -@@ -68,6 +68,8 @@ static const char *mig_state_to_str(enum vfio_device_mig_state state) - return "STOP_COPY"; - case VFIO_DEVICE_STATE_RESUMING: - return "RESUMING"; -+ case VFIO_DEVICE_STATE_PRE_COPY: -+ return "PRE_COPY"; - default: - return "UNKNOWN STATE"; - } -@@ -241,6 +243,25 @@ static int vfio_query_stop_copy_size(VFIODevice *vbasedev, - return 0; - } - -+static int vfio_query_precopy_size(VFIOMigration *migration) -+{ -+ struct vfio_precopy_info precopy = { -+ .argsz = sizeof(precopy), -+ }; -+ -+ migration->precopy_init_size = 0; -+ migration->precopy_dirty_size = 0; -+ -+ if (ioctl(migration->data_fd, VFIO_MIG_GET_PRECOPY_INFO, &precopy)) { -+ return -errno; -+ } -+ -+ migration->precopy_init_size = precopy.initial_bytes; -+ migration->precopy_dirty_size = precopy.dirty_bytes; -+ -+ return 0; -+} -+ - /* Returns the size of saved data on success and -errno on error */ - static ssize_t vfio_save_block(QEMUFile *f, VFIOMigration *migration) - { -@@ -249,6 +270,14 @@ static ssize_t vfio_save_block(QEMUFile *f, VFIOMigration *migration) - data_size = read(migration->data_fd, migration->data_buffer, - migration->data_buffer_size); - if (data_size < 0) { -+ /* -+ * Pre-copy emptied all the device state for now. For more information, -+ * please refer to the Linux kernel VFIO uAPI. -+ */ -+ if (errno == ENOMSG) { -+ return 0; -+ } -+ - return -errno; - } - if (data_size == 0) { -@@ -265,6 +294,38 @@ static ssize_t vfio_save_block(QEMUFile *f, VFIOMigration *migration) - return qemu_file_get_error(f) ?: data_size; - } - -+static void vfio_update_estimated_pending_data(VFIOMigration *migration, -+ uint64_t data_size) -+{ -+ if (!data_size) { -+ /* -+ * Pre-copy emptied all the device state for now, update estimated sizes -+ * accordingly. -+ */ -+ migration->precopy_init_size = 0; -+ migration->precopy_dirty_size = 0; -+ -+ return; -+ } -+ -+ if (migration->precopy_init_size) { -+ uint64_t init_size = MIN(migration->precopy_init_size, data_size); -+ -+ migration->precopy_init_size -= init_size; -+ data_size -= init_size; -+ } -+ -+ migration->precopy_dirty_size -= MIN(migration->precopy_dirty_size, -+ data_size); -+} -+ -+static bool vfio_precopy_supported(VFIODevice *vbasedev) -+{ -+ VFIOMigration *migration = vbasedev->migration; -+ -+ return migration->mig_flags & VFIO_MIGRATION_PRE_COPY; -+} -+ - /* ---------------------------------------------------------------------- */ - - static int vfio_save_setup(QEMUFile *f, void *opaque) -@@ -285,6 +346,28 @@ static int vfio_save_setup(QEMUFile *f, void *opaque) - return -ENOMEM; - } - -+ if (vfio_precopy_supported(vbasedev)) { -+ int ret; -+ -+ switch (migration->device_state) { -+ case VFIO_DEVICE_STATE_RUNNING: -+ ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_PRE_COPY, -+ VFIO_DEVICE_STATE_RUNNING); -+ if (ret) { -+ return ret; -+ } -+ -+ vfio_query_precopy_size(migration); -+ -+ break; -+ case VFIO_DEVICE_STATE_STOP: -+ /* vfio_save_complete_precopy() will go to STOP_COPY */ -+ break; -+ default: -+ return -EINVAL; -+ } -+ } -+ - trace_vfio_save_setup(vbasedev->name, migration->data_buffer_size); - - qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); -@@ -299,26 +382,42 @@ static void vfio_save_cleanup(void *opaque) - - g_free(migration->data_buffer); - migration->data_buffer = NULL; -+ migration->precopy_init_size = 0; -+ migration->precopy_dirty_size = 0; - vfio_migration_cleanup(vbasedev); - trace_vfio_save_cleanup(vbasedev->name); - } - -+static void vfio_state_pending_estimate(void *opaque, uint64_t *must_precopy, -+ uint64_t *can_postcopy) -+{ -+ VFIODevice *vbasedev = opaque; -+ VFIOMigration *migration = vbasedev->migration; -+ -+ if (migration->device_state != VFIO_DEVICE_STATE_PRE_COPY) { -+ return; -+ } -+ -+ *must_precopy += -+ migration->precopy_init_size + migration->precopy_dirty_size; -+ -+ trace_vfio_state_pending_estimate(vbasedev->name, *must_precopy, -+ *can_postcopy, -+ migration->precopy_init_size, -+ migration->precopy_dirty_size); -+} -+ - /* - * Migration size of VFIO devices can be as little as a few KBs or as big as - * many GBs. This value should be big enough to cover the worst case. - */ - #define VFIO_MIG_STOP_COPY_SIZE (100 * GiB) - --/* -- * Only exact function is implemented and not estimate function. The reason is -- * that during pre-copy phase of migration the estimate function is called -- * repeatedly while pending RAM size is over the threshold, thus migration -- * can't converge and querying the VFIO device pending data size is useless. -- */ - static void vfio_state_pending_exact(void *opaque, uint64_t *must_precopy, - uint64_t *can_postcopy) - { - VFIODevice *vbasedev = opaque; -+ VFIOMigration *migration = vbasedev->migration; - uint64_t stop_copy_size = VFIO_MIG_STOP_COPY_SIZE; - - /* -@@ -328,8 +427,48 @@ static void vfio_state_pending_exact(void *opaque, uint64_t *must_precopy, - vfio_query_stop_copy_size(vbasedev, &stop_copy_size); - *must_precopy += stop_copy_size; - -+ if (migration->device_state == VFIO_DEVICE_STATE_PRE_COPY) { -+ vfio_query_precopy_size(migration); -+ -+ *must_precopy += -+ migration->precopy_init_size + migration->precopy_dirty_size; -+ } -+ - trace_vfio_state_pending_exact(vbasedev->name, *must_precopy, *can_postcopy, -- stop_copy_size); -+ stop_copy_size, migration->precopy_init_size, -+ migration->precopy_dirty_size); -+} -+ -+static bool vfio_is_active_iterate(void *opaque) -+{ -+ VFIODevice *vbasedev = opaque; -+ VFIOMigration *migration = vbasedev->migration; -+ -+ return migration->device_state == VFIO_DEVICE_STATE_PRE_COPY; -+} -+ -+static int vfio_save_iterate(QEMUFile *f, void *opaque) -+{ -+ VFIODevice *vbasedev = opaque; -+ VFIOMigration *migration = vbasedev->migration; -+ ssize_t data_size; -+ -+ data_size = vfio_save_block(f, migration); -+ if (data_size < 0) { -+ return data_size; -+ } -+ qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); -+ -+ vfio_update_estimated_pending_data(migration, data_size); -+ -+ trace_vfio_save_iterate(vbasedev->name, migration->precopy_init_size, -+ migration->precopy_dirty_size); -+ -+ /* -+ * A VFIO device's pre-copy dirty_bytes is not guaranteed to reach zero. -+ * Return 1 so following handlers will not be potentially blocked. -+ */ -+ return 1; - } - - static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) -@@ -338,7 +477,7 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) - ssize_t data_size; - int ret; - -- /* We reach here with device state STOP only */ -+ /* We reach here with device state STOP or STOP_COPY only */ - ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY, - VFIO_DEVICE_STATE_STOP); - if (ret) { -@@ -457,7 +596,10 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int version_id) - static const SaveVMHandlers savevm_vfio_handlers = { - .save_setup = vfio_save_setup, - .save_cleanup = vfio_save_cleanup, -+ .state_pending_estimate = vfio_state_pending_estimate, - .state_pending_exact = vfio_state_pending_exact, -+ .is_active_iterate = vfio_is_active_iterate, -+ .save_live_iterate = vfio_save_iterate, - .save_live_complete_precopy = vfio_save_complete_precopy, - .save_state = vfio_save_state, - .load_setup = vfio_load_setup, -@@ -470,13 +612,18 @@ static const SaveVMHandlers savevm_vfio_handlers = { - static void vfio_vmstate_change(void *opaque, bool running, RunState state) - { - VFIODevice *vbasedev = opaque; -+ VFIOMigration *migration = vbasedev->migration; - enum vfio_device_mig_state new_state; - int ret; - - if (running) { - new_state = VFIO_DEVICE_STATE_RUNNING; - } else { -- new_state = VFIO_DEVICE_STATE_STOP; -+ new_state = -+ (migration->device_state == VFIO_DEVICE_STATE_PRE_COPY && -+ (state == RUN_STATE_FINISH_MIGRATE || state == RUN_STATE_PAUSED)) ? -+ VFIO_DEVICE_STATE_STOP_COPY : -+ VFIO_DEVICE_STATE_STOP; - } - - /* -diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events -index 646e42fd27..4150b59e58 100644 ---- a/hw/vfio/trace-events -+++ b/hw/vfio/trace-events -@@ -162,6 +162,8 @@ vfio_save_block(const char *name, int data_size) " (%s) data_size %d" - vfio_save_cleanup(const char *name) " (%s)" - vfio_save_complete_precopy(const char *name, int ret) " (%s) ret %d" - vfio_save_device_config_state(const char *name) " (%s)" -+vfio_save_iterate(const char *name, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy initial size 0x%"PRIx64" precopy dirty size 0x%"PRIx64 - vfio_save_setup(const char *name, uint64_t data_buffer_size) " (%s) data buffer size 0x%"PRIx64 --vfio_state_pending_exact(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t stopcopy_size) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" stopcopy size 0x%"PRIx64 -+vfio_state_pending_estimate(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" precopy initial size 0x%"PRIx64" precopy dirty size 0x%"PRIx64 -+vfio_state_pending_exact(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t stopcopy_size, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" stopcopy size 0x%"PRIx64" precopy initial size 0x%"PRIx64" precopy dirty size 0x%"PRIx64 - vfio_vmstate_change(const char *name, int running, const char *reason, const char *dev_state) " (%s) running %d reason %s device state %s" -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 5f29dab839..1db901c194 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -67,6 +67,8 @@ typedef struct VFIOMigration { - void *data_buffer; - size_t data_buffer_size; - uint64_t mig_flags; -+ uint64_t precopy_init_size; -+ uint64_t precopy_dirty_size; - } VFIOMigration; - - typedef struct VFIOAddressSpace { --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-migration-Add-helper-function-to-set-state-or-r.patch b/SOURCES/kvm-vfio-migration-Add-helper-function-to-set-state-or-r.patch new file mode 100644 index 0000000..6556a19 --- /dev/null +++ b/SOURCES/kvm-vfio-migration-Add-helper-function-to-set-state-or-r.patch @@ -0,0 +1,115 @@ +From 0c0435e7210b99a6bf7b8f8205f7af8277b7525b Mon Sep 17 00:00:00 2001 +From: Avihai Horon +Date: Sun, 31 Dec 2023 12:48:18 +0200 +Subject: [PATCH 063/101] vfio/migration: Add helper function to set state or + reset device +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [62/67] 1a63eea289561a05a6a8527c2a9da0289a7836d9 (eauger1/centos-qemu-kvm) + +There are several places where failure in setting the device state leads +to a device reset, which is done by setting ERROR as the recover state. + +Add a helper function that sets the device state and resets the device +in case of failure. This will make the code cleaner and remove duplicate +comments. + +Signed-off-by: Avihai Horon +Reviewed-by: Cédric Le Goater +Reviewed-by: Philippe Mathieu-Daudé +(cherry picked from commit c817e5a377a334241eed149e35760aca58bdeb34) +Signed-off-by: Eric Auger +--- + hw/vfio/migration.c | 41 +++++++++++++++++------------------------ + 1 file changed, 17 insertions(+), 24 deletions(-) + +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index 28d422b39f..70e6b1a709 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -163,6 +163,19 @@ reset_device: + return ret; + } + ++/* ++ * Some device state transitions require resetting the device if they fail. ++ * This function sets the device in new_state and resets the device if that ++ * fails. Reset is done by using ERROR as the recover state. ++ */ ++static int ++vfio_migration_set_state_or_reset(VFIODevice *vbasedev, ++ enum vfio_device_mig_state new_state) ++{ ++ return vfio_migration_set_state(vbasedev, new_state, ++ VFIO_DEVICE_STATE_ERROR); ++} ++ + static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev, + uint64_t data_size) + { +@@ -422,12 +435,7 @@ static void vfio_save_cleanup(void *opaque) + * after migration has completed, so it won't increase downtime. + */ + if (migration->device_state == VFIO_DEVICE_STATE_STOP_COPY) { +- /* +- * If setting the device in STOP state fails, the device should be +- * reset. To do so, use ERROR state as a recover state. +- */ +- vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP, +- VFIO_DEVICE_STATE_ERROR); ++ vfio_migration_set_state_or_reset(vbasedev, VFIO_DEVICE_STATE_STOP); + } + + g_free(migration->data_buffer); +@@ -699,12 +707,7 @@ static void vfio_vmstate_change_prepare(void *opaque, bool running, + VFIO_DEVICE_STATE_PRE_COPY_P2P : + VFIO_DEVICE_STATE_RUNNING_P2P; + +- /* +- * If setting the device in new_state fails, the device should be reset. +- * To do so, use ERROR state as a recover state. +- */ +- ret = vfio_migration_set_state(vbasedev, new_state, +- VFIO_DEVICE_STATE_ERROR); ++ ret = vfio_migration_set_state_or_reset(vbasedev, new_state); + if (ret) { + /* + * Migration should be aborted in this case, but vm_state_notify() +@@ -736,12 +739,7 @@ static void vfio_vmstate_change(void *opaque, bool running, RunState state) + VFIO_DEVICE_STATE_STOP; + } + +- /* +- * If setting the device in new_state fails, the device should be reset. +- * To do so, use ERROR state as a recover state. +- */ +- ret = vfio_migration_set_state(vbasedev, new_state, +- VFIO_DEVICE_STATE_ERROR); ++ ret = vfio_migration_set_state_or_reset(vbasedev, new_state); + if (ret) { + /* + * Migration should be aborted in this case, but vm_state_notify() +@@ -770,12 +768,7 @@ static void vfio_migration_state_notifier(Notifier *notifier, void *data) + case MIGRATION_STATUS_CANCELLING: + case MIGRATION_STATUS_CANCELLED: + case MIGRATION_STATUS_FAILED: +- /* +- * If setting the device in RUNNING state fails, the device should +- * be reset. To do so, use ERROR state as a recover state. +- */ +- vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_RUNNING, +- VFIO_DEVICE_STATE_ERROR); ++ vfio_migration_set_state_or_reset(vbasedev, VFIO_DEVICE_STATE_RUNNING); + } + } + +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-migration-Add-support-for-switchover-ack-capabi.patch b/SOURCES/kvm-vfio-migration-Add-support-for-switchover-ack-capabi.patch deleted file mode 100644 index d87680d..0000000 --- a/SOURCES/kvm-vfio-migration-Add-support-for-switchover-ack-capabi.patch +++ /dev/null @@ -1,192 +0,0 @@ -From 169dc1bb051b3aebc571936d956b49ba0621ae43 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 16/37] vfio/migration: Add support for switchover ack - capability -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [14/28] b3bd2eb2d0ca49ff05a0a82ae5bb956a354aed47 (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit 745c42912a04 -Author: Avihai Horon -Date: Wed Jun 21 14:12:01 2023 +0300 - - vfio/migration: Add support for switchover ack capability - - Loading of a VFIO device's data can take a substantial amount of time as - the device may need to allocate resources, prepare internal data - structures, etc. This can increase migration downtime, especially for - VFIO devices with a lot of resources. - - To solve this, VFIO migration uAPI defines "initial bytes" as part of - its precopy data stream. Initial bytes can be used in various ways to - improve VFIO migration performance. For example, it can be used to - transfer device metadata to pre-allocate resources in the destination. - However, for this to work we need to make sure that all initial bytes - are sent and loaded in the destination before the source VM is stopped. - - Use migration switchover ack capability to make sure a VFIO device's - initial bytes are sent and loaded in the destination before the source - stops the VM and attempts to complete the migration. - This can significantly reduce migration downtime for some devices. - - Signed-off-by: Avihai Horon - Reviewed-by: Cédric Le Goater - Tested-by: YangHang Liu - Acked-by: Alex Williamson - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - docs/devel/vfio-migration.rst | 10 +++++++++ - hw/vfio/migration.c | 39 ++++++++++++++++++++++++++++++++++- - include/hw/vfio/vfio-common.h | 1 + - 3 files changed, 49 insertions(+), 1 deletion(-) - -diff --git a/docs/devel/vfio-migration.rst b/docs/devel/vfio-migration.rst -index e896b2a673..b433cb5bb2 100644 ---- a/docs/devel/vfio-migration.rst -+++ b/docs/devel/vfio-migration.rst -@@ -16,6 +16,13 @@ helps to reduce the total downtime of the VM. VFIO devices opt-in to pre-copy - support by reporting the VFIO_MIGRATION_PRE_COPY flag in the - VFIO_DEVICE_FEATURE_MIGRATION ioctl. - -+When pre-copy is supported, it's possible to further reduce downtime by -+enabling "switchover-ack" migration capability. -+VFIO migration uAPI defines "initial bytes" as part of its pre-copy data stream -+and recommends that the initial bytes are sent and loaded in the destination -+before stopping the source VM. Enabling this migration capability will -+guarantee that and thus, can potentially reduce downtime even further. -+ - Note that currently VFIO migration is supported only for a single device. This - is due to VFIO migration's lack of P2P support. However, P2P support is planned - to be added later on. -@@ -45,6 +52,9 @@ VFIO implements the device hooks for the iterative approach as follows: - * A ``save_live_iterate`` function that reads the VFIO device's data from the - vendor driver during iterative pre-copy phase. - -+* A ``switchover_ack_needed`` function that checks if the VFIO device uses -+ "switchover-ack" migration capability when this capability is enabled. -+ - * A ``save_state`` function to save the device config space if it is present. - - * A ``save_live_complete_precopy`` function that sets the VFIO device in -diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c -index d8f6a22ae1..acbf0bb7ab 100644 ---- a/hw/vfio/migration.c -+++ b/hw/vfio/migration.c -@@ -18,6 +18,8 @@ - #include "sysemu/runstate.h" - #include "hw/vfio/vfio-common.h" - #include "migration/migration.h" -+#include "migration/options.h" -+#include "migration/savevm.h" - #include "migration/vmstate.h" - #include "migration/qemu-file.h" - #include "migration/register.h" -@@ -45,6 +47,7 @@ - #define VFIO_MIG_FLAG_DEV_CONFIG_STATE (0xffffffffef100002ULL) - #define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL) - #define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL) -+#define VFIO_MIG_FLAG_DEV_INIT_DATA_SENT (0xffffffffef100005ULL) - - /* - * This is an arbitrary size based on migration of mlx5 devices, where typically -@@ -384,6 +387,7 @@ static void vfio_save_cleanup(void *opaque) - migration->data_buffer = NULL; - migration->precopy_init_size = 0; - migration->precopy_dirty_size = 0; -+ migration->initial_data_sent = false; - vfio_migration_cleanup(vbasedev); - trace_vfio_save_cleanup(vbasedev->name); - } -@@ -457,10 +461,17 @@ static int vfio_save_iterate(QEMUFile *f, void *opaque) - if (data_size < 0) { - return data_size; - } -- qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); - - vfio_update_estimated_pending_data(migration, data_size); - -+ if (migrate_switchover_ack() && !migration->precopy_init_size && -+ !migration->initial_data_sent) { -+ qemu_put_be64(f, VFIO_MIG_FLAG_DEV_INIT_DATA_SENT); -+ migration->initial_data_sent = true; -+ } else { -+ qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); -+ } -+ - trace_vfio_save_iterate(vbasedev->name, migration->precopy_init_size, - migration->precopy_dirty_size); - -@@ -579,6 +590,24 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int version_id) - } - break; - } -+ case VFIO_MIG_FLAG_DEV_INIT_DATA_SENT: -+ { -+ if (!vfio_precopy_supported(vbasedev) || -+ !migrate_switchover_ack()) { -+ error_report("%s: Received INIT_DATA_SENT but switchover ack " -+ "is not used", vbasedev->name); -+ return -EINVAL; -+ } -+ -+ ret = qemu_loadvm_approve_switchover(); -+ if (ret) { -+ error_report( -+ "%s: qemu_loadvm_approve_switchover failed, err=%d (%s)", -+ vbasedev->name, ret, strerror(-ret)); -+ } -+ -+ return ret; -+ } - default: - error_report("%s: Unknown tag 0x%"PRIx64, vbasedev->name, data); - return -EINVAL; -@@ -593,6 +622,13 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int version_id) - return ret; - } - -+static bool vfio_switchover_ack_needed(void *opaque) -+{ -+ VFIODevice *vbasedev = opaque; -+ -+ return vfio_precopy_supported(vbasedev); -+} -+ - static const SaveVMHandlers savevm_vfio_handlers = { - .save_setup = vfio_save_setup, - .save_cleanup = vfio_save_cleanup, -@@ -605,6 +641,7 @@ static const SaveVMHandlers savevm_vfio_handlers = { - .load_setup = vfio_load_setup, - .load_cleanup = vfio_load_cleanup, - .load_state = vfio_load_state, -+ .switchover_ack_needed = vfio_switchover_ack_needed, - }; - - /* ---------------------------------------------------------------------- */ -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 1db901c194..3dc5f2104c 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -69,6 +69,7 @@ typedef struct VFIOMigration { - uint64_t mig_flags; - uint64_t precopy_init_size; - uint64_t precopy_dirty_size; -+ bool initial_data_sent; - } VFIOMigration; - - typedef struct VFIOAddressSpace { --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-migration-Change-vIOMMU-blocker-from-global-to-.patch b/SOURCES/kvm-vfio-migration-Change-vIOMMU-blocker-from-global-to-.patch deleted file mode 100644 index dde2e24..0000000 --- a/SOURCES/kvm-vfio-migration-Change-vIOMMU-blocker-from-global-to-.patch +++ /dev/null @@ -1,171 +0,0 @@ -From 35c7d0d3b02d61d6f29afae74bd83edd70a6a1b4 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 26/37] vfio/migration: Change vIOMMU blocker from global to - per device -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [24/28] 8fda1c82a81fadd4f38e6a5e878c9228a81c0f6e (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit 3c26c80a0a26 -Author: Zhenzhong Duan -Date: Mon Jul 3 15:15:07 2023 +0800 - - vfio/migration: Change vIOMMU blocker from global to per device - - Contrary to multiple device blocker which needs to consider already-attached - devices to unblock/block dynamically, the vIOMMU migration blocker is a device - specific config. Meaning it only needs to know whether the device is bypassing - or not the vIOMMU (via machine property, or per pxb-pcie::bypass_iommu), and - does not need the state of currently present devices. For this reason, the - vIOMMU global migration blocker can be consolidated into the per-device - migration blocker, allowing us to remove some unnecessary code. - - This change also makes vfio_mig_active() more accurate as it doesn't check for - global blocker. - - Signed-off-by: Zhenzhong Duan - Reviewed-by: Joao Martins - Reviewed-by: Cédric Le Goater - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/common.c | 51 ++--------------------------------- - hw/vfio/migration.c | 7 ++--- - hw/vfio/pci.c | 1 - - include/hw/vfio/vfio-common.h | 3 +-- - 4 files changed, 7 insertions(+), 55 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 136d8243d6..e815f6ba30 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -362,7 +362,6 @@ bool vfio_mig_active(void) - } - - static Error *multiple_devices_migration_blocker; --static Error *giommu_migration_blocker; - - static unsigned int vfio_migratable_device_num(void) - { -@@ -420,55 +419,9 @@ void vfio_unblock_multiple_devices_migration(void) - multiple_devices_migration_blocker = NULL; - } - --static bool vfio_viommu_preset(void) -+bool vfio_viommu_preset(VFIODevice *vbasedev) - { -- VFIOAddressSpace *space; -- -- QLIST_FOREACH(space, &vfio_address_spaces, list) { -- if (space->as != &address_space_memory) { -- return true; -- } -- } -- -- return false; --} -- --int vfio_block_giommu_migration(VFIODevice *vbasedev, Error **errp) --{ -- int ret; -- -- if (giommu_migration_blocker || -- !vfio_viommu_preset()) { -- return 0; -- } -- -- if (vbasedev->enable_migration == ON_OFF_AUTO_ON) { -- error_setg(errp, -- "Migration is currently not supported with vIOMMU enabled"); -- return -EINVAL; -- } -- -- error_setg(&giommu_migration_blocker, -- "Migration is currently not supported with vIOMMU enabled"); -- ret = migrate_add_blocker(giommu_migration_blocker, errp); -- if (ret < 0) { -- error_free(giommu_migration_blocker); -- giommu_migration_blocker = NULL; -- } -- -- return ret; --} -- --void vfio_migration_finalize(void) --{ -- if (!giommu_migration_blocker || -- vfio_viommu_preset()) { -- return; -- } -- -- migrate_del_blocker(giommu_migration_blocker); -- error_free(giommu_migration_blocker); -- giommu_migration_blocker = NULL; -+ return vbasedev->group->container->space->as != &address_space_memory; - } - - static void vfio_set_migration_error(int err) -diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c -index 1db7d52ab2..e6e5e85f75 100644 ---- a/hw/vfio/migration.c -+++ b/hw/vfio/migration.c -@@ -878,9 +878,10 @@ int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) - return ret; - } - -- ret = vfio_block_giommu_migration(vbasedev, errp); -- if (ret) { -- return ret; -+ if (vfio_viommu_preset(vbasedev)) { -+ error_setg(&err, "%s: Migration is currently not supported " -+ "with vIOMMU enabled", vbasedev->name); -+ return vfio_block_migration(vbasedev, err, errp); - } - - trace_vfio_migration_realize(vbasedev->name); -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 2d059832a4..922c81872c 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -3279,7 +3279,6 @@ static void vfio_instance_finalize(Object *obj) - */ - vfio_put_device(vdev); - vfio_put_group(group); -- vfio_migration_finalize(); - } - - static void vfio_exitfn(PCIDevice *pdev) -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 93429b9abb..45167c8a8a 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -227,7 +227,7 @@ extern VFIOGroupList vfio_group_list; - bool vfio_mig_active(void); - int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp); - void vfio_unblock_multiple_devices_migration(void); --int vfio_block_giommu_migration(VFIODevice *vbasedev, Error **errp); -+bool vfio_viommu_preset(VFIODevice *vbasedev); - int64_t vfio_mig_bytes_transferred(void); - void vfio_reset_bytes_transferred(void); - -@@ -254,6 +254,5 @@ int vfio_spapr_remove_window(VFIOContainer *container, - - int vfio_migration_realize(VFIODevice *vbasedev, Error **errp); - void vfio_migration_exit(VFIODevice *vbasedev); --void vfio_migration_finalize(void); - - #endif /* HW_VFIO_VFIO_COMMON_H */ --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-migration-Free-resources-when-vfio_migration_re.patch b/SOURCES/kvm-vfio-migration-Free-resources-when-vfio_migration_re.patch deleted file mode 100644 index 9deaf1a..0000000 --- a/SOURCES/kvm-vfio-migration-Free-resources-when-vfio_migration_re.patch +++ /dev/null @@ -1,145 +0,0 @@ -From a36fa46369fe9bf2a2174e9ed6ab83042e904066 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 27/37] vfio/migration: Free resources when - vfio_migration_realize fails -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [25/28] b3ab8d3443d4bc12a689dc7d88a94da315814bb7 (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit 2b43b2995b02 -Author: Zhenzhong Duan -Date: Mon Jul 3 15:15:08 2023 +0800 - - vfio/migration: Free resources when vfio_migration_realize fails - - When vfio_realize() succeeds, hot unplug will call vfio_exitfn() - to free resources allocated in vfio_realize(); when vfio_realize() - fails, vfio_exitfn() is never called and we need to free resources - in vfio_realize(). - - In the case that vfio_migration_realize() fails, - e.g: with -only-migratable & enable-migration=off, we see below: - - (qemu) device_add vfio-pci,host=81:11.1,id=vfio1,bus=root1,enable-migration=off - 0000:81:11.1: Migration disabled - Error: disallowing migration blocker (--only-migratable) for: 0000:81:11.1: Migration is disabled for VFIO device - - If we hotplug again we should see same log as above, but we see: - (qemu) device_add vfio-pci,host=81:11.1,id=vfio1,bus=root1,enable-migration=off - Error: vfio 0000:81:11.1: device is already attached - - That's because some references to VFIO device isn't released. - For resources allocated in vfio_migration_realize(), free them by - jumping to out_deinit path with calling a new function - vfio_migration_deinit(). For resources allocated in vfio_realize(), - free them by jumping to de-register path in vfio_realize(). - - Signed-off-by: Zhenzhong Duan - Fixes: a22651053b59 ("vfio: Make vfio-pci device migration capable") - Reviewed-by: Cédric Le Goater - Reviewed-by: Joao Martins - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/migration.c | 33 +++++++++++++++++++++++---------- - hw/vfio/pci.c | 1 + - 2 files changed, 24 insertions(+), 10 deletions(-) - -diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c -index e6e5e85f75..e3954570c8 100644 ---- a/hw/vfio/migration.c -+++ b/hw/vfio/migration.c -@@ -802,6 +802,17 @@ static int vfio_migration_init(VFIODevice *vbasedev) - return 0; - } - -+static void vfio_migration_deinit(VFIODevice *vbasedev) -+{ -+ VFIOMigration *migration = vbasedev->migration; -+ -+ remove_migration_state_change_notifier(&migration->migration_state); -+ qemu_del_vm_change_state_handler(migration->vm_state); -+ unregister_savevm(VMSTATE_IF(vbasedev->dev), "vfio", vbasedev); -+ vfio_migration_free(vbasedev); -+ vfio_unblock_multiple_devices_migration(); -+} -+ - static int vfio_block_migration(VFIODevice *vbasedev, Error *err, Error **errp) - { - int ret; -@@ -866,7 +877,7 @@ int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) - error_setg(&err, - "%s: VFIO device doesn't support device dirty tracking", - vbasedev->name); -- return vfio_block_migration(vbasedev, err, errp); -+ goto add_blocker; - } - - warn_report("%s: VFIO device doesn't support device dirty tracking", -@@ -875,29 +886,31 @@ int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) - - ret = vfio_block_multiple_devices_migration(vbasedev, errp); - if (ret) { -- return ret; -+ goto out_deinit; - } - - if (vfio_viommu_preset(vbasedev)) { - error_setg(&err, "%s: Migration is currently not supported " - "with vIOMMU enabled", vbasedev->name); -- return vfio_block_migration(vbasedev, err, errp); -+ goto add_blocker; - } - - trace_vfio_migration_realize(vbasedev->name); - return 0; -+ -+add_blocker: -+ ret = vfio_block_migration(vbasedev, err, errp); -+out_deinit: -+ if (ret) { -+ vfio_migration_deinit(vbasedev); -+ } -+ return ret; - } - - void vfio_migration_exit(VFIODevice *vbasedev) - { - if (vbasedev->migration) { -- VFIOMigration *migration = vbasedev->migration; -- -- remove_migration_state_change_notifier(&migration->migration_state); -- qemu_del_vm_change_state_handler(migration->vm_state); -- unregister_savevm(VMSTATE_IF(vbasedev->dev), "vfio", vbasedev); -- vfio_migration_free(vbasedev); -- vfio_unblock_multiple_devices_migration(); -+ vfio_migration_deinit(vbasedev); - } - - if (vbasedev->migration_blocker) { -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 922c81872c..037b7d4176 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -3234,6 +3234,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) - ret = vfio_migration_realize(vbasedev, errp); - if (ret) { - error_report("%s: Migration disabled", vbasedev->name); -+ goto out_deregister; - } - } - --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-migration-Make-VFIO-migration-non-experimental.patch b/SOURCES/kvm-vfio-migration-Make-VFIO-migration-non-experimental.patch deleted file mode 100644 index 3258541..0000000 --- a/SOURCES/kvm-vfio-migration-Make-VFIO-migration-non-experimental.patch +++ /dev/null @@ -1,283 +0,0 @@ -From 747c34c0a3b8048ebdab387d22f2b922c81d572a Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 21/37] vfio/migration: Make VFIO migration non-experimental -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [19/28] 2f457c1c0de95a3fced0270f2edbbc5193cc4de9 (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit 8bbcb64a71d8 -Author: Avihai Horon -Date: Wed Jun 28 10:31:12 2023 +0300 - - vfio/migration: Make VFIO migration non-experimental - - The major parts of VFIO migration are supported today in QEMU. This - includes basic VFIO migration, device dirty page tracking and precopy - support. - - Thus, at this point in time, it seems appropriate to make VFIO migration - non-experimental: remove the x prefix from enable_migration property, - change it to ON_OFF_AUTO and let the default value be AUTO. - - In addition, make the following adjustments: - 1. When enable_migration is ON and migration is not supported, fail VFIO - device realization. - 2. When enable_migration is AUTO (i.e., not explicitly enabled), require - device dirty tracking support. This is because device dirty tracking - is currently the only method to do dirty page tracking, which is - essential for migrating in a reasonable downtime. Setting - enable_migration to ON will not require device dirty tracking. - 3. Make migration error and blocker messages more elaborate. - 4. Remove error prints in vfio_migration_query_flags(). - 5. Rename trace_vfio_migration_probe() to - trace_vfio_migration_realize(). - - Signed-off-by: Avihai Horon - Reviewed-by: Joao Martins - Reviewed-by: Cédric Le Goater - Reviewed-by: Alex Williamson - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/common.c | 16 ++++++- - hw/vfio/migration.c | 79 +++++++++++++++++++++++------------ - hw/vfio/pci.c | 4 +- - hw/vfio/trace-events | 2 +- - include/hw/vfio/vfio-common.h | 6 +-- - 5 files changed, 73 insertions(+), 34 deletions(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 3b4ac53f15..136d8243d6 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -381,7 +381,7 @@ static unsigned int vfio_migratable_device_num(void) - return device_num; - } - --int vfio_block_multiple_devices_migration(Error **errp) -+int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp) - { - int ret; - -@@ -390,6 +390,12 @@ int vfio_block_multiple_devices_migration(Error **errp) - return 0; - } - -+ if (vbasedev->enable_migration == ON_OFF_AUTO_ON) { -+ error_setg(errp, "Migration is currently not supported with multiple " -+ "VFIO devices"); -+ return -EINVAL; -+ } -+ - error_setg(&multiple_devices_migration_blocker, - "Migration is currently not supported with multiple " - "VFIO devices"); -@@ -427,7 +433,7 @@ static bool vfio_viommu_preset(void) - return false; - } - --int vfio_block_giommu_migration(Error **errp) -+int vfio_block_giommu_migration(VFIODevice *vbasedev, Error **errp) - { - int ret; - -@@ -436,6 +442,12 @@ int vfio_block_giommu_migration(Error **errp) - return 0; - } - -+ if (vbasedev->enable_migration == ON_OFF_AUTO_ON) { -+ error_setg(errp, -+ "Migration is currently not supported with vIOMMU enabled"); -+ return -EINVAL; -+ } -+ - error_setg(&giommu_migration_blocker, - "Migration is currently not supported with vIOMMU enabled"); - ret = migrate_add_blocker(giommu_migration_blocker, errp); -diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c -index 7cf143926c..1db7d52ab2 100644 ---- a/hw/vfio/migration.c -+++ b/hw/vfio/migration.c -@@ -724,14 +724,6 @@ static int vfio_migration_query_flags(VFIODevice *vbasedev, uint64_t *mig_flags) - feature->argsz = sizeof(buf); - feature->flags = VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_MIGRATION; - if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) { -- if (errno == ENOTTY) { -- error_report("%s: VFIO migration is not supported in kernel", -- vbasedev->name); -- } else { -- error_report("%s: Failed to query VFIO migration support, err: %s", -- vbasedev->name, strerror(errno)); -- } -- - return -errno; - } - -@@ -810,6 +802,27 @@ static int vfio_migration_init(VFIODevice *vbasedev) - return 0; - } - -+static int vfio_block_migration(VFIODevice *vbasedev, Error *err, Error **errp) -+{ -+ int ret; -+ -+ if (vbasedev->enable_migration == ON_OFF_AUTO_ON) { -+ error_propagate(errp, err); -+ return -EINVAL; -+ } -+ -+ vbasedev->migration_blocker = error_copy(err); -+ error_free(err); -+ -+ ret = migrate_add_blocker(vbasedev->migration_blocker, errp); -+ if (ret < 0) { -+ error_free(vbasedev->migration_blocker); -+ vbasedev->migration_blocker = NULL; -+ } -+ -+ return ret; -+} -+ - /* ---------------------------------------------------------------------- */ - - int64_t vfio_mig_bytes_transferred(void) -@@ -824,40 +837,54 @@ void vfio_reset_bytes_transferred(void) - - int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) - { -- int ret = -ENOTSUP; -+ Error *err = NULL; -+ int ret; - -- if (!vbasedev->enable_migration) { -- goto add_blocker; -+ if (vbasedev->enable_migration == ON_OFF_AUTO_OFF) { -+ error_setg(&err, "%s: Migration is disabled for VFIO device", -+ vbasedev->name); -+ return vfio_block_migration(vbasedev, err, errp); - } - - ret = vfio_migration_init(vbasedev); - if (ret) { -- goto add_blocker; -+ if (ret == -ENOTTY) { -+ error_setg(&err, "%s: VFIO migration is not supported in kernel", -+ vbasedev->name); -+ } else { -+ error_setg(&err, -+ "%s: Migration couldn't be initialized for VFIO device, " -+ "err: %d (%s)", -+ vbasedev->name, ret, strerror(-ret)); -+ } -+ -+ return vfio_block_migration(vbasedev, err, errp); -+ } -+ -+ if (!vbasedev->dirty_pages_supported) { -+ if (vbasedev->enable_migration == ON_OFF_AUTO_AUTO) { -+ error_setg(&err, -+ "%s: VFIO device doesn't support device dirty tracking", -+ vbasedev->name); -+ return vfio_block_migration(vbasedev, err, errp); -+ } -+ -+ warn_report("%s: VFIO device doesn't support device dirty tracking", -+ vbasedev->name); - } - -- ret = vfio_block_multiple_devices_migration(errp); -+ ret = vfio_block_multiple_devices_migration(vbasedev, errp); - if (ret) { - return ret; - } - -- ret = vfio_block_giommu_migration(errp); -+ ret = vfio_block_giommu_migration(vbasedev, errp); - if (ret) { - return ret; - } - -- trace_vfio_migration_probe(vbasedev->name); -+ trace_vfio_migration_realize(vbasedev->name); - return 0; -- --add_blocker: -- error_setg(&vbasedev->migration_blocker, -- "VFIO device doesn't support migration"); -- -- ret = migrate_add_blocker(vbasedev->migration_blocker, errp); -- if (ret < 0) { -- error_free(vbasedev->migration_blocker); -- vbasedev->migration_blocker = NULL; -- } -- return ret; - } - - void vfio_migration_exit(VFIODevice *vbasedev) -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 15e7554954..6634945a70 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -3371,8 +3371,8 @@ static Property vfio_pci_dev_properties[] = { - VFIO_FEATURE_ENABLE_REQ_BIT, true), - DEFINE_PROP_BIT("x-igd-opregion", VFIOPCIDevice, features, - VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, false), -- DEFINE_PROP_BOOL("x-enable-migration", VFIOPCIDevice, -- vbasedev.enable_migration, false), -+ DEFINE_PROP_ON_OFF_AUTO("enable-migration", VFIOPCIDevice, -+ vbasedev.enable_migration, ON_OFF_AUTO_AUTO), - DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false), - DEFINE_PROP_BOOL("x-balloon-allowed", VFIOPCIDevice, - vbasedev.ram_block_discard_allowed, false), -diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events -index 4150b59e58..0391bd583b 100644 ---- a/hw/vfio/trace-events -+++ b/hw/vfio/trace-events -@@ -155,7 +155,7 @@ vfio_load_cleanup(const char *name) " (%s)" - vfio_load_device_config_state(const char *name) " (%s)" - vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64 - vfio_load_state_device_data(const char *name, uint64_t data_size, int ret) " (%s) size 0x%"PRIx64" ret %d" --vfio_migration_probe(const char *name) " (%s)" -+vfio_migration_realize(const char *name) " (%s)" - vfio_migration_set_state(const char *name, const char *state) " (%s) state %s" - vfio_migration_state_notifier(const char *name, const char *state) " (%s) state %s" - vfio_save_block(const char *name, int data_size) " (%s) data_size %d" -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 1d19c6f251..93429b9abb 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -139,7 +139,7 @@ typedef struct VFIODevice { - bool needs_reset; - bool no_mmap; - bool ram_block_discard_allowed; -- bool enable_migration; -+ OnOffAuto enable_migration; - VFIODeviceOps *ops; - unsigned int num_irqs; - unsigned int num_regions; -@@ -225,9 +225,9 @@ typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList; - extern VFIOGroupList vfio_group_list; - - bool vfio_mig_active(void); --int vfio_block_multiple_devices_migration(Error **errp); -+int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp); - void vfio_unblock_multiple_devices_migration(void); --int vfio_block_giommu_migration(Error **errp); -+int vfio_block_giommu_migration(VFIODevice *vbasedev, Error **errp); - int64_t vfio_mig_bytes_transferred(void); - void vfio_reset_bytes_transferred(void); - --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-migration-Refactor-vfio_save_block-to-return-sa.patch b/SOURCES/kvm-vfio-migration-Refactor-vfio_save_block-to-return-sa.patch deleted file mode 100644 index 3b61c5d..0000000 --- a/SOURCES/kvm-vfio-migration-Refactor-vfio_save_block-to-return-sa.patch +++ /dev/null @@ -1,102 +0,0 @@ -From edcf24a08d66d620a10c746824e31d230c8516ce Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 13/37] vfio/migration: Refactor vfio_save_block() to return - saved data size -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [11/28] b4aed6ddcbde159e98275a0675dcdf45d644673b (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit cf53efbbda2e -Author: Avihai Horon -Date: Wed Jun 21 14:11:58 2023 +0300 - - vfio/migration: Refactor vfio_save_block() to return saved data size - - Refactor vfio_save_block() to return the size of saved data on success - and -errno on error. - - This will be used in next patch to implement VFIO migration pre-copy - support. - - Signed-off-by: Avihai Horon - Reviewed-by: Cédric Le Goater - Reviewed-by: Juan Quintela - Tested-by: YangHang Liu - Acked-by: Alex Williamson - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/migration.c | 17 +++++++++-------- - 1 file changed, 9 insertions(+), 8 deletions(-) - -diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c -index 6b58dddb88..235978fd68 100644 ---- a/hw/vfio/migration.c -+++ b/hw/vfio/migration.c -@@ -241,8 +241,8 @@ static int vfio_query_stop_copy_size(VFIODevice *vbasedev, - return 0; - } - --/* Returns 1 if end-of-stream is reached, 0 if more data and -errno if error */ --static int vfio_save_block(QEMUFile *f, VFIOMigration *migration) -+/* Returns the size of saved data on success and -errno on error */ -+static ssize_t vfio_save_block(QEMUFile *f, VFIOMigration *migration) - { - ssize_t data_size; - -@@ -252,7 +252,7 @@ static int vfio_save_block(QEMUFile *f, VFIOMigration *migration) - return -errno; - } - if (data_size == 0) { -- return 1; -+ return 0; - } - - qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE); -@@ -262,7 +262,7 @@ static int vfio_save_block(QEMUFile *f, VFIOMigration *migration) - - trace_vfio_save_block(migration->vbasedev->name, data_size); - -- return qemu_file_get_error(f); -+ return qemu_file_get_error(f) ?: data_size; - } - - /* ---------------------------------------------------------------------- */ -@@ -335,6 +335,7 @@ static void vfio_state_pending_exact(void *opaque, uint64_t *must_precopy, - static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) - { - VFIODevice *vbasedev = opaque; -+ ssize_t data_size; - int ret; - - /* We reach here with device state STOP only */ -@@ -345,11 +346,11 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) - } - - do { -- ret = vfio_save_block(f, vbasedev->migration); -- if (ret < 0) { -- return ret; -+ data_size = vfio_save_block(f, vbasedev->migration); -+ if (data_size < 0) { -+ return data_size; - } -- } while (!ret); -+ } while (data_size); - - qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); - ret = qemu_file_get_error(f); --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-migration-Remove-print-of-Migration-disabled.patch b/SOURCES/kvm-vfio-migration-Remove-print-of-Migration-disabled.patch deleted file mode 100644 index ad3c6ca..0000000 --- a/SOURCES/kvm-vfio-migration-Remove-print-of-Migration-disabled.patch +++ /dev/null @@ -1,56 +0,0 @@ -From 5bb94c4eaeb94f0b41a57660098a4c12a295b725 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 28/37] vfio/migration: Remove print of "Migration disabled" -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [26/28] c7ff1f9c90b4cfcb327ef474042ea71ea577a94d (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit 0520d63c7701 -Author: Zhenzhong Duan -Date: Mon Jul 3 15:15:09 2023 +0800 - - vfio/migration: Remove print of "Migration disabled" - - Property enable_migration supports [on/off/auto]. - In ON mode, error pointer is passed to errp and logged. - In OFF mode, we doesn't need to log "Migration disabled" as it's intentional. - In AUTO mode, we should only ever see errors or warnings if the device - supports migration and an error or incompatibility occurs while further - probing or configuring it. Lack of support for migration shoundn't - generate an error or warning. - - Signed-off-by: Zhenzhong Duan - Reviewed-by: Joao Martins - Reviewed-by: Cédric Le Goater - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/pci.c | 1 - - 1 file changed, 1 deletion(-) - -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 037b7d4176..a60b868c38 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -3233,7 +3233,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) - if (!pdev->failover_pair_id) { - ret = vfio_migration_realize(vbasedev, errp); - if (ret) { -- error_report("%s: Migration disabled", vbasedev->name); - goto out_deregister; - } - } --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-migration-Reset-bytes_transferred-properly.patch b/SOURCES/kvm-vfio-migration-Reset-bytes_transferred-properly.patch deleted file mode 100644 index 2666460..0000000 --- a/SOURCES/kvm-vfio-migration-Reset-bytes_transferred-properly.patch +++ /dev/null @@ -1,165 +0,0 @@ -From a63b4010ba4f491c9144afff363bebcf35ecf496 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 20/37] vfio/migration: Reset bytes_transferred properly -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [18/28] e9a70faeca4fd5aa7ef36502cf76bf0b62f65057 (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit 808642a2f640 -Author: Avihai Horon -Date: Wed Jun 28 10:31:11 2023 +0300 - - vfio/migration: Reset bytes_transferred properly - - Currently, VFIO bytes_transferred is not reset properly: - 1. bytes_transferred is not reset after a VM snapshot (so a migration - following a snapshot will report incorrect value). - 2. bytes_transferred is a single counter for all VFIO devices, however - upon migration failure it is reset multiple times, by each VFIO - device. - - Fix it by introducing a new function vfio_reset_bytes_transferred() and - calling it during migration and snapshot start. - - Remove existing bytes_transferred reset in VFIO migration state - notifier, which is not needed anymore. - - Fixes: 3710586caa5d ("qapi: Add VFIO devices migration stats in Migration stats") - Signed-off-by: Avihai Horon - Reviewed-by: Cédric Le Goater - Reviewed-by: Alex Williamson - Signed-off-by: Cédric Le Goater - -Conflicts: - - migration/migration.c - migration/savevm.c - context changes due to commit aff3f6606d14 ("migration: Rename - ram_counters to mig_stats") - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/migration.c | 6 +++++- - include/hw/vfio/vfio-common.h | 1 + - migration/migration.c | 1 + - migration/migration.h | 1 + - migration/savevm.c | 1 + - migration/target.c | 17 +++++++++++++++-- - 6 files changed, 24 insertions(+), 3 deletions(-) - -diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c -index acbf0bb7ab..7cf143926c 100644 ---- a/hw/vfio/migration.c -+++ b/hw/vfio/migration.c -@@ -697,7 +697,6 @@ static void vfio_migration_state_notifier(Notifier *notifier, void *data) - case MIGRATION_STATUS_CANCELLING: - case MIGRATION_STATUS_CANCELLED: - case MIGRATION_STATUS_FAILED: -- bytes_transferred = 0; - /* - * If setting the device in RUNNING state fails, the device should - * be reset. To do so, use ERROR state as a recover state. -@@ -818,6 +817,11 @@ int64_t vfio_mig_bytes_transferred(void) - return bytes_transferred; - } - -+void vfio_reset_bytes_transferred(void) -+{ -+ bytes_transferred = 0; -+} -+ - int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) - { - int ret = -ENOTSUP; -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 6d1b8487c3..1d19c6f251 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -229,6 +229,7 @@ int vfio_block_multiple_devices_migration(Error **errp); - void vfio_unblock_multiple_devices_migration(void); - int vfio_block_giommu_migration(Error **errp); - int64_t vfio_mig_bytes_transferred(void); -+void vfio_reset_bytes_transferred(void); - - #ifdef CONFIG_LINUX - int vfio_get_region_info(VFIODevice *vbasedev, int index, -diff --git a/migration/migration.c b/migration/migration.c -index 9bf1caee6c..47ad6c43cb 100644 ---- a/migration/migration.c -+++ b/migration/migration.c -@@ -1638,6 +1638,7 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, - */ - memset(&ram_counters, 0, sizeof(ram_counters)); - memset(&compression_counters, 0, sizeof(compression_counters)); -+ reset_vfio_bytes_transferred(); - - return true; - } -diff --git a/migration/migration.h b/migration/migration.h -index e9679f8029..7ccf460aa2 100644 ---- a/migration/migration.h -+++ b/migration/migration.h -@@ -495,6 +495,7 @@ bool migration_rate_limit(void); - void migration_cancel(const Error *error); - - void populate_vfio_info(MigrationInfo *info); -+void reset_vfio_bytes_transferred(void); - void postcopy_temp_page_reset(PostcopyTmpPage *tmp_page); - - #endif -diff --git a/migration/savevm.c b/migration/savevm.c -index aff70e6263..83088fc3f8 100644 ---- a/migration/savevm.c -+++ b/migration/savevm.c -@@ -1620,6 +1620,7 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp) - migrate_init(ms); - memset(&ram_counters, 0, sizeof(ram_counters)); - memset(&compression_counters, 0, sizeof(compression_counters)); -+ reset_vfio_bytes_transferred(); - ms->to_dst_file = f; - - qemu_mutex_unlock_iothread(); -diff --git a/migration/target.c b/migration/target.c -index 00ca007f97..f39c9a8d88 100644 ---- a/migration/target.c -+++ b/migration/target.c -@@ -14,12 +14,25 @@ - #include "hw/vfio/vfio-common.h" - #endif - -+#ifdef CONFIG_VFIO - void populate_vfio_info(MigrationInfo *info) - { --#ifdef CONFIG_VFIO - if (vfio_mig_active()) { - info->vfio = g_malloc0(sizeof(*info->vfio)); - info->vfio->transferred = vfio_mig_bytes_transferred(); - } --#endif - } -+ -+void reset_vfio_bytes_transferred(void) -+{ -+ vfio_reset_bytes_transferred(); -+} -+#else -+void populate_vfio_info(MigrationInfo *info) -+{ -+} -+ -+void reset_vfio_bytes_transferred(void) -+{ -+} -+#endif --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-migration-Return-bool-type-for-vfio_migration_r.patch b/SOURCES/kvm-vfio-migration-Return-bool-type-for-vfio_migration_r.patch deleted file mode 100644 index efd42a9..0000000 --- a/SOURCES/kvm-vfio-migration-Return-bool-type-for-vfio_migration_r.patch +++ /dev/null @@ -1,125 +0,0 @@ -From 223eef8363c9ba58514b2d4f93e5ff015d111ff2 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 29/37] vfio/migration: Return bool type for - vfio_migration_realize() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [27/28] d5aea3ea4c53e4573076cbacbbe3134f9f0f9e53 (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit d4a2af747d5a -Author: Zhenzhong Duan -Date: Mon Jul 3 15:15:10 2023 +0800 - - vfio/migration: Return bool type for vfio_migration_realize() - - Make vfio_migration_realize() adhere to the convention of other realize() - callbacks(like qdev_realize) by returning bool instead of int. - - Suggested-by: Cédric Le Goater - Suggested-by: Joao Martins - Signed-off-by: Zhenzhong Duan - Reviewed-by: Joao Martins - Reviewed-by: Cédric Le Goater - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/migration.c | 15 ++++++++++----- - hw/vfio/pci.c | 3 +-- - include/hw/vfio/vfio-common.h | 2 +- - 3 files changed, 12 insertions(+), 8 deletions(-) - -diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c -index e3954570c8..2674f4bc47 100644 ---- a/hw/vfio/migration.c -+++ b/hw/vfio/migration.c -@@ -846,7 +846,12 @@ void vfio_reset_bytes_transferred(void) - bytes_transferred = 0; - } - --int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) -+/* -+ * Return true when either migration initialized or blocker registered. -+ * Currently only return false when adding blocker fails which will -+ * de-register vfio device. -+ */ -+bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp) - { - Error *err = NULL; - int ret; -@@ -854,7 +859,7 @@ int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) - if (vbasedev->enable_migration == ON_OFF_AUTO_OFF) { - error_setg(&err, "%s: Migration is disabled for VFIO device", - vbasedev->name); -- return vfio_block_migration(vbasedev, err, errp); -+ return !vfio_block_migration(vbasedev, err, errp); - } - - ret = vfio_migration_init(vbasedev); -@@ -869,7 +874,7 @@ int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) - vbasedev->name, ret, strerror(-ret)); - } - -- return vfio_block_migration(vbasedev, err, errp); -+ return !vfio_block_migration(vbasedev, err, errp); - } - - if (!vbasedev->dirty_pages_supported) { -@@ -896,7 +901,7 @@ int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) - } - - trace_vfio_migration_realize(vbasedev->name); -- return 0; -+ return true; - - add_blocker: - ret = vfio_block_migration(vbasedev, err, errp); -@@ -904,7 +909,7 @@ out_deinit: - if (ret) { - vfio_migration_deinit(vbasedev); - } -- return ret; -+ return !ret; - } - - void vfio_migration_exit(VFIODevice *vbasedev) -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index a60b868c38..ba40ca8784 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -3231,8 +3231,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) - } - - if (!pdev->failover_pair_id) { -- ret = vfio_migration_realize(vbasedev, errp); -- if (ret) { -+ if (!vfio_migration_realize(vbasedev, errp)) { - goto out_deregister; - } - } -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index 45167c8a8a..da43d27352 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -252,7 +252,7 @@ int vfio_spapr_create_window(VFIOContainer *container, - int vfio_spapr_remove_window(VFIOContainer *container, - hwaddr offset_within_address_space); - --int vfio_migration_realize(VFIODevice *vbasedev, Error **errp); -+bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp); - void vfio_migration_exit(VFIODevice *vbasedev); - - #endif /* HW_VFIO_VFIO_COMMON_H */ --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-migration-Skip-log_sync-during-migration-SETUP-.patch b/SOURCES/kvm-vfio-migration-Skip-log_sync-during-migration-SETUP-.patch deleted file mode 100644 index 6211db7..0000000 --- a/SOURCES/kvm-vfio-migration-Skip-log_sync-during-migration-SETUP-.patch +++ /dev/null @@ -1,68 +0,0 @@ -From 76208f7824d5139ac8d86140b0e01031b67638cc Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:56 +0200 -Subject: [PATCH 04/37] vfio/migration: Skip log_sync during migration SETUP - state -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [2/28] 4c340992b472ac4627b57705f4e971f14bbb0846 (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit ff180c6bd7a8 -Author: Avihai Horon -Date: Mon Apr 3 16:00:00 2023 +0300 - - vfio/migration: Skip log_sync during migration SETUP state - - Currently, VFIO log_sync can be issued while migration is in SETUP - state. However, doing this log_sync is at best redundant and at worst - can fail. - - Redundant -- all RAM is marked dirty in migration SETUP state and is - transferred only after migration is set to ACTIVE state, so doing - log_sync during migration SETUP is pointless. - - Can fail -- there is a time window, between setting migration state to - SETUP and starting dirty tracking by RAM save_live_setup handler, during - which dirty tracking is still not started. Any VFIO log_sync call that - is issued during this time window will fail. For example, this error can - be triggered by migrating a VM when a GUI is active, which constantly - calls log_sync. - - Fix it by skipping VFIO log_sync while migration is in SETUP state. - - Fixes: 758b96b61d5c ("vfio/migrate: Move switch of dirty tracking into vfio_memory_listener") - Signed-off-by: Avihai Horon - Link: https://lore.kernel.org/r/20230403130000.6422-1-avihaih@nvidia.com - Signed-off-by: Alex Williamson - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/common.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/hw/vfio/common.c b/hw/vfio/common.c -index 4d01ea3515..78358ede27 100644 ---- a/hw/vfio/common.c -+++ b/hw/vfio/common.c -@@ -478,7 +478,8 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) - VFIODevice *vbasedev; - MigrationState *ms = migrate_get_current(); - -- if (!migration_is_setup_or_active(ms->state)) { -+ if (ms->state != MIGRATION_STATUS_ACTIVE && -+ ms->state != MIGRATION_STATUS_DEVICE) { - return false; - } - --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-migration-Store-VFIO-migration-flags-in-VFIOMig.patch b/SOURCES/kvm-vfio-migration-Store-VFIO-migration-flags-in-VFIOMig.patch deleted file mode 100644 index 2db8511..0000000 --- a/SOURCES/kvm-vfio-migration-Store-VFIO-migration-flags-in-VFIOMig.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 77353cdafd08562dff9c99e9f3984d12224bee52 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 14/37] vfio/migration: Store VFIO migration flags in - VFIOMigration -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [12/28] 31a9c39e6ee6338a35dc08c3e7f5c1a204166249 (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit 6cd1fe11598a -Author: Avihai Horon -Date: Wed Jun 21 14:11:59 2023 +0300 - - vfio/migration: Store VFIO migration flags in VFIOMigration - - VFIO migration flags are queried once in vfio_migration_init(). Store - them in VFIOMigration so they can be used later to check the device's - migration capabilities without re-querying them. - - This will be used in the next patch to check if the device supports - precopy migration. - - Signed-off-by: Avihai Horon - Reviewed-by: Cédric Le Goater - Tested-by: YangHang Liu - Acked-by: Alex Williamson - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/migration.c | 1 + - include/hw/vfio/vfio-common.h | 1 + - 2 files changed, 2 insertions(+) - -diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c -index 235978fd68..8d33414379 100644 ---- a/hw/vfio/migration.c -+++ b/hw/vfio/migration.c -@@ -603,6 +603,7 @@ static int vfio_migration_init(VFIODevice *vbasedev) - migration->vbasedev = vbasedev; - migration->device_state = VFIO_DEVICE_STATE_RUNNING; - migration->data_fd = -1; -+ migration->mig_flags = mig_flags; - - vbasedev->dirty_pages_supported = vfio_dma_logging_supported(vbasedev); - -diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h -index eed244f25f..5f29dab839 100644 ---- a/include/hw/vfio/vfio-common.h -+++ b/include/hw/vfio/vfio-common.h -@@ -66,6 +66,7 @@ typedef struct VFIOMigration { - int data_fd; - void *data_buffer; - size_t data_buffer_size; -+ uint64_t mig_flags; - } VFIOMigration; - - typedef struct VFIOAddressSpace { --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-pci-Allow-the-selection-of-a-given-iommu-backen.patch b/SOURCES/kvm-vfio-pci-Allow-the-selection-of-a-given-iommu-backen.patch new file mode 100644 index 0000000..f79de18 --- /dev/null +++ b/SOURCES/kvm-vfio-pci-Allow-the-selection-of-a-given-iommu-backen.patch @@ -0,0 +1,81 @@ +From 7788fdc2375e01ead0c8a705c3b3d7467dd93d67 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Tue, 21 Nov 2023 16:44:09 +0800 +Subject: [PATCH 030/101] vfio/pci: Allow the selection of a given iommu + backend +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [29/67] 363c62607a11093ea0062489e11a708117d8ffb9 (eauger1/centos-qemu-kvm) + +Now we support two types of iommu backends, let's add the capability +to select one of them. This depends on whether an iommufd object has +been linked with the vfio-pci device: + +If the user wants to use the legacy backend, it shall not +link the vfio-pci device with any iommufd object: + + -device vfio-pci,host=0000:02:00.0 + +This is called the legacy mode/backend. + +If the user wants to use the iommufd backend (/dev/iommu) it +shall pass an iommufd object id in the vfio-pci device options: + + -object iommufd,id=iommufd0 + -device vfio-pci,host=0000:02:00.0,iommufd=iommufd0 + +Suggested-by: Alex Williamson +Signed-off-by: Eric Auger +Signed-off-by: Yi Liu +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Tested-by: Eric Auger +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +(cherry picked from commit ee42b261b0a2e465ae003ddcaf1caf117c201f74) +Signed-off-by: Eric Auger +--- + hw/vfio/pci.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 83b2561908..39e6a6678e 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -19,6 +19,7 @@ + */ + + #include "qemu/osdep.h" ++#include CONFIG_DEVICES /* CONFIG_IOMMUFD */ + #include + #include + +@@ -42,6 +43,7 @@ + #include "qapi/error.h" + #include "migration/blocker.h" + #include "migration/qemu-file.h" ++#include "sysemu/iommufd.h" + + #define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug" + +@@ -3415,6 +3417,10 @@ static Property vfio_pci_dev_properties[] = { + * DEFINE_PROP_STRING("vfiofd", VFIOPCIDevice, vfiofd_name), + * DEFINE_PROP_STRING("vfiogroupfd, VFIOPCIDevice, vfiogroupfd_name), + */ ++#ifdef CONFIG_IOMMUFD ++ DEFINE_PROP_LINK("iommufd", VFIOPCIDevice, vbasedev.iommufd, ++ TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *), ++#endif + DEFINE_PROP_END_OF_LIST(), + }; + +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-pci-Call-vfio_prepare_kvm_msi_virq_batch-in-MSI.patch b/SOURCES/kvm-vfio-pci-Call-vfio_prepare_kvm_msi_virq_batch-in-MSI.patch deleted file mode 100644 index b5d9d37..0000000 --- a/SOURCES/kvm-vfio-pci-Call-vfio_prepare_kvm_msi_virq_batch-in-MSI.patch +++ /dev/null @@ -1,67 +0,0 @@ -From b5a69101abac153c9c9be7f539d810e3e4af3bdf Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 19/37] vfio/pci: Call vfio_prepare_kvm_msi_virq_batch() in MSI - retry path -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [17/28] 2067bb58f3a2c1a793e5566cee3c78a8299c9c1c (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit c17408892319 -Author: Shameer Kolothum -Date: Tue Jun 13 15:09:43 2023 +0100 - - vfio/pci: Call vfio_prepare_kvm_msi_virq_batch() in MSI retry path - - When vfio_enable_vectors() returns with less than requested nr_vectors - we retry with what kernel reported back. But the retry path doesn't - call vfio_prepare_kvm_msi_virq_batch() and this results in, - - qemu-system-aarch64: vfio: Error: Failed to enable 4 MSI vectors, retry with 1 - qemu-system-aarch64: ../hw/vfio/pci.c:602: vfio_commit_kvm_msi_virq_batch: Assertion `vdev->defer_kvm_irq_routing' failed - - Fixes: dc580d51f7dd ("vfio: defer to commit kvm irq routing when enable msi/msix") - Reviewed-by: Longpeng - Signed-off-by: Shameer Kolothum - Reviewed-by: Cédric Le Goater - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/pci.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 7c5e2b5996..15e7554954 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -666,6 +666,8 @@ static void vfio_msi_enable(VFIOPCIDevice *vdev) - - vfio_disable_interrupts(vdev); - -+ vdev->nr_vectors = msi_nr_vectors_allocated(&vdev->pdev); -+retry: - /* - * Setting vector notifiers needs to enable route for each vector. - * Deferring to commit the KVM routes once rather than per vector -@@ -673,8 +675,6 @@ static void vfio_msi_enable(VFIOPCIDevice *vdev) - */ - vfio_prepare_kvm_msi_virq_batch(vdev); - -- vdev->nr_vectors = msi_nr_vectors_allocated(&vdev->pdev); --retry: - vdev->msi_vectors = g_new0(VFIOMSIVector, vdev->nr_vectors); - - for (i = 0; i < vdev->nr_vectors; i++) { --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-pci-Clear-MSI-X-IRQ-index-always.patch b/SOURCES/kvm-vfio-pci-Clear-MSI-X-IRQ-index-always.patch new file mode 100644 index 0000000..837e490 --- /dev/null +++ b/SOURCES/kvm-vfio-pci-Clear-MSI-X-IRQ-index-always.patch @@ -0,0 +1,69 @@ +From 43236995e8ad336d366b625fb8362046be53fc34 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Mon, 29 Jan 2024 09:46:34 +0100 +Subject: [PATCH] vfio/pci: Clear MSI-X IRQ index always +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 218: vfio/pci: Clear MSI-X IRQ index always +RH-Jira: RHEL-21293 +RH-Acked-by: Eric Auger +RH-Acked-by: Alex Williamson +RH-Commit: [1/1] b4b587b13c11e350d3e5fcc11ba66a006b25a763 (clegoate/qemu-kvm-c9s) + +JIRA: https://issues.redhat.com/browse/RHEL-21293 + +commit d2b668fca5652760b435ce812a743bba03d2f316 +Author: Cédric Le Goater +Date: Thu Jan 25 14:27:36 2024 +0100 + + vfio/pci: Clear MSI-X IRQ index always + + When doing device assignment of a physical device, MSI-X can be + enabled with no vectors enabled and this sets the IRQ index to + VFIO_PCI_MSIX_IRQ_INDEX. However, when MSI-X is disabled, the IRQ + index is left untouched if no vectors are in use. Then, when INTx + is enabled, the IRQ index value is considered incompatible (set to + MSI-X) and VFIO_DEVICE_SET_IRQS fails. QEMU complains with : + + qemu-system-x86_64: vfio 0000:08:00.0: Failed to set up TRIGGER eventfd signaling for interrupt INTX-0: VFIO_DEVICE_SET_IRQS failure: Invalid argument + + To avoid that, unconditionaly clear the IRQ index when MSI-X is + disabled. + + Buglink: https://issues.redhat.com/browse/RHEL-21293 + Fixes: 5ebffa4e87e7 ("vfio/pci: use an invalid fd to enable MSI-X") + Cc: Jing Liu + Cc: Alex Williamson + Reviewed-by: Alex Williamson + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/pci.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index adb7c09367..29bb8067eb 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -829,9 +829,11 @@ static void vfio_msix_disable(VFIOPCIDevice *vdev) + } + } + +- if (vdev->nr_vectors) { +- vfio_disable_irqindex(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX); +- } ++ /* ++ * Always clear MSI-X IRQ index. A PF device could have enabled ++ * MSI-X with no vectors. See vfio_msix_enable(). ++ */ ++ vfio_disable_irqindex(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX); + + vfio_msi_disable_common(vdev); + vfio_intx_enable(vdev, &err); +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-pci-Disable-INTx-in-vfio_realize-error-path.patch b/SOURCES/kvm-vfio-pci-Disable-INTx-in-vfio_realize-error-path.patch deleted file mode 100644 index 0aca4ef..0000000 --- a/SOURCES/kvm-vfio-pci-Disable-INTx-in-vfio_realize-error-path.patch +++ /dev/null @@ -1,54 +0,0 @@ -From 816c20b23546d31316c9ca450db8a6668ac6216c Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 25/37] vfio/pci: Disable INTx in vfio_realize error path -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [23/28] 2fde4bad00c4286e6bbe24947c2bfd6468fc0ff3 (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit adee0da0368f -Author: Zhenzhong Duan -Date: Mon Jul 3 15:15:06 2023 +0800 - - vfio/pci: Disable INTx in vfio_realize error path - - When vfio realize fails, INTx isn't disabled if it has been enabled. - This may confuse host side with unhandled interrupt report. - - Fixes: c5478fea27ac ("vfio/pci: Respond to KVM irqchip change notifier") - Signed-off-by: Zhenzhong Duan - Reviewed-by: Joao Martins - Reviewed-by: Cédric Le Goater - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/pci.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 87bd440504..2d059832a4 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -3244,6 +3244,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) - return; - - out_deregister: -+ if (vdev->interrupt == VFIO_INT_INTx) { -+ vfio_intx_disable(vdev); -+ } - pci_device_set_intx_routing_notifier(&vdev->pdev, NULL); - if (vdev->irqchip_change_notifier.notify) { - kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-pci-Extract-out-a-helper-vfio_pci_get_pci_hot_r.patch b/SOURCES/kvm-vfio-pci-Extract-out-a-helper-vfio_pci_get_pci_hot_r.patch new file mode 100644 index 0000000..af6593c --- /dev/null +++ b/SOURCES/kvm-vfio-pci-Extract-out-a-helper-vfio_pci_get_pci_hot_r.patch @@ -0,0 +1,139 @@ +From fe5ecedd452754eeb238b23eb0544ed3c5086157 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Tue, 21 Nov 2023 16:44:06 +0800 +Subject: [PATCH 027/101] vfio/pci: Extract out a helper + vfio_pci_get_pci_hot_reset_info +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [26/67] 730b7f1496f4f21310fa13c79cb87f8d5e2ad2a8 (eauger1/centos-qemu-kvm) + +This helper will be used by both legacy and iommufd backends. + +No functional changes intended. + +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Reviewed-by: Eric Auger +Tested-by: Eric Auger +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +(cherry picked from commit 4d36ec23a75eb387492f4d68ff1b8eeee5d68142) +Signed-off-by: Eric Auger +--- + hw/vfio/pci.c | 54 +++++++++++++++++++++++++++++++++++---------------- + hw/vfio/pci.h | 3 +++ + 2 files changed, 40 insertions(+), 17 deletions(-) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index ec98080f28..b482e5479f 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -2448,22 +2448,13 @@ static bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name) + return (strcmp(tmp, name) == 0); + } + +-static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) ++int vfio_pci_get_pci_hot_reset_info(VFIOPCIDevice *vdev, ++ struct vfio_pci_hot_reset_info **info_p) + { +- VFIOGroup *group; + struct vfio_pci_hot_reset_info *info; +- struct vfio_pci_dependent_device *devices; +- struct vfio_pci_hot_reset *reset; +- int32_t *fds; +- int ret, i, count; +- bool multi = false; ++ int ret, count; + +- trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi"); +- +- if (!single) { +- vfio_pci_pre_reset(vdev); +- } +- vdev->vbasedev.needs_reset = false; ++ assert(info_p && !*info_p); + + info = g_malloc0(sizeof(*info)); + info->argsz = sizeof(*info); +@@ -2471,24 +2462,53 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) + ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info); + if (ret && errno != ENOSPC) { + ret = -errno; ++ g_free(info); + if (!vdev->has_pm_reset) { + error_report("vfio: Cannot reset device %s, " + "no available reset mechanism.", vdev->vbasedev.name); + } +- goto out_single; ++ return ret; + } + + count = info->count; +- info = g_realloc(info, sizeof(*info) + (count * sizeof(*devices))); +- info->argsz = sizeof(*info) + (count * sizeof(*devices)); +- devices = &info->devices[0]; ++ info = g_realloc(info, sizeof(*info) + (count * sizeof(info->devices[0]))); ++ info->argsz = sizeof(*info) + (count * sizeof(info->devices[0])); + + ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info); + if (ret) { + ret = -errno; ++ g_free(info); + error_report("vfio: hot reset info failed: %m"); ++ return ret; ++ } ++ ++ *info_p = info; ++ return 0; ++} ++ ++static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) ++{ ++ VFIOGroup *group; ++ struct vfio_pci_hot_reset_info *info = NULL; ++ struct vfio_pci_dependent_device *devices; ++ struct vfio_pci_hot_reset *reset; ++ int32_t *fds; ++ int ret, i, count; ++ bool multi = false; ++ ++ trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi"); ++ ++ if (!single) { ++ vfio_pci_pre_reset(vdev); ++ } ++ vdev->vbasedev.needs_reset = false; ++ ++ ret = vfio_pci_get_pci_hot_reset_info(vdev, &info); ++ ++ if (ret) { + goto out_single; + } ++ devices = &info->devices[0]; + + trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name); + +diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h +index eb74d9de2d..3568a6135d 100644 +--- a/hw/vfio/pci.h ++++ b/hw/vfio/pci.h +@@ -219,6 +219,9 @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr); + + extern const PropertyInfo qdev_prop_nv_gpudirect_clique; + ++int vfio_pci_get_pci_hot_reset_info(VFIOPCIDevice *vdev, ++ struct vfio_pci_hot_reset_info **info_p); ++ + int vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp); + + int vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev, +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-pci-Fix-a-segfault-in-vfio_realize.patch b/SOURCES/kvm-vfio-pci-Fix-a-segfault-in-vfio_realize.patch deleted file mode 100644 index d05d114..0000000 --- a/SOURCES/kvm-vfio-pci-Fix-a-segfault-in-vfio_realize.patch +++ /dev/null @@ -1,67 +0,0 @@ -From 0b1ab3aacc02e70bfe8440236eb9def426bbe10e Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 22/37] vfio/pci: Fix a segfault in vfio_realize -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [20/28] 48b9c1efe295c2672693d9c99f6d11738d2b98d1 (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit 357bd7932a13 -Author: Zhenzhong Duan -Date: Thu Jun 29 16:40:38 2023 +0800 - - vfio/pci: Fix a segfault in vfio_realize - - The kvm irqchip notifier is only registered if the device supports - INTx, however it's unconditionally removed in vfio realize error - path. If the assigned device does not support INTx, this will cause - QEMU to crash when vfio realize fails. Change it to conditionally - remove the notifier only if the notify hook is setup. - - Before fix: - (qemu) device_add vfio-pci,host=81:11.1,id=vfio1,bus=root1,xres=1 - Connection closed by foreign host. - - After fix: - (qemu) device_add vfio-pci,host=81:11.1,id=vfio1,bus=root1,xres=1 - Error: vfio 0000:81:11.1: xres and yres properties require display=on - (qemu) - - Fixes: c5478fea27ac ("vfio/pci: Respond to KVM irqchip change notifier") - Signed-off-by: Zhenzhong Duan - Reviewed-by: Cédric Le Goater - Reviewed-by: Joao Martins - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/pci.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 6634945a70..d08e6c1a20 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -3245,7 +3245,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) - - out_deregister: - pci_device_set_intx_routing_notifier(&vdev->pdev, NULL); -- kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); -+ if (vdev->irqchip_change_notifier.notify) { -+ kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); -+ } - out_teardown: - vfio_teardown_msi(vdev); - vfio_bars_exit(vdev); --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-pci-Fix-a-use-after-free-issue.patch b/SOURCES/kvm-vfio-pci-Fix-a-use-after-free-issue.patch deleted file mode 100644 index 1fa725f..0000000 --- a/SOURCES/kvm-vfio-pci-Fix-a-use-after-free-issue.patch +++ /dev/null @@ -1,56 +0,0 @@ -From 2437a06ff137c4bc856df096e42407c1f50b25b0 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:56 +0200 -Subject: [PATCH 06/37] vfio/pci: Fix a use-after-free issue -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [4/28] eca69a7e0a6fb8c1c70be8b91209a53b040e30ba (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit b83b40b61484 -Author: Zhenzhong Duan -Date: Wed May 17 10:46:51 2023 +0800 - - vfio/pci: Fix a use-after-free issue - - vbasedev->name is freed wrongly which leads to garbage VFIO trace log. - Fix it by allocating a dup of vbasedev->name and then free the dup. - - Fixes: 2dca1b37a760 ("vfio/pci: add support for VF token") - Suggested-by: Alex Williamson - Signed-off-by: Zhenzhong Duan - Reviewed-by: Cédric Le Goater - Reviewed-by: Matthew Rosato - Acked-by: Alex Williamson - Reviewed-by: Philippe Mathieu-Daudé - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/pci.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 6cd3a98c39..7c5e2b5996 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -3018,7 +3018,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) - qemu_uuid_unparse(&vdev->vf_token, uuid); - name = g_strdup_printf("%s vf_token=%s", vbasedev->name, uuid); - } else { -- name = vbasedev->name; -+ name = g_strdup(vbasedev->name); - } - - ret = vfio_get_device(group, name, vbasedev, errp); --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-pci-Free-leaked-timer-in-vfio_realize-error-pat.patch b/SOURCES/kvm-vfio-pci-Free-leaked-timer-in-vfio_realize-error-pat.patch deleted file mode 100644 index 3978b96..0000000 --- a/SOURCES/kvm-vfio-pci-Free-leaked-timer-in-vfio_realize-error-pat.patch +++ /dev/null @@ -1,55 +0,0 @@ -From 9c5016c9b3f9cf66d1b531de829e8b5010962695 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:57 +0200 -Subject: [PATCH 23/37] vfio/pci: Free leaked timer in vfio_realize error path -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [21/28] dbaae4e484de4613f7f7735be519b7357627326e (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit 0cc889c8826c -Author: Zhenzhong Duan -Date: Thu Jun 29 16:40:39 2023 +0800 - - vfio/pci: Free leaked timer in vfio_realize error path - - When vfio_realize fails, the mmap_timer used for INTx optimization - isn't freed. As this timer isn't activated yet, the potential impact - is just a piece of leaked memory. - - Fixes: ea486926b07d ("vfio-pci: Update slow path INTx algorithm timer related") - Signed-off-by: Zhenzhong Duan - Reviewed-by: Cédric Le Goater - Reviewed-by: Joao Martins - Signed-off-by: Cédric Le Goater - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/pci.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index d08e6c1a20..87bd440504 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -3248,6 +3248,9 @@ out_deregister: - if (vdev->irqchip_change_notifier.notify) { - kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); - } -+ if (vdev->intx.mmap_timer) { -+ timer_free(vdev->intx.mmap_timer); -+ } - out_teardown: - vfio_teardown_msi(vdev); - vfio_bars_exit(vdev); --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-pci-Introduce-a-vfio-pci-hot-reset-interface.patch b/SOURCES/kvm-vfio-pci-Introduce-a-vfio-pci-hot-reset-interface.patch new file mode 100644 index 0000000..2a2db5f --- /dev/null +++ b/SOURCES/kvm-vfio-pci-Introduce-a-vfio-pci-hot-reset-interface.patch @@ -0,0 +1,466 @@ +From acc3e5306e184567006bc45e7f36f2473e75d08a Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Tue, 21 Nov 2023 16:44:07 +0800 +Subject: [PATCH 028/101] vfio/pci: Introduce a vfio pci hot reset interface +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [27/67] 192088dbf2cf88663acd2416f69b7eeb175b2525 (eauger1/centos-qemu-kvm) + +Legacy vfio pci and iommufd cdev have different process to hot reset +vfio device, expand current code to abstract out pci_hot_reset callback +for legacy vfio, this same interface will also be used by iommufd +cdev vfio device. + +Rename vfio_pci_hot_reset to vfio_legacy_pci_hot_reset and move it +into container.c. + +vfio_pci_[pre/post]_reset and vfio_pci_host_match are exported so +they could be called in legacy and iommufd pci_hot_reset callback. + +Suggested-by: Cédric Le Goater +Signed-off-by: Zhenzhong Duan +Reviewed-by: Eric Auger +Tested-by: Eric Auger +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +(cherry picked from commit c328e7e8ad1c969dbcbe90ee76afcd3cfec5e945) +Signed-off-by: Eric Auger +--- + hw/vfio/container.c | 170 ++++++++++++++++++++++++++ + hw/vfio/pci.c | 168 +------------------------ + hw/vfio/pci.h | 3 + + include/hw/vfio/vfio-container-base.h | 3 + + 4 files changed, 182 insertions(+), 162 deletions(-) + +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index ed2d721b2b..1dbf9b9a17 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -33,6 +33,7 @@ + #include "trace.h" + #include "qapi/error.h" + #include "migration/migration.h" ++#include "pci.h" + + VFIOGroupList vfio_group_list = + QLIST_HEAD_INITIALIZER(vfio_group_list); +@@ -922,6 +923,174 @@ static void vfio_legacy_detach_device(VFIODevice *vbasedev) + vfio_put_group(group); + } + ++static int vfio_legacy_pci_hot_reset(VFIODevice *vbasedev, bool single) ++{ ++ VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); ++ VFIOGroup *group; ++ struct vfio_pci_hot_reset_info *info = NULL; ++ struct vfio_pci_dependent_device *devices; ++ struct vfio_pci_hot_reset *reset; ++ int32_t *fds; ++ int ret, i, count; ++ bool multi = false; ++ ++ trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi"); ++ ++ if (!single) { ++ vfio_pci_pre_reset(vdev); ++ } ++ vdev->vbasedev.needs_reset = false; ++ ++ ret = vfio_pci_get_pci_hot_reset_info(vdev, &info); ++ ++ if (ret) { ++ goto out_single; ++ } ++ devices = &info->devices[0]; ++ ++ trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name); ++ ++ /* Verify that we have all the groups required */ ++ for (i = 0; i < info->count; i++) { ++ PCIHostDeviceAddress host; ++ VFIOPCIDevice *tmp; ++ VFIODevice *vbasedev_iter; ++ ++ host.domain = devices[i].segment; ++ host.bus = devices[i].bus; ++ host.slot = PCI_SLOT(devices[i].devfn); ++ host.function = PCI_FUNC(devices[i].devfn); ++ ++ trace_vfio_pci_hot_reset_dep_devices(host.domain, ++ host.bus, host.slot, host.function, devices[i].group_id); ++ ++ if (vfio_pci_host_match(&host, vdev->vbasedev.name)) { ++ continue; ++ } ++ ++ QLIST_FOREACH(group, &vfio_group_list, next) { ++ if (group->groupid == devices[i].group_id) { ++ break; ++ } ++ } ++ ++ if (!group) { ++ if (!vdev->has_pm_reset) { ++ error_report("vfio: Cannot reset device %s, " ++ "depends on group %d which is not owned.", ++ vdev->vbasedev.name, devices[i].group_id); ++ } ++ ret = -EPERM; ++ goto out; ++ } ++ ++ /* Prep dependent devices for reset and clear our marker. */ ++ QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { ++ if (!vbasedev_iter->dev->realized || ++ vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) { ++ continue; ++ } ++ tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev); ++ if (vfio_pci_host_match(&host, tmp->vbasedev.name)) { ++ if (single) { ++ ret = -EINVAL; ++ goto out_single; ++ } ++ vfio_pci_pre_reset(tmp); ++ tmp->vbasedev.needs_reset = false; ++ multi = true; ++ break; ++ } ++ } ++ } ++ ++ if (!single && !multi) { ++ ret = -EINVAL; ++ goto out_single; ++ } ++ ++ /* Determine how many group fds need to be passed */ ++ count = 0; ++ QLIST_FOREACH(group, &vfio_group_list, next) { ++ for (i = 0; i < info->count; i++) { ++ if (group->groupid == devices[i].group_id) { ++ count++; ++ break; ++ } ++ } ++ } ++ ++ reset = g_malloc0(sizeof(*reset) + (count * sizeof(*fds))); ++ reset->argsz = sizeof(*reset) + (count * sizeof(*fds)); ++ fds = &reset->group_fds[0]; ++ ++ /* Fill in group fds */ ++ QLIST_FOREACH(group, &vfio_group_list, next) { ++ for (i = 0; i < info->count; i++) { ++ if (group->groupid == devices[i].group_id) { ++ fds[reset->count++] = group->fd; ++ break; ++ } ++ } ++ } ++ ++ /* Bus reset! */ ++ ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset); ++ g_free(reset); ++ if (ret) { ++ ret = -errno; ++ } ++ ++ trace_vfio_pci_hot_reset_result(vdev->vbasedev.name, ++ ret ? strerror(errno) : "Success"); ++ ++out: ++ /* Re-enable INTx on affected devices */ ++ for (i = 0; i < info->count; i++) { ++ PCIHostDeviceAddress host; ++ VFIOPCIDevice *tmp; ++ VFIODevice *vbasedev_iter; ++ ++ host.domain = devices[i].segment; ++ host.bus = devices[i].bus; ++ host.slot = PCI_SLOT(devices[i].devfn); ++ host.function = PCI_FUNC(devices[i].devfn); ++ ++ if (vfio_pci_host_match(&host, vdev->vbasedev.name)) { ++ continue; ++ } ++ ++ QLIST_FOREACH(group, &vfio_group_list, next) { ++ if (group->groupid == devices[i].group_id) { ++ break; ++ } ++ } ++ ++ if (!group) { ++ break; ++ } ++ ++ QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { ++ if (!vbasedev_iter->dev->realized || ++ vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) { ++ continue; ++ } ++ tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev); ++ if (vfio_pci_host_match(&host, tmp->vbasedev.name)) { ++ vfio_pci_post_reset(tmp); ++ break; ++ } ++ } ++ } ++out_single: ++ if (!single) { ++ vfio_pci_post_reset(vdev); ++ } ++ g_free(info); ++ ++ return ret; ++} ++ + const VFIOIOMMUOps vfio_legacy_ops = { + .dma_map = vfio_legacy_dma_map, + .dma_unmap = vfio_legacy_dma_unmap, +@@ -929,4 +1098,5 @@ const VFIOIOMMUOps vfio_legacy_ops = { + .detach_device = vfio_legacy_detach_device, + .set_dirty_page_tracking = vfio_legacy_set_dirty_page_tracking, + .query_dirty_bitmap = vfio_legacy_query_dirty_bitmap, ++ .pci_hot_reset = vfio_legacy_pci_hot_reset, + }; +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index b482e5479f..83b2561908 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -2377,7 +2377,7 @@ static int vfio_add_capabilities(VFIOPCIDevice *vdev, Error **errp) + return 0; + } + +-static void vfio_pci_pre_reset(VFIOPCIDevice *vdev) ++void vfio_pci_pre_reset(VFIOPCIDevice *vdev) + { + PCIDevice *pdev = &vdev->pdev; + uint16_t cmd; +@@ -2414,7 +2414,7 @@ static void vfio_pci_pre_reset(VFIOPCIDevice *vdev) + vfio_pci_write_config(pdev, PCI_COMMAND, cmd, 2); + } + +-static void vfio_pci_post_reset(VFIOPCIDevice *vdev) ++void vfio_pci_post_reset(VFIOPCIDevice *vdev) + { + Error *err = NULL; + int nr; +@@ -2438,7 +2438,7 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev) + vfio_quirk_reset(vdev); + } + +-static bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name) ++bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name) + { + char tmp[13]; + +@@ -2488,166 +2488,10 @@ int vfio_pci_get_pci_hot_reset_info(VFIOPCIDevice *vdev, + + static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) + { +- VFIOGroup *group; +- struct vfio_pci_hot_reset_info *info = NULL; +- struct vfio_pci_dependent_device *devices; +- struct vfio_pci_hot_reset *reset; +- int32_t *fds; +- int ret, i, count; +- bool multi = false; +- +- trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi"); +- +- if (!single) { +- vfio_pci_pre_reset(vdev); +- } +- vdev->vbasedev.needs_reset = false; +- +- ret = vfio_pci_get_pci_hot_reset_info(vdev, &info); +- +- if (ret) { +- goto out_single; +- } +- devices = &info->devices[0]; +- +- trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name); +- +- /* Verify that we have all the groups required */ +- for (i = 0; i < info->count; i++) { +- PCIHostDeviceAddress host; +- VFIOPCIDevice *tmp; +- VFIODevice *vbasedev_iter; +- +- host.domain = devices[i].segment; +- host.bus = devices[i].bus; +- host.slot = PCI_SLOT(devices[i].devfn); +- host.function = PCI_FUNC(devices[i].devfn); +- +- trace_vfio_pci_hot_reset_dep_devices(host.domain, +- host.bus, host.slot, host.function, devices[i].group_id); +- +- if (vfio_pci_host_match(&host, vdev->vbasedev.name)) { +- continue; +- } +- +- QLIST_FOREACH(group, &vfio_group_list, next) { +- if (group->groupid == devices[i].group_id) { +- break; +- } +- } +- +- if (!group) { +- if (!vdev->has_pm_reset) { +- error_report("vfio: Cannot reset device %s, " +- "depends on group %d which is not owned.", +- vdev->vbasedev.name, devices[i].group_id); +- } +- ret = -EPERM; +- goto out; +- } +- +- /* Prep dependent devices for reset and clear our marker. */ +- QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { +- if (!vbasedev_iter->dev->realized || +- vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) { +- continue; +- } +- tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev); +- if (vfio_pci_host_match(&host, tmp->vbasedev.name)) { +- if (single) { +- ret = -EINVAL; +- goto out_single; +- } +- vfio_pci_pre_reset(tmp); +- tmp->vbasedev.needs_reset = false; +- multi = true; +- break; +- } +- } +- } +- +- if (!single && !multi) { +- ret = -EINVAL; +- goto out_single; +- } +- +- /* Determine how many group fds need to be passed */ +- count = 0; +- QLIST_FOREACH(group, &vfio_group_list, next) { +- for (i = 0; i < info->count; i++) { +- if (group->groupid == devices[i].group_id) { +- count++; +- break; +- } +- } +- } +- +- reset = g_malloc0(sizeof(*reset) + (count * sizeof(*fds))); +- reset->argsz = sizeof(*reset) + (count * sizeof(*fds)); +- fds = &reset->group_fds[0]; +- +- /* Fill in group fds */ +- QLIST_FOREACH(group, &vfio_group_list, next) { +- for (i = 0; i < info->count; i++) { +- if (group->groupid == devices[i].group_id) { +- fds[reset->count++] = group->fd; +- break; +- } +- } +- } +- +- /* Bus reset! */ +- ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset); +- g_free(reset); +- +- trace_vfio_pci_hot_reset_result(vdev->vbasedev.name, +- ret ? strerror(errno) : "Success"); +- +-out: +- /* Re-enable INTx on affected devices */ +- for (i = 0; i < info->count; i++) { +- PCIHostDeviceAddress host; +- VFIOPCIDevice *tmp; +- VFIODevice *vbasedev_iter; +- +- host.domain = devices[i].segment; +- host.bus = devices[i].bus; +- host.slot = PCI_SLOT(devices[i].devfn); +- host.function = PCI_FUNC(devices[i].devfn); +- +- if (vfio_pci_host_match(&host, vdev->vbasedev.name)) { +- continue; +- } +- +- QLIST_FOREACH(group, &vfio_group_list, next) { +- if (group->groupid == devices[i].group_id) { +- break; +- } +- } +- +- if (!group) { +- break; +- } +- +- QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { +- if (!vbasedev_iter->dev->realized || +- vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) { +- continue; +- } +- tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev); +- if (vfio_pci_host_match(&host, tmp->vbasedev.name)) { +- vfio_pci_post_reset(tmp); +- break; +- } +- } +- } +-out_single: +- if (!single) { +- vfio_pci_post_reset(vdev); +- } +- g_free(info); ++ VFIODevice *vbasedev = &vdev->vbasedev; ++ const VFIOIOMMUOps *ops = vbasedev->bcontainer->ops; + +- return ret; ++ return ops->pci_hot_reset(vbasedev, single); + } + + /* +diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h +index 3568a6135d..b7de39c010 100644 +--- a/hw/vfio/pci.h ++++ b/hw/vfio/pci.h +@@ -219,6 +219,9 @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr); + + extern const PropertyInfo qdev_prop_nv_gpudirect_clique; + ++void vfio_pci_pre_reset(VFIOPCIDevice *vdev); ++void vfio_pci_post_reset(VFIOPCIDevice *vdev); ++bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name); + int vfio_pci_get_pci_hot_reset_info(VFIOPCIDevice *vdev, + struct vfio_pci_hot_reset_info **info_p); + +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index 4b6f017c6f..45bb19c767 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -106,6 +106,9 @@ struct VFIOIOMMUOps { + int (*set_dirty_page_tracking)(VFIOContainerBase *bcontainer, bool start); + int (*query_dirty_bitmap)(VFIOContainerBase *bcontainer, VFIOBitmap *vbmap, + hwaddr iova, hwaddr size); ++ /* PCI specific */ ++ int (*pci_hot_reset)(VFIODevice *vbasedev, bool single); ++ + /* SPAPR specific */ + int (*add_window)(VFIOContainerBase *bcontainer, + MemoryRegionSection *section, +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-pci-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch b/SOURCES/kvm-vfio-pci-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch new file mode 100644 index 0000000..4a973b5 --- /dev/null +++ b/SOURCES/kvm-vfio-pci-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch @@ -0,0 +1,237 @@ +From 965a44793806fef2094906947bd3b428638bf89a Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Tue, 21 Nov 2023 16:44:10 +0800 +Subject: [PATCH 031/101] vfio/pci: Make vfio cdev pre-openable by passing a + file handle +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [30/67] a14b824b700e8fb36633cd159bcc422d992a316f (eauger1/centos-qemu-kvm) + +Conflicts: contextual conflict in hw/vfio/pci.c due to +RHEL-only f73562144e492 vfio: cap number of devices that can be assigned + +This gives management tools like libvirt a chance to open the vfio +cdev with privilege and pass FD to qemu. This way qemu never needs +to have privilege to open a VFIO or iommu cdev node. + +Together with the earlier support of pre-opening /dev/iommu device, +now we have full support of passing a vfio device to unprivileged +qemu by management tool. This mode is no more considered for the +legacy backend. So let's remove the "TODO" comment. + +Add helper functions vfio_device_set_fd() and vfio_device_get_name() +to set fd and get device name, they will also be used by other vfio +devices. + +There is no easy way to check if a device is mdev with FD passing, +so fail the x-balloon-allowed check unconditionally in this case. + +There is also no easy way to get BDF as name with FD passing, so +we fake a name by VFIO_FD[fd]. + +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Tested-by: Eric Auger +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +(cherry picked from commit da3e04b26fd8d15b344944504d5ffa9c5f20b54b) +Signed-off-by: Eric Auger +--- + hw/vfio/helpers.c | 43 +++++++++++++++++++++++++++++++++++ + hw/vfio/iommufd.c | 12 ++++++---- + hw/vfio/pci.c | 28 +++++++++++++---------- + include/hw/vfio/vfio-common.h | 4 ++++ + 4 files changed, 71 insertions(+), 16 deletions(-) + +diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c +index 168847e7c5..3592c3d54e 100644 +--- a/hw/vfio/helpers.c ++++ b/hw/vfio/helpers.c +@@ -27,6 +27,7 @@ + #include "trace.h" + #include "qapi/error.h" + #include "qemu/error-report.h" ++#include "monitor/monitor.h" + + /* + * Common VFIO interrupt disable +@@ -609,3 +610,45 @@ bool vfio_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type) + + return ret; + } ++ ++int vfio_device_get_name(VFIODevice *vbasedev, Error **errp) ++{ ++ struct stat st; ++ ++ if (vbasedev->fd < 0) { ++ if (stat(vbasedev->sysfsdev, &st) < 0) { ++ error_setg_errno(errp, errno, "no such host device"); ++ error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev); ++ return -errno; ++ } ++ /* User may specify a name, e.g: VFIO platform device */ ++ if (!vbasedev->name) { ++ vbasedev->name = g_path_get_basename(vbasedev->sysfsdev); ++ } ++ } else { ++ if (!vbasedev->iommufd) { ++ error_setg(errp, "Use FD passing only with iommufd backend"); ++ return -EINVAL; ++ } ++ /* ++ * Give a name with fd so any function printing out vbasedev->name ++ * will not break. ++ */ ++ if (!vbasedev->name) { ++ vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd); ++ } ++ } ++ ++ return 0; ++} ++ ++void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp) ++{ ++ int fd = monitor_fd_param(monitor_cur(), str, errp); ++ ++ if (fd < 0) { ++ error_prepend(errp, "Could not parse remote object fd %s:", str); ++ return; ++ } ++ vbasedev->fd = fd; ++} +diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c +index 6e53e013ef..5accd26484 100644 +--- a/hw/vfio/iommufd.c ++++ b/hw/vfio/iommufd.c +@@ -320,11 +320,15 @@ static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev, + uint32_t ioas_id; + Error *err = NULL; + +- devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp); +- if (devfd < 0) { +- return devfd; ++ if (vbasedev->fd < 0) { ++ devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp); ++ if (devfd < 0) { ++ return devfd; ++ } ++ vbasedev->fd = devfd; ++ } else { ++ devfd = vbasedev->fd; + } +- vbasedev->fd = devfd; + + ret = iommufd_cdev_connect_and_bind(vbasedev, errp); + if (ret) { +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 39e6a6678e..3412a63bb1 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -2949,7 +2949,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + VFIOGroup *group; + char *tmp, *subsys; + Error *err = NULL; +- struct stat st; + int ret, i = 0; + bool is_mdev; + char uuid[UUID_STR_LEN]; +@@ -2976,11 +2975,14 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + return; + } + +- if (!vbasedev->sysfsdev) { ++ if (vbasedev->fd < 0 && !vbasedev->sysfsdev) { + if (!(~vdev->host.domain || ~vdev->host.bus || + ~vdev->host.slot || ~vdev->host.function)) { + error_setg(errp, "No provided host device"); + error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F " ++#ifdef CONFIG_IOMMUFD ++ "or -device vfio-pci,fd=DEVICE_FD " ++#endif + "or -device vfio-pci,sysfsdev=PATH_TO_DEVICE\n"); + return; + } +@@ -2990,13 +2992,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + vdev->host.slot, vdev->host.function); + } + +- if (stat(vbasedev->sysfsdev, &st) < 0) { +- error_setg_errno(errp, errno, "no such host device"); +- error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev); ++ if (vfio_device_get_name(vbasedev, errp) < 0) { + return; + } +- +- vbasedev->name = g_path_get_basename(vbasedev->sysfsdev); + vbasedev->ops = &vfio_pci_ops; + vbasedev->type = VFIO_DEVICE_TYPE_PCI; + vbasedev->dev = DEVICE(vdev); +@@ -3356,6 +3354,7 @@ static void vfio_instance_init(Object *obj) + vdev->host.bus = ~0U; + vdev->host.slot = ~0U; + vdev->host.function = ~0U; ++ vdev->vbasedev.fd = -1; + + vdev->nv_gpudirect_clique = 0xFF; + +@@ -3412,11 +3411,6 @@ static Property vfio_pci_dev_properties[] = { + qdev_prop_nv_gpudirect_clique, uint8_t), + DEFINE_PROP_OFF_AUTO_PCIBAR("x-msix-relocation", VFIOPCIDevice, msix_relo, + OFF_AUTOPCIBAR_OFF), +- /* +- * TODO - support passed fds... is this necessary? +- * DEFINE_PROP_STRING("vfiofd", VFIOPCIDevice, vfiofd_name), +- * DEFINE_PROP_STRING("vfiogroupfd, VFIOPCIDevice, vfiogroupfd_name), +- */ + #ifdef CONFIG_IOMMUFD + DEFINE_PROP_LINK("iommufd", VFIOPCIDevice, vbasedev.iommufd, + TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *), +@@ -3424,6 +3418,13 @@ static Property vfio_pci_dev_properties[] = { + DEFINE_PROP_END_OF_LIST(), + }; + ++#ifdef CONFIG_IOMMUFD ++static void vfio_pci_set_fd(Object *obj, const char *str, Error **errp) ++{ ++ vfio_device_set_fd(&VFIO_PCI(obj)->vbasedev, str, errp); ++} ++#endif ++ + static void vfio_pci_dev_class_init(ObjectClass *klass, void *data) + { + DeviceClass *dc = DEVICE_CLASS(klass); +@@ -3431,6 +3432,9 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, void *data) + + dc->reset = vfio_pci_reset; + device_class_set_props(dc, vfio_pci_dev_properties); ++#ifdef CONFIG_IOMMUFD ++ object_class_property_add_str(klass, "fd", NULL, vfio_pci_set_fd); ++#endif + dc->desc = "VFIO-based PCI device assignment"; + set_bit(DEVICE_CATEGORY_MISC, dc->categories); + pdc->realize = vfio_realize; +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 3dac5c167e..697bf24a35 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -251,4 +251,8 @@ int vfio_devices_query_dirty_bitmap(VFIOContainerBase *bcontainer, + hwaddr size); + int vfio_get_dirty_bitmap(VFIOContainerBase *bcontainer, uint64_t iova, + uint64_t size, ram_addr_t ram_addr); ++ ++/* Returns 0 on success, or a negative errno. */ ++int vfio_device_get_name(VFIODevice *vbasedev, Error **errp); ++void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp); + #endif /* HW_VFIO_VFIO_COMMON_H */ +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-pci-Move-VFIODevice-initializations-in-vfio_ins.patch b/SOURCES/kvm-vfio-pci-Move-VFIODevice-initializations-in-vfio_ins.patch new file mode 100644 index 0000000..d426ede --- /dev/null +++ b/SOURCES/kvm-vfio-pci-Move-VFIODevice-initializations-in-vfio_ins.patch @@ -0,0 +1,70 @@ +From 942bd7251d166f558e0e6acf7ba853e940e2fb52 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Tue, 21 Nov 2023 16:44:21 +0800 +Subject: [PATCH 042/101] vfio/pci: Move VFIODevice initializations in + vfio_instance_init +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [41/67] 67392d7a92a6ec2155697a355c88d295338a0785 (eauger1/centos-qemu-kvm) + +Some of the VFIODevice initializations is in vfio_realize, +move all of them in vfio_instance_init. + +No functional change intended. + +Suggested-by: Cédric Le Goater +Signed-off-by: Zhenzhong Duan +Reviewed-by: Philippe Mathieu-Daudé +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +(cherry picked from commit dd2fcb1716be9b89c726b3446f38446bb99d6b3a) +Signed-off-by: Eric Auger +--- + hw/vfio/pci.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 3412a63bb1..3f5900cc46 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -2995,9 +2995,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + if (vfio_device_get_name(vbasedev, errp) < 0) { + return; + } +- vbasedev->ops = &vfio_pci_ops; +- vbasedev->type = VFIO_DEVICE_TYPE_PCI; +- vbasedev->dev = DEVICE(vdev); + + /* + * Mediated devices *might* operate compatibly with discarding of RAM, but +@@ -3346,6 +3343,7 @@ static void vfio_instance_init(Object *obj) + { + PCIDevice *pci_dev = PCI_DEVICE(obj); + VFIOPCIDevice *vdev = VFIO_PCI(obj); ++ VFIODevice *vbasedev = &vdev->vbasedev; + + device_add_bootindex_property(obj, &vdev->bootindex, + "bootindex", NULL, +@@ -3354,7 +3352,11 @@ static void vfio_instance_init(Object *obj) + vdev->host.bus = ~0U; + vdev->host.slot = ~0U; + vdev->host.function = ~0U; +- vdev->vbasedev.fd = -1; ++ ++ vbasedev->type = VFIO_DEVICE_TYPE_PCI; ++ vbasedev->ops = &vfio_pci_ops; ++ vbasedev->dev = DEVICE(vdev); ++ vbasedev->fd = -1; + + vdev->nv_gpudirect_clique = 0xFF; + +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-pci-Static-Resizable-BAR-capability.patch b/SOURCES/kvm-vfio-pci-Static-Resizable-BAR-capability.patch deleted file mode 100644 index d937140..0000000 --- a/SOURCES/kvm-vfio-pci-Static-Resizable-BAR-capability.patch +++ /dev/null @@ -1,141 +0,0 @@ -From db53345dba5682c3ba0bc3fc596b30a98dadb88f Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:56 +0200 -Subject: [PATCH 05/37] vfio/pci: Static Resizable BAR capability -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [3/28] 42e9f4b517eb919c77c6fdbe771d9d05a91955bd (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit b5048a4cbfa0 -Author: Alex Williamson -Date: Thu May 4 14:42:48 2023 -0600 - - vfio/pci: Static Resizable BAR capability - - The PCI Resizable BAR (ReBAR) capability is currently hidden from the - VM because the protocol for interacting with the capability does not - support a mechanism for the device to reject an advertised supported - BAR size. However, when assigned to a VM, the act of resizing the - BAR requires adjustment of host resources for the device, which - absolutely can fail. Linux does not currently allow us to reserve - resources for the device independent of the current usage. - - The only writable field within the ReBAR capability is the BAR Size - register. The PCIe spec indicates that when written, the device - should immediately begin to operate with the provided BAR size. The - spec however also notes that software must only write values - corresponding to supported sizes as indicated in the capability and - control registers. Writing unsupported sizes produces undefined - results. Therefore, if the hypervisor were to virtualize the - capability and control registers such that the current size is the - only indicated available size, then a write of anything other than - the current size falls into the category of undefined behavior, - where we can essentially expose the modified ReBAR capability as - read-only. - - This may seem pointless, but users have reported that virtualizing - the capability in this way not only allows guest software to expose - related features as available (even if only cosmetic), but in some - scenarios can resolve guest driver issues. Additionally, no - regressions in behavior have been reported for this change. - - A caveat here is that the PCIe spec requires for compatibility that - devices report support for a size in the range of 1MB to 512GB, - therefore if the current BAR size falls outside that range we revert - to hiding the capability. - - Reviewed-by: Cédric Le Goater - Link: https://lore.kernel.org/r/20230505232308.2869912-1-alex.williamson@redhat.com - Signed-off-by: Alex Williamson - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/pci.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++- - 1 file changed, 53 insertions(+), 1 deletion(-) - -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index 579b92a6ed..6cd3a98c39 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -2069,6 +2069,54 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos, Error **errp) - return 0; - } - -+static int vfio_setup_rebar_ecap(VFIOPCIDevice *vdev, uint16_t pos) -+{ -+ uint32_t ctrl; -+ int i, nbar; -+ -+ ctrl = pci_get_long(vdev->pdev.config + pos + PCI_REBAR_CTRL); -+ nbar = (ctrl & PCI_REBAR_CTRL_NBAR_MASK) >> PCI_REBAR_CTRL_NBAR_SHIFT; -+ -+ for (i = 0; i < nbar; i++) { -+ uint32_t cap; -+ int size; -+ -+ ctrl = pci_get_long(vdev->pdev.config + pos + PCI_REBAR_CTRL + (i * 8)); -+ size = (ctrl & PCI_REBAR_CTRL_BAR_SIZE) >> PCI_REBAR_CTRL_BAR_SHIFT; -+ -+ /* The cap register reports sizes 1MB to 128TB, with 4 reserved bits */ -+ cap = size <= 27 ? 1U << (size + 4) : 0; -+ -+ /* -+ * The PCIe spec (v6.0.1, 7.8.6) requires HW to support at least one -+ * size in the range 1MB to 512GB. We intend to mask all sizes except -+ * the one currently enabled in the size field, therefore if it's -+ * outside the range, hide the whole capability as this virtualization -+ * trick won't work. If >512GB resizable BARs start to appear, we -+ * might need an opt-in or reservation scheme in the kernel. -+ */ -+ if (!(cap & PCI_REBAR_CAP_SIZES)) { -+ return -EINVAL; -+ } -+ -+ /* Hide all sizes reported in the ctrl reg per above requirement. */ -+ ctrl &= (PCI_REBAR_CTRL_BAR_SIZE | -+ PCI_REBAR_CTRL_NBAR_MASK | -+ PCI_REBAR_CTRL_BAR_IDX); -+ -+ /* -+ * The BAR size field is RW, however we've mangled the capability -+ * register such that we only report a single size, ie. the current -+ * BAR size. A write of an unsupported value is undefined, therefore -+ * the register field is essentially RO. -+ */ -+ vfio_add_emulated_long(vdev, pos + PCI_REBAR_CAP + (i * 8), cap, ~0); -+ vfio_add_emulated_long(vdev, pos + PCI_REBAR_CTRL + (i * 8), ctrl, ~0); -+ } -+ -+ return 0; -+} -+ - static void vfio_add_ext_cap(VFIOPCIDevice *vdev) - { - PCIDevice *pdev = &vdev->pdev; -@@ -2142,9 +2190,13 @@ static void vfio_add_ext_cap(VFIOPCIDevice *vdev) - case 0: /* kernel masked capability */ - case PCI_EXT_CAP_ID_SRIOV: /* Read-only VF BARs confuse OVMF */ - case PCI_EXT_CAP_ID_ARI: /* XXX Needs next function virtualization */ -- case PCI_EXT_CAP_ID_REBAR: /* Can't expose read-only */ - trace_vfio_add_ext_cap_dropped(vdev->vbasedev.name, cap_id, next); - break; -+ case PCI_EXT_CAP_ID_REBAR: -+ if (!vfio_setup_rebar_ecap(vdev, next)) { -+ pcie_add_capability(pdev, cap_id, cap_ver, next, size); -+ } -+ break; - default: - pcie_add_capability(pdev, cap_id, cap_ver, next, size); - } --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-pci-add-support-for-VF-token.patch b/SOURCES/kvm-vfio-pci-add-support-for-VF-token.patch deleted file mode 100644 index 7b40e5e..0000000 --- a/SOURCES/kvm-vfio-pci-add-support-for-VF-token.patch +++ /dev/null @@ -1,104 +0,0 @@ -From 3022cc31bca5a5441e285c971eaf72b7643b9be0 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= -Date: Wed, 12 Jul 2023 17:46:56 +0200 -Subject: [PATCH 03/37] vfio/pci: add support for VF token -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Cédric Le Goater -RH-MergeRequest: 179: vfio: live migration support -RH-Bugzilla: 2192818 -RH-Acked-by: Eric Auger -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/28] ff24284ede2806e21f4f6709d8abd4c4029b7d5c (clegoate/qemu-kvm-c9s) - -Bugzilla: https://bugzilla.redhat.com/2192818 - -commit 2dca1b37a760 -Author: Minwoo Im -Date: Mon Mar 20 16:35:22 2023 +0900 - - vfio/pci: add support for VF token - - VF token was introduced [1] to kernel vfio-pci along with SR-IOV - support [2]. This patch adds support VF token among PF and VF(s). To - passthu PCIe VF to a VM, kernel >= v5.7 needs this. - - It can be configured with UUID like: - - -device vfio-pci,host=DDDD:BB:DD:F,vf-token=,... - - [1] https://lore.kernel.org/linux-pci/158396393244.5601.10297430724964025753.stgit@gimli.home/ - [2] https://lore.kernel.org/linux-pci/158396044753.5601.14804870681174789709.stgit@gimli.home/ - - Cc: Alex Williamson - Signed-off-by: Minwoo Im - Reviewed-by: Klaus Jensen - Link: https://lore.kernel.org/r/20230320073522epcms2p48f682ecdb73e0ae1a4850ad0712fd780@epcms2p4 - Signed-off-by: Alex Williamson - -Conflicts: - - hw/vfio/pci.c - context changes in vfio_realize () due to redhat commit 267071d16b23 - ("vfio: cap number of devices that can be assigned") - -Signed-off-by: Cédric Le Goater ---- - hw/vfio/pci.c | 13 ++++++++++++- - hw/vfio/pci.h | 1 + - 2 files changed, 13 insertions(+), 1 deletion(-) - -diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c -index a779053be3..579b92a6ed 100644 ---- a/hw/vfio/pci.c -+++ b/hw/vfio/pci.c -@@ -2859,6 +2859,8 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) - int groupid; - int ret, i = 0; - bool is_mdev; -+ char uuid[UUID_FMT_LEN]; -+ char *name; - - if (device_limit && device_limit != vdev->assigned_device_limit) { - error_setg(errp, "Assigned device limit has been redefined. " -@@ -2960,7 +2962,15 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) - goto error; - } - -- ret = vfio_get_device(group, vbasedev->name, vbasedev, errp); -+ if (!qemu_uuid_is_null(&vdev->vf_token)) { -+ qemu_uuid_unparse(&vdev->vf_token, uuid); -+ name = g_strdup_printf("%s vf_token=%s", vbasedev->name, uuid); -+ } else { -+ name = vbasedev->name; -+ } -+ -+ ret = vfio_get_device(group, name, vbasedev, errp); -+ g_free(name); - if (ret) { - vfio_put_group(group); - goto error; -@@ -3292,6 +3302,7 @@ static void vfio_instance_init(Object *obj) - - static Property vfio_pci_dev_properties[] = { - DEFINE_PROP_PCI_HOST_DEVADDR("host", VFIOPCIDevice, host), -+ DEFINE_PROP_UUID_NODEFAULT("vf-token", VFIOPCIDevice, vf_token), - DEFINE_PROP_STRING("sysfsdev", VFIOPCIDevice, vbasedev.sysfsdev), - DEFINE_PROP_ON_OFF_AUTO("x-pre-copy-dirty-page-tracking", VFIOPCIDevice, - vbasedev.pre_copy_dirty_page_tracking, -diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h -index 45235d38ba..10530743ad 100644 ---- a/hw/vfio/pci.h -+++ b/hw/vfio/pci.h -@@ -137,6 +137,7 @@ struct VFIOPCIDevice { - VFIOVGA *vga; /* 0xa0000, 0x3b0, 0x3c0 */ - void *igd_opregion; - PCIHostDeviceAddress host; -+ QemuUUID vf_token; - EventNotifier err_notifier; - EventNotifier req_notifier; - int (*resetfn)(struct VFIOPCIDevice *); --- -2.39.3 - diff --git a/SOURCES/kvm-vfio-platform-Allow-the-selection-of-a-given-iommu-b.patch b/SOURCES/kvm-vfio-platform-Allow-the-selection-of-a-given-iommu-b.patch new file mode 100644 index 0000000..06c2f0f --- /dev/null +++ b/SOURCES/kvm-vfio-platform-Allow-the-selection-of-a-given-iommu-b.patch @@ -0,0 +1,77 @@ +From ede579d6d5fe5be9235d6a218efdb237192aee0e Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Tue, 21 Nov 2023 16:44:11 +0800 +Subject: [PATCH 032/101] vfio/platform: Allow the selection of a given iommu + backend +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [31/67] aba1dc16cada602edd7be1a28b0f57991131e6f7 (eauger1/centos-qemu-kvm) + +Now we support two types of iommu backends, let's add the capability +to select one of them. This depends on whether an iommufd object has +been linked with the vfio-platform device: + +If the user wants to use the legacy backend, it shall not +link the vfio-platform device with any iommufd object: + + -device vfio-platform,host=XXX + +This is called the legacy mode/backend. + +If the user wants to use the iommufd backend (/dev/iommu) it +shall pass an iommufd object id in the vfio-platform device options: + + -object iommufd,id=iommufd0 + -device vfio-platform,host=XXX,iommufd=iommufd0 + +Suggested-by: Alex Williamson +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Reviewed-by: Eric Auger +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +(cherry picked from commit a6c50e1c3f8d0eb77edaea392e61508bb3c516f8) +Signed-off-by: Eric Auger +--- + hw/vfio/platform.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c +index 8e3d4ac458..98ae4bc655 100644 +--- a/hw/vfio/platform.c ++++ b/hw/vfio/platform.c +@@ -15,11 +15,13 @@ + */ + + #include "qemu/osdep.h" ++#include CONFIG_DEVICES /* CONFIG_IOMMUFD */ + #include "qapi/error.h" + #include + #include + + #include "hw/vfio/vfio-platform.h" ++#include "sysemu/iommufd.h" + #include "migration/vmstate.h" + #include "qemu/error-report.h" + #include "qemu/lockable.h" +@@ -649,6 +651,10 @@ static Property vfio_platform_dev_properties[] = { + DEFINE_PROP_UINT32("mmap-timeout-ms", VFIOPlatformDevice, + mmap_timeout, 1100), + DEFINE_PROP_BOOL("x-irqfd", VFIOPlatformDevice, irqfd_allowed, true), ++#ifdef CONFIG_IOMMUFD ++ DEFINE_PROP_LINK("iommufd", VFIOPlatformDevice, vbasedev.iommufd, ++ TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *), ++#endif + DEFINE_PROP_END_OF_LIST(), + }; + +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-platform-Make-vfio-cdev-pre-openable-by-passing.patch b/SOURCES/kvm-vfio-platform-Make-vfio-cdev-pre-openable-by-passing.patch new file mode 100644 index 0000000..f931524 --- /dev/null +++ b/SOURCES/kvm-vfio-platform-Make-vfio-cdev-pre-openable-by-passing.patch @@ -0,0 +1,108 @@ +From 22664f4115d9b297ef4276e48f8ba0bc195ec99e Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Tue, 21 Nov 2023 16:44:12 +0800 +Subject: [PATCH 033/101] vfio/platform: Make vfio cdev pre-openable by passing + a file handle +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [32/67] 069867dce64b826e92dc2051405a4ded5261981f (eauger1/centos-qemu-kvm) + +This gives management tools like libvirt a chance to open the vfio +cdev with privilege and pass FD to qemu. This way qemu never needs +to have privilege to open a VFIO or iommu cdev node. + +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +(cherry picked from commit 3016e60f8f715d2058a48e4956be994482c5e218) +Signed-off-by: Eric Auger +--- + hw/vfio/platform.c | 32 ++++++++++++++++++++++++-------- + 1 file changed, 24 insertions(+), 8 deletions(-) + +diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c +index 98ae4bc655..a97d9c6234 100644 +--- a/hw/vfio/platform.c ++++ b/hw/vfio/platform.c +@@ -531,14 +531,13 @@ static VFIODeviceOps vfio_platform_ops = { + */ + static int vfio_base_device_init(VFIODevice *vbasedev, Error **errp) + { +- struct stat st; + int ret; + +- /* @sysfsdev takes precedence over @host */ +- if (vbasedev->sysfsdev) { ++ /* @fd takes precedence over @sysfsdev which takes precedence over @host */ ++ if (vbasedev->fd < 0 && vbasedev->sysfsdev) { + g_free(vbasedev->name); + vbasedev->name = g_path_get_basename(vbasedev->sysfsdev); +- } else { ++ } else if (vbasedev->fd < 0) { + if (!vbasedev->name || strchr(vbasedev->name, '/')) { + error_setg(errp, "wrong host device name"); + return -EINVAL; +@@ -548,10 +547,9 @@ static int vfio_base_device_init(VFIODevice *vbasedev, Error **errp) + vbasedev->name); + } + +- if (stat(vbasedev->sysfsdev, &st) < 0) { +- error_setg_errno(errp, errno, +- "failed to get the sysfs host device file status"); +- return -errno; ++ ret = vfio_device_get_name(vbasedev, errp); ++ if (ret) { ++ return ret; + } + + ret = vfio_attach_device(vbasedev->name, vbasedev, +@@ -658,6 +656,20 @@ static Property vfio_platform_dev_properties[] = { + DEFINE_PROP_END_OF_LIST(), + }; + ++static void vfio_platform_instance_init(Object *obj) ++{ ++ VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(obj); ++ ++ vdev->vbasedev.fd = -1; ++} ++ ++#ifdef CONFIG_IOMMUFD ++static void vfio_platform_set_fd(Object *obj, const char *str, Error **errp) ++{ ++ vfio_device_set_fd(&VFIO_PLATFORM_DEVICE(obj)->vbasedev, str, errp); ++} ++#endif ++ + static void vfio_platform_class_init(ObjectClass *klass, void *data) + { + DeviceClass *dc = DEVICE_CLASS(klass); +@@ -665,6 +677,9 @@ static void vfio_platform_class_init(ObjectClass *klass, void *data) + + dc->realize = vfio_platform_realize; + device_class_set_props(dc, vfio_platform_dev_properties); ++#ifdef CONFIG_IOMMUFD ++ object_class_property_add_str(klass, "fd", NULL, vfio_platform_set_fd); ++#endif + dc->vmsd = &vfio_platform_vmstate; + dc->desc = "VFIO-based platform device assignment"; + sbc->connect_irq_notifier = vfio_start_irqfd_injection; +@@ -677,6 +692,7 @@ static const TypeInfo vfio_platform_dev_info = { + .name = TYPE_VFIO_PLATFORM, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(VFIOPlatformDevice), ++ .instance_init = vfio_platform_instance_init, + .class_init = vfio_platform_class_init, + .class_size = sizeof(VFIOPlatformDeviceClass), + }; +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-platform-Move-VFIODevice-initializations-in-vfi.patch b/SOURCES/kvm-vfio-platform-Move-VFIODevice-initializations-in-vfi.patch new file mode 100644 index 0000000..56283a6 --- /dev/null +++ b/SOURCES/kvm-vfio-platform-Move-VFIODevice-initializations-in-vfi.patch @@ -0,0 +1,64 @@ +From 2417020283532030f424fe07dfeb7477e6489640 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Tue, 21 Nov 2023 16:44:22 +0800 +Subject: [PATCH 043/101] vfio/platform: Move VFIODevice initializations in + vfio_platform_instance_init +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [42/67] 53a459b6246d7d7bdc7a62ac92f02f1e775a54a6 (eauger1/centos-qemu-kvm) + +Some of the VFIODevice initializations is in vfio_platform_realize, +move all of them in vfio_platform_instance_init. + +No functional change intended. + +Suggested-by: Cédric Le Goater +Signed-off-by: Zhenzhong Duan +Reviewed-by: Philippe Mathieu-Daudé +Tested-by: Nicolin Chen +Signed-off-by: Cédric Le Goater +(cherry picked from commit a0cf44c8d618578843a65ea7f6d3db8ce52185bc) +Signed-off-by: Eric Auger +--- + hw/vfio/platform.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c +index a97d9c6234..506eb8193f 100644 +--- a/hw/vfio/platform.c ++++ b/hw/vfio/platform.c +@@ -581,10 +581,6 @@ static void vfio_platform_realize(DeviceState *dev, Error **errp) + VFIODevice *vbasedev = &vdev->vbasedev; + int i, ret; + +- vbasedev->type = VFIO_DEVICE_TYPE_PLATFORM; +- vbasedev->dev = dev; +- vbasedev->ops = &vfio_platform_ops; +- + qemu_mutex_init(&vdev->intp_mutex); + + trace_vfio_platform_realize(vbasedev->sysfsdev ? +@@ -659,8 +655,12 @@ static Property vfio_platform_dev_properties[] = { + static void vfio_platform_instance_init(Object *obj) + { + VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(obj); ++ VFIODevice *vbasedev = &vdev->vbasedev; + +- vdev->vbasedev.fd = -1; ++ vbasedev->type = VFIO_DEVICE_TYPE_PLATFORM; ++ vbasedev->ops = &vfio_platform_ops; ++ vbasedev->dev = DEVICE(vdev); ++ vbasedev->fd = -1; + } + + #ifdef CONFIG_IOMMUFD +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-spapr-Extend-VFIOIOMMUOps-with-a-release-handle.patch b/SOURCES/kvm-vfio-spapr-Extend-VFIOIOMMUOps-with-a-release-handle.patch new file mode 100644 index 0000000..fb7e707 --- /dev/null +++ b/SOURCES/kvm-vfio-spapr-Extend-VFIOIOMMUOps-with-a-release-handle.patch @@ -0,0 +1,129 @@ +From e75ec2aca351daabe597ca6322c1589885f30d7a Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Tue, 19 Dec 2023 07:58:16 +0100 +Subject: [PATCH 049/101] vfio/spapr: Extend VFIOIOMMUOps with a release + handler +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [48/67] 1c4d22a6f69324805d050767fcf178d8566f2030 (eauger1/centos-qemu-kvm) + +This allows to abstract a bit more the sPAPR IOMMU support in the +legacy IOMMU backend. + +Reviewed-by: Zhenzhong Duan +Tested-by: Eric Farman +Signed-off-by: Cédric Le Goater +(cherry picked from commit 001a013ea3f125d2ec0e709b5765754149d8d968) +Signed-off-by: Eric Auger +--- + hw/vfio/container.c | 10 +++----- + hw/vfio/spapr.c | 35 +++++++++++++++------------ + include/hw/vfio/vfio-container-base.h | 1 + + 3 files changed, 24 insertions(+), 22 deletions(-) + +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index b22feb8ded..1e77a2929e 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -632,9 +632,8 @@ listener_release_exit: + QLIST_REMOVE(bcontainer, next); + vfio_kvm_device_del_group(group); + memory_listener_unregister(&bcontainer->listener); +- if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU || +- container->iommu_type == VFIO_SPAPR_TCE_IOMMU) { +- vfio_spapr_container_deinit(container); ++ if (bcontainer->ops->release) { ++ bcontainer->ops->release(bcontainer); + } + + enable_discards_exit: +@@ -667,9 +666,8 @@ static void vfio_disconnect_container(VFIOGroup *group) + */ + if (QLIST_EMPTY(&container->group_list)) { + memory_listener_unregister(&bcontainer->listener); +- if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU || +- container->iommu_type == VFIO_SPAPR_TCE_IOMMU) { +- vfio_spapr_container_deinit(container); ++ if (bcontainer->ops->release) { ++ bcontainer->ops->release(bcontainer); + } + } + +diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c +index 5c6426e697..44617dfc6b 100644 +--- a/hw/vfio/spapr.c ++++ b/hw/vfio/spapr.c +@@ -440,6 +440,24 @@ vfio_spapr_container_del_section_window(VFIOContainerBase *bcontainer, + } + } + ++static void vfio_spapr_container_release(VFIOContainerBase *bcontainer) ++{ ++ VFIOContainer *container = container_of(bcontainer, VFIOContainer, ++ bcontainer); ++ VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer, ++ container); ++ VFIOHostDMAWindow *hostwin, *next; ++ ++ if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) { ++ memory_listener_unregister(&scontainer->prereg_listener); ++ } ++ QLIST_FOREACH_SAFE(hostwin, &scontainer->hostwin_list, hostwin_next, ++ next) { ++ QLIST_REMOVE(hostwin, hostwin_next); ++ g_free(hostwin); ++ } ++} ++ + static VFIOIOMMUOps vfio_iommu_spapr_ops; + + static void setup_spapr_ops(VFIOContainerBase *bcontainer) +@@ -447,6 +465,7 @@ static void setup_spapr_ops(VFIOContainerBase *bcontainer) + vfio_iommu_spapr_ops = *bcontainer->ops; + vfio_iommu_spapr_ops.add_window = vfio_spapr_container_add_section_window; + vfio_iommu_spapr_ops.del_window = vfio_spapr_container_del_section_window; ++ vfio_iommu_spapr_ops.release = vfio_spapr_container_release; + bcontainer->ops = &vfio_iommu_spapr_ops; + } + +@@ -527,19 +546,3 @@ listener_unregister_exit: + } + return ret; + } +- +-void vfio_spapr_container_deinit(VFIOContainer *container) +-{ +- VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer, +- container); +- VFIOHostDMAWindow *hostwin, *next; +- +- if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) { +- memory_listener_unregister(&scontainer->prereg_listener); +- } +- QLIST_FOREACH_SAFE(hostwin, &scontainer->hostwin_list, hostwin_next, +- next) { +- QLIST_REMOVE(hostwin, hostwin_next); +- g_free(hostwin); +- } +-} +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index 2ae297ccda..5c9594b6c7 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -117,5 +117,6 @@ struct VFIOIOMMUOps { + Error **errp); + void (*del_window)(VFIOContainerBase *bcontainer, + MemoryRegionSection *section); ++ void (*release)(VFIOContainerBase *bcontainer); + }; + #endif /* HW_VFIO_VFIO_CONTAINER_BASE_H */ +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-spapr-Introduce-a-sPAPR-VFIOIOMMU-QOM-interface.patch b/SOURCES/kvm-vfio-spapr-Introduce-a-sPAPR-VFIOIOMMU-QOM-interface.patch new file mode 100644 index 0000000..f835acb --- /dev/null +++ b/SOURCES/kvm-vfio-spapr-Introduce-a-sPAPR-VFIOIOMMU-QOM-interface.patch @@ -0,0 +1,150 @@ +From 645ed97633935712edcc2c56f252738b38f15e3a Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Tue, 19 Dec 2023 07:58:22 +0100 +Subject: [PATCH 055/101] vfio/spapr: Introduce a sPAPR VFIOIOMMU QOM interface +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [54/67] 2ceac3c07d71790dc3852fbbbd4084a7affb9373 (eauger1/centos-qemu-kvm) + +Move vfio_spapr_container_setup() to a VFIOIOMMUClass::setup handler +and convert the sPAPR VFIOIOMMUOps struct to a QOM interface. The +sPAPR QOM interface inherits from the legacy QOM interface because +because both have the same basic needs. The sPAPR interface is then +extended with the handlers specific to the sPAPR IOMMU. + +This allows reuse and provides better abstraction of the backends. It +will be useful to avoid compiling the sPAPR IOMMU backend on targets +not supporting it. + +Reviewed-by: Zhenzhong Duan +Tested-by: Eric Farman +Signed-off-by: Cédric Le Goater +(cherry picked from commit f221f641a2fe69c2ca3857759551470664b0bec8) +Signed-off-by: Eric Auger +--- + hw/vfio/container.c | 18 +++++-------- + hw/vfio/spapr.c | 39 ++++++++++++++++----------- + include/hw/vfio/vfio-container-base.h | 1 + + 3 files changed, 31 insertions(+), 27 deletions(-) + +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index c22bdd3216..688cf23bab 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -381,6 +381,10 @@ static const VFIOIOMMUClass *vfio_get_iommu_class(int iommu_type, Error **errp) + case VFIO_TYPE1_IOMMU: + klass = object_class_by_name(TYPE_VFIO_IOMMU_LEGACY); + break; ++ case VFIO_SPAPR_TCE_v2_IOMMU: ++ case VFIO_SPAPR_TCE_IOMMU: ++ klass = object_class_by_name(TYPE_VFIO_IOMMU_SPAPR); ++ break; + default: + g_assert_not_reached(); + }; +@@ -623,19 +627,9 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + goto free_container_exit; + } + +- switch (container->iommu_type) { +- case VFIO_TYPE1v2_IOMMU: +- case VFIO_TYPE1_IOMMU: +- ret = vfio_legacy_setup(bcontainer, errp); +- break; +- case VFIO_SPAPR_TCE_v2_IOMMU: +- case VFIO_SPAPR_TCE_IOMMU: +- ret = vfio_spapr_container_init(container, errp); +- break; +- default: +- g_assert_not_reached(); +- } ++ assert(bcontainer->ops->setup); + ++ ret = bcontainer->ops->setup(bcontainer, errp); + if (ret) { + goto enable_discards_exit; + } +diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c +index 44617dfc6b..0d949bb728 100644 +--- a/hw/vfio/spapr.c ++++ b/hw/vfio/spapr.c +@@ -458,20 +458,11 @@ static void vfio_spapr_container_release(VFIOContainerBase *bcontainer) + } + } + +-static VFIOIOMMUOps vfio_iommu_spapr_ops; +- +-static void setup_spapr_ops(VFIOContainerBase *bcontainer) +-{ +- vfio_iommu_spapr_ops = *bcontainer->ops; +- vfio_iommu_spapr_ops.add_window = vfio_spapr_container_add_section_window; +- vfio_iommu_spapr_ops.del_window = vfio_spapr_container_del_section_window; +- vfio_iommu_spapr_ops.release = vfio_spapr_container_release; +- bcontainer->ops = &vfio_iommu_spapr_ops; +-} +- +-int vfio_spapr_container_init(VFIOContainer *container, Error **errp) ++static int vfio_spapr_container_setup(VFIOContainerBase *bcontainer, ++ Error **errp) + { +- VFIOContainerBase *bcontainer = &container->bcontainer; ++ VFIOContainer *container = container_of(bcontainer, VFIOContainer, ++ bcontainer); + VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer, + container); + struct vfio_iommu_spapr_tce_info info; +@@ -536,8 +527,6 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp) + 0x1000); + } + +- setup_spapr_ops(bcontainer); +- + return 0; + + listener_unregister_exit: +@@ -546,3 +535,23 @@ listener_unregister_exit: + } + return ret; + } ++ ++static void vfio_iommu_spapr_class_init(ObjectClass *klass, void *data) ++{ ++ VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass); ++ ++ vioc->add_window = vfio_spapr_container_add_section_window; ++ vioc->del_window = vfio_spapr_container_del_section_window; ++ vioc->release = vfio_spapr_container_release; ++ vioc->setup = vfio_spapr_container_setup; ++}; ++ ++static const TypeInfo types[] = { ++ { ++ .name = TYPE_VFIO_IOMMU_SPAPR, ++ .parent = TYPE_VFIO_IOMMU_LEGACY, ++ .class_init = vfio_iommu_spapr_class_init, ++ }, ++}; ++ ++DEFINE_TYPES(types) +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index ce8b1fba88..9e21d7811f 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -95,6 +95,7 @@ void vfio_container_destroy(VFIOContainerBase *bcontainer); + + #define TYPE_VFIO_IOMMU "vfio-iommu" + #define TYPE_VFIO_IOMMU_LEGACY TYPE_VFIO_IOMMU "-legacy" ++#define TYPE_VFIO_IOMMU_SPAPR TYPE_VFIO_IOMMU "-spapr" + + /* + * VFIOContainerBase is not an abstract QOM object because it felt +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-spapr-Introduce-spapr-backend-and-target-interf.patch b/SOURCES/kvm-vfio-spapr-Introduce-spapr-backend-and-target-interf.patch new file mode 100644 index 0000000..f1ca4a2 --- /dev/null +++ b/SOURCES/kvm-vfio-spapr-Introduce-spapr-backend-and-target-interf.patch @@ -0,0 +1,91 @@ +From ff0c13c22878eed0f3879c0805bef5b9f9d83e04 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Thu, 2 Nov 2023 15:12:42 +0800 +Subject: [PATCH 017/101] vfio/spapr: Introduce spapr backend and target + interface +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [16/67] e35cda157a2a1afeded3305622c861abd07edb51 (eauger1/centos-qemu-kvm) + +Introduce an empty spapr backend which will hold spapr specific +content, currently only prereg_listener and hostwin_list. + +Also introduce two spapr specific callbacks add/del_window into +VFIOIOMMUOps. Instantiate a spapr ops with a helper setup_spapr_ops +and assign it to bcontainer->ops. + +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +(cherry picked from commit 9b7d38bf5a2c1054bfe6de08806954cdc45d8d98) +Signed-off-by: Eric Auger +--- + hw/vfio/spapr.c | 14 ++++++++++++++ + include/hw/vfio/vfio-container-base.h | 6 ++++++ + 2 files changed, 20 insertions(+) + +diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c +index 7a50975f25..e1a6b35563 100644 +--- a/hw/vfio/spapr.c ++++ b/hw/vfio/spapr.c +@@ -24,6 +24,10 @@ + #include "qapi/error.h" + #include "trace.h" + ++typedef struct VFIOSpaprContainer { ++ VFIOContainer container; ++} VFIOSpaprContainer; ++ + static bool vfio_prereg_listener_skipped_section(MemoryRegionSection *section) + { + if (memory_region_is_iommu(section->mr)) { +@@ -421,6 +425,14 @@ void vfio_container_del_section_window(VFIOContainer *container, + } + } + ++static VFIOIOMMUOps vfio_iommu_spapr_ops; ++ ++static void setup_spapr_ops(VFIOContainerBase *bcontainer) ++{ ++ vfio_iommu_spapr_ops = *bcontainer->ops; ++ bcontainer->ops = &vfio_iommu_spapr_ops; ++} ++ + int vfio_spapr_container_init(VFIOContainer *container, Error **errp) + { + VFIOContainerBase *bcontainer = &container->bcontainer; +@@ -486,6 +498,8 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp) + 0x1000); + } + ++ setup_spapr_ops(bcontainer); ++ + return 0; + + listener_unregister_exit: +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index 9658ffb526..f62a14ac73 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -101,5 +101,11 @@ struct VFIOIOMMUOps { + int (*set_dirty_page_tracking)(VFIOContainerBase *bcontainer, bool start); + int (*query_dirty_bitmap)(VFIOContainerBase *bcontainer, VFIOBitmap *vbmap, + hwaddr iova, hwaddr size); ++ /* SPAPR specific */ ++ int (*add_window)(VFIOContainerBase *bcontainer, ++ MemoryRegionSection *section, ++ Error **errp); ++ void (*del_window)(VFIOContainerBase *bcontainer, ++ MemoryRegionSection *section); + }; + #endif /* HW_VFIO_VFIO_CONTAINER_BASE_H */ +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-spapr-Move-hostwin_list-into-spapr-container.patch b/SOURCES/kvm-vfio-spapr-Move-hostwin_list-into-spapr-container.patch new file mode 100644 index 0000000..93cb6b8 --- /dev/null +++ b/SOURCES/kvm-vfio-spapr-Move-hostwin_list-into-spapr-container.patch @@ -0,0 +1,188 @@ +From 3e9e7b57b15ac328f5d663b4e04df546d49f5fa6 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Thu, 2 Nov 2023 15:12:45 +0800 +Subject: [PATCH 020/101] vfio/spapr: Move hostwin_list into spapr container +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [19/67] 87cfeaa32ad32a260a89b2bb1866d59e20c0fe30 (eauger1/centos-qemu-kvm) + +No functional changes intended. + +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +(cherry picked from commit dbb9d0c9691d145338686d3e0920da047f2ab3da) +Signed-off-by: Eric Auger +--- + hw/vfio/spapr.c | 36 +++++++++++++++++++---------------- + include/hw/vfio/vfio-common.h | 1 - + 2 files changed, 20 insertions(+), 17 deletions(-) + +diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c +index 68c3dd6c75..5c6426e697 100644 +--- a/hw/vfio/spapr.c ++++ b/hw/vfio/spapr.c +@@ -27,6 +27,7 @@ + typedef struct VFIOSpaprContainer { + VFIOContainer container; + MemoryListener prereg_listener; ++ QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; + } VFIOSpaprContainer; + + static bool vfio_prereg_listener_skipped_section(MemoryRegionSection *section) +@@ -154,12 +155,12 @@ static const MemoryListener vfio_prereg_listener = { + .region_del = vfio_prereg_listener_region_del, + }; + +-static void vfio_host_win_add(VFIOContainer *container, hwaddr min_iova, ++static void vfio_host_win_add(VFIOSpaprContainer *scontainer, hwaddr min_iova, + hwaddr max_iova, uint64_t iova_pgsizes) + { + VFIOHostDMAWindow *hostwin; + +- QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { ++ QLIST_FOREACH(hostwin, &scontainer->hostwin_list, hostwin_next) { + if (ranges_overlap(hostwin->min_iova, + hostwin->max_iova - hostwin->min_iova + 1, + min_iova, +@@ -173,15 +174,15 @@ static void vfio_host_win_add(VFIOContainer *container, hwaddr min_iova, + hostwin->min_iova = min_iova; + hostwin->max_iova = max_iova; + hostwin->iova_pgsizes = iova_pgsizes; +- QLIST_INSERT_HEAD(&container->hostwin_list, hostwin, hostwin_next); ++ QLIST_INSERT_HEAD(&scontainer->hostwin_list, hostwin, hostwin_next); + } + +-static int vfio_host_win_del(VFIOContainer *container, ++static int vfio_host_win_del(VFIOSpaprContainer *scontainer, + hwaddr min_iova, hwaddr max_iova) + { + VFIOHostDMAWindow *hostwin; + +- QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { ++ QLIST_FOREACH(hostwin, &scontainer->hostwin_list, hostwin_next) { + if (hostwin->min_iova == min_iova && hostwin->max_iova == max_iova) { + QLIST_REMOVE(hostwin, hostwin_next); + g_free(hostwin); +@@ -192,7 +193,7 @@ static int vfio_host_win_del(VFIOContainer *container, + return -1; + } + +-static VFIOHostDMAWindow *vfio_find_hostwin(VFIOContainer *container, ++static VFIOHostDMAWindow *vfio_find_hostwin(VFIOSpaprContainer *container, + hwaddr iova, hwaddr end) + { + VFIOHostDMAWindow *hostwin; +@@ -329,6 +330,8 @@ vfio_spapr_container_add_section_window(VFIOContainerBase *bcontainer, + { + VFIOContainer *container = container_of(bcontainer, VFIOContainer, + bcontainer); ++ VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer, ++ container); + VFIOHostDMAWindow *hostwin; + hwaddr pgsize = 0; + int ret; +@@ -344,7 +347,7 @@ vfio_spapr_container_add_section_window(VFIOContainerBase *bcontainer, + iova = section->offset_within_address_space; + end = iova + int128_get64(section->size) - 1; + +- if (!vfio_find_hostwin(container, iova, end)) { ++ if (!vfio_find_hostwin(scontainer, iova, end)) { + error_setg(errp, "Container %p can't map guest IOVA region" + " 0x%"HWADDR_PRIx"..0x%"HWADDR_PRIx, container, + iova, end); +@@ -358,7 +361,7 @@ vfio_spapr_container_add_section_window(VFIOContainerBase *bcontainer, + } + + /* For now intersections are not allowed, we may relax this later */ +- QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) { ++ QLIST_FOREACH(hostwin, &scontainer->hostwin_list, hostwin_next) { + if (ranges_overlap(hostwin->min_iova, + hostwin->max_iova - hostwin->min_iova + 1, + section->offset_within_address_space, +@@ -380,7 +383,7 @@ vfio_spapr_container_add_section_window(VFIOContainerBase *bcontainer, + return ret; + } + +- vfio_host_win_add(container, section->offset_within_address_space, ++ vfio_host_win_add(scontainer, section->offset_within_address_space, + section->offset_within_address_space + + int128_get64(section->size) - 1, pgsize); + #ifdef CONFIG_KVM +@@ -419,6 +422,8 @@ vfio_spapr_container_del_section_window(VFIOContainerBase *bcontainer, + { + VFIOContainer *container = container_of(bcontainer, VFIOContainer, + bcontainer); ++ VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer, ++ container); + + if (container->iommu_type != VFIO_SPAPR_TCE_v2_IOMMU) { + return; +@@ -426,7 +431,7 @@ vfio_spapr_container_del_section_window(VFIOContainerBase *bcontainer, + + vfio_spapr_remove_window(container, + section->offset_within_address_space); +- if (vfio_host_win_del(container, ++ if (vfio_host_win_del(scontainer, + section->offset_within_address_space, + section->offset_within_address_space + + int128_get64(section->size) - 1) < 0) { +@@ -454,7 +459,7 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp) + bool v2 = container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU; + int ret, fd = container->fd; + +- QLIST_INIT(&container->hostwin_list); ++ QLIST_INIT(&scontainer->hostwin_list); + + /* + * The host kernel code implementing VFIO_IOMMU_DISABLE is called +@@ -506,7 +511,7 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp) + } else { + /* The default table uses 4K pages */ + bcontainer->pgsizes = 0x1000; +- vfio_host_win_add(container, info.dma32_window_start, ++ vfio_host_win_add(scontainer, info.dma32_window_start, + info.dma32_window_start + + info.dma32_window_size - 1, + 0x1000); +@@ -525,15 +530,14 @@ listener_unregister_exit: + + void vfio_spapr_container_deinit(VFIOContainer *container) + { ++ VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer, ++ container); + VFIOHostDMAWindow *hostwin, *next; + + if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) { +- VFIOSpaprContainer *scontainer = container_of(container, +- VFIOSpaprContainer, +- container); + memory_listener_unregister(&scontainer->prereg_listener); + } +- QLIST_FOREACH_SAFE(hostwin, &container->hostwin_list, hostwin_next, ++ QLIST_FOREACH_SAFE(hostwin, &scontainer->hostwin_list, hostwin_next, + next) { + QLIST_REMOVE(hostwin, hostwin_next); + g_free(hostwin); +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index ed6148c058..24ecc0e7ee 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -79,7 +79,6 @@ typedef struct VFIOContainer { + VFIOContainerBase bcontainer; + int fd; /* /dev/vfio/vfio, empowered by the attached groups */ + unsigned iommu_type; +- QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; + QLIST_HEAD(, VFIOGroup) group_list; + } VFIOContainer; + +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-spapr-Move-prereg_listener-into-spapr-container.patch b/SOURCES/kvm-vfio-spapr-Move-prereg_listener-into-spapr-container.patch new file mode 100644 index 0000000..1db4b55 --- /dev/null +++ b/SOURCES/kvm-vfio-spapr-Move-prereg_listener-into-spapr-container.patch @@ -0,0 +1,120 @@ +From 17e6dad3e43e173147c0ca33f6f1f4f317a77d0b Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Thu, 2 Nov 2023 15:12:44 +0800 +Subject: [PATCH 019/101] vfio/spapr: Move prereg_listener into spapr container +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [18/67] dbea1b0b759e91b953271da92bba4ca6853bec82 (eauger1/centos-qemu-kvm) + +No functional changes intended. + +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +Signed-off-by: Cédric Le Goater +(cherry picked from commit 6ad359ec29af7f21dcb206c8edb26905a4925f80) +Signed-off-by: Eric Auger +--- + hw/vfio/spapr.c | 24 ++++++++++++++++-------- + include/hw/vfio/vfio-common.h | 1 - + 2 files changed, 16 insertions(+), 9 deletions(-) + +diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c +index 5be1911aad..68c3dd6c75 100644 +--- a/hw/vfio/spapr.c ++++ b/hw/vfio/spapr.c +@@ -26,6 +26,7 @@ + + typedef struct VFIOSpaprContainer { + VFIOContainer container; ++ MemoryListener prereg_listener; + } VFIOSpaprContainer; + + static bool vfio_prereg_listener_skipped_section(MemoryRegionSection *section) +@@ -48,8 +49,9 @@ static void *vfio_prereg_gpa_to_vaddr(MemoryRegionSection *section, hwaddr gpa) + static void vfio_prereg_listener_region_add(MemoryListener *listener, + MemoryRegionSection *section) + { +- VFIOContainer *container = container_of(listener, VFIOContainer, +- prereg_listener); ++ VFIOSpaprContainer *scontainer = container_of(listener, VFIOSpaprContainer, ++ prereg_listener); ++ VFIOContainer *container = &scontainer->container; + VFIOContainerBase *bcontainer = &container->bcontainer; + const hwaddr gpa = section->offset_within_address_space; + hwaddr end; +@@ -107,8 +109,9 @@ static void vfio_prereg_listener_region_add(MemoryListener *listener, + static void vfio_prereg_listener_region_del(MemoryListener *listener, + MemoryRegionSection *section) + { +- VFIOContainer *container = container_of(listener, VFIOContainer, +- prereg_listener); ++ VFIOSpaprContainer *scontainer = container_of(listener, VFIOSpaprContainer, ++ prereg_listener); ++ VFIOContainer *container = &scontainer->container; + const hwaddr gpa = section->offset_within_address_space; + hwaddr end; + int ret; +@@ -445,6 +448,8 @@ static void setup_spapr_ops(VFIOContainerBase *bcontainer) + int vfio_spapr_container_init(VFIOContainer *container, Error **errp) + { + VFIOContainerBase *bcontainer = &container->bcontainer; ++ VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer, ++ container); + struct vfio_iommu_spapr_tce_info info; + bool v2 = container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU; + int ret, fd = container->fd; +@@ -463,9 +468,9 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp) + return -errno; + } + } else { +- container->prereg_listener = vfio_prereg_listener; ++ scontainer->prereg_listener = vfio_prereg_listener; + +- memory_listener_register(&container->prereg_listener, ++ memory_listener_register(&scontainer->prereg_listener, + &address_space_memory); + if (bcontainer->error) { + ret = -1; +@@ -513,7 +518,7 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp) + + listener_unregister_exit: + if (v2) { +- memory_listener_unregister(&container->prereg_listener); ++ memory_listener_unregister(&scontainer->prereg_listener); + } + return ret; + } +@@ -523,7 +528,10 @@ void vfio_spapr_container_deinit(VFIOContainer *container) + VFIOHostDMAWindow *hostwin, *next; + + if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) { +- memory_listener_unregister(&container->prereg_listener); ++ VFIOSpaprContainer *scontainer = container_of(container, ++ VFIOSpaprContainer, ++ container); ++ memory_listener_unregister(&scontainer->prereg_listener); + } + QLIST_FOREACH_SAFE(hostwin, &container->hostwin_list, hostwin_next, + next) { +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 055f679363..ed6148c058 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -78,7 +78,6 @@ struct VFIOGroup; + typedef struct VFIOContainer { + VFIOContainerBase bcontainer; + int fd; /* /dev/vfio/vfio, empowered by the attached groups */ +- MemoryListener prereg_listener; + unsigned iommu_type; + QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; + QLIST_HEAD(, VFIOGroup) group_list; +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-spapr-Only-compile-sPAPR-IOMMU-support-when-nee.patch b/SOURCES/kvm-vfio-spapr-Only-compile-sPAPR-IOMMU-support-when-nee.patch new file mode 100644 index 0000000..7762804 --- /dev/null +++ b/SOURCES/kvm-vfio-spapr-Only-compile-sPAPR-IOMMU-support-when-nee.patch @@ -0,0 +1,46 @@ +From 5d485eb1442a81b51688124ce30024e96490acbf Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Tue, 19 Dec 2023 07:58:24 +0100 +Subject: [PATCH 057/101] vfio/spapr: Only compile sPAPR IOMMU support when + needed +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [56/67] 4dc0cbde470f877a8aac2bf6fab6923f2f919285 (eauger1/centos-qemu-kvm) + +sPAPR IOMMU support is only needed for pseries machines. Compile out +support when CONFIG_PSERIES is not set. This saves ~7K of text. + +Reviewed-by: Zhenzhong Duan +Tested-by: Eric Farman +Signed-off-by: Cédric Le Goater +(cherry picked from commit 10164df6ed3d41cbf67105dcd954a663ef4cc3e9) +Signed-off-by: Eric Auger +--- + hw/vfio/meson.build | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build +index e5d98b6adc..bb98493b53 100644 +--- a/hw/vfio/meson.build ++++ b/hw/vfio/meson.build +@@ -4,9 +4,9 @@ vfio_ss.add(files( + 'common.c', + 'container-base.c', + 'container.c', +- 'spapr.c', + 'migration.c', + )) ++vfio_ss.add(when: 'CONFIG_PSERIES', if_true: files('spapr.c')) + vfio_ss.add(when: 'CONFIG_IOMMUFD', if_true: files( + 'iommufd.c', + )) +-- +2.39.3 + diff --git a/SOURCES/kvm-vfio-spapr-switch-to-spapr-IOMMU-BE-add-del_section_.patch b/SOURCES/kvm-vfio-spapr-switch-to-spapr-IOMMU-BE-add-del_section_.patch new file mode 100644 index 0000000..4d8db61 --- /dev/null +++ b/SOURCES/kvm-vfio-spapr-switch-to-spapr-IOMMU-BE-add-del_section_.patch @@ -0,0 +1,184 @@ +From 3b7f044f15b4a9daf4ad7eda58777aba6dbe3fc0 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Thu, 2 Nov 2023 15:12:43 +0800 +Subject: [PATCH 018/101] vfio/spapr: switch to spapr IOMMU BE + add/del_section_window +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 211: IOMMUFD backend backport +RH-Jira: RHEL-19302 RHEL-21057 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Sebastian Ott +RH-Commit: [17/67] a0d9f1f2d4d2592f3d9fc2ee5b2c38236a986e38 (eauger1/centos-qemu-kvm) + +No functional change intended. + +Signed-off-by: Zhenzhong Duan +Reviewed-by: Cédric Le Goater +(cherry picked from commit 233309e8e4c158af6c6b126d5ad021bae40a918a) +Signed-off-by: Eric Auger +--- + hw/vfio/common.c | 8 ++------ + hw/vfio/container-base.c | 21 +++++++++++++++++++++ + hw/vfio/spapr.c | 19 ++++++++++++++----- + include/hw/vfio/vfio-common.h | 5 ----- + include/hw/vfio/vfio-container-base.h | 5 +++++ + 5 files changed, 42 insertions(+), 16 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 483ba82089..572ae7c934 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -571,8 +571,6 @@ static void vfio_listener_region_add(MemoryListener *listener, + { + VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, + listener); +- VFIOContainer *container = container_of(bcontainer, VFIOContainer, +- bcontainer); + hwaddr iova, end; + Int128 llend, llsize; + void *vaddr; +@@ -595,7 +593,7 @@ static void vfio_listener_region_add(MemoryListener *listener, + return; + } + +- if (vfio_container_add_section_window(container, section, &err)) { ++ if (vfio_container_add_section_window(bcontainer, section, &err)) { + goto fail; + } + +@@ -738,8 +736,6 @@ static void vfio_listener_region_del(MemoryListener *listener, + { + VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, + listener); +- VFIOContainer *container = container_of(bcontainer, VFIOContainer, +- bcontainer); + hwaddr iova, end; + Int128 llend, llsize; + int ret; +@@ -818,7 +814,7 @@ static void vfio_listener_region_del(MemoryListener *listener, + + memory_region_unref(section->mr); + +- vfio_container_del_section_window(container, section); ++ vfio_container_del_section_window(bcontainer, section); + } + + typedef struct VFIODirtyRanges { +diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c +index 0177f43741..71f7274973 100644 +--- a/hw/vfio/container-base.c ++++ b/hw/vfio/container-base.c +@@ -31,6 +31,27 @@ int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, + return bcontainer->ops->dma_unmap(bcontainer, iova, size, iotlb); + } + ++int vfio_container_add_section_window(VFIOContainerBase *bcontainer, ++ MemoryRegionSection *section, ++ Error **errp) ++{ ++ if (!bcontainer->ops->add_window) { ++ return 0; ++ } ++ ++ return bcontainer->ops->add_window(bcontainer, section, errp); ++} ++ ++void vfio_container_del_section_window(VFIOContainerBase *bcontainer, ++ MemoryRegionSection *section) ++{ ++ if (!bcontainer->ops->del_window) { ++ return; ++ } ++ ++ return bcontainer->ops->del_window(bcontainer, section); ++} ++ + int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer, + bool start) + { +diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c +index e1a6b35563..5be1911aad 100644 +--- a/hw/vfio/spapr.c ++++ b/hw/vfio/spapr.c +@@ -319,10 +319,13 @@ static int vfio_spapr_create_window(VFIOContainer *container, + return 0; + } + +-int vfio_container_add_section_window(VFIOContainer *container, +- MemoryRegionSection *section, +- Error **errp) ++static int ++vfio_spapr_container_add_section_window(VFIOContainerBase *bcontainer, ++ MemoryRegionSection *section, ++ Error **errp) + { ++ VFIOContainer *container = container_of(bcontainer, VFIOContainer, ++ bcontainer); + VFIOHostDMAWindow *hostwin; + hwaddr pgsize = 0; + int ret; +@@ -407,9 +410,13 @@ int vfio_container_add_section_window(VFIOContainer *container, + return 0; + } + +-void vfio_container_del_section_window(VFIOContainer *container, +- MemoryRegionSection *section) ++static void ++vfio_spapr_container_del_section_window(VFIOContainerBase *bcontainer, ++ MemoryRegionSection *section) + { ++ VFIOContainer *container = container_of(bcontainer, VFIOContainer, ++ bcontainer); ++ + if (container->iommu_type != VFIO_SPAPR_TCE_v2_IOMMU) { + return; + } +@@ -430,6 +437,8 @@ static VFIOIOMMUOps vfio_iommu_spapr_ops; + static void setup_spapr_ops(VFIOContainerBase *bcontainer) + { + vfio_iommu_spapr_ops = *bcontainer->ops; ++ vfio_iommu_spapr_ops.add_window = vfio_spapr_container_add_section_window; ++ vfio_iommu_spapr_ops.del_window = vfio_spapr_container_del_section_window; + bcontainer->ops = &vfio_iommu_spapr_ops; + } + +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index b9e5a0e64b..055f679363 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -169,11 +169,6 @@ VFIOAddressSpace *vfio_get_address_space(AddressSpace *as); + void vfio_put_address_space(VFIOAddressSpace *space); + + /* SPAPR specific */ +-int vfio_container_add_section_window(VFIOContainer *container, +- MemoryRegionSection *section, +- Error **errp); +-void vfio_container_del_section_window(VFIOContainer *container, +- MemoryRegionSection *section); + int vfio_spapr_container_init(VFIOContainer *container, Error **errp); + void vfio_spapr_container_deinit(VFIOContainer *container); + +diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h +index f62a14ac73..4b6f017c6f 100644 +--- a/include/hw/vfio/vfio-container-base.h ++++ b/include/hw/vfio/vfio-container-base.h +@@ -75,6 +75,11 @@ int vfio_container_dma_map(VFIOContainerBase *bcontainer, + int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, + IOMMUTLBEntry *iotlb); ++int vfio_container_add_section_window(VFIOContainerBase *bcontainer, ++ MemoryRegionSection *section, ++ Error **errp); ++void vfio_container_del_section_window(VFIOContainerBase *bcontainer, ++ MemoryRegionSection *section); + int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer, + bool start); + int vfio_container_query_dirty_bitmap(VFIOContainerBase *bcontainer, +-- +2.39.3 + diff --git a/SOURCES/kvm-vhost-fix-vhost_dev_enable_notifiers-error-case.patch b/SOURCES/kvm-vhost-fix-vhost_dev_enable_notifiers-error-case.patch deleted file mode 100644 index 3282c24..0000000 --- a/SOURCES/kvm-vhost-fix-vhost_dev_enable_notifiers-error-case.patch +++ /dev/null @@ -1,138 +0,0 @@ -From ac54f5f746782da89ab674733af5622e524b58eb Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Fri, 2 Jun 2023 18:27:35 +0200 -Subject: [PATCH 4/6] vhost: fix vhost_dev_enable_notifiers() error case -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -RH-MergeRequest: 176: vhost: fix vhost_dev_enable_notifiers() error case -RH-Jira: RHEL-330 -RH-Acked-by: MST -RH-Acked-by: Cindy Lu -RH-Acked-by: Eugenio Pérez -RH-Acked-by: Jason Wang -RH-Commit: [1/1] fd30d7501be59f7e5b9d6fc5ed84efcc4037d08e (lvivier/qemu-kvm-centos) - -JIRA: https://issues.redhat.com/browse/RHEL-330 - -in vhost_dev_enable_notifiers(), if virtio_bus_set_host_notifier(true) -fails, we call vhost_dev_disable_notifiers() that executes -virtio_bus_set_host_notifier(false) on all queues, even on queues that -have failed to be initialized. - -This triggers a core dump in memory_region_del_eventfd(): - - virtio_bus_set_host_notifier: unable to init event notifier: Too many open files (-24) - vhost VQ 1 notifier binding failed: 24 - .../softmmu/memory.c:2611: memory_region_del_eventfd: Assertion `i != mr->ioeventfd_nb' failed. - -Fix the problem by providing to vhost_dev_disable_notifiers() the -number of queues to disable. - -Fixes: 8771589b6f81 ("vhost: simplify vhost_dev_enable_notifiers") -Cc: longpeng2@huawei.com -Signed-off-by: Laurent Vivier -Message-Id: <20230602162735.3670785-1-lvivier@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Reviewed-by: Philippe Mathieu-Daudé -(cherry picked from commit 92099aa4e9a3bb6856c290afaf41c76f9e3dd9fd) ---- - hw/virtio/vhost.c | 65 ++++++++++++++++++++++++++--------------------- - 1 file changed, 36 insertions(+), 29 deletions(-) - -diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c -index a266396576..ae0a033e60 100644 ---- a/hw/virtio/vhost.c -+++ b/hw/virtio/vhost.c -@@ -1545,6 +1545,40 @@ void vhost_dev_cleanup(struct vhost_dev *hdev) - memset(hdev, 0, sizeof(struct vhost_dev)); - } - -+static void vhost_dev_disable_notifiers_nvqs(struct vhost_dev *hdev, -+ VirtIODevice *vdev, -+ unsigned int nvqs) -+{ -+ BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); -+ int i, r; -+ -+ /* -+ * Batch all the host notifiers in a single transaction to avoid -+ * quadratic time complexity in address_space_update_ioeventfds(). -+ */ -+ memory_region_transaction_begin(); -+ -+ for (i = 0; i < nvqs; ++i) { -+ r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i, -+ false); -+ if (r < 0) { -+ error_report("vhost VQ %d notifier cleanup failed: %d", i, -r); -+ } -+ assert(r >= 0); -+ } -+ -+ /* -+ * The transaction expects the ioeventfds to be open when it -+ * commits. Do it now, before the cleanup loop. -+ */ -+ memory_region_transaction_commit(); -+ -+ for (i = 0; i < nvqs; ++i) { -+ virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i); -+ } -+ virtio_device_release_ioeventfd(vdev); -+} -+ - /* Stop processing guest IO notifications in qemu. - * Start processing them in vhost in kernel. - */ -@@ -1574,7 +1608,7 @@ int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) - if (r < 0) { - error_report("vhost VQ %d notifier binding failed: %d", i, -r); - memory_region_transaction_commit(); -- vhost_dev_disable_notifiers(hdev, vdev); -+ vhost_dev_disable_notifiers_nvqs(hdev, vdev, i); - return r; - } - } -@@ -1591,34 +1625,7 @@ int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) - */ - void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) - { -- BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); -- int i, r; -- -- /* -- * Batch all the host notifiers in a single transaction to avoid -- * quadratic time complexity in address_space_update_ioeventfds(). -- */ -- memory_region_transaction_begin(); -- -- for (i = 0; i < hdev->nvqs; ++i) { -- r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i, -- false); -- if (r < 0) { -- error_report("vhost VQ %d notifier cleanup failed: %d", i, -r); -- } -- assert (r >= 0); -- } -- -- /* -- * The transaction expects the ioeventfds to be open when it -- * commits. Do it now, before the cleanup loop. -- */ -- memory_region_transaction_commit(); -- -- for (i = 0; i < hdev->nvqs; ++i) { -- virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i); -- } -- virtio_device_release_ioeventfd(vdev); -+ vhost_dev_disable_notifiers_nvqs(hdev, vdev, hdev->nvqs); - } - - /* Test and clear event pending status. --- -2.39.3 - diff --git a/SOURCES/kvm-vhost-vdpa-do-not-cleanup-the-vdpa-vhost-net-structu.patch b/SOURCES/kvm-vhost-vdpa-do-not-cleanup-the-vdpa-vhost-net-structu.patch deleted file mode 100644 index fd29eb7..0000000 --- a/SOURCES/kvm-vhost-vdpa-do-not-cleanup-the-vdpa-vhost-net-structu.patch +++ /dev/null @@ -1,67 +0,0 @@ -From 4e30ca551fb3740a428017a0debf0a6aab976639 Mon Sep 17 00:00:00 2001 -From: Ani Sinha -Date: Mon, 19 Jun 2023 12:22:09 +0530 -Subject: [PATCH 6/6] vhost-vdpa: do not cleanup the vdpa/vhost-net structures - if peer nic is present - -RH-Author: Ani Sinha -RH-MergeRequest: 174: vhost-vdpa: do not cleanup the vdpa/vhost-net structures if peer nic is present -RH-Bugzilla: 2128929 -RH-Acked-by: Igor Mammedov -RH-Acked-by: Miroslav Rezanina -RH-Commit: [1/1] c70d4e5fd93256326d318e0b507db6b9eb93ad86 (anisinha/centos-qemu-kvm) - -When a peer nic is still attached to the vdpa backend, it is too early to free -up the vhost-net and vdpa structures. If these structures are freed here, then -QEMU crashes when the guest is being shut down. The following call chain -would result in an assertion failure since the pointer returned from -vhost_vdpa_get_vhost_net() would be NULL: - -do_vm_stop() -> vm_state_notify() -> virtio_set_status() -> -virtio_net_vhost_status() -> get_vhost_net(). - -Therefore, we defer freeing up the structures until at guest shutdown -time when qemu_cleanup() calls net_cleanup() which then calls -qemu_del_net_client() which would eventually call vhost_vdpa_cleanup() -again to free up the structures. This time, the loop in net_cleanup() -ensures that vhost_vdpa_cleanup() will be called one last time when -all the peer nics are detached and freed. - -All unit tests pass with this change. - -CC: imammedo@redhat.com -CC: jusual@redhat.com -CC: mst@redhat.com -Fixes: CVE-2023-3301 -Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2128929 -Signed-off-by: Ani Sinha -Message-Id: <20230619065209.442185-1-anisinha@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit a0d7215e339b61c7d7a7b3fcf754954d80d93eb8) ---- - net/vhost-vdpa.c | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 99904a0da7..8c8900f0f4 100644 ---- a/net/vhost-vdpa.c -+++ b/net/vhost-vdpa.c -@@ -184,6 +184,14 @@ static void vhost_vdpa_cleanup(NetClientState *nc) - { - VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); - -+ /* -+ * If a peer NIC is attached, do not cleanup anything. -+ * Cleanup will happen as a part of qemu_cleanup() -> net_cleanup() -+ * when the guest is shutting down. -+ */ -+ if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_NIC) { -+ return; -+ } - qemu_vfree(s->cvq_cmd_out_buffer); - qemu_vfree(s->status); - if (s->vhost_net) { --- -2.39.3 - diff --git a/SOURCES/kvm-vhost-vdpa-mute-unaligned-memory-error-report.patch b/SOURCES/kvm-vhost-vdpa-mute-unaligned-memory-error-report.patch deleted file mode 100644 index 3711949..0000000 --- a/SOURCES/kvm-vhost-vdpa-mute-unaligned-memory-error-report.patch +++ /dev/null @@ -1,86 +0,0 @@ -From 3b51a7b84ea21360c6d551284aecb8b6f371e888 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Tue, 4 Jul 2023 09:19:31 +0200 -Subject: [PATCH 9/9] vhost-vdpa: mute unaligned memory error report -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -RH-MergeRequest: 193: vhost-vdpa: mute unaligned memory error report -RH-Bugzilla: 2141965 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Eugenio Pérez -RH-Commit: [1/1] 60f5385d41269ce9310e1e8e0a2f1106e3a16ada (lvivier/qemu-kvm-centos) - -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2141965 - -With TPM CRM device, vhost-vdpa reports an error when it tries -to register a listener for a non aligned memory region: - - qemu-system-x86_64: vhost_vdpa_listener_region_add received unaligned region - qemu-system-x86_64: vhost_vdpa_listener_region_del received unaligned region - -This error can be confusing for the user whereas we only need to skip -the region (as it's already done after the error_report()) - -Rather than introducing a special case for TPM CRB memory section -to not display the message in this case, simply replace the -error_report() by a trace function (with more information, like the -memory region name). - -Signed-off-by: Laurent Vivier -Message-Id: <20230704071931.575888-2-lvivier@redhat.com> -Reviewed-by: David Hildenbrand -Acked-by: Jason Wang -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -(cherry picked from commit 77812aa7b1fdf8f547c35a7f9a4eb1cbf3a073db) ---- - hw/virtio/trace-events | 2 ++ - hw/virtio/vhost-vdpa.c | 8 ++++++-- - 2 files changed, 8 insertions(+), 2 deletions(-) - -diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events -index 68b752e304..300dec8d3e 100644 ---- a/hw/virtio/trace-events -+++ b/hw/virtio/trace-events -@@ -34,7 +34,9 @@ vhost_vdpa_dma_map(void *vdpa, int fd, uint32_t msg_type, uint32_t asid, uint64_ - vhost_vdpa_dma_unmap(void *vdpa, int fd, uint32_t msg_type, uint32_t asid, uint64_t iova, uint64_t size, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" asid: %"PRIu32" iova: 0x%"PRIx64" size: 0x%"PRIx64" type: %"PRIu8 - vhost_vdpa_listener_begin_batch(void *v, int fd, uint32_t msg_type, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" type: %"PRIu8 - vhost_vdpa_listener_commit(void *v, int fd, uint32_t msg_type, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" type: %"PRIu8 -+vhost_vdpa_listener_region_add_unaligned(void *v, const char *name, uint64_t offset_as, uint64_t offset_page) "vdpa: %p region %s offset_within_address_space %"PRIu64" offset_within_region %"PRIu64 - vhost_vdpa_listener_region_add(void *vdpa, uint64_t iova, uint64_t llend, void *vaddr, bool readonly) "vdpa: %p iova 0x%"PRIx64" llend 0x%"PRIx64" vaddr: %p read-only: %d" -+vhost_vdpa_listener_region_del_unaligned(void *v, const char *name, uint64_t offset_as, uint64_t offset_page) "vdpa: %p region %s offset_within_address_space %"PRIu64" offset_within_region %"PRIu64 - vhost_vdpa_listener_region_del(void *vdpa, uint64_t iova, uint64_t llend) "vdpa: %p iova 0x%"PRIx64" llend 0x%"PRIx64 - vhost_vdpa_add_status(void *dev, uint8_t status) "dev: %p status: 0x%"PRIx8 - vhost_vdpa_init(void *dev, void *vdpa) "dev: %p vdpa: %p" -diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index bc6bad23d5..c04f14420d 100644 ---- a/hw/virtio/vhost-vdpa.c -+++ b/hw/virtio/vhost-vdpa.c -@@ -202,7 +202,9 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, - - if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != - (section->offset_within_region & ~TARGET_PAGE_MASK))) { -- error_report("%s received unaligned region", __func__); -+ trace_vhost_vdpa_listener_region_add_unaligned(v, section->mr->name, -+ section->offset_within_address_space & ~TARGET_PAGE_MASK, -+ section->offset_within_region & ~TARGET_PAGE_MASK); - return; - } - -@@ -281,7 +283,9 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, - - if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != - (section->offset_within_region & ~TARGET_PAGE_MASK))) { -- error_report("%s received unaligned region", __func__); -+ trace_vhost_vdpa_listener_region_del_unaligned(v, section->mr->name, -+ section->offset_within_address_space & ~TARGET_PAGE_MASK, -+ section->offset_within_region & ~TARGET_PAGE_MASK); - return; - } - --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-Re-enable-notifications-after-drain.patch b/SOURCES/kvm-virtio-Re-enable-notifications-after-drain.patch new file mode 100644 index 0000000..ef770fd --- /dev/null +++ b/SOURCES/kvm-virtio-Re-enable-notifications-after-drain.patch @@ -0,0 +1,139 @@ +From 2a758da4e1433564998def68447008908c96e113 Mon Sep 17 00:00:00 2001 +From: Hanna Czenczek +Date: Fri, 2 Feb 2024 16:31:57 +0100 +Subject: [PATCH 2/6] virtio: Re-enable notifications after drain + +RH-Author: Hanna Czenczek +RH-MergeRequest: 223: virtio: Re-enable notifications after drain +RH-Jira: RHEL-3934 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [2/3] e3be798e6259a378fc03f4364ecaeb875b01f64c (hreitz/qemu-kvm-c-9-s) + +During drain, we do not care about virtqueue notifications, which is why +we remove the handlers on it. When removing those handlers, whether vq +notifications are enabled or not depends on whether we were in polling +mode or not; if not, they are enabled (by default); if so, they have +been disabled by the io_poll_start callback. + +Because we do not care about those notifications after removing the +handlers, this is fine. However, we have to explicitly ensure they are +enabled when re-attaching the handlers, so we will resume receiving +notifications. We do this in virtio_queue_aio_attach_host_notifier*(). +If such a function is called while we are in a polling section, +attaching the notifiers will then invoke the io_poll_start callback, +re-disabling notifications. + +Because we will always miss virtqueue updates in the drained section, we +also need to poll the virtqueue once after attaching the notifiers. + +Buglink: https://issues.redhat.com/browse/RHEL-3934 +Signed-off-by: Hanna Czenczek +Message-ID: <20240202153158.788922-3-hreitz@redhat.com> +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 5bdbaebcce18fe6a627cafad2043ec08f3de5744) +--- + hw/virtio/virtio.c | 42 ++++++++++++++++++++++++++++++++++++++++++ + include/block/aio.h | 7 ++++++- + 2 files changed, 48 insertions(+), 1 deletion(-) + +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index 3a160f86ed..356d690cc9 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -3556,6 +3556,17 @@ static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n) + + void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx) + { ++ /* ++ * virtio_queue_aio_detach_host_notifier() can leave notifications disabled. ++ * Re-enable them. (And if detach has not been used before, notifications ++ * being enabled is still the default state while a notifier is attached; ++ * see virtio_queue_host_notifier_aio_poll_end(), which will always leave ++ * notifications enabled once the polling section is left.) ++ */ ++ if (!virtio_queue_get_notification(vq)) { ++ virtio_queue_set_notification(vq, 1); ++ } ++ + aio_set_event_notifier(ctx, &vq->host_notifier, + virtio_queue_host_notifier_read, + virtio_queue_host_notifier_aio_poll, +@@ -3563,6 +3574,13 @@ void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx) + aio_set_event_notifier_poll(ctx, &vq->host_notifier, + virtio_queue_host_notifier_aio_poll_begin, + virtio_queue_host_notifier_aio_poll_end); ++ ++ /* ++ * We will have ignored notifications about new requests from the guest ++ * while no notifiers were attached, so "kick" the virt queue to process ++ * those requests now. ++ */ ++ event_notifier_set(&vq->host_notifier); + } + + /* +@@ -3573,14 +3591,38 @@ void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx) + */ + void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ctx) + { ++ /* See virtio_queue_aio_attach_host_notifier() */ ++ if (!virtio_queue_get_notification(vq)) { ++ virtio_queue_set_notification(vq, 1); ++ } ++ + aio_set_event_notifier(ctx, &vq->host_notifier, + virtio_queue_host_notifier_read, + NULL, NULL); ++ ++ /* ++ * See virtio_queue_aio_attach_host_notifier(). ++ * Note that this may be unnecessary for the type of virtqueues this ++ * function is used for. Still, it will not hurt to have a quick look into ++ * whether we can/should process any of the virtqueue elements. ++ */ ++ event_notifier_set(&vq->host_notifier); + } + + void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx) + { + aio_set_event_notifier(ctx, &vq->host_notifier, NULL, NULL, NULL); ++ ++ /* ++ * aio_set_event_notifier_poll() does not guarantee whether io_poll_end() ++ * will run after io_poll_begin(), so by removing the notifier, we do not ++ * know whether virtio_queue_host_notifier_aio_poll_end() has run after a ++ * previous virtio_queue_host_notifier_aio_poll_begin(), i.e. whether ++ * notifications are enabled or disabled. It does not really matter anyway; ++ * we just removed the notifier, so we do not care about notifications until ++ * we potentially re-attach it. The attach_host_notifier functions will ++ * ensure that notifications are enabled again when they are needed. ++ */ + } + + void virtio_queue_host_notifier_read(EventNotifier *n) +diff --git a/include/block/aio.h b/include/block/aio.h +index af05512a7d..261c77fd9a 100644 +--- a/include/block/aio.h ++++ b/include/block/aio.h +@@ -480,9 +480,14 @@ void aio_set_event_notifier(AioContext *ctx, + AioPollFn *io_poll, + EventNotifierHandler *io_poll_ready); + +-/* Set polling begin/end callbacks for an event notifier that has already been ++/* ++ * Set polling begin/end callbacks for an event notifier that has already been + * registered with aio_set_event_notifier. Do nothing if the event notifier is + * not registered. ++ * ++ * Note that if the io_poll_end() callback (or the entire notifier) is removed ++ * during polling, it will not be called, so an io_poll_begin() is not ++ * necessarily always followed by an io_poll_end(). + */ + void aio_set_event_notifier_poll(AioContext *ctx, + EventNotifier *notifier, +-- +2.39.3 + diff --git a/SOURCES/kvm-virtio-blk-Fix-potential-nullpointer-read-access-in-.patch b/SOURCES/kvm-virtio-blk-Fix-potential-nullpointer-read-access-in-.patch new file mode 100644 index 0000000..0565357 --- /dev/null +++ b/SOURCES/kvm-virtio-blk-Fix-potential-nullpointer-read-access-in-.patch @@ -0,0 +1,47 @@ +From 376df80fbba5a9bb0ec43cad083cde9de59128d7 Mon Sep 17 00:00:00 2001 +From: Stefan Weil via +Date: Sun, 24 Dec 2023 12:43:14 +0100 +Subject: [PATCH 10/22] virtio-blk: Fix potential nullpointer read access in + virtio_blk_data_plane_destroy + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter +RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Hanna Czenczek +RH-Commit: [6/17] 460005fc7719b2e1dd577dfe75d18537ab2b8d06 (stefanha/centos-stream-qemu-kvm) + +Fixes: CID 1532828 +Fixes: b6948ab01d ("virtio-blk: add iothread-vq-mapping parameter") +Signed-off-by: Stefan Weil +Signed-off-by: Michael Tokarev +(cherry picked from commit d819fc9516a4ec71e37a6c9edfcd285b7f98c2dc) +Signed-off-by: Stefan Hajnoczi +--- + hw/block/dataplane/virtio-blk.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c +index 6debd4401e..97a302cf49 100644 +--- a/hw/block/dataplane/virtio-blk.c ++++ b/hw/block/dataplane/virtio-blk.c +@@ -152,7 +152,7 @@ bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, + void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s) + { + VirtIOBlock *vblk; +- VirtIOBlkConf *conf = s->conf; ++ VirtIOBlkConf *conf; + + if (!s) { + return; +@@ -160,6 +160,7 @@ void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s) + + vblk = VIRTIO_BLK(s->vdev); + assert(!vblk->dataplane_started); ++ conf = s->conf; + + if (conf->iothread_vq_mapping_list) { + IOThreadVirtQueueMappingList *node; +-- +2.39.3 + diff --git a/SOURCES/kvm-virtio-blk-Use-ioeventfd_attach-in-start_ioeventfd.patch b/SOURCES/kvm-virtio-blk-Use-ioeventfd_attach-in-start_ioeventfd.patch new file mode 100644 index 0000000..1a3771e --- /dev/null +++ b/SOURCES/kvm-virtio-blk-Use-ioeventfd_attach-in-start_ioeventfd.patch @@ -0,0 +1,75 @@ +From 094941b2c3e66e078d93718933eb07e800a7dd60 Mon Sep 17 00:00:00 2001 +From: Hanna Czenczek +Date: Fri, 2 Feb 2024 16:31:58 +0100 +Subject: [PATCH 3/6] virtio-blk: Use ioeventfd_attach in start_ioeventfd + +RH-Author: Hanna Czenczek +RH-MergeRequest: 223: virtio: Re-enable notifications after drain +RH-Jira: RHEL-3934 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [3/3] 96d6760d1b7b12df695b6825b15a2a3b8a79a74c (hreitz/qemu-kvm-c-9-s) + +Commit d3f6f294aeadd5f88caf0155e4360808c95b3146 ("virtio-blk: always set +ioeventfd during startup") has made virtio_blk_start_ioeventfd() always +kick the virtqueue (set the ioeventfd), regardless of whether the BB is +drained. That is no longer necessary, because attaching the host +notifier will now set the ioeventfd, too; this happens either +immediately right here in virtio_blk_start_ioeventfd(), or later when +the drain ends, in virtio_blk_ioeventfd_attach(). + +With event_notifier_set() removed, the code becomes the same as the one +in virtio_blk_ioeventfd_attach(), so we can reuse that function. + +Signed-off-by: Hanna Czenczek +Message-ID: <20240202153158.788922-4-hreitz@redhat.com> +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 52bff01f64eec017ffb0d5903a0ee1d67ca7a548) +--- + hw/block/virtio-blk.c | 21 ++++++++++----------- + 1 file changed, 10 insertions(+), 11 deletions(-) + +diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c +index 0b9100b746..7fdeaf2d12 100644 +--- a/hw/block/virtio-blk.c ++++ b/hw/block/virtio-blk.c +@@ -37,6 +37,8 @@ + #include "hw/virtio/virtio-blk-common.h" + #include "qemu/coroutine.h" + ++static void virtio_blk_ioeventfd_attach(VirtIOBlock *s); ++ + static void virtio_blk_init_request(VirtIOBlock *s, VirtQueue *vq, + VirtIOBlockReq *req) + { +@@ -1808,17 +1810,14 @@ static int virtio_blk_start_ioeventfd(VirtIODevice *vdev) + s->ioeventfd_started = true; + smp_wmb(); /* paired with aio_notify_accept() on the read side */ + +- /* Get this show started by hooking up our callbacks */ +- for (i = 0; i < nvqs; i++) { +- VirtQueue *vq = virtio_get_queue(vdev, i); +- AioContext *ctx = s->vq_aio_context[i]; +- +- /* Kick right away to begin processing requests already in vring */ +- event_notifier_set(virtio_queue_get_host_notifier(vq)); +- +- if (!blk_in_drain(s->conf.conf.blk)) { +- virtio_queue_aio_attach_host_notifier(vq, ctx); +- } ++ /* ++ * Get this show started by hooking up our callbacks. If drained now, ++ * virtio_blk_drained_end() will do this later. ++ * Attaching the notifier also kicks the virtqueues, processing any requests ++ * they may already have. ++ */ ++ if (!blk_in_drain(s->conf.conf.blk)) { ++ virtio_blk_ioeventfd_attach(s); + } + return 0; + +-- +2.39.3 + diff --git a/SOURCES/kvm-virtio-blk-add-iothread-vq-mapping-parameter.patch b/SOURCES/kvm-virtio-blk-add-iothread-vq-mapping-parameter.patch new file mode 100644 index 0000000..65a96a0 --- /dev/null +++ b/SOURCES/kvm-virtio-blk-add-iothread-vq-mapping-parameter.patch @@ -0,0 +1,464 @@ +From 733fc13f65286c849ad6618be89df450f8bc5f7e Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Wed, 20 Dec 2023 08:47:55 -0500 +Subject: [PATCH 09/22] virtio-blk: add iothread-vq-mapping parameter + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter +RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Hanna Czenczek +RH-Commit: [5/17] c371fe62376c4eb54da88272a5966cec28404224 (stefanha/centos-stream-qemu-kvm) + +Add the iothread-vq-mapping parameter to assign virtqueues to IOThreads. +Store the vq:AioContext mapping in the new struct +VirtIOBlockDataPlane->vq_aio_context[] field and refactor the code to +use the per-vq AioContext instead of the BlockDriverState's AioContext. + +Reimplement --device virtio-blk-pci,iothread= and non-IOThread mode by +assigning all virtqueues to the IOThread and main loop's AioContext in +vq_aio_context[], respectively. + +The comment in struct VirtIOBlockDataPlane about EventNotifiers is +stale. Remove it. + +Signed-off-by: Stefan Hajnoczi +Message-ID: <20231220134755.814917-5-stefanha@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit b6948ab01df068bef591868c22d1f873d2d05cde) +Signed-off-by: Stefan Hajnoczi +--- + hw/block/dataplane/virtio-blk.c | 155 ++++++++++++++++++++++++-------- + hw/block/dataplane/virtio-blk.h | 3 + + hw/block/virtio-blk.c | 92 ++++++++++++++++--- + include/hw/virtio/virtio-blk.h | 2 + + 4 files changed, 202 insertions(+), 50 deletions(-) + +diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c +index 7bbbd981ad..6debd4401e 100644 +--- a/hw/block/dataplane/virtio-blk.c ++++ b/hw/block/dataplane/virtio-blk.c +@@ -32,13 +32,11 @@ struct VirtIOBlockDataPlane { + VirtIOBlkConf *conf; + VirtIODevice *vdev; + +- /* Note that these EventNotifiers are assigned by value. This is +- * fine as long as you do not call event_notifier_cleanup on them +- * (because you don't own the file descriptor or handle; you just +- * use it). ++ /* ++ * The AioContext for each virtqueue. The BlockDriverState will use the ++ * first element as its AioContext. + */ +- IOThread *iothread; +- AioContext *ctx; ++ AioContext **vq_aio_context; + }; + + /* Raise an interrupt to signal guest, if necessary */ +@@ -47,6 +45,45 @@ void virtio_blk_data_plane_notify(VirtIOBlockDataPlane *s, VirtQueue *vq) + virtio_notify_irqfd(s->vdev, vq); + } + ++/* Generate vq:AioContext mappings from a validated iothread-vq-mapping list */ ++static void ++apply_vq_mapping(IOThreadVirtQueueMappingList *iothread_vq_mapping_list, ++ AioContext **vq_aio_context, uint16_t num_queues) ++{ ++ IOThreadVirtQueueMappingList *node; ++ size_t num_iothreads = 0; ++ size_t cur_iothread = 0; ++ ++ for (node = iothread_vq_mapping_list; node; node = node->next) { ++ num_iothreads++; ++ } ++ ++ for (node = iothread_vq_mapping_list; node; node = node->next) { ++ IOThread *iothread = iothread_by_id(node->value->iothread); ++ AioContext *ctx = iothread_get_aio_context(iothread); ++ ++ /* Released in virtio_blk_data_plane_destroy() */ ++ object_ref(OBJECT(iothread)); ++ ++ if (node->value->vqs) { ++ uint16List *vq; ++ ++ /* Explicit vq:IOThread assignment */ ++ for (vq = node->value->vqs; vq; vq = vq->next) { ++ vq_aio_context[vq->value] = ctx; ++ } ++ } else { ++ /* Round-robin vq:IOThread assignment */ ++ for (unsigned i = cur_iothread; i < num_queues; ++ i += num_iothreads) { ++ vq_aio_context[i] = ctx; ++ } ++ } ++ ++ cur_iothread++; ++ } ++} ++ + /* Context: QEMU global mutex held */ + bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, + VirtIOBlockDataPlane **dataplane, +@@ -58,7 +95,7 @@ bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, + + *dataplane = NULL; + +- if (conf->iothread) { ++ if (conf->iothread || conf->iothread_vq_mapping_list) { + if (!k->set_guest_notifiers || !k->ioeventfd_assign) { + error_setg(errp, + "device is incompatible with iothread " +@@ -86,13 +123,24 @@ bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, + s = g_new0(VirtIOBlockDataPlane, 1); + s->vdev = vdev; + s->conf = conf; ++ s->vq_aio_context = g_new(AioContext *, conf->num_queues); ++ ++ if (conf->iothread_vq_mapping_list) { ++ apply_vq_mapping(conf->iothread_vq_mapping_list, s->vq_aio_context, ++ conf->num_queues); ++ } else if (conf->iothread) { ++ AioContext *ctx = iothread_get_aio_context(conf->iothread); ++ for (unsigned i = 0; i < conf->num_queues; i++) { ++ s->vq_aio_context[i] = ctx; ++ } + +- if (conf->iothread) { +- s->iothread = conf->iothread; +- object_ref(OBJECT(s->iothread)); +- s->ctx = iothread_get_aio_context(s->iothread); ++ /* Released in virtio_blk_data_plane_destroy() */ ++ object_ref(OBJECT(conf->iothread)); + } else { +- s->ctx = qemu_get_aio_context(); ++ AioContext *ctx = qemu_get_aio_context(); ++ for (unsigned i = 0; i < conf->num_queues; i++) { ++ s->vq_aio_context[i] = ctx; ++ } + } + + *dataplane = s; +@@ -104,6 +152,7 @@ bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, + void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s) + { + VirtIOBlock *vblk; ++ VirtIOBlkConf *conf = s->conf; + + if (!s) { + return; +@@ -111,9 +160,21 @@ void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s) + + vblk = VIRTIO_BLK(s->vdev); + assert(!vblk->dataplane_started); +- if (s->iothread) { +- object_unref(OBJECT(s->iothread)); ++ ++ if (conf->iothread_vq_mapping_list) { ++ IOThreadVirtQueueMappingList *node; ++ ++ for (node = conf->iothread_vq_mapping_list; node; node = node->next) { ++ IOThread *iothread = iothread_by_id(node->value->iothread); ++ object_unref(OBJECT(iothread)); ++ } ++ } ++ ++ if (conf->iothread) { ++ object_unref(OBJECT(conf->iothread)); + } ++ ++ g_free(s->vq_aio_context); + g_free(s); + } + +@@ -177,19 +238,13 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) + + trace_virtio_blk_data_plane_start(s); + +- r = blk_set_aio_context(s->conf->conf.blk, s->ctx, &local_err); ++ r = blk_set_aio_context(s->conf->conf.blk, s->vq_aio_context[0], ++ &local_err); + if (r < 0) { + error_report_err(local_err); + goto fail_aio_context; + } + +- /* Kick right away to begin processing requests already in vring */ +- for (i = 0; i < nvqs; i++) { +- VirtQueue *vq = virtio_get_queue(s->vdev, i); +- +- event_notifier_set(virtio_queue_get_host_notifier(vq)); +- } +- + /* + * These fields must be visible to the IOThread when it processes the + * virtqueue, otherwise it will think dataplane has not started yet. +@@ -206,8 +261,12 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) + if (!blk_in_drain(s->conf->conf.blk)) { + for (i = 0; i < nvqs; i++) { + VirtQueue *vq = virtio_get_queue(s->vdev, i); ++ AioContext *ctx = s->vq_aio_context[i]; + +- virtio_queue_aio_attach_host_notifier(vq, s->ctx); ++ /* Kick right away to begin processing requests already in vring */ ++ event_notifier_set(virtio_queue_get_host_notifier(vq)); ++ ++ virtio_queue_aio_attach_host_notifier(vq, ctx); + } + } + return 0; +@@ -236,23 +295,18 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) + * + * Context: BH in IOThread + */ +-static void virtio_blk_data_plane_stop_bh(void *opaque) ++static void virtio_blk_data_plane_stop_vq_bh(void *opaque) + { +- VirtIOBlockDataPlane *s = opaque; +- unsigned i; +- +- for (i = 0; i < s->conf->num_queues; i++) { +- VirtQueue *vq = virtio_get_queue(s->vdev, i); +- EventNotifier *host_notifier = virtio_queue_get_host_notifier(vq); ++ VirtQueue *vq = opaque; ++ EventNotifier *host_notifier = virtio_queue_get_host_notifier(vq); + +- virtio_queue_aio_detach_host_notifier(vq, s->ctx); ++ virtio_queue_aio_detach_host_notifier(vq, qemu_get_current_aio_context()); + +- /* +- * Test and clear notifier after disabling event, in case poll callback +- * didn't have time to run. +- */ +- virtio_queue_host_notifier_read(host_notifier); +- } ++ /* ++ * Test and clear notifier after disabling event, in case poll callback ++ * didn't have time to run. ++ */ ++ virtio_queue_host_notifier_read(host_notifier); + } + + /* Context: QEMU global mutex held */ +@@ -279,7 +333,12 @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev) + trace_virtio_blk_data_plane_stop(s); + + if (!blk_in_drain(s->conf->conf.blk)) { +- aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s); ++ for (i = 0; i < nvqs; i++) { ++ VirtQueue *vq = virtio_get_queue(s->vdev, i); ++ AioContext *ctx = s->vq_aio_context[i]; ++ ++ aio_wait_bh_oneshot(ctx, virtio_blk_data_plane_stop_vq_bh, vq); ++ } + } + + /* +@@ -322,3 +381,23 @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev) + + s->stopping = false; + } ++ ++void virtio_blk_data_plane_detach(VirtIOBlockDataPlane *s) ++{ ++ VirtIODevice *vdev = VIRTIO_DEVICE(s->vdev); ++ ++ for (uint16_t i = 0; i < s->conf->num_queues; i++) { ++ VirtQueue *vq = virtio_get_queue(vdev, i); ++ virtio_queue_aio_detach_host_notifier(vq, s->vq_aio_context[i]); ++ } ++} ++ ++void virtio_blk_data_plane_attach(VirtIOBlockDataPlane *s) ++{ ++ VirtIODevice *vdev = VIRTIO_DEVICE(s->vdev); ++ ++ for (uint16_t i = 0; i < s->conf->num_queues; i++) { ++ VirtQueue *vq = virtio_get_queue(vdev, i); ++ virtio_queue_aio_attach_host_notifier(vq, s->vq_aio_context[i]); ++ } ++} +diff --git a/hw/block/dataplane/virtio-blk.h b/hw/block/dataplane/virtio-blk.h +index 5e18bb99ae..1a806fe447 100644 +--- a/hw/block/dataplane/virtio-blk.h ++++ b/hw/block/dataplane/virtio-blk.h +@@ -28,4 +28,7 @@ void virtio_blk_data_plane_notify(VirtIOBlockDataPlane *s, VirtQueue *vq); + int virtio_blk_data_plane_start(VirtIODevice *vdev); + void virtio_blk_data_plane_stop(VirtIODevice *vdev); + ++void virtio_blk_data_plane_detach(VirtIOBlockDataPlane *s); ++void virtio_blk_data_plane_attach(VirtIOBlockDataPlane *s); ++ + #endif /* HW_DATAPLANE_VIRTIO_BLK_H */ +diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c +index ec9ed09a6a..46e73b2c96 100644 +--- a/hw/block/virtio-blk.c ++++ b/hw/block/virtio-blk.c +@@ -1151,6 +1151,7 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) + return; + } + } ++ + virtio_blk_handle_vq(s, vq); + } + +@@ -1463,6 +1464,68 @@ static int virtio_blk_load_device(VirtIODevice *vdev, QEMUFile *f, + return 0; + } + ++static bool ++validate_iothread_vq_mapping_list(IOThreadVirtQueueMappingList *list, ++ uint16_t num_queues, Error **errp) ++{ ++ g_autofree unsigned long *vqs = bitmap_new(num_queues); ++ g_autoptr(GHashTable) iothreads = ++ g_hash_table_new(g_str_hash, g_str_equal); ++ ++ for (IOThreadVirtQueueMappingList *node = list; node; node = node->next) { ++ const char *name = node->value->iothread; ++ uint16List *vq; ++ ++ if (!iothread_by_id(name)) { ++ error_setg(errp, "IOThread \"%s\" object does not exist", name); ++ return false; ++ } ++ ++ if (!g_hash_table_add(iothreads, (gpointer)name)) { ++ error_setg(errp, ++ "duplicate IOThread name \"%s\" in iothread-vq-mapping", ++ name); ++ return false; ++ } ++ ++ if (node != list) { ++ if (!!node->value->vqs != !!list->value->vqs) { ++ error_setg(errp, "either all items in iothread-vq-mapping " ++ "must have vqs or none of them must have it"); ++ return false; ++ } ++ } ++ ++ for (vq = node->value->vqs; vq; vq = vq->next) { ++ if (vq->value >= num_queues) { ++ error_setg(errp, "vq index %u for IOThread \"%s\" must be " ++ "less than num_queues %u in iothread-vq-mapping", ++ vq->value, name, num_queues); ++ return false; ++ } ++ ++ if (test_and_set_bit(vq->value, vqs)) { ++ error_setg(errp, "cannot assign vq %u to IOThread \"%s\" " ++ "because it is already assigned", vq->value, name); ++ return false; ++ } ++ } ++ } ++ ++ if (list->value->vqs) { ++ for (uint16_t i = 0; i < num_queues; i++) { ++ if (!test_bit(i, vqs)) { ++ error_setg(errp, ++ "missing vq %u IOThread assignment in iothread-vq-mapping", ++ i); ++ return false; ++ } ++ } ++ } ++ ++ return true; ++} ++ + static void virtio_resize_cb(void *opaque) + { + VirtIODevice *vdev = opaque; +@@ -1487,34 +1550,24 @@ static void virtio_blk_resize(void *opaque) + static void virtio_blk_drained_begin(void *opaque) + { + VirtIOBlock *s = opaque; +- VirtIODevice *vdev = VIRTIO_DEVICE(opaque); +- AioContext *ctx = blk_get_aio_context(s->conf.conf.blk); + + if (!s->dataplane || !s->dataplane_started) { + return; + } + +- for (uint16_t i = 0; i < s->conf.num_queues; i++) { +- VirtQueue *vq = virtio_get_queue(vdev, i); +- virtio_queue_aio_detach_host_notifier(vq, ctx); +- } ++ virtio_blk_data_plane_detach(s->dataplane); + } + + /* Resume virtqueue ioeventfd processing after drain */ + static void virtio_blk_drained_end(void *opaque) + { + VirtIOBlock *s = opaque; +- VirtIODevice *vdev = VIRTIO_DEVICE(opaque); +- AioContext *ctx = blk_get_aio_context(s->conf.conf.blk); + + if (!s->dataplane || !s->dataplane_started) { + return; + } + +- for (uint16_t i = 0; i < s->conf.num_queues; i++) { +- VirtQueue *vq = virtio_get_queue(vdev, i); +- virtio_queue_aio_attach_host_notifier(vq, ctx); +- } ++ virtio_blk_data_plane_attach(s->dataplane); + } + + static const BlockDevOps virtio_block_ops = { +@@ -1600,6 +1653,19 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) + return; + } + ++ if (conf->iothread_vq_mapping_list) { ++ if (conf->iothread) { ++ error_setg(errp, "iothread and iothread-vq-mapping properties " ++ "cannot be set at the same time"); ++ return; ++ } ++ ++ if (!validate_iothread_vq_mapping_list(conf->iothread_vq_mapping_list, ++ conf->num_queues, errp)) { ++ return; ++ } ++ } ++ + s->config_size = virtio_get_config_size(&virtio_blk_cfg_size_params, + s->host_features); + virtio_init(vdev, VIRTIO_ID_BLOCK, s->config_size); +@@ -1702,6 +1768,8 @@ static Property virtio_blk_properties[] = { + DEFINE_PROP_BOOL("seg-max-adjust", VirtIOBlock, conf.seg_max_adjust, true), + DEFINE_PROP_LINK("iothread", VirtIOBlock, conf.iothread, TYPE_IOTHREAD, + IOThread *), ++ DEFINE_PROP_IOTHREAD_VQ_MAPPING_LIST("iothread-vq-mapping", VirtIOBlock, ++ conf.iothread_vq_mapping_list), + DEFINE_PROP_BIT64("discard", VirtIOBlock, host_features, + VIRTIO_BLK_F_DISCARD, true), + DEFINE_PROP_BOOL("report-discard-granularity", VirtIOBlock, +diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h +index 9881009c22..5e4091e4da 100644 +--- a/include/hw/virtio/virtio-blk.h ++++ b/include/hw/virtio/virtio-blk.h +@@ -21,6 +21,7 @@ + #include "sysemu/block-backend.h" + #include "sysemu/block-ram-registrar.h" + #include "qom/object.h" ++#include "qapi/qapi-types-virtio.h" + + #define TYPE_VIRTIO_BLK "virtio-blk-device" + OBJECT_DECLARE_SIMPLE_TYPE(VirtIOBlock, VIRTIO_BLK) +@@ -37,6 +38,7 @@ struct VirtIOBlkConf + { + BlockConf conf; + IOThread *iothread; ++ IOThreadVirtQueueMappingList *iothread_vq_mapping_list; + char *serial; + uint32_t request_merging; + uint16_t num_queues; +-- +2.39.3 + diff --git a/SOURCES/kvm-virtio-blk-add-lock-to-protect-s-rq.patch b/SOURCES/kvm-virtio-blk-add-lock-to-protect-s-rq.patch new file mode 100644 index 0000000..31e83a2 --- /dev/null +++ b/SOURCES/kvm-virtio-blk-add-lock-to-protect-s-rq.patch @@ -0,0 +1,177 @@ +From d54e88103aa76f3bf755b3f4308d8ab60367c6ef Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Thu, 14 Sep 2023 10:00:59 -0400 +Subject: [PATCH 074/101] virtio-blk: add lock to protect s->rq + +RH-Author: Kevin Wolf +RH-MergeRequest: 214: Remove AioContext lock +RH-Jira: RHEL-15965 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [5/26] 17dcd5ba18c03e5633a014d8d62d34d8dd7b43bf (kmwolf/centos-qemu-kvm) + +s->rq is accessed from IO_CODE and GLOBAL_STATE_CODE. Introduce a lock +to protect s->rq and eliminate reliance on the AioContext lock. + +Signed-off-by: Stefan Hajnoczi +Message-ID: <20230914140101.1065008-3-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Reviewed-by: Eric Blake +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Kevin Wolf +--- + hw/block/virtio-blk.c | 67 +++++++++++++++++++++++----------- + include/hw/virtio/virtio-blk.h | 3 +- + 2 files changed, 47 insertions(+), 23 deletions(-) + +diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c +index a1f8e15522..ee38e089bc 100644 +--- a/hw/block/virtio-blk.c ++++ b/hw/block/virtio-blk.c +@@ -82,8 +82,11 @@ static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error, + /* Break the link as the next request is going to be parsed from the + * ring again. Otherwise we may end up doing a double completion! */ + req->mr_next = NULL; +- req->next = s->rq; +- s->rq = req; ++ ++ WITH_QEMU_LOCK_GUARD(&s->rq_lock) { ++ req->next = s->rq; ++ s->rq = req; ++ } + } else if (action == BLOCK_ERROR_ACTION_REPORT) { + virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR); + if (acct_failed) { +@@ -1183,10 +1186,13 @@ static void virtio_blk_dma_restart_bh(void *opaque) + { + VirtIOBlock *s = opaque; + +- VirtIOBlockReq *req = s->rq; ++ VirtIOBlockReq *req; + MultiReqBuffer mrb = {}; + +- s->rq = NULL; ++ WITH_QEMU_LOCK_GUARD(&s->rq_lock) { ++ req = s->rq; ++ s->rq = NULL; ++ } + + aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); + while (req) { +@@ -1238,22 +1244,29 @@ static void virtio_blk_reset(VirtIODevice *vdev) + AioContext *ctx; + VirtIOBlockReq *req; + ++ /* Dataplane has stopped... */ ++ assert(!s->dataplane_started); ++ ++ /* ...but requests may still be in flight. */ + ctx = blk_get_aio_context(s->blk); + aio_context_acquire(ctx); + blk_drain(s->blk); ++ aio_context_release(ctx); + + /* We drop queued requests after blk_drain() because blk_drain() itself can + * produce them. */ +- while (s->rq) { +- req = s->rq; +- s->rq = req->next; +- virtqueue_detach_element(req->vq, &req->elem, 0); +- virtio_blk_free_request(req); +- } ++ WITH_QEMU_LOCK_GUARD(&s->rq_lock) { ++ while (s->rq) { ++ req = s->rq; ++ s->rq = req->next; + +- aio_context_release(ctx); ++ /* No other threads can access req->vq here */ ++ virtqueue_detach_element(req->vq, &req->elem, 0); ++ ++ virtio_blk_free_request(req); ++ } ++ } + +- assert(!s->dataplane_started); + blk_set_enable_write_cache(s->blk, s->original_wce); + } + +@@ -1443,18 +1456,22 @@ static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status) + static void virtio_blk_save_device(VirtIODevice *vdev, QEMUFile *f) + { + VirtIOBlock *s = VIRTIO_BLK(vdev); +- VirtIOBlockReq *req = s->rq; + +- while (req) { +- qemu_put_sbyte(f, 1); ++ WITH_QEMU_LOCK_GUARD(&s->rq_lock) { ++ VirtIOBlockReq *req = s->rq; + +- if (s->conf.num_queues > 1) { +- qemu_put_be32(f, virtio_get_queue_index(req->vq)); +- } ++ while (req) { ++ qemu_put_sbyte(f, 1); + +- qemu_put_virtqueue_element(vdev, f, &req->elem); +- req = req->next; ++ if (s->conf.num_queues > 1) { ++ qemu_put_be32(f, virtio_get_queue_index(req->vq)); ++ } ++ ++ qemu_put_virtqueue_element(vdev, f, &req->elem); ++ req = req->next; ++ } + } ++ + qemu_put_sbyte(f, 0); + } + +@@ -1480,8 +1497,11 @@ static int virtio_blk_load_device(VirtIODevice *vdev, QEMUFile *f, + + req = qemu_get_virtqueue_element(vdev, f, sizeof(VirtIOBlockReq)); + virtio_blk_init_request(s, virtio_get_queue(vdev, vq_idx), req); +- req->next = s->rq; +- s->rq = req; ++ ++ WITH_QEMU_LOCK_GUARD(&s->rq_lock) { ++ req->next = s->rq; ++ s->rq = req; ++ } + } + + return 0; +@@ -1628,6 +1648,8 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) + s->host_features); + virtio_init(vdev, VIRTIO_ID_BLOCK, s->config_size); + ++ qemu_mutex_init(&s->rq_lock); ++ + s->blk = conf->conf.blk; + s->rq = NULL; + s->sector_mask = (s->conf.conf.logical_block_size / BDRV_SECTOR_SIZE) - 1; +@@ -1679,6 +1701,7 @@ static void virtio_blk_device_unrealize(DeviceState *dev) + virtio_del_queue(vdev, i); + } + qemu_coroutine_dec_pool_size(conf->num_queues * conf->queue_size / 2); ++ qemu_mutex_destroy(&s->rq_lock); + blk_ram_registrar_destroy(&s->blk_ram_registrar); + qemu_del_vm_change_state_handler(s->change); + blockdev_mark_auto_del(s->blk); +diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h +index dafec432ce..9881009c22 100644 +--- a/include/hw/virtio/virtio-blk.h ++++ b/include/hw/virtio/virtio-blk.h +@@ -54,7 +54,8 @@ struct VirtIOBlockReq; + struct VirtIOBlock { + VirtIODevice parent_obj; + BlockBackend *blk; +- void *rq; ++ QemuMutex rq_lock; ++ void *rq; /* protected by rq_lock */ + VirtIOBlkConf conf; + unsigned short sector_mask; + bool original_wce; +-- +2.39.3 + diff --git a/SOURCES/kvm-virtio-blk-always-set-ioeventfd-during-startup.patch b/SOURCES/kvm-virtio-blk-always-set-ioeventfd-during-startup.patch new file mode 100644 index 0000000..a7b518d --- /dev/null +++ b/SOURCES/kvm-virtio-blk-always-set-ioeventfd-during-startup.patch @@ -0,0 +1,63 @@ +From 22730552442003e81c8c508c3e7ebacf647e4e75 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Fri, 19 Jan 2024 08:57:48 -0500 +Subject: [PATCH 19/22] virtio-blk: always set ioeventfd during startup + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter +RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Hanna Czenczek +RH-Commit: [15/17] 5f7142aeaa54fda41bd5c4fd3222fd8e3e18f370 (stefanha/centos-stream-qemu-kvm) + +When starting ioeventfd it is common practice to set the event notifier +so that the ioeventfd handler is triggered to run immediately. There may +be no requests waiting to be processed, but the idea is that if a +request snuck in then we guarantee that it will be detected. + +One scenario where self-triggering the ioeventfd is necessary is when +virtio_blk_handle_output() is called from a vCPU thread before the +VIRTIO Device Status transitions to DRIVER_OK. In that case we need to +self-trigger the ioeventfd so that the kick handled by the vCPU thread +causes the vq AioContext thread to take over handling the request(s). + +Fixes: b6948ab01df0 ("virtio-blk: add iothread-vq-mapping parameter") +Reported-by: Kevin Wolf +Signed-off-by: Stefan Hajnoczi +Message-ID: <20240119135748.270944-7-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit d3f6f294aeadd5f88caf0155e4360808c95b3146) +Signed-off-by: Stefan Hajnoczi +--- + hw/block/virtio-blk.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c +index 81de06c9f6..0b9100b746 100644 +--- a/hw/block/virtio-blk.c ++++ b/hw/block/virtio-blk.c +@@ -1809,14 +1809,14 @@ static int virtio_blk_start_ioeventfd(VirtIODevice *vdev) + smp_wmb(); /* paired with aio_notify_accept() on the read side */ + + /* Get this show started by hooking up our callbacks */ +- if (!blk_in_drain(s->conf.conf.blk)) { +- for (i = 0; i < nvqs; i++) { +- VirtQueue *vq = virtio_get_queue(vdev, i); +- AioContext *ctx = s->vq_aio_context[i]; ++ for (i = 0; i < nvqs; i++) { ++ VirtQueue *vq = virtio_get_queue(vdev, i); ++ AioContext *ctx = s->vq_aio_context[i]; + +- /* Kick right away to begin processing requests already in vring */ +- event_notifier_set(virtio_queue_get_host_notifier(vq)); ++ /* Kick right away to begin processing requests already in vring */ ++ event_notifier_set(virtio_queue_get_host_notifier(vq)); + ++ if (!blk_in_drain(s->conf.conf.blk)) { + virtio_queue_aio_attach_host_notifier(vq, ctx); + } + } +-- +2.39.3 + diff --git a/SOURCES/kvm-virtio-blk-avoid-using-ioeventfd-state-in-irqfd-cond.patch b/SOURCES/kvm-virtio-blk-avoid-using-ioeventfd-state-in-irqfd-cond.patch new file mode 100644 index 0000000..8d93bf6 --- /dev/null +++ b/SOURCES/kvm-virtio-blk-avoid-using-ioeventfd-state-in-irqfd-cond.patch @@ -0,0 +1,72 @@ +From f62b56c68d50a149a07e15797bf3605e63b2c501 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Mon, 22 Jan 2024 12:26:25 -0500 +Subject: [PATCH 4/6] virtio-blk: avoid using ioeventfd state in irqfd + conditional + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 224: virtio-blk: avoid using ioeventfd state in irqfd conditional +RH-Jira: RHEL-15394 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Hanna Czenczek +RH-Commit: [1/1] 8f24084669db52457e55e2523b9f56f5560dd6ce (stefanha/centos-stream-qemu-kvm) + +Requests that complete in an IOThread use irqfd to notify the guest +while requests that complete in the main loop thread use the traditional +qdev irq code path. The reason for this conditional is that the irq code +path requires the BQL: + + if (s->ioeventfd_started && !s->ioeventfd_disabled) { + virtio_notify_irqfd(vdev, req->vq); + } else { + virtio_notify(vdev, req->vq); + } + +There is a corner case where the conditional invokes the irq code path +instead of the irqfd code path: + + static void virtio_blk_stop_ioeventfd(VirtIODevice *vdev) + { + ... + /* + * Set ->ioeventfd_started to false before draining so that host notifiers + * are not detached/attached anymore. + */ + s->ioeventfd_started = false; + + /* Wait for virtio_blk_dma_restart_bh() and in flight I/O to complete */ + blk_drain(s->conf.conf.blk); + +During blk_drain() the conditional produces the wrong result because +ioeventfd_started is false. + +Use qemu_in_iothread() instead of checking the ioeventfd state. + +Cc: qemu-stable@nongnu.org +Buglink: https://issues.redhat.com/browse/RHEL-15394 +Signed-off-by: Stefan Hajnoczi +Message-ID: <20240122172625.415386-1-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit bfa36802d1704fc413c590ebdcc4e5ae0eacf439) +Signed-off-by: Stefan Hajnoczi +--- + hw/block/virtio-blk.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c +index 7fdeaf2d12..2ae2f6a823 100644 +--- a/hw/block/virtio-blk.c ++++ b/hw/block/virtio-blk.c +@@ -66,7 +66,7 @@ static void virtio_blk_req_complete(VirtIOBlockReq *req, unsigned char status) + iov_discard_undo(&req->inhdr_undo); + iov_discard_undo(&req->outhdr_undo); + virtqueue_push(req->vq, &req->elem, req->in_len); +- if (s->ioeventfd_started && !s->ioeventfd_disabled) { ++ if (qemu_in_iothread()) { + virtio_notify_irqfd(vdev, req->vq); + } else { + virtio_notify(vdev, req->vq); +-- +2.39.3 + diff --git a/SOURCES/kvm-virtio-blk-don-t-lock-AioContext-in-the-completion-c.patch b/SOURCES/kvm-virtio-blk-don-t-lock-AioContext-in-the-completion-c.patch new file mode 100644 index 0000000..be3c7db --- /dev/null +++ b/SOURCES/kvm-virtio-blk-don-t-lock-AioContext-in-the-completion-c.patch @@ -0,0 +1,167 @@ +From a2069ff76637365cacf5b96f9427b98a6ca2c9ba Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Thu, 14 Sep 2023 10:01:00 -0400 +Subject: [PATCH 075/101] virtio-blk: don't lock AioContext in the completion + code path + +RH-Author: Kevin Wolf +RH-MergeRequest: 214: Remove AioContext lock +RH-Jira: RHEL-15965 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [6/26] 3426f62c2156f6967bb4ffbce75a4ff46d3312a3 (kmwolf/centos-qemu-kvm) + +Nothing in the completion code path relies on the AioContext lock +anymore. Virtqueues are only accessed from one thread at any moment and +the s->rq global state is protected by its own lock now. + +Signed-off-by: Stefan Hajnoczi +Message-ID: <20230914140101.1065008-4-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Reviewed-by: Eric Blake +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Kevin Wolf +--- + hw/block/virtio-blk.c | 34 ++++------------------------------ + 1 file changed, 4 insertions(+), 30 deletions(-) + +diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c +index ee38e089bc..f5315df042 100644 +--- a/hw/block/virtio-blk.c ++++ b/hw/block/virtio-blk.c +@@ -105,7 +105,6 @@ static void virtio_blk_rw_complete(void *opaque, int ret) + VirtIOBlock *s = next->dev; + VirtIODevice *vdev = VIRTIO_DEVICE(s); + +- aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); + while (next) { + VirtIOBlockReq *req = next; + next = req->mr_next; +@@ -138,7 +137,6 @@ static void virtio_blk_rw_complete(void *opaque, int ret) + block_acct_done(blk_get_stats(s->blk), &req->acct); + virtio_blk_free_request(req); + } +- aio_context_release(blk_get_aio_context(s->conf.conf.blk)); + } + + static void virtio_blk_flush_complete(void *opaque, int ret) +@@ -146,19 +144,13 @@ static void virtio_blk_flush_complete(void *opaque, int ret) + VirtIOBlockReq *req = opaque; + VirtIOBlock *s = req->dev; + +- aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); +- if (ret) { +- if (virtio_blk_handle_rw_error(req, -ret, 0, true)) { +- goto out; +- } ++ if (ret && virtio_blk_handle_rw_error(req, -ret, 0, true)) { ++ return; + } + + virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); + block_acct_done(blk_get_stats(s->blk), &req->acct); + virtio_blk_free_request(req); +- +-out: +- aio_context_release(blk_get_aio_context(s->conf.conf.blk)); + } + + static void virtio_blk_discard_write_zeroes_complete(void *opaque, int ret) +@@ -168,11 +160,8 @@ static void virtio_blk_discard_write_zeroes_complete(void *opaque, int ret) + bool is_write_zeroes = (virtio_ldl_p(VIRTIO_DEVICE(s), &req->out.type) & + ~VIRTIO_BLK_T_BARRIER) == VIRTIO_BLK_T_WRITE_ZEROES; + +- aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); +- if (ret) { +- if (virtio_blk_handle_rw_error(req, -ret, false, is_write_zeroes)) { +- goto out; +- } ++ if (ret && virtio_blk_handle_rw_error(req, -ret, false, is_write_zeroes)) { ++ return; + } + + virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); +@@ -180,9 +169,6 @@ static void virtio_blk_discard_write_zeroes_complete(void *opaque, int ret) + block_acct_done(blk_get_stats(s->blk), &req->acct); + } + virtio_blk_free_request(req); +- +-out: +- aio_context_release(blk_get_aio_context(s->conf.conf.blk)); + } + + #ifdef __linux__ +@@ -229,10 +215,8 @@ static void virtio_blk_ioctl_complete(void *opaque, int status) + virtio_stl_p(vdev, &scsi->data_len, hdr->dxfer_len); + + out: +- aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); + virtio_blk_req_complete(req, status); + virtio_blk_free_request(req); +- aio_context_release(blk_get_aio_context(s->conf.conf.blk)); + g_free(ioctl_req); + } + +@@ -672,7 +656,6 @@ static void virtio_blk_zone_report_complete(void *opaque, int ret) + { + ZoneCmdData *data = opaque; + VirtIOBlockReq *req = data->req; +- VirtIOBlock *s = req->dev; + VirtIODevice *vdev = VIRTIO_DEVICE(req->dev); + struct iovec *in_iov = data->in_iov; + unsigned in_num = data->in_num; +@@ -763,10 +746,8 @@ static void virtio_blk_zone_report_complete(void *opaque, int ret) + } + + out: +- aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); + virtio_blk_req_complete(req, err_status); + virtio_blk_free_request(req); +- aio_context_release(blk_get_aio_context(s->conf.conf.blk)); + g_free(data->zone_report_data.zones); + g_free(data); + } +@@ -829,10 +810,8 @@ static void virtio_blk_zone_mgmt_complete(void *opaque, int ret) + err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD; + } + +- aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); + virtio_blk_req_complete(req, err_status); + virtio_blk_free_request(req); +- aio_context_release(blk_get_aio_context(s->conf.conf.blk)); + } + + static int virtio_blk_handle_zone_mgmt(VirtIOBlockReq *req, BlockZoneOp op) +@@ -882,7 +861,6 @@ static void virtio_blk_zone_append_complete(void *opaque, int ret) + { + ZoneCmdData *data = opaque; + VirtIOBlockReq *req = data->req; +- VirtIOBlock *s = req->dev; + VirtIODevice *vdev = VIRTIO_DEVICE(req->dev); + int64_t append_sector, n; + uint8_t err_status = VIRTIO_BLK_S_OK; +@@ -905,10 +883,8 @@ static void virtio_blk_zone_append_complete(void *opaque, int ret) + trace_virtio_blk_zone_append_complete(vdev, req, append_sector, ret); + + out: +- aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); + virtio_blk_req_complete(req, err_status); + virtio_blk_free_request(req); +- aio_context_release(blk_get_aio_context(s->conf.conf.blk)); + g_free(data); + } + +@@ -944,10 +920,8 @@ static int virtio_blk_handle_zone_append(VirtIOBlockReq *req, + return 0; + + out: +- aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); + virtio_blk_req_complete(req, err_status); + virtio_blk_free_request(req); +- aio_context_release(blk_get_aio_context(s->conf.conf.blk)); + return err_status; + } + +-- +2.39.3 + diff --git a/SOURCES/kvm-virtio-blk-don-t-lock-AioContext-in-the-submission-c.patch b/SOURCES/kvm-virtio-blk-don-t-lock-AioContext-in-the-submission-c.patch new file mode 100644 index 0000000..c31fcca --- /dev/null +++ b/SOURCES/kvm-virtio-blk-don-t-lock-AioContext-in-the-submission-c.patch @@ -0,0 +1,67 @@ +From 2816f6ce20c496e21947f215112be34a5cb93606 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Thu, 14 Sep 2023 10:01:01 -0400 +Subject: [PATCH 076/101] virtio-blk: don't lock AioContext in the submission + code path + +RH-Author: Kevin Wolf +RH-MergeRequest: 214: Remove AioContext lock +RH-Jira: RHEL-15965 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [7/26] e0de2744cb319569ea008334e45ee5fc2ba9b6d7 (kmwolf/centos-qemu-kvm) + +There is no need to acquire the AioContext lock around blk_aio_*() or +blk_get_geometry() anymore. I/O plugging (defer_call()) also does not +require the AioContext lock anymore. + +Signed-off-by: Stefan Hajnoczi +Message-ID: <20230914140101.1065008-5-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Reviewed-by: Eric Blake +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Kevin Wolf +--- + hw/block/virtio-blk.c | 5 ----- + 1 file changed, 5 deletions(-) + +diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c +index f5315df042..e110f9718b 100644 +--- a/hw/block/virtio-blk.c ++++ b/hw/block/virtio-blk.c +@@ -1111,7 +1111,6 @@ void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq) + MultiReqBuffer mrb = {}; + bool suppress_notifications = virtio_queue_get_notification(vq); + +- aio_context_acquire(blk_get_aio_context(s->blk)); + defer_call_begin(); + + do { +@@ -1137,7 +1136,6 @@ void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq) + } + + defer_call_end(); +- aio_context_release(blk_get_aio_context(s->blk)); + } + + static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) +@@ -1168,7 +1166,6 @@ static void virtio_blk_dma_restart_bh(void *opaque) + s->rq = NULL; + } + +- aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); + while (req) { + VirtIOBlockReq *next = req->next; + if (virtio_blk_handle_request(req, &mrb)) { +@@ -1192,8 +1189,6 @@ static void virtio_blk_dma_restart_bh(void *opaque) + + /* Paired with inc in virtio_blk_dma_restart_cb() */ + blk_dec_in_flight(s->conf.conf.blk); +- +- aio_context_release(blk_get_aio_context(s->conf.conf.blk)); + } + + static void virtio_blk_dma_restart_cb(void *opaque, bool running, +-- +2.39.3 + diff --git a/SOURCES/kvm-virtio-blk-move-dataplane-code-into-virtio-blk.c.patch b/SOURCES/kvm-virtio-blk-move-dataplane-code-into-virtio-blk.c.patch new file mode 100644 index 0000000..3fb8211 --- /dev/null +++ b/SOURCES/kvm-virtio-blk-move-dataplane-code-into-virtio-blk.c.patch @@ -0,0 +1,1009 @@ +From d9be1e1f199ee3171455636f32f3ba59b57e9351 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Fri, 19 Jan 2024 08:57:43 -0500 +Subject: [PATCH 14/22] virtio-blk: move dataplane code into virtio-blk.c + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter +RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Hanna Czenczek +RH-Commit: [10/17] ad854c6c7e808da272bd07229e8c915c1ee6f296 (stefanha/centos-stream-qemu-kvm) + +The dataplane code used to be significantly different from the +non-dataplane code and therefore had a separate source file. + +Over time the difference has gotten smaller because the I/O code paths +were unified. Nowadays the distinction between the VirtIOBlock and +VirtIOBlockDataPlane structs is more of an inconvenience that hinders +code simplification. + +Move hw/block/dataplane/virtio-blk.c into hw/block/virtio-blk.c, merging +VirtIOBlockDataPlane's fields into VirtIOBlock. + +hw/block/virtio-blk.c used VirtIOBlock->dataplane to check if +virtio_blk_data_plane_create() was successful. This is not necessary +because ->dataplane_started and ->dataplane_disabled can be used +instead. This patch makes those changes in order to drop +VirtIOBlock->dataplane. + +Signed-off-by: Stefan Hajnoczi +Message-ID: <20240119135748.270944-2-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 3bcc17f06526754fd675dcf601414442044fa0b6) +Signed-off-by: Stefan Hajnoczi + +Conflicts: + hw/block/dataplane/virtio-blk.c + Downstream is missing commit 0b2675c473f6 ("Rename "QEMU global mutex" + to "BQL" in comments and docs") so the source file still contains old + "QEMU global mutex held" comments instead of the new "BQL held" + phrasing. The code moved into hw/block/virtio-blk.c by this patch uses + the new "BQL held" phrasing so to minimize conflicts in future + backports. Either way, this is not a code change and therefore no risk + in introducing bugs. +--- + hw/block/dataplane/meson.build | 1 - + hw/block/dataplane/trace-events | 5 - + hw/block/dataplane/trace.h | 1 - + hw/block/dataplane/virtio-blk.c | 404 -------------------------------- + hw/block/dataplane/virtio-blk.h | 34 --- + hw/block/virtio-blk.c | 362 ++++++++++++++++++++++++++-- + include/hw/virtio/virtio-blk.h | 12 +- + meson.build | 1 - + 8 files changed, 357 insertions(+), 463 deletions(-) + delete mode 100644 hw/block/dataplane/trace-events + delete mode 100644 hw/block/dataplane/trace.h + delete mode 100644 hw/block/dataplane/virtio-blk.c + delete mode 100644 hw/block/dataplane/virtio-blk.h + +diff --git a/hw/block/dataplane/meson.build b/hw/block/dataplane/meson.build +index 025b3b061b..11a5eba2f4 100644 +--- a/hw/block/dataplane/meson.build ++++ b/hw/block/dataplane/meson.build +@@ -1,2 +1 @@ +-system_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk.c')) + specific_ss.add(when: 'CONFIG_XEN_BUS', if_true: files('xen-block.c')) +diff --git a/hw/block/dataplane/trace-events b/hw/block/dataplane/trace-events +deleted file mode 100644 +index 38fc3e7507..0000000000 +--- a/hw/block/dataplane/trace-events ++++ /dev/null +@@ -1,5 +0,0 @@ +-# See docs/devel/tracing.rst for syntax documentation. +- +-# virtio-blk.c +-virtio_blk_data_plane_start(void *s) "dataplane %p" +-virtio_blk_data_plane_stop(void *s) "dataplane %p" +diff --git a/hw/block/dataplane/trace.h b/hw/block/dataplane/trace.h +deleted file mode 100644 +index 240cc59834..0000000000 +--- a/hw/block/dataplane/trace.h ++++ /dev/null +@@ -1 +0,0 @@ +-#include "trace/trace-hw_block_dataplane.h" +diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c +deleted file mode 100644 +index 97a302cf49..0000000000 +--- a/hw/block/dataplane/virtio-blk.c ++++ /dev/null +@@ -1,404 +0,0 @@ +-/* +- * Dedicated thread for virtio-blk I/O processing +- * +- * Copyright 2012 IBM, Corp. +- * Copyright 2012 Red Hat, Inc. and/or its affiliates +- * +- * Authors: +- * Stefan Hajnoczi +- * +- * This work is licensed under the terms of the GNU GPL, version 2 or later. +- * See the COPYING file in the top-level directory. +- * +- */ +- +-#include "qemu/osdep.h" +-#include "qapi/error.h" +-#include "trace.h" +-#include "qemu/iov.h" +-#include "qemu/main-loop.h" +-#include "qemu/thread.h" +-#include "qemu/error-report.h" +-#include "hw/virtio/virtio-blk.h" +-#include "virtio-blk.h" +-#include "block/aio.h" +-#include "hw/virtio/virtio-bus.h" +-#include "qom/object_interfaces.h" +- +-struct VirtIOBlockDataPlane { +- bool starting; +- bool stopping; +- +- VirtIOBlkConf *conf; +- VirtIODevice *vdev; +- +- /* +- * The AioContext for each virtqueue. The BlockDriverState will use the +- * first element as its AioContext. +- */ +- AioContext **vq_aio_context; +-}; +- +-/* Raise an interrupt to signal guest, if necessary */ +-void virtio_blk_data_plane_notify(VirtIOBlockDataPlane *s, VirtQueue *vq) +-{ +- virtio_notify_irqfd(s->vdev, vq); +-} +- +-/* Generate vq:AioContext mappings from a validated iothread-vq-mapping list */ +-static void +-apply_vq_mapping(IOThreadVirtQueueMappingList *iothread_vq_mapping_list, +- AioContext **vq_aio_context, uint16_t num_queues) +-{ +- IOThreadVirtQueueMappingList *node; +- size_t num_iothreads = 0; +- size_t cur_iothread = 0; +- +- for (node = iothread_vq_mapping_list; node; node = node->next) { +- num_iothreads++; +- } +- +- for (node = iothread_vq_mapping_list; node; node = node->next) { +- IOThread *iothread = iothread_by_id(node->value->iothread); +- AioContext *ctx = iothread_get_aio_context(iothread); +- +- /* Released in virtio_blk_data_plane_destroy() */ +- object_ref(OBJECT(iothread)); +- +- if (node->value->vqs) { +- uint16List *vq; +- +- /* Explicit vq:IOThread assignment */ +- for (vq = node->value->vqs; vq; vq = vq->next) { +- vq_aio_context[vq->value] = ctx; +- } +- } else { +- /* Round-robin vq:IOThread assignment */ +- for (unsigned i = cur_iothread; i < num_queues; +- i += num_iothreads) { +- vq_aio_context[i] = ctx; +- } +- } +- +- cur_iothread++; +- } +-} +- +-/* Context: QEMU global mutex held */ +-bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, +- VirtIOBlockDataPlane **dataplane, +- Error **errp) +-{ +- VirtIOBlockDataPlane *s; +- BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); +- VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); +- +- *dataplane = NULL; +- +- if (conf->iothread || conf->iothread_vq_mapping_list) { +- if (!k->set_guest_notifiers || !k->ioeventfd_assign) { +- error_setg(errp, +- "device is incompatible with iothread " +- "(transport does not support notifiers)"); +- return false; +- } +- if (!virtio_device_ioeventfd_enabled(vdev)) { +- error_setg(errp, "ioeventfd is required for iothread"); +- return false; +- } +- +- /* If dataplane is (re-)enabled while the guest is running there could +- * be block jobs that can conflict. +- */ +- if (blk_op_is_blocked(conf->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { +- error_prepend(errp, "cannot start virtio-blk dataplane: "); +- return false; +- } +- } +- /* Don't try if transport does not support notifiers. */ +- if (!virtio_device_ioeventfd_enabled(vdev)) { +- return false; +- } +- +- s = g_new0(VirtIOBlockDataPlane, 1); +- s->vdev = vdev; +- s->conf = conf; +- s->vq_aio_context = g_new(AioContext *, conf->num_queues); +- +- if (conf->iothread_vq_mapping_list) { +- apply_vq_mapping(conf->iothread_vq_mapping_list, s->vq_aio_context, +- conf->num_queues); +- } else if (conf->iothread) { +- AioContext *ctx = iothread_get_aio_context(conf->iothread); +- for (unsigned i = 0; i < conf->num_queues; i++) { +- s->vq_aio_context[i] = ctx; +- } +- +- /* Released in virtio_blk_data_plane_destroy() */ +- object_ref(OBJECT(conf->iothread)); +- } else { +- AioContext *ctx = qemu_get_aio_context(); +- for (unsigned i = 0; i < conf->num_queues; i++) { +- s->vq_aio_context[i] = ctx; +- } +- } +- +- *dataplane = s; +- +- return true; +-} +- +-/* Context: QEMU global mutex held */ +-void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s) +-{ +- VirtIOBlock *vblk; +- VirtIOBlkConf *conf; +- +- if (!s) { +- return; +- } +- +- vblk = VIRTIO_BLK(s->vdev); +- assert(!vblk->dataplane_started); +- conf = s->conf; +- +- if (conf->iothread_vq_mapping_list) { +- IOThreadVirtQueueMappingList *node; +- +- for (node = conf->iothread_vq_mapping_list; node; node = node->next) { +- IOThread *iothread = iothread_by_id(node->value->iothread); +- object_unref(OBJECT(iothread)); +- } +- } +- +- if (conf->iothread) { +- object_unref(OBJECT(conf->iothread)); +- } +- +- g_free(s->vq_aio_context); +- g_free(s); +-} +- +-/* Context: QEMU global mutex held */ +-int virtio_blk_data_plane_start(VirtIODevice *vdev) +-{ +- VirtIOBlock *vblk = VIRTIO_BLK(vdev); +- VirtIOBlockDataPlane *s = vblk->dataplane; +- BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vblk))); +- VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); +- unsigned i; +- unsigned nvqs = s->conf->num_queues; +- Error *local_err = NULL; +- int r; +- +- if (vblk->dataplane_started || s->starting) { +- return 0; +- } +- +- s->starting = true; +- +- /* Set up guest notifier (irq) */ +- r = k->set_guest_notifiers(qbus->parent, nvqs, true); +- if (r != 0) { +- error_report("virtio-blk failed to set guest notifier (%d), " +- "ensure -accel kvm is set.", r); +- goto fail_guest_notifiers; +- } +- +- /* +- * Batch all the host notifiers in a single transaction to avoid +- * quadratic time complexity in address_space_update_ioeventfds(). +- */ +- memory_region_transaction_begin(); +- +- /* Set up virtqueue notify */ +- for (i = 0; i < nvqs; i++) { +- r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, true); +- if (r != 0) { +- int j = i; +- +- fprintf(stderr, "virtio-blk failed to set host notifier (%d)\n", r); +- while (i--) { +- virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); +- } +- +- /* +- * The transaction expects the ioeventfds to be open when it +- * commits. Do it now, before the cleanup loop. +- */ +- memory_region_transaction_commit(); +- +- while (j--) { +- virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), j); +- } +- goto fail_host_notifiers; +- } +- } +- +- memory_region_transaction_commit(); +- +- trace_virtio_blk_data_plane_start(s); +- +- r = blk_set_aio_context(s->conf->conf.blk, s->vq_aio_context[0], +- &local_err); +- if (r < 0) { +- error_report_err(local_err); +- goto fail_aio_context; +- } +- +- /* +- * These fields must be visible to the IOThread when it processes the +- * virtqueue, otherwise it will think dataplane has not started yet. +- * +- * Make sure ->dataplane_started is false when blk_set_aio_context() is +- * called above so that draining does not cause the host notifier to be +- * detached/attached prematurely. +- */ +- s->starting = false; +- vblk->dataplane_started = true; +- smp_wmb(); /* paired with aio_notify_accept() on the read side */ +- +- /* Get this show started by hooking up our callbacks */ +- if (!blk_in_drain(s->conf->conf.blk)) { +- for (i = 0; i < nvqs; i++) { +- VirtQueue *vq = virtio_get_queue(s->vdev, i); +- AioContext *ctx = s->vq_aio_context[i]; +- +- /* Kick right away to begin processing requests already in vring */ +- event_notifier_set(virtio_queue_get_host_notifier(vq)); +- +- virtio_queue_aio_attach_host_notifier(vq, ctx); +- } +- } +- return 0; +- +- fail_aio_context: +- memory_region_transaction_begin(); +- +- for (i = 0; i < nvqs; i++) { +- virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); +- } +- +- memory_region_transaction_commit(); +- +- for (i = 0; i < nvqs; i++) { +- virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); +- } +- fail_host_notifiers: +- k->set_guest_notifiers(qbus->parent, nvqs, false); +- fail_guest_notifiers: +- vblk->dataplane_disabled = true; +- s->starting = false; +- return -ENOSYS; +-} +- +-/* Stop notifications for new requests from guest. +- * +- * Context: BH in IOThread +- */ +-static void virtio_blk_data_plane_stop_vq_bh(void *opaque) +-{ +- VirtQueue *vq = opaque; +- EventNotifier *host_notifier = virtio_queue_get_host_notifier(vq); +- +- virtio_queue_aio_detach_host_notifier(vq, qemu_get_current_aio_context()); +- +- /* +- * Test and clear notifier after disabling event, in case poll callback +- * didn't have time to run. +- */ +- virtio_queue_host_notifier_read(host_notifier); +-} +- +-/* Context: QEMU global mutex held */ +-void virtio_blk_data_plane_stop(VirtIODevice *vdev) +-{ +- VirtIOBlock *vblk = VIRTIO_BLK(vdev); +- VirtIOBlockDataPlane *s = vblk->dataplane; +- BusState *qbus = qdev_get_parent_bus(DEVICE(vblk)); +- VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); +- unsigned i; +- unsigned nvqs = s->conf->num_queues; +- +- if (!vblk->dataplane_started || s->stopping) { +- return; +- } +- +- /* Better luck next time. */ +- if (vblk->dataplane_disabled) { +- vblk->dataplane_disabled = false; +- vblk->dataplane_started = false; +- return; +- } +- s->stopping = true; +- trace_virtio_blk_data_plane_stop(s); +- +- if (!blk_in_drain(s->conf->conf.blk)) { +- for (i = 0; i < nvqs; i++) { +- VirtQueue *vq = virtio_get_queue(s->vdev, i); +- AioContext *ctx = s->vq_aio_context[i]; +- +- aio_wait_bh_oneshot(ctx, virtio_blk_data_plane_stop_vq_bh, vq); +- } +- } +- +- /* +- * Batch all the host notifiers in a single transaction to avoid +- * quadratic time complexity in address_space_update_ioeventfds(). +- */ +- memory_region_transaction_begin(); +- +- for (i = 0; i < nvqs; i++) { +- virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); +- } +- +- /* +- * The transaction expects the ioeventfds to be open when it +- * commits. Do it now, before the cleanup loop. +- */ +- memory_region_transaction_commit(); +- +- for (i = 0; i < nvqs; i++) { +- virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); +- } +- +- /* +- * Set ->dataplane_started to false before draining so that host notifiers +- * are not detached/attached anymore. +- */ +- vblk->dataplane_started = false; +- +- /* Wait for virtio_blk_dma_restart_bh() and in flight I/O to complete */ +- blk_drain(s->conf->conf.blk); +- +- /* +- * Try to switch bs back to the QEMU main loop. If other users keep the +- * BlockBackend in the iothread, that's ok +- */ +- blk_set_aio_context(s->conf->conf.blk, qemu_get_aio_context(), NULL); +- +- /* Clean up guest notifier (irq) */ +- k->set_guest_notifiers(qbus->parent, nvqs, false); +- +- s->stopping = false; +-} +- +-void virtio_blk_data_plane_detach(VirtIOBlockDataPlane *s) +-{ +- VirtIODevice *vdev = VIRTIO_DEVICE(s->vdev); +- +- for (uint16_t i = 0; i < s->conf->num_queues; i++) { +- VirtQueue *vq = virtio_get_queue(vdev, i); +- virtio_queue_aio_detach_host_notifier(vq, s->vq_aio_context[i]); +- } +-} +- +-void virtio_blk_data_plane_attach(VirtIOBlockDataPlane *s) +-{ +- VirtIODevice *vdev = VIRTIO_DEVICE(s->vdev); +- +- for (uint16_t i = 0; i < s->conf->num_queues; i++) { +- VirtQueue *vq = virtio_get_queue(vdev, i); +- virtio_queue_aio_attach_host_notifier(vq, s->vq_aio_context[i]); +- } +-} +diff --git a/hw/block/dataplane/virtio-blk.h b/hw/block/dataplane/virtio-blk.h +deleted file mode 100644 +index 1a806fe447..0000000000 +--- a/hw/block/dataplane/virtio-blk.h ++++ /dev/null +@@ -1,34 +0,0 @@ +-/* +- * Dedicated thread for virtio-blk I/O processing +- * +- * Copyright 2012 IBM, Corp. +- * Copyright 2012 Red Hat, Inc. and/or its affiliates +- * +- * Authors: +- * Stefan Hajnoczi +- * +- * This work is licensed under the terms of the GNU GPL, version 2 or later. +- * See the COPYING file in the top-level directory. +- * +- */ +- +-#ifndef HW_DATAPLANE_VIRTIO_BLK_H +-#define HW_DATAPLANE_VIRTIO_BLK_H +- +-#include "hw/virtio/virtio.h" +- +-typedef struct VirtIOBlockDataPlane VirtIOBlockDataPlane; +- +-bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf, +- VirtIOBlockDataPlane **dataplane, +- Error **errp); +-void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s); +-void virtio_blk_data_plane_notify(VirtIOBlockDataPlane *s, VirtQueue *vq); +- +-int virtio_blk_data_plane_start(VirtIODevice *vdev); +-void virtio_blk_data_plane_stop(VirtIODevice *vdev); +- +-void virtio_blk_data_plane_detach(VirtIOBlockDataPlane *s); +-void virtio_blk_data_plane_attach(VirtIOBlockDataPlane *s); +- +-#endif /* HW_DATAPLANE_VIRTIO_BLK_H */ +diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c +index 46e73b2c96..cb623069f8 100644 +--- a/hw/block/virtio-blk.c ++++ b/hw/block/virtio-blk.c +@@ -27,7 +27,6 @@ + #include "sysemu/sysemu.h" + #include "sysemu/runstate.h" + #include "hw/virtio/virtio-blk.h" +-#include "dataplane/virtio-blk.h" + #include "scsi/constants.h" + #ifdef __linux__ + # include +@@ -66,7 +65,7 @@ static void virtio_blk_req_complete(VirtIOBlockReq *req, unsigned char status) + iov_discard_undo(&req->outhdr_undo); + virtqueue_push(req->vq, &req->elem, req->in_len); + if (s->dataplane_started && !s->dataplane_disabled) { +- virtio_blk_data_plane_notify(s->dataplane, req->vq); ++ virtio_notify_irqfd(vdev, req->vq); + } else { + virtio_notify(vdev, req->vq); + } +@@ -1142,7 +1141,7 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) + { + VirtIOBlock *s = (VirtIOBlock *)vdev; + +- if (s->dataplane && !s->dataplane_started) { ++ if (!s->dataplane_disabled && !s->dataplane_started) { + /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start + * dataplane here instead of waiting for .set_status(). + */ +@@ -1546,16 +1545,34 @@ static void virtio_blk_resize(void *opaque) + aio_bh_schedule_oneshot(qemu_get_aio_context(), virtio_resize_cb, vdev); + } + ++static void virtio_blk_data_plane_detach(VirtIOBlock *s) ++{ ++ VirtIODevice *vdev = VIRTIO_DEVICE(s); ++ ++ for (uint16_t i = 0; i < s->conf.num_queues; i++) { ++ VirtQueue *vq = virtio_get_queue(vdev, i); ++ virtio_queue_aio_detach_host_notifier(vq, s->vq_aio_context[i]); ++ } ++} ++ ++static void virtio_blk_data_plane_attach(VirtIOBlock *s) ++{ ++ VirtIODevice *vdev = VIRTIO_DEVICE(s); ++ ++ for (uint16_t i = 0; i < s->conf.num_queues; i++) { ++ VirtQueue *vq = virtio_get_queue(vdev, i); ++ virtio_queue_aio_attach_host_notifier(vq, s->vq_aio_context[i]); ++ } ++} ++ + /* Suspend virtqueue ioeventfd processing during drain */ + static void virtio_blk_drained_begin(void *opaque) + { + VirtIOBlock *s = opaque; + +- if (!s->dataplane || !s->dataplane_started) { +- return; ++ if (s->dataplane_started) { ++ virtio_blk_data_plane_detach(s); + } +- +- virtio_blk_data_plane_detach(s->dataplane); + } + + /* Resume virtqueue ioeventfd processing after drain */ +@@ -1563,11 +1580,9 @@ static void virtio_blk_drained_end(void *opaque) + { + VirtIOBlock *s = opaque; + +- if (!s->dataplane || !s->dataplane_started) { +- return; ++ if (s->dataplane_started) { ++ virtio_blk_data_plane_attach(s); + } +- +- virtio_blk_data_plane_attach(s->dataplane); + } + + static const BlockDevOps virtio_block_ops = { +@@ -1576,6 +1591,326 @@ static const BlockDevOps virtio_block_ops = { + .drained_end = virtio_blk_drained_end, + }; + ++/* Generate vq:AioContext mappings from a validated iothread-vq-mapping list */ ++static void ++apply_vq_mapping(IOThreadVirtQueueMappingList *iothread_vq_mapping_list, ++ AioContext **vq_aio_context, uint16_t num_queues) ++{ ++ IOThreadVirtQueueMappingList *node; ++ size_t num_iothreads = 0; ++ size_t cur_iothread = 0; ++ ++ for (node = iothread_vq_mapping_list; node; node = node->next) { ++ num_iothreads++; ++ } ++ ++ for (node = iothread_vq_mapping_list; node; node = node->next) { ++ IOThread *iothread = iothread_by_id(node->value->iothread); ++ AioContext *ctx = iothread_get_aio_context(iothread); ++ ++ /* Released in virtio_blk_data_plane_destroy() */ ++ object_ref(OBJECT(iothread)); ++ ++ if (node->value->vqs) { ++ uint16List *vq; ++ ++ /* Explicit vq:IOThread assignment */ ++ for (vq = node->value->vqs; vq; vq = vq->next) { ++ vq_aio_context[vq->value] = ctx; ++ } ++ } else { ++ /* Round-robin vq:IOThread assignment */ ++ for (unsigned i = cur_iothread; i < num_queues; ++ i += num_iothreads) { ++ vq_aio_context[i] = ctx; ++ } ++ } ++ ++ cur_iothread++; ++ } ++} ++ ++/* Context: BQL held */ ++static bool virtio_blk_data_plane_create(VirtIOBlock *s, Error **errp) ++{ ++ VirtIODevice *vdev = VIRTIO_DEVICE(s); ++ VirtIOBlkConf *conf = &s->conf; ++ BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); ++ VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); ++ ++ if (conf->iothread || conf->iothread_vq_mapping_list) { ++ if (!k->set_guest_notifiers || !k->ioeventfd_assign) { ++ error_setg(errp, ++ "device is incompatible with iothread " ++ "(transport does not support notifiers)"); ++ return false; ++ } ++ if (!virtio_device_ioeventfd_enabled(vdev)) { ++ error_setg(errp, "ioeventfd is required for iothread"); ++ return false; ++ } ++ ++ /* ++ * If dataplane is (re-)enabled while the guest is running there could ++ * be block jobs that can conflict. ++ */ ++ if (blk_op_is_blocked(conf->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { ++ error_prepend(errp, "cannot start virtio-blk dataplane: "); ++ return false; ++ } ++ } ++ /* Don't try if transport does not support notifiers. */ ++ if (!virtio_device_ioeventfd_enabled(vdev)) { ++ s->dataplane_disabled = true; ++ return false; ++ } ++ ++ s->vq_aio_context = g_new(AioContext *, conf->num_queues); ++ ++ if (conf->iothread_vq_mapping_list) { ++ apply_vq_mapping(conf->iothread_vq_mapping_list, s->vq_aio_context, ++ conf->num_queues); ++ } else if (conf->iothread) { ++ AioContext *ctx = iothread_get_aio_context(conf->iothread); ++ for (unsigned i = 0; i < conf->num_queues; i++) { ++ s->vq_aio_context[i] = ctx; ++ } ++ ++ /* Released in virtio_blk_data_plane_destroy() */ ++ object_ref(OBJECT(conf->iothread)); ++ } else { ++ AioContext *ctx = qemu_get_aio_context(); ++ for (unsigned i = 0; i < conf->num_queues; i++) { ++ s->vq_aio_context[i] = ctx; ++ } ++ } ++ ++ return true; ++} ++ ++/* Context: BQL held */ ++static void virtio_blk_data_plane_destroy(VirtIOBlock *s) ++{ ++ VirtIOBlkConf *conf = &s->conf; ++ ++ assert(!s->dataplane_started); ++ ++ if (conf->iothread_vq_mapping_list) { ++ IOThreadVirtQueueMappingList *node; ++ ++ for (node = conf->iothread_vq_mapping_list; node; node = node->next) { ++ IOThread *iothread = iothread_by_id(node->value->iothread); ++ object_unref(OBJECT(iothread)); ++ } ++ } ++ ++ if (conf->iothread) { ++ object_unref(OBJECT(conf->iothread)); ++ } ++ ++ g_free(s->vq_aio_context); ++ s->vq_aio_context = NULL; ++} ++ ++/* Context: BQL held */ ++static int virtio_blk_data_plane_start(VirtIODevice *vdev) ++{ ++ VirtIOBlock *s = VIRTIO_BLK(vdev); ++ BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s))); ++ VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); ++ unsigned i; ++ unsigned nvqs = s->conf.num_queues; ++ Error *local_err = NULL; ++ int r; ++ ++ if (s->dataplane_started || s->dataplane_starting) { ++ return 0; ++ } ++ ++ s->dataplane_starting = true; ++ ++ /* Set up guest notifier (irq) */ ++ r = k->set_guest_notifiers(qbus->parent, nvqs, true); ++ if (r != 0) { ++ error_report("virtio-blk failed to set guest notifier (%d), " ++ "ensure -accel kvm is set.", r); ++ goto fail_guest_notifiers; ++ } ++ ++ /* ++ * Batch all the host notifiers in a single transaction to avoid ++ * quadratic time complexity in address_space_update_ioeventfds(). ++ */ ++ memory_region_transaction_begin(); ++ ++ /* Set up virtqueue notify */ ++ for (i = 0; i < nvqs; i++) { ++ r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, true); ++ if (r != 0) { ++ int j = i; ++ ++ fprintf(stderr, "virtio-blk failed to set host notifier (%d)\n", r); ++ while (i--) { ++ virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); ++ } ++ ++ /* ++ * The transaction expects the ioeventfds to be open when it ++ * commits. Do it now, before the cleanup loop. ++ */ ++ memory_region_transaction_commit(); ++ ++ while (j--) { ++ virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), j); ++ } ++ goto fail_host_notifiers; ++ } ++ } ++ ++ memory_region_transaction_commit(); ++ ++ r = blk_set_aio_context(s->conf.conf.blk, s->vq_aio_context[0], ++ &local_err); ++ if (r < 0) { ++ error_report_err(local_err); ++ goto fail_aio_context; ++ } ++ ++ /* ++ * These fields must be visible to the IOThread when it processes the ++ * virtqueue, otherwise it will think dataplane has not started yet. ++ * ++ * Make sure ->dataplane_started is false when blk_set_aio_context() is ++ * called above so that draining does not cause the host notifier to be ++ * detached/attached prematurely. ++ */ ++ s->dataplane_starting = false; ++ s->dataplane_started = true; ++ smp_wmb(); /* paired with aio_notify_accept() on the read side */ ++ ++ /* Get this show started by hooking up our callbacks */ ++ if (!blk_in_drain(s->conf.conf.blk)) { ++ for (i = 0; i < nvqs; i++) { ++ VirtQueue *vq = virtio_get_queue(vdev, i); ++ AioContext *ctx = s->vq_aio_context[i]; ++ ++ /* Kick right away to begin processing requests already in vring */ ++ event_notifier_set(virtio_queue_get_host_notifier(vq)); ++ ++ virtio_queue_aio_attach_host_notifier(vq, ctx); ++ } ++ } ++ return 0; ++ ++ fail_aio_context: ++ memory_region_transaction_begin(); ++ ++ for (i = 0; i < nvqs; i++) { ++ virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); ++ } ++ ++ memory_region_transaction_commit(); ++ ++ for (i = 0; i < nvqs; i++) { ++ virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); ++ } ++ fail_host_notifiers: ++ k->set_guest_notifiers(qbus->parent, nvqs, false); ++ fail_guest_notifiers: ++ s->dataplane_disabled = true; ++ s->dataplane_starting = false; ++ return -ENOSYS; ++} ++ ++/* Stop notifications for new requests from guest. ++ * ++ * Context: BH in IOThread ++ */ ++static void virtio_blk_data_plane_stop_vq_bh(void *opaque) ++{ ++ VirtQueue *vq = opaque; ++ EventNotifier *host_notifier = virtio_queue_get_host_notifier(vq); ++ ++ virtio_queue_aio_detach_host_notifier(vq, qemu_get_current_aio_context()); ++ ++ /* ++ * Test and clear notifier after disabling event, in case poll callback ++ * didn't have time to run. ++ */ ++ virtio_queue_host_notifier_read(host_notifier); ++} ++ ++/* Context: BQL held */ ++static void virtio_blk_data_plane_stop(VirtIODevice *vdev) ++{ ++ VirtIOBlock *s = VIRTIO_BLK(vdev); ++ BusState *qbus = qdev_get_parent_bus(DEVICE(s)); ++ VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); ++ unsigned i; ++ unsigned nvqs = s->conf.num_queues; ++ ++ if (!s->dataplane_started || s->dataplane_stopping) { ++ return; ++ } ++ ++ /* Better luck next time. */ ++ if (s->dataplane_disabled) { ++ s->dataplane_disabled = false; ++ s->dataplane_started = false; ++ return; ++ } ++ s->dataplane_stopping = true; ++ ++ if (!blk_in_drain(s->conf.conf.blk)) { ++ for (i = 0; i < nvqs; i++) { ++ VirtQueue *vq = virtio_get_queue(vdev, i); ++ AioContext *ctx = s->vq_aio_context[i]; ++ ++ aio_wait_bh_oneshot(ctx, virtio_blk_data_plane_stop_vq_bh, vq); ++ } ++ } ++ ++ /* ++ * Batch all the host notifiers in a single transaction to avoid ++ * quadratic time complexity in address_space_update_ioeventfds(). ++ */ ++ memory_region_transaction_begin(); ++ ++ for (i = 0; i < nvqs; i++) { ++ virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); ++ } ++ ++ /* ++ * The transaction expects the ioeventfds to be open when it ++ * commits. Do it now, before the cleanup loop. ++ */ ++ memory_region_transaction_commit(); ++ ++ for (i = 0; i < nvqs; i++) { ++ virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); ++ } ++ ++ /* ++ * Set ->dataplane_started to false before draining so that host notifiers ++ * are not detached/attached anymore. ++ */ ++ s->dataplane_started = false; ++ ++ /* Wait for virtio_blk_dma_restart_bh() and in flight I/O to complete */ ++ blk_drain(s->conf.conf.blk); ++ ++ /* ++ * Try to switch bs back to the QEMU main loop. If other users keep the ++ * BlockBackend in the iothread, that's ok ++ */ ++ blk_set_aio_context(s->conf.conf.blk, qemu_get_aio_context(), NULL); ++ ++ /* Clean up guest notifier (irq) */ ++ k->set_guest_notifiers(qbus->parent, nvqs, false); ++ ++ s->dataplane_stopping = false; ++} ++ + static void virtio_blk_device_realize(DeviceState *dev, Error **errp) + { + VirtIODevice *vdev = VIRTIO_DEVICE(dev); +@@ -1680,7 +2015,7 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) + virtio_add_queue(vdev, conf->queue_size, virtio_blk_handle_output); + } + qemu_coroutine_inc_pool_size(conf->num_queues * conf->queue_size / 2); +- virtio_blk_data_plane_create(vdev, conf, &s->dataplane, &err); ++ virtio_blk_data_plane_create(s, &err); + if (err != NULL) { + error_propagate(errp, err); + for (i = 0; i < conf->num_queues; i++) { +@@ -1717,8 +2052,7 @@ static void virtio_blk_device_unrealize(DeviceState *dev) + + blk_drain(s->blk); + del_boot_device_lchs(dev, "/disk@0,0"); +- virtio_blk_data_plane_destroy(s->dataplane); +- s->dataplane = NULL; ++ virtio_blk_data_plane_destroy(s); + for (i = 0; i < conf->num_queues; i++) { + virtio_del_queue(vdev, i); + } +diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h +index 5e4091e4da..fecffdc303 100644 +--- a/include/hw/virtio/virtio-blk.h ++++ b/include/hw/virtio/virtio-blk.h +@@ -50,8 +50,6 @@ struct VirtIOBlkConf + bool x_enable_wce_if_config_wce; + }; + +-struct VirtIOBlockDataPlane; +- + struct VirtIOBlockReq; + struct VirtIOBlock { + VirtIODevice parent_obj; +@@ -64,7 +62,15 @@ struct VirtIOBlock { + VMChangeStateEntry *change; + bool dataplane_disabled; + bool dataplane_started; +- struct VirtIOBlockDataPlane *dataplane; ++ bool dataplane_starting; ++ bool dataplane_stopping; ++ ++ /* ++ * The AioContext for each virtqueue. The BlockDriverState will use the ++ * first element as its AioContext. ++ */ ++ AioContext **vq_aio_context; ++ + uint64_t host_features; + size_t config_size; + BlockRAMRegistrar blk_ram_registrar; +diff --git a/meson.build b/meson.build +index 6c77d9687d..47c65d0f53 100644 +--- a/meson.build ++++ b/meson.build +@@ -3298,7 +3298,6 @@ if have_system + 'hw/arm', + 'hw/audio', + 'hw/block', +- 'hw/block/dataplane', + 'hw/char', + 'hw/display', + 'hw/dma', +-- +2.39.3 + diff --git a/SOURCES/kvm-virtio-blk-rename-dataplane-create-destroy-functions.patch b/SOURCES/kvm-virtio-blk-rename-dataplane-create-destroy-functions.patch new file mode 100644 index 0000000..5f45b9d --- /dev/null +++ b/SOURCES/kvm-virtio-blk-rename-dataplane-create-destroy-functions.patch @@ -0,0 +1,117 @@ +From 71257c2f320f1511de1e275779cf4b90effc1f02 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Fri, 19 Jan 2024 08:57:44 -0500 +Subject: [PATCH 15/22] virtio-blk: rename dataplane create/destroy functions + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter +RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Hanna Czenczek +RH-Commit: [11/17] 60e7016d5f3e4e9e89945578279b12f812f85ddf (stefanha/centos-stream-qemu-kvm) + +virtio_blk_data_plane_create() and virtio_blk_data_plane_destroy() are +actually about s->vq_aio_context[] rather than managing +dataplane-specific state. + +As a prerequisite to using s->vq_aio_context[] in all code paths (even +when dataplane is not used), rename these functions to reflect that they +just manage s->vq_aio_context and call them regardless of whether or not +dataplane is in use. + +Note that virtio-blk supports running with -device +virtio-blk-pci,ioevent=off where the vCPU thread enters the device +emulation code. In this mode ioeventfd is not used for virtqueue +processing. However, we still want to initialize s->vq_aio_context[] to +qemu_aio_context in that case since I/O completion callbacks will be +invoked in the main loop thread. + +Signed-off-by: Stefan Hajnoczi +Message-ID: <20240119135748.270944-3-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 57bc2658935778d1ae0edbcd4402763da8c7bae2) +Signed-off-by: Stefan Hajnoczi +--- + hw/block/virtio-blk.c | 23 ++++++++++++----------- + 1 file changed, 12 insertions(+), 11 deletions(-) + +diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c +index cb623069f8..4d6f9377c6 100644 +--- a/hw/block/virtio-blk.c ++++ b/hw/block/virtio-blk.c +@@ -1608,7 +1608,7 @@ apply_vq_mapping(IOThreadVirtQueueMappingList *iothread_vq_mapping_list, + IOThread *iothread = iothread_by_id(node->value->iothread); + AioContext *ctx = iothread_get_aio_context(iothread); + +- /* Released in virtio_blk_data_plane_destroy() */ ++ /* Released in virtio_blk_vq_aio_context_cleanup() */ + object_ref(OBJECT(iothread)); + + if (node->value->vqs) { +@@ -1631,7 +1631,7 @@ apply_vq_mapping(IOThreadVirtQueueMappingList *iothread_vq_mapping_list, + } + + /* Context: BQL held */ +-static bool virtio_blk_data_plane_create(VirtIOBlock *s, Error **errp) ++static bool virtio_blk_vq_aio_context_init(VirtIOBlock *s, Error **errp) + { + VirtIODevice *vdev = VIRTIO_DEVICE(s); + VirtIOBlkConf *conf = &s->conf; +@@ -1659,11 +1659,6 @@ static bool virtio_blk_data_plane_create(VirtIOBlock *s, Error **errp) + return false; + } + } +- /* Don't try if transport does not support notifiers. */ +- if (!virtio_device_ioeventfd_enabled(vdev)) { +- s->dataplane_disabled = true; +- return false; +- } + + s->vq_aio_context = g_new(AioContext *, conf->num_queues); + +@@ -1676,7 +1671,7 @@ static bool virtio_blk_data_plane_create(VirtIOBlock *s, Error **errp) + s->vq_aio_context[i] = ctx; + } + +- /* Released in virtio_blk_data_plane_destroy() */ ++ /* Released in virtio_blk_vq_aio_context_cleanup() */ + object_ref(OBJECT(conf->iothread)); + } else { + AioContext *ctx = qemu_get_aio_context(); +@@ -1689,7 +1684,7 @@ static bool virtio_blk_data_plane_create(VirtIOBlock *s, Error **errp) + } + + /* Context: BQL held */ +-static void virtio_blk_data_plane_destroy(VirtIOBlock *s) ++static void virtio_blk_vq_aio_context_cleanup(VirtIOBlock *s) + { + VirtIOBlkConf *conf = &s->conf; + +@@ -2015,7 +2010,13 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) + virtio_add_queue(vdev, conf->queue_size, virtio_blk_handle_output); + } + qemu_coroutine_inc_pool_size(conf->num_queues * conf->queue_size / 2); +- virtio_blk_data_plane_create(s, &err); ++ ++ /* Don't start dataplane if transport does not support notifiers. */ ++ if (!virtio_device_ioeventfd_enabled(vdev)) { ++ s->dataplane_disabled = true; ++ } ++ ++ virtio_blk_vq_aio_context_init(s, &err); + if (err != NULL) { + error_propagate(errp, err); + for (i = 0; i < conf->num_queues; i++) { +@@ -2052,7 +2053,7 @@ static void virtio_blk_device_unrealize(DeviceState *dev) + + blk_drain(s->blk); + del_boot_device_lchs(dev, "/disk@0,0"); +- virtio_blk_data_plane_destroy(s); ++ virtio_blk_vq_aio_context_cleanup(s); + for (i = 0; i < conf->num_queues; i++) { + virtio_del_queue(vdev, i); + } +-- +2.39.3 + diff --git a/SOURCES/kvm-virtio-blk-rename-dataplane-to-ioeventfd.patch b/SOURCES/kvm-virtio-blk-rename-dataplane-to-ioeventfd.patch new file mode 100644 index 0000000..a0c0b67 --- /dev/null +++ b/SOURCES/kvm-virtio-blk-rename-dataplane-to-ioeventfd.patch @@ -0,0 +1,307 @@ +From ba80cdcd5604b9b9efc4682ade9828ab74ebf5e6 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Fri, 19 Jan 2024 08:57:45 -0500 +Subject: [PATCH 16/22] virtio-blk: rename dataplane to ioeventfd + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter +RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Hanna Czenczek +RH-Commit: [12/17] 4230005e0d1b4629fe4540f1f63cd705e58618da (stefanha/centos-stream-qemu-kvm) + +The dataplane code is really about using ioeventfd. It's used both for +IOThreads (what we think of as dataplane) and for the core virtio-pci +code's ioeventfd feature (which is enabled by default and used when no +IOThread has been specified). Rename the code to reflect this. + +Signed-off-by: Stefan Hajnoczi +Message-ID: <20240119135748.270944-4-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 3cdaf3dd4a4ca94ebabe7eab23b432f1a6c547cc) +Signed-off-by: Stefan Hajnoczi +--- + hw/block/virtio-blk.c | 78 +++++++++++++++++----------------- + include/hw/virtio/virtio-blk.h | 8 ++-- + 2 files changed, 43 insertions(+), 43 deletions(-) + +diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c +index 4d6f9377c6..08c566946a 100644 +--- a/hw/block/virtio-blk.c ++++ b/hw/block/virtio-blk.c +@@ -64,7 +64,7 @@ static void virtio_blk_req_complete(VirtIOBlockReq *req, unsigned char status) + iov_discard_undo(&req->inhdr_undo); + iov_discard_undo(&req->outhdr_undo); + virtqueue_push(req->vq, &req->elem, req->in_len); +- if (s->dataplane_started && !s->dataplane_disabled) { ++ if (s->ioeventfd_started && !s->ioeventfd_disabled) { + virtio_notify_irqfd(vdev, req->vq); + } else { + virtio_notify(vdev, req->vq); +@@ -1141,12 +1141,12 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) + { + VirtIOBlock *s = (VirtIOBlock *)vdev; + +- if (!s->dataplane_disabled && !s->dataplane_started) { ++ if (!s->ioeventfd_disabled && !s->ioeventfd_started) { + /* Some guests kick before setting VIRTIO_CONFIG_S_DRIVER_OK so start +- * dataplane here instead of waiting for .set_status(). ++ * ioeventfd here instead of waiting for .set_status(). + */ + virtio_device_start_ioeventfd(vdev); +- if (!s->dataplane_disabled) { ++ if (!s->ioeventfd_disabled) { + return; + } + } +@@ -1213,7 +1213,7 @@ static void virtio_blk_reset(VirtIODevice *vdev) + VirtIOBlockReq *req; + + /* Dataplane has stopped... */ +- assert(!s->dataplane_started); ++ assert(!s->ioeventfd_started); + + /* ...but requests may still be in flight. */ + blk_drain(s->blk); +@@ -1380,7 +1380,7 @@ static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status) + VirtIOBlock *s = VIRTIO_BLK(vdev); + + if (!(status & (VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK))) { +- assert(!s->dataplane_started); ++ assert(!s->ioeventfd_started); + } + + if (!(status & VIRTIO_CONFIG_S_DRIVER_OK)) { +@@ -1545,7 +1545,7 @@ static void virtio_blk_resize(void *opaque) + aio_bh_schedule_oneshot(qemu_get_aio_context(), virtio_resize_cb, vdev); + } + +-static void virtio_blk_data_plane_detach(VirtIOBlock *s) ++static void virtio_blk_ioeventfd_detach(VirtIOBlock *s) + { + VirtIODevice *vdev = VIRTIO_DEVICE(s); + +@@ -1555,7 +1555,7 @@ static void virtio_blk_data_plane_detach(VirtIOBlock *s) + } + } + +-static void virtio_blk_data_plane_attach(VirtIOBlock *s) ++static void virtio_blk_ioeventfd_attach(VirtIOBlock *s) + { + VirtIODevice *vdev = VIRTIO_DEVICE(s); + +@@ -1570,8 +1570,8 @@ static void virtio_blk_drained_begin(void *opaque) + { + VirtIOBlock *s = opaque; + +- if (s->dataplane_started) { +- virtio_blk_data_plane_detach(s); ++ if (s->ioeventfd_started) { ++ virtio_blk_ioeventfd_detach(s); + } + } + +@@ -1580,8 +1580,8 @@ static void virtio_blk_drained_end(void *opaque) + { + VirtIOBlock *s = opaque; + +- if (s->dataplane_started) { +- virtio_blk_data_plane_attach(s); ++ if (s->ioeventfd_started) { ++ virtio_blk_ioeventfd_attach(s); + } + } + +@@ -1651,11 +1651,11 @@ static bool virtio_blk_vq_aio_context_init(VirtIOBlock *s, Error **errp) + } + + /* +- * If dataplane is (re-)enabled while the guest is running there could ++ * If ioeventfd is (re-)enabled while the guest is running there could + * be block jobs that can conflict. + */ + if (blk_op_is_blocked(conf->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { +- error_prepend(errp, "cannot start virtio-blk dataplane: "); ++ error_prepend(errp, "cannot start virtio-blk ioeventfd: "); + return false; + } + } +@@ -1688,7 +1688,7 @@ static void virtio_blk_vq_aio_context_cleanup(VirtIOBlock *s) + { + VirtIOBlkConf *conf = &s->conf; + +- assert(!s->dataplane_started); ++ assert(!s->ioeventfd_started); + + if (conf->iothread_vq_mapping_list) { + IOThreadVirtQueueMappingList *node; +@@ -1708,7 +1708,7 @@ static void virtio_blk_vq_aio_context_cleanup(VirtIOBlock *s) + } + + /* Context: BQL held */ +-static int virtio_blk_data_plane_start(VirtIODevice *vdev) ++static int virtio_blk_start_ioeventfd(VirtIODevice *vdev) + { + VirtIOBlock *s = VIRTIO_BLK(vdev); + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s))); +@@ -1718,11 +1718,11 @@ static int virtio_blk_data_plane_start(VirtIODevice *vdev) + Error *local_err = NULL; + int r; + +- if (s->dataplane_started || s->dataplane_starting) { ++ if (s->ioeventfd_started || s->ioeventfd_starting) { + return 0; + } + +- s->dataplane_starting = true; ++ s->ioeventfd_starting = true; + + /* Set up guest notifier (irq) */ + r = k->set_guest_notifiers(qbus->parent, nvqs, true); +@@ -1773,14 +1773,14 @@ static int virtio_blk_data_plane_start(VirtIODevice *vdev) + + /* + * These fields must be visible to the IOThread when it processes the +- * virtqueue, otherwise it will think dataplane has not started yet. ++ * virtqueue, otherwise it will think ioeventfd has not started yet. + * +- * Make sure ->dataplane_started is false when blk_set_aio_context() is ++ * Make sure ->ioeventfd_started is false when blk_set_aio_context() is + * called above so that draining does not cause the host notifier to be + * detached/attached prematurely. + */ +- s->dataplane_starting = false; +- s->dataplane_started = true; ++ s->ioeventfd_starting = false; ++ s->ioeventfd_started = true; + smp_wmb(); /* paired with aio_notify_accept() on the read side */ + + /* Get this show started by hooking up our callbacks */ +@@ -1812,8 +1812,8 @@ static int virtio_blk_data_plane_start(VirtIODevice *vdev) + fail_host_notifiers: + k->set_guest_notifiers(qbus->parent, nvqs, false); + fail_guest_notifiers: +- s->dataplane_disabled = true; +- s->dataplane_starting = false; ++ s->ioeventfd_disabled = true; ++ s->ioeventfd_starting = false; + return -ENOSYS; + } + +@@ -1821,7 +1821,7 @@ static int virtio_blk_data_plane_start(VirtIODevice *vdev) + * + * Context: BH in IOThread + */ +-static void virtio_blk_data_plane_stop_vq_bh(void *opaque) ++static void virtio_blk_ioeventfd_stop_vq_bh(void *opaque) + { + VirtQueue *vq = opaque; + EventNotifier *host_notifier = virtio_queue_get_host_notifier(vq); +@@ -1836,7 +1836,7 @@ static void virtio_blk_data_plane_stop_vq_bh(void *opaque) + } + + /* Context: BQL held */ +-static void virtio_blk_data_plane_stop(VirtIODevice *vdev) ++static void virtio_blk_stop_ioeventfd(VirtIODevice *vdev) + { + VirtIOBlock *s = VIRTIO_BLK(vdev); + BusState *qbus = qdev_get_parent_bus(DEVICE(s)); +@@ -1844,24 +1844,24 @@ static void virtio_blk_data_plane_stop(VirtIODevice *vdev) + unsigned i; + unsigned nvqs = s->conf.num_queues; + +- if (!s->dataplane_started || s->dataplane_stopping) { ++ if (!s->ioeventfd_started || s->ioeventfd_stopping) { + return; + } + + /* Better luck next time. */ +- if (s->dataplane_disabled) { +- s->dataplane_disabled = false; +- s->dataplane_started = false; ++ if (s->ioeventfd_disabled) { ++ s->ioeventfd_disabled = false; ++ s->ioeventfd_started = false; + return; + } +- s->dataplane_stopping = true; ++ s->ioeventfd_stopping = true; + + if (!blk_in_drain(s->conf.conf.blk)) { + for (i = 0; i < nvqs; i++) { + VirtQueue *vq = virtio_get_queue(vdev, i); + AioContext *ctx = s->vq_aio_context[i]; + +- aio_wait_bh_oneshot(ctx, virtio_blk_data_plane_stop_vq_bh, vq); ++ aio_wait_bh_oneshot(ctx, virtio_blk_ioeventfd_stop_vq_bh, vq); + } + } + +@@ -1886,10 +1886,10 @@ static void virtio_blk_data_plane_stop(VirtIODevice *vdev) + } + + /* +- * Set ->dataplane_started to false before draining so that host notifiers ++ * Set ->ioeventfd_started to false before draining so that host notifiers + * are not detached/attached anymore. + */ +- s->dataplane_started = false; ++ s->ioeventfd_started = false; + + /* Wait for virtio_blk_dma_restart_bh() and in flight I/O to complete */ + blk_drain(s->conf.conf.blk); +@@ -1903,7 +1903,7 @@ static void virtio_blk_data_plane_stop(VirtIODevice *vdev) + /* Clean up guest notifier (irq) */ + k->set_guest_notifiers(qbus->parent, nvqs, false); + +- s->dataplane_stopping = false; ++ s->ioeventfd_stopping = false; + } + + static void virtio_blk_device_realize(DeviceState *dev, Error **errp) +@@ -2011,9 +2011,9 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) + } + qemu_coroutine_inc_pool_size(conf->num_queues * conf->queue_size / 2); + +- /* Don't start dataplane if transport does not support notifiers. */ ++ /* Don't start ioeventfd if transport does not support notifiers. */ + if (!virtio_device_ioeventfd_enabled(vdev)) { +- s->dataplane_disabled = true; ++ s->ioeventfd_disabled = true; + } + + virtio_blk_vq_aio_context_init(s, &err); +@@ -2137,8 +2137,8 @@ static void virtio_blk_class_init(ObjectClass *klass, void *data) + vdc->reset = virtio_blk_reset; + vdc->save = virtio_blk_save_device; + vdc->load = virtio_blk_load_device; +- vdc->start_ioeventfd = virtio_blk_data_plane_start; +- vdc->stop_ioeventfd = virtio_blk_data_plane_stop; ++ vdc->start_ioeventfd = virtio_blk_start_ioeventfd; ++ vdc->stop_ioeventfd = virtio_blk_stop_ioeventfd; + } + + static const TypeInfo virtio_blk_info = { +diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h +index fecffdc303..833a9a344f 100644 +--- a/include/hw/virtio/virtio-blk.h ++++ b/include/hw/virtio/virtio-blk.h +@@ -60,10 +60,10 @@ struct VirtIOBlock { + unsigned short sector_mask; + bool original_wce; + VMChangeStateEntry *change; +- bool dataplane_disabled; +- bool dataplane_started; +- bool dataplane_starting; +- bool dataplane_stopping; ++ bool ioeventfd_disabled; ++ bool ioeventfd_started; ++ bool ioeventfd_starting; ++ bool ioeventfd_stopping; + + /* + * The AioContext for each virtqueue. The BlockDriverState will use the +-- +2.39.3 + diff --git a/SOURCES/kvm-virtio-blk-restart-s-rq-reqs-in-vq-AioContexts.patch b/SOURCES/kvm-virtio-blk-restart-s-rq-reqs-in-vq-AioContexts.patch new file mode 100644 index 0000000..611b881 --- /dev/null +++ b/SOURCES/kvm-virtio-blk-restart-s-rq-reqs-in-vq-AioContexts.patch @@ -0,0 +1,106 @@ +From 9311035821b3fea3f78c7f06ddb8a3861584f907 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Fri, 19 Jan 2024 08:57:46 -0500 +Subject: [PATCH 17/22] virtio-blk: restart s->rq reqs in vq AioContexts + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter +RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Hanna Czenczek +RH-Commit: [13/17] cf5ad0352a78458ffc7588f967963f62b267fd64 (stefanha/centos-stream-qemu-kvm) + +A virtio-blk device with the iothread-vq-mapping parameter has +per-virtqueue AioContexts. It is not thread-safe to process s->rq +requests in the BlockBackend AioContext since that may be different from +the virtqueue's AioContext to which this request belongs. The code +currently races and could crash. + +Adapt virtio_blk_dma_restart_cb() to first split s->rq into per-vq lists +and then schedule a BH each vq's AioContext as necessary. This way +requests are safely processed in their vq's AioContext. + +Signed-off-by: Stefan Hajnoczi +Message-ID: <20240119135748.270944-5-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 71ee0cdd14cc01a8b51aa4e9577dd0a1bb2f8e19) +Signed-off-by: Stefan Hajnoczi +--- + hw/block/virtio-blk.c | 44 ++++++++++++++++++++++++++++++++----------- + 1 file changed, 33 insertions(+), 11 deletions(-) + +diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c +index 08c566946a..f48ce5cbb8 100644 +--- a/hw/block/virtio-blk.c ++++ b/hw/block/virtio-blk.c +@@ -1156,16 +1156,11 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) + + static void virtio_blk_dma_restart_bh(void *opaque) + { +- VirtIOBlock *s = opaque; ++ VirtIOBlockReq *req = opaque; ++ VirtIOBlock *s = req->dev; /* we're called with at least one request */ + +- VirtIOBlockReq *req; + MultiReqBuffer mrb = {}; + +- WITH_QEMU_LOCK_GUARD(&s->rq_lock) { +- req = s->rq; +- s->rq = NULL; +- } +- + while (req) { + VirtIOBlockReq *next = req->next; + if (virtio_blk_handle_request(req, &mrb)) { +@@ -1195,16 +1190,43 @@ static void virtio_blk_dma_restart_cb(void *opaque, bool running, + RunState state) + { + VirtIOBlock *s = opaque; ++ uint16_t num_queues = s->conf.num_queues; + + if (!running) { + return; + } + +- /* Paired with dec in virtio_blk_dma_restart_bh() */ +- blk_inc_in_flight(s->conf.conf.blk); ++ /* Split the device-wide s->rq request list into per-vq request lists */ ++ g_autofree VirtIOBlockReq **vq_rq = g_new0(VirtIOBlockReq *, num_queues); ++ VirtIOBlockReq *rq; ++ ++ WITH_QEMU_LOCK_GUARD(&s->rq_lock) { ++ rq = s->rq; ++ s->rq = NULL; ++ } ++ ++ while (rq) { ++ VirtIOBlockReq *next = rq->next; ++ uint16_t idx = virtio_get_queue_index(rq->vq); ++ ++ rq->next = vq_rq[idx]; ++ vq_rq[idx] = rq; ++ rq = next; ++ } + +- aio_bh_schedule_oneshot(blk_get_aio_context(s->conf.conf.blk), +- virtio_blk_dma_restart_bh, s); ++ /* Schedule a BH to submit the requests in each vq's AioContext */ ++ for (uint16_t i = 0; i < num_queues; i++) { ++ if (!vq_rq[i]) { ++ continue; ++ } ++ ++ /* Paired with dec in virtio_blk_dma_restart_bh() */ ++ blk_inc_in_flight(s->conf.conf.blk); ++ ++ aio_bh_schedule_oneshot(s->vq_aio_context[i], ++ virtio_blk_dma_restart_bh, ++ vq_rq[i]); ++ } + } + + static void virtio_blk_reset(VirtIODevice *vdev) +-- +2.39.3 + diff --git a/SOURCES/kvm-virtio-blk-tolerate-failure-to-set-BlockBackend-AioC.patch b/SOURCES/kvm-virtio-blk-tolerate-failure-to-set-BlockBackend-AioC.patch new file mode 100644 index 0000000..303c007 --- /dev/null +++ b/SOURCES/kvm-virtio-blk-tolerate-failure-to-set-BlockBackend-AioC.patch @@ -0,0 +1,72 @@ +From 282cebc22987958d11efc76e4f6ddb9601e709d9 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Fri, 19 Jan 2024 08:57:47 -0500 +Subject: [PATCH 18/22] virtio-blk: tolerate failure to set BlockBackend + AioContext + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 219: virtio-blk: add iothread-vq-mapping parameter +RH-Jira: RHEL-17369 RHEL-20764 RHEL-7356 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Hanna Czenczek +RH-Commit: [14/17] edb113ce9fea0c1a88ae7b5d61c35c1981e6993f (stefanha/centos-stream-qemu-kvm) + +We no longer rely on setting the AioContext since the block layer +IO_CODE APIs can be called from any thread. Now it's just a hint to help +block jobs and other operations co-locate themselves in a thread with +the guest I/O requests. Keep going if setting the AioContext fails. + +Suggested-by: Kevin Wolf +Signed-off-by: Stefan Hajnoczi +Message-ID: <20240119135748.270944-6-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit ea0736d7f84ead109a6b701427991828f97724c3) +Signed-off-by: Stefan Hajnoczi +--- + hw/block/virtio-blk.c | 19 +++++-------------- + 1 file changed, 5 insertions(+), 14 deletions(-) + +diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c +index f48ce5cbb8..81de06c9f6 100644 +--- a/hw/block/virtio-blk.c ++++ b/hw/block/virtio-blk.c +@@ -1786,11 +1786,14 @@ static int virtio_blk_start_ioeventfd(VirtIODevice *vdev) + + memory_region_transaction_commit(); + ++ /* ++ * Try to change the AioContext so that block jobs and other operations can ++ * co-locate their activity in the same AioContext. If it fails, nevermind. ++ */ + r = blk_set_aio_context(s->conf.conf.blk, s->vq_aio_context[0], + &local_err); + if (r < 0) { +- error_report_err(local_err); +- goto fail_aio_context; ++ warn_report_err(local_err); + } + + /* +@@ -1819,18 +1822,6 @@ static int virtio_blk_start_ioeventfd(VirtIODevice *vdev) + } + return 0; + +- fail_aio_context: +- memory_region_transaction_begin(); +- +- for (i = 0; i < nvqs; i++) { +- virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); +- } +- +- memory_region_transaction_commit(); +- +- for (i = 0; i < nvqs; i++) { +- virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); +- } + fail_host_notifiers: + k->set_guest_notifiers(qbus->parent, nvqs, false); + fail_guest_notifiers: +-- +2.39.3 + diff --git a/SOURCES/kvm-virtio-gpu-block-migration-of-VMs-with-blob-true.patch b/SOURCES/kvm-virtio-gpu-block-migration-of-VMs-with-blob-true.patch new file mode 100644 index 0000000..1f70049 --- /dev/null +++ b/SOURCES/kvm-virtio-gpu-block-migration-of-VMs-with-blob-true.patch @@ -0,0 +1,87 @@ +From 5db0b4131c56d96760b3300298f4bedab99d35cb Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Wed, 6 Sep 2023 17:00:22 +0400 +Subject: [PATCH 100/101] virtio-gpu: block migration of VMs with blob=true +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +RH-MergeRequest: 217: virtio-gpu: block migration of VMs with blob=true +RH-Jira: RHEL-7565 +RH-Commit: [1/1] f978ca697d574b1419eb027a1007c060dfb83298 + +JIRA: https://issues.redhat.com/browse/RHEL-7565 + +commit 9c549ab6895a43ad0cb33e684e11cdb0b5400897 +Author: Marc-André Lureau +Date: Wed Sep 6 17:00:22 2023 +0400 + +virtio-gpu: block migration of VMs with blob=true + +"blob" resources don't have an associated pixman image: + +#0 pixman_image_get_stride (image=0x0) at ../pixman/pixman-image.c:921 +#1 0x0000562327c25236 in virtio_gpu_save (f=0x56232bb13b00, opaque=0x56232b555a60, size=0, field=0x5623289ab6c8 <__compound_literal.3+104>, vmdesc=0x56232ab59fe0) at ../hw/display/virtio-gpu.c:1225 + +Related to: +https://bugzilla.redhat.com/show_bug.cgi?id=2236353 + +Signed-off-by: Marc-André Lureau +Acked-by: Peter Xu + +[ rhel backport - fix Error* vs Error** argument ] +Signed-off-by: Marc-André Lureau +--- + hw/display/virtio-gpu.c | 14 ++++++++++++++ + 1 file changed, 14 insertions(+) + +diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c +index b016d3bac8..1702190ead 100644 +--- a/hw/display/virtio-gpu.c ++++ b/hw/display/virtio-gpu.c +@@ -27,6 +27,7 @@ + #include "hw/virtio/virtio-gpu-pixman.h" + #include "hw/virtio/virtio-bus.h" + #include "hw/qdev-properties.h" ++#include "migration/blocker.h" + #include "qemu/log.h" + #include "qemu/module.h" + #include "qapi/error.h" +@@ -41,6 +42,8 @@ virtio_gpu_find_check_resource(VirtIOGPU *g, uint32_t resource_id, + + static void virtio_gpu_reset_bh(void *opaque); + ++static Error *blob_mig_blocker; ++ + void virtio_gpu_update_cursor_data(VirtIOGPU *g, + struct virtio_gpu_scanout *s, + uint32_t resource_id) +@@ -1452,6 +1455,14 @@ void virtio_gpu_device_realize(DeviceState *qdev, Error **errp) + error_setg(errp, "blobs and virgl are not compatible (yet)"); + return; + } ++ ++ if (!blob_mig_blocker) { ++ error_setg(&blob_mig_blocker, ++ "virtio-gpu blob VMs are currently not migratable."); ++ } ++ if (migrate_add_blocker(&blob_mig_blocker, errp)) { ++ return; ++ } + } + + if (!virtio_gpu_base_device_realize(qdev, +@@ -1478,6 +1489,9 @@ static void virtio_gpu_device_unrealize(DeviceState *qdev) + { + VirtIOGPU *g = VIRTIO_GPU(qdev); + ++ if (virtio_gpu_blob_enabled(g->parent_obj.conf)) { ++ migrate_del_blocker(&blob_mig_blocker); ++ } + g_clear_pointer(&g->ctrl_bh, qemu_bh_delete); + g_clear_pointer(&g->cursor_bh, qemu_bh_delete); + g_clear_pointer(&g->reset_bh, qemu_bh_delete); +-- +2.39.3 + diff --git a/SOURCES/kvm-virtio-iommu-Fix-64kB-host-page-size-VFIO-device-ass.patch b/SOURCES/kvm-virtio-iommu-Fix-64kB-host-page-size-VFIO-device-ass.patch deleted file mode 100644 index acfb3ae..0000000 --- a/SOURCES/kvm-virtio-iommu-Fix-64kB-host-page-size-VFIO-device-ass.patch +++ /dev/null @@ -1,151 +0,0 @@ -From 08c8af80dbd03b46a6a8397ef0c41cda3e6de22c Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Wed, 5 Jul 2023 18:51:17 +0200 -Subject: [PATCH 01/37] virtio-iommu: Fix 64kB host page size VFIO device - assignment - -RH-Author: Eric Auger -RH-MergeRequest: 182: VIRTIO-IOMMU/VFIO page size related fixes -RH-Bugzilla: 2211609 2211634 -RH-Acked-by: Gavin Shan -RH-Acked-by: Sebastian Ott -RH-Commit: [1/2] b48db1c964559505dda4c6c9a3b79d68207b25eb (eauger1/centos-qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2211634 - -When running on a 64kB page size host and protecting a VFIO device -with the virtio-iommu, qemu crashes with this kind of message: - -qemu-kvm: virtio-iommu page mask 0xfffffffffffff000 is incompatible -with mask 0x20010000 -qemu: hardware error: vfio: DMA mapping failed, unable to continue - -This is due to the fact the IOMMU MR corresponding to the VFIO device -is enabled very late on domain attach, after the machine init. -The device reports a minimal 64kB page size but it is too late to be -applied. virtio_iommu_set_page_size_mask() fails and this causes -vfio_listener_region_add() to end up with hw_error(); - -To work around this issue, we transiently enable the IOMMU MR on -machine init to collect the page size requirements and then restore -the bypass state. - -Fixes: 90519b9053 ("virtio-iommu: Add bypass mode support to assigned device") -Signed-off-by: Eric Auger - -Message-Id: <20230705165118.28194-2-eric.auger@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Reviewed-by: Jean-Philippe Brucker -Tested-by: Jean-Philippe Brucker -Reviewed-by: Zhenzhong Duan -(cherry picked from commit 94df5b2180d61fb2ee2b04cc007981e58b6479a9) -Signed-off-by: Eric Auger ---- - hw/virtio/trace-events | 1 + - hw/virtio/virtio-iommu.c | 31 +++++++++++++++++++++++++++++-- - include/hw/virtio/virtio-iommu.h | 2 ++ - 3 files changed, 32 insertions(+), 2 deletions(-) - -diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events -index 8f8d05cf9b..68b752e304 100644 ---- a/hw/virtio/trace-events -+++ b/hw/virtio/trace-events -@@ -131,6 +131,7 @@ virtio_iommu_set_page_size_mask(const char *name, uint64_t old, uint64_t new) "m - virtio_iommu_notify_flag_add(const char *name) "add notifier to mr %s" - virtio_iommu_notify_flag_del(const char *name) "del notifier from mr %s" - virtio_iommu_switch_address_space(uint8_t bus, uint8_t slot, uint8_t fn, bool on) "Device %02x:%02x.%x switching address space (iommu enabled=%d)" -+virtio_iommu_freeze_granule(uint64_t page_size_mask) "granule set to 0x%"PRIx64 - - # virtio-mem.c - virtio_mem_send_response(uint16_t type) "type=%" PRIu16 -diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c -index 1cd258135d..542679b321 100644 ---- a/hw/virtio/virtio-iommu.c -+++ b/hw/virtio/virtio-iommu.c -@@ -24,6 +24,7 @@ - #include "hw/virtio/virtio.h" - #include "sysemu/kvm.h" - #include "sysemu/reset.h" -+#include "sysemu/sysemu.h" - #include "qapi/error.h" - #include "qemu/error-report.h" - #include "trace.h" -@@ -1106,12 +1107,12 @@ static int virtio_iommu_set_page_size_mask(IOMMUMemoryRegion *mr, - } - - /* -- * After the machine is finalized, we can't change the mask anymore. If by -+ * Once the granule is frozen we can't change the mask anymore. If by - * chance the hotplugged device supports the same granule, we can still - * accept it. Having a different masks is possible but the guest will use - * sub-optimal block sizes, so warn about it. - */ -- if (phase_check(PHASE_MACHINE_READY)) { -+ if (s->granule_frozen) { - int new_granule = ctz64(new_mask); - int cur_granule = ctz64(cur_mask); - -@@ -1146,6 +1147,28 @@ static void virtio_iommu_system_reset(void *opaque) - - } - -+static void virtio_iommu_freeze_granule(Notifier *notifier, void *data) -+{ -+ VirtIOIOMMU *s = container_of(notifier, VirtIOIOMMU, machine_done); -+ int granule; -+ -+ if (likely(s->config.bypass)) { -+ /* -+ * Transient IOMMU MR enable to collect page_size_mask requirements -+ * through memory_region_iommu_set_page_size_mask() called by -+ * VFIO region_add() callback -+ */ -+ s->config.bypass = false; -+ virtio_iommu_switch_address_space_all(s); -+ /* restore default */ -+ s->config.bypass = true; -+ virtio_iommu_switch_address_space_all(s); -+ } -+ s->granule_frozen = true; -+ granule = ctz64(s->config.page_size_mask); -+ trace_virtio_iommu_freeze_granule(BIT(granule)); -+} -+ - static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) - { - VirtIODevice *vdev = VIRTIO_DEVICE(dev); -@@ -1189,6 +1212,9 @@ static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) - error_setg(errp, "VIRTIO-IOMMU is not attached to any PCI bus!"); - } - -+ s->machine_done.notify = virtio_iommu_freeze_granule; -+ qemu_add_machine_init_done_notifier(&s->machine_done); -+ - qemu_register_reset(virtio_iommu_system_reset, s); - } - -@@ -1198,6 +1224,7 @@ static void virtio_iommu_device_unrealize(DeviceState *dev) - VirtIOIOMMU *s = VIRTIO_IOMMU(dev); - - qemu_unregister_reset(virtio_iommu_system_reset, s); -+ qemu_remove_machine_init_done_notifier(&s->machine_done); - - g_hash_table_destroy(s->as_by_busptr); - if (s->domains) { -diff --git a/include/hw/virtio/virtio-iommu.h b/include/hw/virtio/virtio-iommu.h -index 2ad5ee320b..a93fc5383e 100644 ---- a/include/hw/virtio/virtio-iommu.h -+++ b/include/hw/virtio/virtio-iommu.h -@@ -61,6 +61,8 @@ struct VirtIOIOMMU { - QemuRecMutex mutex; - GTree *endpoints; - bool boot_bypass; -+ Notifier machine_done; -+ bool granule_frozen; - }; - - #endif --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-iommu-Rework-the-traces-in-virtio_iommu_set_p.patch b/SOURCES/kvm-virtio-iommu-Rework-the-traces-in-virtio_iommu_set_p.patch deleted file mode 100644 index 7934a12..0000000 --- a/SOURCES/kvm-virtio-iommu-Rework-the-traces-in-virtio_iommu_set_p.patch +++ /dev/null @@ -1,83 +0,0 @@ -From 643d93343759a350fe0f6327d308bf6a93c79d25 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Wed, 5 Jul 2023 18:51:18 +0200 -Subject: [PATCH 02/37] virtio-iommu: Rework the traces in - virtio_iommu_set_page_size_mask() - -RH-Author: Eric Auger -RH-MergeRequest: 182: VIRTIO-IOMMU/VFIO page size related fixes -RH-Bugzilla: 2211609 2211634 -RH-Acked-by: Gavin Shan -RH-Acked-by: Sebastian Ott -RH-Commit: [2/2] 0af7078dde158f07c83e2b293adc5d9d475688ae (eauger1/centos-qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2211609 - -The current error messages in virtio_iommu_set_page_size_mask() -sound quite similar for different situations and miss the IOMMU -memory region that causes the issue. - -Clarify them and rework the comment. - -Also remove the trace when the new page_size_mask is not applied as -the current frozen granule is kept. This message is rather confusing -for the end user and anyway the current granule would have been used -by the driver. - -Signed-off-by: Eric Auger -Reviewed-by: Zhenzhong Duan -Message-Id: <20230705165118.28194-3-eric.auger@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Reviewed-by: Jean-Philippe Brucker -Tested-by: Jean-Philippe Brucker -(cherry picked from commit 587a7641d53055054d68d67d94c9408ef808f127) -Signed-off-by: Eric Auger ---- - hw/virtio/virtio-iommu.c | 19 +++++++------------ - 1 file changed, 7 insertions(+), 12 deletions(-) - -diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c -index 542679b321..421e2a944f 100644 ---- a/hw/virtio/virtio-iommu.c -+++ b/hw/virtio/virtio-iommu.c -@@ -1101,29 +1101,24 @@ static int virtio_iommu_set_page_size_mask(IOMMUMemoryRegion *mr, - new_mask); - - if ((cur_mask & new_mask) == 0) { -- error_setg(errp, "virtio-iommu page mask 0x%"PRIx64 -- " is incompatible with mask 0x%"PRIx64, cur_mask, new_mask); -+ error_setg(errp, "virtio-iommu %s reports a page size mask 0x%"PRIx64 -+ " incompatible with currently supported mask 0x%"PRIx64, -+ mr->parent_obj.name, new_mask, cur_mask); - return -1; - } - - /* - * Once the granule is frozen we can't change the mask anymore. If by - * chance the hotplugged device supports the same granule, we can still -- * accept it. Having a different masks is possible but the guest will use -- * sub-optimal block sizes, so warn about it. -+ * accept it. - */ - if (s->granule_frozen) { -- int new_granule = ctz64(new_mask); - int cur_granule = ctz64(cur_mask); - -- if (new_granule != cur_granule) { -- error_setg(errp, "virtio-iommu page mask 0x%"PRIx64 -- " is incompatible with mask 0x%"PRIx64, cur_mask, -- new_mask); -+ if (!(BIT(cur_granule) & new_mask)) { -+ error_setg(errp, "virtio-iommu %s does not support frozen granule 0x%llx", -+ mr->parent_obj.name, BIT_ULL(cur_granule)); - return -1; -- } else if (new_mask != cur_mask) { -- warn_report("virtio-iommu page mask 0x%"PRIx64 -- " does not match 0x%"PRIx64, cur_mask, new_mask); - } - return 0; - } --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-iommu-Standardize-granule-extraction-and-form.patch b/SOURCES/kvm-virtio-iommu-Standardize-granule-extraction-and-form.patch deleted file mode 100644 index 638ae98..0000000 --- a/SOURCES/kvm-virtio-iommu-Standardize-granule-extraction-and-form.patch +++ /dev/null @@ -1,88 +0,0 @@ -From 59cd85621b1b14ada843ea0562cc76b6a7c93df4 Mon Sep 17 00:00:00 2001 -From: Eric Auger -Date: Tue, 18 Jul 2023 20:21:36 +0200 -Subject: [PATCH 08/14] virtio-iommu: Standardize granule extraction and - formatting - -RH-Author: Eric Auger -RH-MergeRequest: 197: virtio-iommu/smmu: backport some late fixes -RH-Bugzilla: 2229133 -RH-Acked-by: Thomas Huth -RH-Acked-by: Peter Xu -RH-Commit: [2/3] 48784ef2a19174518f66479dcb532230bffe8bf1 (eauger1/centos-qemu-kvm) - -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2229133 - -At several locations we compute the granule from the config -page_size_mask using ctz() and then format it in traces using -BIT(). As the page_size_mask is 64b we should use ctz64 and -BIT_ULL() for formatting. We failed to be consistent. - -Note the page_size_mask is garanteed to be non null. The spec -mandates the device to set at least one bit, so ctz64 cannot -return 64. This is garanteed by the fact the device -initializes the page_size_mask to qemu_target_page_mask() -and then the page_size_mask is further constrained by -virtio_iommu_set_page_size_mask() callback which can't -result in a new mask being null. So if Coverity complains -round those ctz64/BIT_ULL with CID 1517772 this is a false -positive - -Signed-off-by: Eric Auger -Fixes: 94df5b2180 ("virtio-iommu: Fix 64kB host page size VFIO device assignment") -Message-Id: <20230718182136.40096-1-eric.auger@redhat.com> -Reviewed-by: Michael S. Tsirkin -Signed-off-by: Michael S. Tsirkin -Reviewed-by: Jean-Philippe Brucker -(cherry picked from commit 1084feddc6a677cdfdde56936bfb97cf32cc4dee) -Signed-off-by: Eric Auger ---- - hw/virtio/virtio-iommu.c | 8 +++++--- - 1 file changed, 5 insertions(+), 3 deletions(-) - -diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c -index 17ce630200..17b3dcd158 100644 ---- a/hw/virtio/virtio-iommu.c -+++ b/hw/virtio/virtio-iommu.c -@@ -854,17 +854,19 @@ static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr, - VirtIOIOMMUEndpoint *ep; - uint32_t sid, flags; - bool bypass_allowed; -+ int granule; - bool found; - int i; - - interval.low = addr; - interval.high = addr + 1; -+ granule = ctz64(s->config.page_size_mask); - - IOMMUTLBEntry entry = { - .target_as = &address_space_memory, - .iova = addr, - .translated_addr = addr, -- .addr_mask = (1 << ctz32(s->config.page_size_mask)) - 1, -+ .addr_mask = BIT_ULL(granule) - 1, - .perm = IOMMU_NONE, - }; - -@@ -1117,7 +1119,7 @@ static int virtio_iommu_set_page_size_mask(IOMMUMemoryRegion *mr, - if (s->granule_frozen) { - int cur_granule = ctz64(cur_mask); - -- if (!(BIT(cur_granule) & new_mask)) { -+ if (!(BIT_ULL(cur_granule) & new_mask)) { - error_setg(errp, "virtio-iommu %s does not support frozen granule 0x%llx", - mr->parent_obj.name, BIT_ULL(cur_granule)); - return -1; -@@ -1163,7 +1165,7 @@ static void virtio_iommu_freeze_granule(Notifier *notifier, void *data) - } - s->granule_frozen = true; - granule = ctz64(s->config.page_size_mask); -- trace_virtio_iommu_freeze_granule(BIT(granule)); -+ trace_virtio_iommu_freeze_granule(BIT_ULL(granule)); - } - - static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-mem-default-enable-dynamic-memslots.patch b/SOURCES/kvm-virtio-mem-default-enable-dynamic-memslots.patch new file mode 100644 index 0000000..6ad1c98 --- /dev/null +++ b/SOURCES/kvm-virtio-mem-default-enable-dynamic-memslots.patch @@ -0,0 +1,70 @@ +From 94bccae527f1ab8328cc7692532046d700e2ca71 Mon Sep 17 00:00:00 2001 +From: David Hildenbrand +Date: Mon, 5 Feb 2024 19:27:07 +0100 +Subject: [PATCH 22/22] virtio-mem: default-enable "dynamic-memslots" + +RH-Author: David Hildenbrand +RH-MergeRequest: 220: virtio-mem: default-enable "dynamic-memslots" +RH-Jira: RHEL-24045 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/1] d9a60acd7de1d8703ea3ca938e388e19f31f5347 + +JIRA: https://issues.redhat.com/browse/RHEL-24045 +Upstream: RHEL only + +We only support selected vhost-user devices in combination with +virtio-mem in RHEL. One devices that works well is virtiofsd, devices that +are currently incompatible include DPDK and SPDK. + +The vhost devices we support must be compatible with the dynamic-memslot +feature (i.e., support at least 509 memslots, support dynamically adding/ +removing memslots), such that setting "dynamic-memslots=on" will work a +expected and not make certain QEMU commandlines or hotplug of vhost-user +devices bail out. + +Let's set "dynamic-memslots=on" starting with RHEL 9.4, so we +get the benefits (i.e., reduced metadata consumption in KVM, majority of +unplugged memory being inaccessible) as default. + +When wanting to run virtio-mem with incompatible vhost-user devices, it +might just work (if the vhost-user device is created before the +virtio-mem device), or the feature can be manually disabled by +specifying "dynamic-memslots=off". + +Signed-off-by: David Hildenbrand +--- + hw/core/machine.c | 2 ++ + hw/virtio/virtio-mem.c | 3 ++- + 2 files changed, 4 insertions(+), 1 deletion(-) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 446601ee30..309f6ba685 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -78,6 +78,8 @@ GlobalProperty hw_compat_rhel_9_4[] = { + { "vfio-pci-nohotplug", "x-ramfb-migrate", "off" }, + /* hw_compat_rhel_9_4 from hw_compat_8_1 */ + { "igb", "x-pcie-flr-init", "off" }, ++ /* hw_compat_rhel_9_4 jira RHEL-24045 */ ++ { "virtio-mem", "dynamic-memslots", "off" }, + }; + const size_t hw_compat_rhel_9_4_len = G_N_ELEMENTS(hw_compat_rhel_9_4); + +diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c +index 75ee38aa46..00ca91e8fe 100644 +--- a/hw/virtio/virtio-mem.c ++++ b/hw/virtio/virtio-mem.c +@@ -1696,8 +1696,9 @@ static Property virtio_mem_properties[] = { + #endif + DEFINE_PROP_BOOL(VIRTIO_MEM_EARLY_MIGRATION_PROP, VirtIOMEM, + early_migration, true), ++ /* RHEL: default-enable "dynamic-memslots" (jira RHEL-24045) */ + DEFINE_PROP_BOOL(VIRTIO_MEM_DYNAMIC_MEMSLOTS_PROP, VirtIOMEM, +- dynamic_memslots, false), ++ dynamic_memslots, true), + DEFINE_PROP_END_OF_LIST(), + }; + +-- +2.39.3 + diff --git a/SOURCES/kvm-virtio-net-correctly-report-maximum-tx_queue_size-va.patch b/SOURCES/kvm-virtio-net-correctly-report-maximum-tx_queue_size-va.patch deleted file mode 100644 index 119ea84..0000000 --- a/SOURCES/kvm-virtio-net-correctly-report-maximum-tx_queue_size-va.patch +++ /dev/null @@ -1,92 +0,0 @@ -From 4fe096a6fad61ab721fd29324d48383c7f427ac9 Mon Sep 17 00:00:00 2001 -From: Laurent Vivier -Date: Mon, 5 Jun 2023 16:21:25 +0200 -Subject: [PATCH 7/9] virtio-net: correctly report maximum tx_queue_size value -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Laurent Vivier -RH-MergeRequest: 191: virtio-net: correctly report maximum tx_queue_size value -RH-Bugzilla: 2040509 -RH-Acked-by: Stefano Garzarella -RH-Acked-by: Eugenio Pérez -RH-Commit: [1/1] afb944c6d75fe476ac86fe267b1cca5f272dfbbd (lvivier/qemu-kvm-centos) - -BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2040509 - -Maximum value for tx_queue_size depends on the backend type. -1024 for vDPA/vhost-user, 256 for all the others. - -The value is returned by virtio_net_max_tx_queue_size() to set the -parameter: - - n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n), - n->net_conf.tx_queue_size); - -But the parameter checking uses VIRTQUEUE_MAX_SIZE (1024). - -So the parameter is silently ignored and ethtool reports a different -value than the one provided by the user. - - ... -netdev tap,... -device virtio-net,tx_queue_size=1024 - - # ethtool -g enp0s2 - Ring parameters for enp0s2: - Pre-set maximums: - RX: 256 - RX Mini: n/a - RX Jumbo: n/a - TX: 256 - Current hardware settings: - RX: 256 - RX Mini: n/a - RX Jumbo: n/a - TX: 256 - - ... -netdev vhost-user,... -device virtio-net,tx_queue_size=2048 - - Invalid tx_queue_size (= 2048), must be a power of 2 between 256 and 1024 - -With this patch the correct maximum value is checked and displayed. - -For vDPA/vhost-user: - - Invalid tx_queue_size (= 2048), must be a power of 2 between 256 and 1024 - -For all the others: - - Invalid tx_queue_size (= 512), must be a power of 2 between 256 and 256 - -Fixes: 2eef278b9e63 ("virtio-net: fix tx queue size for !vhost-user") -Cc: mst@redhat.com -Cc: qemu-stable@nongnu.org -Signed-off-by: Laurent Vivier -Signed-off-by: Jason Wang -(cherry picked from commit 4271f4038372f174dbafffacca1a748d058a03ba) ---- - hw/net/virtio-net.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index 447f669921..ae1e6a5e3d 100644 ---- a/hw/net/virtio-net.c -+++ b/hw/net/virtio-net.c -@@ -3628,12 +3628,12 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) - } - - if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE || -- n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE || -+ n->net_conf.tx_queue_size > virtio_net_max_tx_queue_size(n) || - !is_power_of_2(n->net_conf.tx_queue_size)) { - error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), " - "must be a power of 2 between %d and %d", - n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE, -- VIRTQUEUE_MAX_SIZE); -+ virtio_net_max_tx_queue_size(n)); - virtio_cleanup(vdev); - return; - } --- -2.39.3 - diff --git a/SOURCES/kvm-virtio-scsi-Attach-event-vq-notifier-with-no_poll.patch b/SOURCES/kvm-virtio-scsi-Attach-event-vq-notifier-with-no_poll.patch new file mode 100644 index 0000000..b8066b2 --- /dev/null +++ b/SOURCES/kvm-virtio-scsi-Attach-event-vq-notifier-with-no_poll.patch @@ -0,0 +1,78 @@ +From da3a5afa41790ae913d41cfcdc3c6a8731ae3fe8 Mon Sep 17 00:00:00 2001 +From: Hanna Czenczek +Date: Fri, 2 Feb 2024 16:31:56 +0100 +Subject: [PATCH 1/6] virtio-scsi: Attach event vq notifier with no_poll + +RH-Author: Hanna Czenczek +RH-MergeRequest: 223: virtio: Re-enable notifications after drain +RH-Jira: RHEL-3934 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [1/3] d29b461a0a4b584af0ee80fb3f9e45c92ea88eb0 (hreitz/qemu-kvm-c-9-s) + +As of commit 38738f7dbbda90fbc161757b7f4be35b52205552 ("virtio-scsi: +don't waste CPU polling the event virtqueue"), we only attach an io_read +notifier for the virtio-scsi event virtqueue instead, and no polling +notifiers. During operation, the event virtqueue is typically +non-empty, but none of the buffers are intended to be used immediately. +Instead, they only get used when certain events occur. Therefore, it +makes no sense to continuously poll it when non-empty, because it is +supposed to be and stay non-empty. + +We do this by using virtio_queue_aio_attach_host_notifier_no_poll() +instead of virtio_queue_aio_attach_host_notifier() for the event +virtqueue. + +Commit 766aa2de0f29b657148e04599320d771c36fd126 ("virtio-scsi: implement +BlockDevOps->drained_begin()") however has virtio_scsi_drained_end() use +virtio_queue_aio_attach_host_notifier() for all virtqueues, including +the event virtqueue. This can lead to it being polled again, undoing +the benefit of commit 38738f7dbbda90fbc161757b7f4be35b52205552. + +Fix it by using virtio_queue_aio_attach_host_notifier_no_poll() for the +event virtqueue. + +Reported-by: Fiona Ebner +Fixes: 766aa2de0f29b657148e04599320d771c36fd126 + ("virtio-scsi: implement BlockDevOps->drained_begin()") +Reviewed-by: Stefan Hajnoczi +Tested-by: Fiona Ebner +Reviewed-by: Fiona Ebner +Signed-off-by: Hanna Czenczek +Message-ID: <20240202153158.788922-2-hreitz@redhat.com> +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit c42c3833e0cfdf2b80fb3ca410acfd392b6874ab) +--- + hw/scsi/virtio-scsi.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c +index ca365a70e9..9943186917 100644 +--- a/hw/scsi/virtio-scsi.c ++++ b/hw/scsi/virtio-scsi.c +@@ -1149,6 +1149,7 @@ static void virtio_scsi_drained_begin(SCSIBus *bus) + static void virtio_scsi_drained_end(SCSIBus *bus) + { + VirtIOSCSI *s = container_of(bus, VirtIOSCSI, bus); ++ VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s); + VirtIODevice *vdev = VIRTIO_DEVICE(s); + uint32_t total_queues = VIRTIO_SCSI_VQ_NUM_FIXED + + s->parent_obj.conf.num_queues; +@@ -1166,7 +1167,11 @@ static void virtio_scsi_drained_end(SCSIBus *bus) + + for (uint32_t i = 0; i < total_queues; i++) { + VirtQueue *vq = virtio_get_queue(vdev, i); +- virtio_queue_aio_attach_host_notifier(vq, s->ctx); ++ if (vq == vs->event_vq) { ++ virtio_queue_aio_attach_host_notifier_no_poll(vq, s->ctx); ++ } else { ++ virtio_queue_aio_attach_host_notifier(vq, s->ctx); ++ } + } + } + +-- +2.39.3 + diff --git a/SOURCES/kvm-virtio-scsi-don-t-lock-AioContext-around-virtio_queu.patch b/SOURCES/kvm-virtio-scsi-don-t-lock-AioContext-around-virtio_queu.patch new file mode 100644 index 0000000..9ad8fdf --- /dev/null +++ b/SOURCES/kvm-virtio-scsi-don-t-lock-AioContext-around-virtio_queu.patch @@ -0,0 +1,58 @@ +From 1ee3f919a51135a0798a14c734ca80d74d30025d Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Mon, 4 Dec 2023 11:42:57 -0500 +Subject: [PATCH 078/101] virtio-scsi: don't lock AioContext around + virtio_queue_aio_attach_host_notifier() + +RH-Author: Kevin Wolf +RH-MergeRequest: 214: Remove AioContext lock +RH-Jira: RHEL-15965 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [9/26] 5e1179e617d05bf765b285ba42393ec1ddbeba28 (kmwolf/centos-qemu-kvm) + +virtio_queue_aio_attach_host_notifier() does not require the AioContext +lock. Stop taking the lock and add an explicit smp_wmb() because we were +relying on the implicit barrier in the AioContext lock before. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Eric Blake +Reviewed-by: Kevin Wolf +Message-ID: <20231204164259.1515217-3-stefanha@redhat.com> +Signed-off-by: Kevin Wolf +--- + hw/scsi/virtio-scsi-dataplane.c | 8 +------- + 1 file changed, 1 insertion(+), 7 deletions(-) + +diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c +index 1e684beebe..135e23fe54 100644 +--- a/hw/scsi/virtio-scsi-dataplane.c ++++ b/hw/scsi/virtio-scsi-dataplane.c +@@ -149,23 +149,17 @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev) + + memory_region_transaction_commit(); + +- /* +- * These fields are visible to the IOThread so we rely on implicit barriers +- * in aio_context_acquire() on the write side and aio_notify_accept() on +- * the read side. +- */ + s->dataplane_starting = false; + s->dataplane_started = true; ++ smp_wmb(); /* paired with aio_notify_accept() */ + + if (s->bus.drain_count == 0) { +- aio_context_acquire(s->ctx); + virtio_queue_aio_attach_host_notifier(vs->ctrl_vq, s->ctx); + virtio_queue_aio_attach_host_notifier_no_poll(vs->event_vq, s->ctx); + + for (i = 0; i < vs->conf.num_queues; i++) { + virtio_queue_aio_attach_host_notifier(vs->cmd_vqs[i], s->ctx); + } +- aio_context_release(s->ctx); + } + return 0; + +-- +2.39.3 + diff --git a/SOURCES/kvm-virtio-scsi-replace-AioContext-lock-with-tmf_bh_lock.patch b/SOURCES/kvm-virtio-scsi-replace-AioContext-lock-with-tmf_bh_lock.patch new file mode 100644 index 0000000..2654cb7 --- /dev/null +++ b/SOURCES/kvm-virtio-scsi-replace-AioContext-lock-with-tmf_bh_lock.patch @@ -0,0 +1,173 @@ +From c2d7633ead6e19d4b6af5552ca907ae071b8734b Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 5 Dec 2023 13:19:58 -0500 +Subject: [PATCH 081/101] virtio-scsi: replace AioContext lock with tmf_bh_lock + +RH-Author: Kevin Wolf +RH-MergeRequest: 214: Remove AioContext lock +RH-Jira: RHEL-15965 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [12/26] 8fb375bfd72a491d47321c78078577071a4e90fb (kmwolf/centos-qemu-kvm) + +Protect the Task Management Function BH state with a lock. The TMF BH +runs in the main loop thread. An IOThread might process a TMF at the +same time as the TMF BH is running. Therefore tmf_bh_list and tmf_bh +must be protected by a lock. + +Run TMF request completion in the IOThread using aio_wait_bh_oneshot(). +This avoids more locking to protect the virtqueue and SCSI layer state. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Eric Blake +Reviewed-by: Kevin Wolf +Message-ID: <20231205182011.1976568-2-stefanha@redhat.com> +Signed-off-by: Kevin Wolf +--- + hw/scsi/virtio-scsi.c | 62 ++++++++++++++++++++++----------- + include/hw/virtio/virtio-scsi.h | 3 +- + 2 files changed, 43 insertions(+), 22 deletions(-) + +diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c +index 9c751bf296..4f8d35facc 100644 +--- a/hw/scsi/virtio-scsi.c ++++ b/hw/scsi/virtio-scsi.c +@@ -123,6 +123,30 @@ static void virtio_scsi_complete_req(VirtIOSCSIReq *req) + virtio_scsi_free_req(req); + } + ++static void virtio_scsi_complete_req_bh(void *opaque) ++{ ++ VirtIOSCSIReq *req = opaque; ++ ++ virtio_scsi_complete_req(req); ++} ++ ++/* ++ * Called from virtio_scsi_do_one_tmf_bh() in main loop thread. The main loop ++ * thread cannot touch the virtqueue since that could race with an IOThread. ++ */ ++static void virtio_scsi_complete_req_from_main_loop(VirtIOSCSIReq *req) ++{ ++ VirtIOSCSI *s = req->dev; ++ ++ if (!s->ctx || s->ctx == qemu_get_aio_context()) { ++ /* No need to schedule a BH when there is no IOThread */ ++ virtio_scsi_complete_req(req); ++ } else { ++ /* Run request completion in the IOThread */ ++ aio_wait_bh_oneshot(s->ctx, virtio_scsi_complete_req_bh, req); ++ } ++} ++ + static void virtio_scsi_bad_req(VirtIOSCSIReq *req) + { + virtio_error(VIRTIO_DEVICE(req->dev), "wrong size for virtio-scsi headers"); +@@ -338,10 +362,7 @@ static void virtio_scsi_do_one_tmf_bh(VirtIOSCSIReq *req) + + out: + object_unref(OBJECT(d)); +- +- virtio_scsi_acquire(s); +- virtio_scsi_complete_req(req); +- virtio_scsi_release(s); ++ virtio_scsi_complete_req_from_main_loop(req); + } + + /* Some TMFs must be processed from the main loop thread */ +@@ -354,18 +375,16 @@ static void virtio_scsi_do_tmf_bh(void *opaque) + + GLOBAL_STATE_CODE(); + +- virtio_scsi_acquire(s); ++ WITH_QEMU_LOCK_GUARD(&s->tmf_bh_lock) { ++ QTAILQ_FOREACH_SAFE(req, &s->tmf_bh_list, next, tmp) { ++ QTAILQ_REMOVE(&s->tmf_bh_list, req, next); ++ QTAILQ_INSERT_TAIL(&reqs, req, next); ++ } + +- QTAILQ_FOREACH_SAFE(req, &s->tmf_bh_list, next, tmp) { +- QTAILQ_REMOVE(&s->tmf_bh_list, req, next); +- QTAILQ_INSERT_TAIL(&reqs, req, next); ++ qemu_bh_delete(s->tmf_bh); ++ s->tmf_bh = NULL; + } + +- qemu_bh_delete(s->tmf_bh); +- s->tmf_bh = NULL; +- +- virtio_scsi_release(s); +- + QTAILQ_FOREACH_SAFE(req, &reqs, next, tmp) { + QTAILQ_REMOVE(&reqs, req, next); + virtio_scsi_do_one_tmf_bh(req); +@@ -379,8 +398,7 @@ static void virtio_scsi_reset_tmf_bh(VirtIOSCSI *s) + + GLOBAL_STATE_CODE(); + +- virtio_scsi_acquire(s); +- ++ /* Called after ioeventfd has been stopped, so tmf_bh_lock is not needed */ + if (s->tmf_bh) { + qemu_bh_delete(s->tmf_bh); + s->tmf_bh = NULL; +@@ -393,19 +411,19 @@ static void virtio_scsi_reset_tmf_bh(VirtIOSCSI *s) + req->resp.tmf.response = VIRTIO_SCSI_S_TARGET_FAILURE; + virtio_scsi_complete_req(req); + } +- +- virtio_scsi_release(s); + } + + static void virtio_scsi_defer_tmf_to_bh(VirtIOSCSIReq *req) + { + VirtIOSCSI *s = req->dev; + +- QTAILQ_INSERT_TAIL(&s->tmf_bh_list, req, next); ++ WITH_QEMU_LOCK_GUARD(&s->tmf_bh_lock) { ++ QTAILQ_INSERT_TAIL(&s->tmf_bh_list, req, next); + +- if (!s->tmf_bh) { +- s->tmf_bh = qemu_bh_new(virtio_scsi_do_tmf_bh, s); +- qemu_bh_schedule(s->tmf_bh); ++ if (!s->tmf_bh) { ++ s->tmf_bh = qemu_bh_new(virtio_scsi_do_tmf_bh, s); ++ qemu_bh_schedule(s->tmf_bh); ++ } + } + } + +@@ -1235,6 +1253,7 @@ static void virtio_scsi_device_realize(DeviceState *dev, Error **errp) + Error *err = NULL; + + QTAILQ_INIT(&s->tmf_bh_list); ++ qemu_mutex_init(&s->tmf_bh_lock); + + virtio_scsi_common_realize(dev, + virtio_scsi_handle_ctrl, +@@ -1277,6 +1296,7 @@ static void virtio_scsi_device_unrealize(DeviceState *dev) + + qbus_set_hotplug_handler(BUS(&s->bus), NULL); + virtio_scsi_common_unrealize(dev); ++ qemu_mutex_destroy(&s->tmf_bh_lock); + } + + static Property virtio_scsi_properties[] = { +diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h +index 779568ab5d..da8cb928d9 100644 +--- a/include/hw/virtio/virtio-scsi.h ++++ b/include/hw/virtio/virtio-scsi.h +@@ -85,8 +85,9 @@ struct VirtIOSCSI { + + /* + * TMFs deferred to main loop BH. These fields are protected by +- * virtio_scsi_acquire(). ++ * tmf_bh_lock. + */ ++ QemuMutex tmf_bh_lock; + QEMUBH *tmf_bh; + QTAILQ_HEAD(, VirtIOSCSIReq) tmf_bh_list; + +-- +2.39.3 + diff --git a/SOURCES/kvm-vl.c-Create-late-backends-before-migration-object.patch b/SOURCES/kvm-vl.c-Create-late-backends-before-migration-object.patch deleted file mode 100644 index e1eef6d..0000000 --- a/SOURCES/kvm-vl.c-Create-late-backends-before-migration-object.patch +++ /dev/null @@ -1,58 +0,0 @@ -From 63e2339a6f38706c6fc5eb251426812520db6a6d Mon Sep 17 00:00:00 2001 -From: Peter Xu -Date: Wed, 19 Apr 2023 12:17:37 -0400 -Subject: [PATCH 03/56] vl.c: Create late backends before migration object -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -RH-Author: Peter Xu -RH-MergeRequest: 162: migration: Pretty failures for postcopy on unsupported memory types -RH-Bugzilla: 2057267 -RH-Acked-by: Leonardo Brás -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: quintela1 -RH-Commit: [2/50] 7209bb94faa48650388be8fef08c77afd26517d8 (peterx/qemu-kvm) - -The migration object may want to check against different types of memory -when initialized. Delay the creation to be after late backends. - -Signed-off-by: Peter Xu -Reviewed-by: Juan Quintela -Reviewed-by: David Hildenbrand -Signed-off-by: Juan Quintela -(cherry picked from commit cb9d8b8ce1aaf38f53295fc59ec1b8b7eb4338d2) -Signed-off-by: Peter Xu ---- - softmmu/vl.c | 9 +++++++-- - 1 file changed, 7 insertions(+), 2 deletions(-) - -diff --git a/softmmu/vl.c b/softmmu/vl.c -index ad4173138d..a44b49430b 100644 ---- a/softmmu/vl.c -+++ b/softmmu/vl.c -@@ -3592,14 +3592,19 @@ void qemu_init(int argc, char **argv) - machine_class->name, machine_class->deprecation_reason); - } - -+ /* -+ * Create backends before creating migration objects, so that it can -+ * check against compatibilities on the backend memories (e.g. postcopy -+ * over memory-backend-file objects). -+ */ -+ qemu_create_late_backends(); -+ - /* - * Note: creates a QOM object, must run only after global and - * compat properties have been set up. - */ - migration_object_init(); - -- qemu_create_late_backends(); -- - /* parse features once if machine provides default cpu_type */ - current_machine->cpu_type = machine_class->default_cpu_type; - if (cpu_option) { --- -2.39.1 - diff --git a/SOURCES/kvm-x86-rhel-9.2.0-machine-type-compat-fix.patch b/SOURCES/kvm-x86-rhel-9.2.0-machine-type-compat-fix.patch new file mode 100644 index 0000000..a8bf6ac --- /dev/null +++ b/SOURCES/kvm-x86-rhel-9.2.0-machine-type-compat-fix.patch @@ -0,0 +1,48 @@ +From 2932c8de175fadeed4bb7c1024724cbabc53f6d5 Mon Sep 17 00:00:00 2001 +From: Sebastian Ott +Date: Mon, 19 Feb 2024 02:37:27 -0500 +Subject: [PATCH 6/6] x86: rhel 9.2.0 machine type compat fix + +RH-Author: Sebastian Ott +RH-MergeRequest: 342: Draft: x86: rhel 9.2.0 machine type compat fix (RHEL) +RH-Jira: RHEL-17068 +RH-Acked-by: Thomas Huth +RH-Commit: [23/23] 658dda965f34119de300eef26155f47b1b3fa7f1 + +Fix up the compatibility for 9.2.0 and older. + +Signed-off-by: Sebastian Ott's avatarSebastian Ott +--- + hw/i386/pc_piix.c | 2 ++ + hw/i386/pc_q35.c | 2 ++ + 2 files changed, 4 insertions(+) + +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index 44038391fb..09d02cc91f 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -1023,6 +1023,8 @@ static void pc_machine_rhel760_options(MachineClass *m) + pcmc->enforce_amd_1tb_hole = false; + /* From pc_i440fx_8_0_machine_options() */ + pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_32; ++ /* From pc_i440fx_8_1_machine_options() */ ++ pcmc->broken_32bit_mem_addr_check = true; + /* Introduced in QEMU 8.2 */ + pcmc->default_south_bridge = TYPE_PIIX3_DEVICE; + +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 6387df97c8..c6967e1846 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -759,6 +759,8 @@ static void pc_q35_machine_rhel920_options(MachineClass *m) + + /* From pc_q35_8_0_machine_options() */ + pcmc->default_smbios_ep_type = SMBIOS_ENTRY_POINT_TYPE_32; ++ /* From pc_q35_8_1_machine_options() */ ++ pcmc->broken_32bit_mem_addr_check = true; + + compat_props_add(m->compat_props, hw_compat_rhel_9_4, + hw_compat_rhel_9_4_len); +-- +2.39.3 + diff --git a/SOURCES/qemu-ga.sysconfig b/SOURCES/qemu-ga.sysconfig index a78b428..736b471 100644 --- a/SOURCES/qemu-ga.sysconfig +++ b/SOURCES/qemu-ga.sysconfig @@ -1,11 +1,19 @@ # This is a systemd environment file, not a shell script. # It provides settings for "/lib/systemd/system/qemu-guest-agent.service". -# Comma-separated blocked RPCs to disable, or empty list to enable all. +# Guest agent command with comma-separated blocked RPCs to disable, +# or empty list to enable all. # # You can get the list of RPC commands using "qemu-ga --block-rpcs='?'". # There should be no spaces between commas and commands in the block list. -BLOCK_RPCS=guest-file-open,guest-file-close,guest-file-read,guest-file-write,guest-file-seek,guest-file-flush,guest-exec,guest-exec-status +# FILTER_RPC_ARGS="--block-rpcs=guest-file-open,guest-file-close,guest-file-read,guest-file-write,guest-file-seek,guest-file-flush,guest-exec,guest-exec-status" + +# Guest agent command with comma-separated allowed RPCs to enable, +# or empty list to disable all. +# +# You can get the list of RPC commands using "qemu-ga --allow-rpcs='?'". +# There should be no spaces between commas and commands in the allow list. +FILTER_RPC_ARGS="--allow-rpcs=guest-sync-delimited,guest-sync,guest-ping,guest-get-time,guest-set-time,guest-info,guest-shutdown,guest-fsfreeze-status,guest-fsfreeze-freeze,guest-fsfreeze-freeze-list,guest-fsfreeze-thaw,guest-fstrim,guest-suspend-disk,guest-suspend-ram,guest-suspend-hybrid,guest-network-get-interfaces,guest-get-vcpus,guest-set-vcpus,guest-get-disks,guest-get-fsinfo,guest-set-user-password,guest-get-memory-blocks,guest-set-memory-blocks,guest-get-memory-block-info,guest-get-host-name,guest-get-users,guest-get-timezone,guest-get-osinfo,guest-get-devices,guest-ssh-get-authorized-keys,guest-ssh-add-authorized-keys,guest-ssh-remove-authorized-keys,guest-get-diskstats,guest-get-cpustats" # Fsfreeze hook script specification. # diff --git a/SOURCES/qemu-guest-agent.service b/SOURCES/qemu-guest-agent.service index 244da02..f74ebd0 100644 --- a/SOURCES/qemu-guest-agent.service +++ b/SOURCES/qemu-guest-agent.service @@ -10,7 +10,7 @@ EnvironmentFile=/etc/sysconfig/qemu-ga ExecStart=/usr/bin/qemu-ga \ --method=virtio-serial \ --path=/dev/virtio-ports/org.qemu.guest_agent.0 \ - --block-rpcs=${BLOCK_RPCS} \ + ${FILTER_RPC_ARGS} \ -F${FSFREEZE_HOOK_PATHNAME} Restart=always RestartSec=0 diff --git a/SPECS/qemu-kvm.spec b/SPECS/qemu-kvm.spec index a85380b..973e9a2 100644 --- a/SPECS/qemu-kvm.spec +++ b/SPECS/qemu-kvm.spec @@ -58,7 +58,7 @@ %global tools_only 1 %endif -%ifnarch %{ix86} x86_64 +%ifnarch %{ix86} x86_64 aarch64 %global have_usbredir 0 %endif @@ -148,8 +148,8 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm -Version: 8.0.0 -Release: 13%{?rcrel}%{?dist}%{?cc_suffix} +Version: 8.2.0 +Release: 11%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -186,366 +186,430 @@ Patch0012: 0012-vfio-cap-number-of-devices-that-can-be-assigned.patch Patch0013: 0013-Add-support-statement-to-help-output.patch Patch0014: 0014-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch Patch0015: 0015-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch -Patch0016: 0016-qga-linux-add-usb-support-to-guest-get-fsinfo.patch -Patch0017: 0017-Add-RHEL-9.2.0-compat-structure.patch -Patch0018: 0018-redhat-hw-i386-pc-Update-x86-machine-type-compatibil.patch -Patch0019: 0019-Disable-unwanted-new-devices.patch -# For bz#2087047 - Disk detach is unsuccessful while the guest is still booting -Patch20: kvm-acpi-pcihp-allow-repeating-hot-unplug-requests.patch -# For bz#1934134 - ACPI table limits warning when booting guest with 512 VCPUs -Patch21: kvm-hw-acpi-limit-warning-on-acpi-table-size-to-pc-machi.patch -# For bz#1934134 - ACPI table limits warning when booting guest with 512 VCPUs -Patch22: kvm-hw-acpi-Mark-acpi-blobs-as-resizable-on-RHEL-pc-mach.patch -# For bz#2058982 - Qemu core dump if cut off nfs storage during migration -Patch23: kvm-migration-Handle-block-device-inactivation-failures-.patch -# For bz#2058982 - Qemu core dump if cut off nfs storage during migration -Patch24: kvm-migration-Minor-control-flow-simplification.patch -# For bz#2058982 - Qemu core dump if cut off nfs storage during migration -Patch25: kvm-migration-Attempt-disk-reactivation-in-more-failure-.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch26: kvm-util-mmap-alloc-qemu_fd_getfs.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch27: kvm-vl.c-Create-late-backends-before-migration-object.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch28: kvm-migration-postcopy-Detect-file-system-on-dest-host.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch29: kvm-migration-mark-mixed-functions-that-can-suspend.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch30: kvm-postcopy-ram-do-not-use-qatomic_mb_read.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch31: kvm-migration-remove-extra-whitespace-character-for-code.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch32: kvm-migration-Merge-ram_counters-and-ram_atomic_counters.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch33: kvm-migration-Update-atomic-stats-out-of-the-mutex.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch34: kvm-migration-Make-multifd_bytes-atomic.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch35: kvm-migration-Make-dirty_sync_missed_zero_copy-atomic.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch36: kvm-migration-Make-precopy_bytes-atomic.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch37: kvm-migration-Make-downtime_bytes-atomic.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch38: kvm-migration-Make-dirty_sync_count-atomic.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch39: kvm-migration-Make-postcopy_requests-atomic.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch40: kvm-migration-Rename-duplicate-to-zero_pages.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch41: kvm-migration-Rename-normal-to-normal_pages.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch42: kvm-migration-rename-enabled_capabilities-to-capabilitie.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch43: kvm-migration-Pass-migrate_caps_check-the-old-and-new-ca.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch44: kvm-migration-move-migration_global_dump-to-migration-hm.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch45: kvm-spice-move-client_migrate_info-command-to-ui.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch46: kvm-migration-Create-migrate_cap_set.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch47: kvm-migration-Create-options.c.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch48: kvm-migration-Move-migrate_colo_enabled-to-options.c.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch49: kvm-migration-Move-migrate_use_compression-to-options.c.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch50: kvm-migration-Move-migrate_use_events-to-options.c.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch51: kvm-migration-Move-migrate_use_multifd-to-options.c.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch52: kvm-migration-Move-migrate_use_zero_copy_send-to-options.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch53: kvm-migration-Move-migrate_use_xbzrle-to-options.c.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch54: kvm-migration-Move-migrate_use_block-to-options.c.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch55: kvm-migration-Move-migrate_use_return-to-options.c.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch56: kvm-migration-Create-migrate_rdma_pin_all-function.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch57: kvm-migration-Move-migrate_caps_check-to-options.c.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch58: kvm-migration-Move-qmp_query_migrate_capabilities-to-opt.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch59: kvm-migration-Move-qmp_migrate_set_capabilities-to-optio.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch60: kvm-migration-Move-migrate_cap_set-to-options.c.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch61: kvm-migration-Move-parameters-functions-to-option.c.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch62: kvm-migration-Use-migrate_max_postcopy_bandwidth.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch63: kvm-migration-Move-migrate_use_block_incremental-to-opti.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch64: kvm-migration-Create-migrate_throttle_trigger_threshold.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch65: kvm-migration-Create-migrate_checkpoint_delay.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch66: kvm-migration-Create-migrate_max_cpu_throttle.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch67: kvm-migration-Move-migrate_announce_params-to-option.c.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch68: kvm-migration-Create-migrate_cpu_throttle_initial-to-opt.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch69: kvm-migration-Create-migrate_cpu_throttle_increment-func.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch70: kvm-migration-Create-migrate_cpu_throttle_tailslow-funct.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch71: kvm-migration-Move-migrate_postcopy-to-options.c.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch72: kvm-migration-Create-migrate_max_bandwidth-function.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch73: kvm-migration-Move-migrate_use_tls-to-options.c.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch74: kvm-migration-Move-qmp_migrate_set_parameters-to-options.patch -# For bz#2057267 - Migration with postcopy fail when vm set with shared memory -Patch75: kvm-migration-Allow-postcopy_ram_supported_by_host-to-re.patch -# For bz#2185688 - [qemu-kvm] no response with QMP command block_resize -Patch76: kvm-block-bdrv-blk_co_unref-for-calls-in-coroutine-conte.patch -# For bz#2185688 - [qemu-kvm] no response with QMP command block_resize -Patch77: kvm-block-Don-t-call-no_coroutine_fns-in-qmp_block_resiz.patch -# For bz#2185688 - [qemu-kvm] no response with QMP command block_resize -Patch78: kvm-iotests-Use-alternative-CPU-type-that-is-not-depreca.patch -# For bz#2185688 - [qemu-kvm] no response with QMP command block_resize -Patch79: kvm-iotests-Test-resizing-image-attached-to-an-iothread.patch -# For bz#2186725 - Qemu hang when commit during fio running(iothread enable) -Patch80: kvm-block-compile-out-assert_bdrv_graph_readable-by-defa.patch -# For bz#2186725 - Qemu hang when commit during fio running(iothread enable) -Patch81: kvm-graph-lock-Disable-locking-for-now.patch -# For bz#2186725 - Qemu hang when commit during fio running(iothread enable) -Patch82: kvm-nbd-server-Fix-drained_poll-to-wake-coroutine-in-rig.patch -# For bz#2186725 - Qemu hang when commit during fio running(iothread enable) -Patch83: kvm-iotests-Test-commit-with-iothreads-and-ongoing-I-O.patch -# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] -Patch84: kvm-memory-prevent-dma-reentracy-issues.patch -# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] -Patch85: kvm-async-Add-an-optional-reentrancy-guard-to-the-BH-API.patch -# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] -Patch86: kvm-checkpatch-add-qemu_bh_new-aio_bh_new-checks.patch -# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] -Patch87: kvm-hw-replace-most-qemu_bh_new-calls-with-qemu_bh_new_g.patch -# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] -Patch88: kvm-lsi53c895a-disable-reentrancy-detection-for-script-R.patch -# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] -Patch89: kvm-bcm2835_property-disable-reentrancy-detection-for-io.patch -# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] -Patch90: kvm-raven-disable-reentrancy-detection-for-iomem.patch -# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] -Patch91: kvm-apic-disable-reentrancy-detection-for-apic-msi.patch -# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] -Patch92: kvm-async-avoid-use-after-free-on-re-entrancy-guard.patch -# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] -Patch93: kvm-loongarch-mark-loongarch_ipi_iocsr-re-entrnacy-safe.patch -# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] -Patch94: kvm-memory-stricter-checks-prior-to-unsetting-engaged_in.patch -# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] -Patch95: kvm-lsi53c895a-disable-reentrancy-detection-for-MMIO-reg.patch -# For RHEL-516 - CVE-2023-2680 qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue (incomplete fix for CVE-2021-3750) [rhel-9] -Patch96: kvm-hw-scsi-lsi53c895a-Fix-reentrancy-issues-in-the-LSI-.patch -# For bz#2189423 - Failed to migrate VM from rhel 9.3 to rhel 9.2 -Patch97: kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-register-for-machi.patch -# For bz#2196289 - Fix number of ready channels on multifd -Patch98: kvm-multifd-Fix-the-number-of-channels-ready.patch -# For bz#2168500 - [IBM 9.3 FEAT] KVM: Improve memory reclaiming for z15 Secure Execution guests - qemu part -Patch99: kvm-util-async-teardown-wire-up-query-command-line-optio.patch -# For bz#2168500 - [IBM 9.3 FEAT] KVM: Improve memory reclaiming for z15 Secure Execution guests - qemu part -Patch100: kvm-s390x-pv-Fix-spurious-warning-with-asynchronous-tear.patch -# For bz#2216201 - [qemu-kvm]VM reports vulnerabilty to mmio_stale_data on patched host with microcode -Patch101: kvm-target-i386-add-support-for-FLUSH_L1D-feature.patch -# For bz#2216201 - [qemu-kvm]VM reports vulnerabilty to mmio_stale_data on patched host with microcode -Patch102: kvm-target-i386-add-support-for-FB_CLEAR-feature.patch -# For bz#2180076 - [qemu-kvm] support fd passing for libblkio QEMU BlockDrivers -Patch103: kvm-block-blkio-use-qemu_open-to-support-fd-passing-for-.patch -# For bz#2180076 - [qemu-kvm] support fd passing for libblkio QEMU BlockDrivers -Patch104: kvm-qapi-add-fdset-feature-for-BlockdevOptionsVirtioBlkV.patch -# For bz#2171363 - [aarch64] Kernel hits Call trace with irregular CPU-to-NUMA association -Patch105: kvm-numa-Validate-cluster-and-NUMA-node-boundary-if-requ.patch -# For bz#2171363 - [aarch64] Kernel hits Call trace with irregular CPU-to-NUMA association -Patch106: kvm-hw-arm-Validate-cluster-and-NUMA-node-boundary.patch -# For bz#2171363 - [aarch64] Kernel hits Call trace with irregular CPU-to-NUMA association -Patch107: kvm-hw-arm-virt-Validate-cluster-and-NUMA-node-boundary-.patch -# For RHEL-330 - [virtual network][qemu-kvm-8.0.0-rc1]qemu core dump: qemu-kvm: ../softmmu/memory.c:2592: void memory_region_del_eventfd(MemoryRegion *, hwaddr, unsigned int, _Bool, uint64_t, EventNotifier *): Assertion `i != mr->ioeventfd_nb' failed -Patch108: kvm-vhost-fix-vhost_dev_enable_notifiers-error-case.patch -# For bz#2218644 - query-stats QMP command interrupts vcpus, the Max Latencies could be more than 100us (rhel 9.3.0 clone) -Patch109: kvm-kvm-reuse-per-vcpu-stats-fd-to-avoid-vcpu-interrupti.patch -# For bz#2128929 - [rhel9.2] hotplug/hotunplug mlx vdpa device to the occupied addr port, then qemu core dump occurs after shutdown guest -Patch110: kvm-vhost-vdpa-do-not-cleanup-the-vdpa-vhost-net-structu.patch -# For bz#2211609 - With virtio-iommu and vfio-pci, qemu reports "warning: virtio-iommu page mask 0xfffffffffffff000 does not match 0x40201000" -# For bz#2211634 - [aarch64] With virtio-iommu and vfio-pci, qemu coredump when host using kernel-64k package -Patch111: kvm-virtio-iommu-Fix-64kB-host-page-size-VFIO-device-ass.patch -# For bz#2211609 - With virtio-iommu and vfio-pci, qemu reports "warning: virtio-iommu page mask 0xfffffffffffff000 does not match 0x40201000" -# For bz#2211634 - [aarch64] With virtio-iommu and vfio-pci, qemu coredump when host using kernel-64k package -Patch112: kvm-virtio-iommu-Rework-the-traces-in-virtio_iommu_set_p.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch113: kvm-vfio-pci-add-support-for-VF-token.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch114: kvm-vfio-migration-Skip-log_sync-during-migration-SETUP-.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch115: kvm-vfio-pci-Static-Resizable-BAR-capability.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch116: kvm-vfio-pci-Fix-a-use-after-free-issue.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch117: kvm-util-vfio-helpers-Use-g_file_read_link.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch118: kvm-migration-Make-all-functions-check-have-the-same-for.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch119: kvm-migration-Move-migration_properties-to-options.c.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch120: kvm-migration-Add-switchover-ack-capability.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch121: kvm-migration-Implement-switchover-ack-logic.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch122: kvm-migration-Enable-switchover-ack-capability.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch123: kvm-vfio-migration-Refactor-vfio_save_block-to-return-sa.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch124: kvm-vfio-migration-Store-VFIO-migration-flags-in-VFIOMig.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch125: kvm-vfio-migration-Add-VFIO-migration-pre-copy-support.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch126: kvm-vfio-migration-Add-support-for-switchover-ack-capabi.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch127: kvm-vfio-Implement-a-common-device-info-helper.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch128: kvm-hw-vfio-pci-quirks-Support-alternate-offset-for-GPUD.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch129: kvm-vfio-pci-Call-vfio_prepare_kvm_msi_virq_batch-in-MSI.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch130: kvm-vfio-migration-Reset-bytes_transferred-properly.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch131: kvm-vfio-migration-Make-VFIO-migration-non-experimental.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch132: kvm-vfio-pci-Fix-a-segfault-in-vfio_realize.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch133: kvm-vfio-pci-Free-leaked-timer-in-vfio_realize-error-pat.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch134: kvm-hw-vfio-pci-quirks-Sanitize-capability-pointer.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch135: kvm-vfio-pci-Disable-INTx-in-vfio_realize-error-path.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch136: kvm-vfio-migration-Change-vIOMMU-blocker-from-global-to-.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch137: kvm-vfio-migration-Free-resources-when-vfio_migration_re.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch138: kvm-vfio-migration-Remove-print-of-Migration-disabled.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch139: kvm-vfio-migration-Return-bool-type-for-vfio_migration_r.patch -# For bz#2192818 - [VFIO LM] Live migration -Patch140: kvm-vfio-Fix-null-pointer-dereference-bug-in-vfio_bars_f.patch -# For bz#2220866 - Misaligned symbol for s390-ccw image during qemu-kvm build -Patch141: kvm-pc-bios-s390-ccw-Makefile-Use-z-noexecstack-to-silen.patch -# For bz#2220866 - Misaligned symbol for s390-ccw image during qemu-kvm build -Patch142: kvm-pc-bios-s390-ccw-Fix-indentation-in-start.S.patch -# For bz#2220866 - Misaligned symbol for s390-ccw image during qemu-kvm build -Patch143: kvm-pc-bios-s390-ccw-Provide-space-for-initial-stack-fra.patch -# For bz#2220866 - Misaligned symbol for s390-ccw image during qemu-kvm build -Patch144: kvm-pc-bios-s390-ccw-Don-t-use-__bss_start-with-the-larl.patch -# For bz#2222579 - PNG screendump doesn't save screen correctly -Patch145: kvm-ui-Fix-pixel-colour-channel-order-for-PNG-screenshot.patch -# For bz#2213317 - Enable libblkio-based block drivers in QEMU -Patch146: kvm-block-blkio-fix-module_block.py-parsing.patch -# For bz#2176702 - [RHEL9][virtio-scsi] scsi-hd cannot hot-plug successfully after hot-plug it repeatly -Patch147: kvm-scsi-fetch-unit-attention-when-creating-the-request.patch -# For bz#2176702 - [RHEL9][virtio-scsi] scsi-hd cannot hot-plug successfully after hot-plug it repeatly -Patch148: kvm-scsi-cleanup-scsi_clear_unit_attention.patch -# For bz#2176702 - [RHEL9][virtio-scsi] scsi-hd cannot hot-plug successfully after hot-plug it repeatly -Patch149: kvm-scsi-clear-unit-attention-only-for-REPORT-LUNS-comma.patch -# For RHEL-794 - Backport s390x fixes from QEMU 8.1 -Patch150: kvm-s390x-ap-Wire-up-the-device-request-notifier-interfa.patch -# For bz#2196295 - Multifd flushes its channels 10 times per second -Patch151: kvm-multifd-Create-property-multifd-flush-after-each-sec.patch -# For bz#2196295 - Multifd flushes its channels 10 times per second -Patch152: kvm-multifd-Protect-multifd_send_sync_main-calls.patch -# For bz#2196295 - Multifd flushes its channels 10 times per second -Patch153: kvm-multifd-Only-flush-once-each-full-round-of-memory.patch -# For RHEL-582 - [passt][rhel 9.3] qemu core dump occurs when guest is shutdown after hotunplug/hotplug a passt interface -Patch154: kvm-net-socket-prepare-to-cleanup-net_init_socket.patch -# For RHEL-582 - [passt][rhel 9.3] qemu core dump occurs when guest is shutdown after hotunplug/hotplug a passt interface -Patch155: kvm-net-socket-move-fd-type-checking-to-its-own-function.patch -# For RHEL-582 - [passt][rhel 9.3] qemu core dump occurs when guest is shutdown after hotunplug/hotplug a passt interface -Patch156: kvm-net-socket-remove-net_init_socket.patch -# For bz#2215819 - Migration test failed while guest with PCIe devices -Patch157: kvm-pcie-Add-hotplug-detect-state-register-to-cmask.patch -# For bz#2174676 - Guest hit EXT4-fs error on host 4K disk when repeatedly hot-plug/unplug running IO disk [RHEL9] -Patch158: kvm-util-iov-Make-qiov_slice-public.patch -# For bz#2174676 - Guest hit EXT4-fs error on host 4K disk when repeatedly hot-plug/unplug running IO disk [RHEL9] -Patch159: kvm-block-Collapse-padded-I-O-vecs-exceeding-IOV_MAX.patch -# For bz#2174676 - Guest hit EXT4-fs error on host 4K disk when repeatedly hot-plug/unplug running IO disk [RHEL9] -Patch160: kvm-util-iov-Remove-qemu_iovec_init_extended.patch -# For bz#2174676 - Guest hit EXT4-fs error on host 4K disk when repeatedly hot-plug/unplug running IO disk [RHEL9] -Patch161: kvm-iotests-iov-padding-New-test.patch -# For bz#2174676 - Guest hit EXT4-fs error on host 4K disk when repeatedly hot-plug/unplug running IO disk [RHEL9] -Patch162: kvm-block-Fix-pad_request-s-request-restriction.patch -# For RHEL-573 - [mlx vhost_vdpa][rhel 9.3]live migration fail with "net vdpa cannot migrate with CVQ feature" -Patch163: kvm-vdpa-do-not-block-migration-if-device-has-cvq-and-x-.patch -# For bz#2040509 - [RFE]:Add support for changing "tx_queue_size" to a setable value -Patch164: kvm-virtio-net-correctly-report-maximum-tx_queue_size-va.patch -# For bz#2223691 - [machine type 9.2]Failed to migrate VM from RHEL 9.3 to RHEL 9.2 -Patch165: kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-reg-for-machine-ty.patch -# For bz#2141965 - [TPM][vhost-vdpa][rhel9.2]Boot a guest with "vhost-vdpa + TPM emulator", qemu output: qemu-kvm: vhost_vdpa_listener_region_add received unaligned region -Patch166: kvm-vhost-vdpa-mute-unaligned-memory-error-report.patch -# For bz#2225354 - [vdpa-blk] The new driver virtio-blk-vhost-user not work in VM booting -# For bz#2225439 - [vdpa-blk] read-only=on option not work on driver virtio-blk-vhost-vdpa -Patch167: kvm-block-blkio-enable-the-completion-eventfd.patch -# For bz#2225354 - [vdpa-blk] The new driver virtio-blk-vhost-user not work in VM booting -# For bz#2225439 - [vdpa-blk] read-only=on option not work on driver virtio-blk-vhost-vdpa -Patch168: kvm-block-blkio-do-not-use-open-flags-in-qemu_open.patch -# For bz#2225354 - [vdpa-blk] The new driver virtio-blk-vhost-user not work in VM booting -# For bz#2225439 - [vdpa-blk] read-only=on option not work on driver virtio-blk-vhost-vdpa -Patch169: kvm-block-blkio-move-blkio_connect-in-the-drivers-functi.patch -# For bz#2225354 - [vdpa-blk] The new driver virtio-blk-vhost-user not work in VM booting -# For bz#2225439 - [vdpa-blk] read-only=on option not work on driver virtio-blk-vhost-vdpa -Patch170: kvm-block-blkio-retry-blkio_connect-if-it-fails-using-fd.patch -# For bz#2225354 - [vdpa-blk] The new driver virtio-blk-vhost-user not work in VM booting -# For bz#2225439 - [vdpa-blk] read-only=on option not work on driver virtio-blk-vhost-vdpa -Patch171: kvm-block-blkio-fall-back-on-using-path-when-fd-setting-.patch -# For bz#2225354 - [vdpa-blk] The new driver virtio-blk-vhost-user not work in VM booting -# For bz#2225439 - [vdpa-blk] read-only=on option not work on driver virtio-blk-vhost-vdpa -Patch172: kvm-block-blkio-use-blkio_set_int-fd-to-check-fd-support.patch -# For bz#2229133 - Backport some virtio-iommu and smmu fixes -Patch173: kvm-hw-virtio-iommu-Fix-potential-OOB-access-in-virtio_i.patch -# For bz#2229133 - Backport some virtio-iommu and smmu fixes -Patch174: kvm-virtio-iommu-Standardize-granule-extraction-and-form.patch -# For bz#2229133 - Backport some virtio-iommu and smmu fixes -Patch175: kvm-hw-arm-smmu-Handle-big-endian-hosts-correctly.patch -# For bz#2214839 - [AMDSERVER 9.3 Bug] Qemu SEV reduced-phys-bits fixes -Patch176: kvm-qapi-i386-sev-Change-the-reduced-phys-bits-value-fro.patch -# For bz#2214839 - [AMDSERVER 9.3 Bug] Qemu SEV reduced-phys-bits fixes -Patch177: kvm-qemu-options.hx-Update-the-reduced-phys-bits-documen.patch -# For bz#2214839 - [AMDSERVER 9.3 Bug] Qemu SEV reduced-phys-bits fixes -Patch178: kvm-i386-sev-Update-checks-and-information-related-to-re.patch -# For bz#2214839 - [AMDSERVER 9.3 Bug] Qemu SEV reduced-phys-bits fixes -Patch179: kvm-i386-cpu-Update-how-the-EBX-register-of-CPUID-0x8000.patch -# For bz#2094913 - Add EPYC-Genoa CPU model in qemu -Patch180: kvm-target-i386-allow-versioned-CPUs-to-specify-new-cach.patch -# For bz#2094913 - Add EPYC-Genoa CPU model in qemu -Patch181: kvm-target-i386-Add-new-EPYC-CPU-versions-with-updated-c.patch -# For bz#2094913 - Add EPYC-Genoa CPU model in qemu -Patch182: kvm-target-i386-Add-a-couple-of-feature-bits-in-8000_000.patch -# For bz#2094913 - Add EPYC-Genoa CPU model in qemu -Patch183: kvm-target-i386-Add-feature-bits-for-CPUID_Fn80000021_EA.patch -# For bz#2094913 - Add EPYC-Genoa CPU model in qemu -Patch184: kvm-target-i386-Add-missing-feature-bits-in-EPYC-Milan-m.patch -# For bz#2094913 - Add EPYC-Genoa CPU model in qemu -Patch185: kvm-target-i386-Add-VNMI-and-automatic-IBRS-feature-bits.patch -# For bz#2094913 - Add EPYC-Genoa CPU model in qemu -Patch186: kvm-target-i386-Add-EPYC-Genoa-model-to-support-Zen-4-pr.patch -# For RHEL-923 - vhost shadow virtqueue: state restore through CVQ -Patch187: kvm-vdpa-return-errno-in-vhost_vdpa_get_vring_group-erro.patch -# For RHEL-923 - vhost shadow virtqueue: state restore through CVQ -Patch188: kvm-vdpa-move-CVQ-isolation-check-to-net_init_vhost_vdpa.patch -# For RHEL-923 - vhost shadow virtqueue: state restore through CVQ -Patch189: kvm-vdpa-use-first-queue-SVQ-state-for-CVQ-default.patch -# For RHEL-923 - vhost shadow virtqueue: state restore through CVQ -Patch190: kvm-vdpa-export-vhost_vdpa_set_vring_ready.patch -# For RHEL-923 - vhost shadow virtqueue: state restore through CVQ -Patch191: kvm-vdpa-rename-vhost_vdpa_net_load-to-vhost_vdpa_net_cv.patch -# For RHEL-923 - vhost shadow virtqueue: state restore through CVQ -Patch192: kvm-vdpa-move-vhost_vdpa_set_vring_ready-to-the-caller.patch -# For RHEL-923 - vhost shadow virtqueue: state restore through CVQ -Patch193: kvm-vdpa-remove-net-cvq-migration-blocker.patch +Patch0016: 0016-Introduce-RHEL-9.4.0-qemu-kvm-machine-type-for-aarch.patch +# For RHEL-17168 - Introduce virt-rhel9.4.0 arm-virt machine type [aarch64] +Patch17: kvm-hw-arm-virt-Fix-compats.patch +# For RHEL-19738 - Enable properties allowing to disable high memory regions +Patch18: kvm-hw-arm-virt-Add-properties-to-disable-high-memory-re.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch19: kvm-vfio-Introduce-base-object-for-VFIOContainer-and-tar.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch20: kvm-vfio-container-Introduce-a-empty-VFIOIOMMUOps.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch21: kvm-vfio-container-Switch-to-dma_map-unmap-API.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch22: kvm-vfio-common-Introduce-vfio_container_init-destroy-he.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch23: kvm-vfio-common-Move-giommu_list-in-base-container.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch24: kvm-vfio-container-Move-space-field-to-base-container.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch25: kvm-vfio-container-Switch-to-IOMMU-BE-set_dirty_page_tra.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch26: kvm-vfio-container-Move-per-container-device-list-in-bas.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch27: kvm-vfio-container-Convert-functions-to-base-container.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch28: kvm-vfio-container-Move-pgsizes-and-dma_max_mappings-to-.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch29: kvm-vfio-container-Move-vrdl_list-to-base-container.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch30: kvm-vfio-container-Move-listener-to-base-container.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch31: kvm-vfio-container-Move-dirty_pgsizes-and-max_dirty_bitm.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch32: kvm-vfio-container-Move-iova_ranges-to-base-container.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch33: kvm-vfio-container-Implement-attach-detach_device.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch34: kvm-vfio-spapr-Introduce-spapr-backend-and-target-interf.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch35: kvm-vfio-spapr-switch-to-spapr-IOMMU-BE-add-del_section_.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch36: kvm-vfio-spapr-Move-prereg_listener-into-spapr-container.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch37: kvm-vfio-spapr-Move-hostwin_list-into-spapr-container.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch38: kvm-backends-iommufd-Introduce-the-iommufd-object.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch39: kvm-util-char_dev-Add-open_cdev.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch40: kvm-vfio-common-return-early-if-space-isn-t-empty.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch41: kvm-vfio-iommufd-Implement-the-iommufd-backend.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch42: kvm-vfio-iommufd-Relax-assert-check-for-iommufd-backend.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch43: kvm-vfio-iommufd-Add-support-for-iova_ranges-and-pgsizes.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch44: kvm-vfio-pci-Extract-out-a-helper-vfio_pci_get_pci_hot_r.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch45: kvm-vfio-pci-Introduce-a-vfio-pci-hot-reset-interface.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch46: kvm-vfio-iommufd-Enable-pci-hot-reset-through-iommufd-cd.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch47: kvm-vfio-pci-Allow-the-selection-of-a-given-iommu-backen.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch48: kvm-vfio-pci-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch49: kvm-vfio-platform-Allow-the-selection-of-a-given-iommu-b.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch50: kvm-vfio-platform-Make-vfio-cdev-pre-openable-by-passing.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch51: kvm-vfio-ap-Allow-the-selection-of-a-given-iommu-backend.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch52: kvm-vfio-ap-Make-vfio-cdev-pre-openable-by-passing-a-fil.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch53: kvm-vfio-ccw-Allow-the-selection-of-a-given-iommu-backen.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch54: kvm-vfio-ccw-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch55: kvm-vfio-Make-VFIOContainerBase-poiner-parameter-const-i.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch56: kvm-hw-arm-Activate-IOMMUFD-for-virt-machines.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch57: kvm-kconfig-Activate-IOMMUFD-for-s390x-machines.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch58: kvm-hw-i386-Activate-IOMMUFD-for-q35-machines.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch59: kvm-vfio-pci-Move-VFIODevice-initializations-in-vfio_ins.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch60: kvm-vfio-platform-Move-VFIODevice-initializations-in-vfi.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch61: kvm-vfio-ap-Move-VFIODevice-initializations-in-vfio_ap_i.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch62: kvm-vfio-ccw-Move-VFIODevice-initializations-in-vfio_ccw.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch63: kvm-vfio-Introduce-a-helper-function-to-initialize-VFIOD.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch64: kvm-docs-devel-Add-VFIO-iommufd-backend-documentation.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch65: kvm-hw-ppc-Kconfig-Imply-VFIO_PCI.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch66: kvm-vfio-spapr-Extend-VFIOIOMMUOps-with-a-release-handle.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch67: kvm-vfio-container-Introduce-vfio_legacy_setup-for-furth.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch68: kvm-vfio-container-Initialize-VFIOIOMMUOps-under-vfio_in.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch69: kvm-vfio-container-Introduce-a-VFIOIOMMU-QOM-interface.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch70: kvm-vfio-container-Introduce-a-VFIOIOMMU-legacy-QOM-inte.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch71: kvm-vfio-container-Intoduce-a-new-VFIOIOMMUClass-setup-h.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch72: kvm-vfio-spapr-Introduce-a-sPAPR-VFIOIOMMU-QOM-interface.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch73: kvm-vfio-iommufd-Introduce-a-VFIOIOMMU-iommufd-QOM-inter.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch74: kvm-vfio-spapr-Only-compile-sPAPR-IOMMU-support-when-nee.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch75: kvm-vfio-iommufd-Remove-CONFIG_IOMMUFD-usage.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch76: kvm-vfio-container-Replace-basename-with-g_path_get_base.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch77: kvm-hw-vfio-fix-iteration-over-global-VFIODevice-list.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch78: kvm-vfio-iommufd-Remove-the-use-of-stat-to-check-file-ex.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch79: kvm-vfio-container-Rename-vfio_init_container-to-vfio_se.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch80: kvm-vfio-migration-Add-helper-function-to-set-state-or-r.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch81: kvm-backends-iommufd-Remove-check-on-number-of-backend-u.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch82: kvm-backends-iommufd-Remove-mutex.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch83: kvm-Compile-IOMMUFD-object-on-aarch64.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch84: kvm-Compile-IOMMUFD-on-s390x.patch +# For RHEL-19302 - NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend +# For RHEL-21057 - Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6 +Patch85: kvm-Compile-IOMMUFD-on-x86_64.patch +# For RHEL-18212 - [RHEL9][Secure-execution][s390x] The error message is not clear when boot up a SE guest with wrong encryption +Patch86: kvm-target-s390x-kvm-pv-Provide-some-more-useful-informa.patch +# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) +Patch87: kvm-nbd-server-avoid-per-NBDRequest-nbd_client_get-put.patch +# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) +Patch88: kvm-nbd-server-only-traverse-NBDExport-clients-from-main.patch +# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) +Patch89: kvm-nbd-server-introduce-NBDClient-lock-to-protect-field.patch +# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) +Patch90: kvm-block-file-posix-set-up-Linux-AIO-and-io_uring-in-th.patch +# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) +Patch91: kvm-virtio-blk-add-lock-to-protect-s-rq.patch +# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) +Patch92: kvm-virtio-blk-don-t-lock-AioContext-in-the-completion-c.patch +# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) +Patch93: kvm-virtio-blk-don-t-lock-AioContext-in-the-submission-c.patch +# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) +Patch94: kvm-scsi-only-access-SCSIDevice-requests-from-one-thread.patch +# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) +Patch95: kvm-virtio-scsi-don-t-lock-AioContext-around-virtio_queu.patch +# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) +Patch96: kvm-scsi-don-t-lock-AioContext-in-I-O-code-path.patch +# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) +Patch97: kvm-dma-helpers-don-t-lock-AioContext-in-dma_blk_cb.patch +# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) +Patch98: kvm-virtio-scsi-replace-AioContext-lock-with-tmf_bh_lock.patch +# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) +Patch99: kvm-scsi-assert-that-callbacks-run-in-the-correct-AioCon.patch +# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) +Patch100: kvm-tests-remove-aio_context_acquire-tests.patch +# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) +Patch101: kvm-aio-make-aio_context_acquire-aio_context_release-a-n.patch +# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) +Patch102: kvm-graph-lock-remove-AioContext-locking.patch +# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) +Patch103: kvm-block-remove-AioContext-locking.patch +# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) +Patch104: kvm-block-remove-bdrv_co_lock.patch +# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) +Patch105: kvm-scsi-remove-AioContext-locking.patch +# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) +Patch106: kvm-aio-wait-draw-equivalence-between-AIO_WAIT_WHILE-and.patch +# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) +Patch107: kvm-aio-remove-aio_context_acquire-aio_context_release-A.patch +# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) +Patch108: kvm-docs-remove-AioContext-lock-from-IOThread-docs.patch +# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) +Patch109: kvm-scsi-remove-outdated-AioContext-lock-comment.patch +# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) +Patch110: kvm-job-remove-outdated-AioContext-locking-comments.patch +# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) +Patch111: kvm-block-remove-outdated-AioContext-locking-comments.patch +# For RHEL-15965 - [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize) +Patch112: kvm-block-coroutine-wrapper-use-qemu_get_current_aio_con.patch +# For RHEL-21169 - [s390x] VM fails to start with ISM passed through QEMU 8.2 +Patch113: kvm-s390x-pci-avoid-double-enable-disable-of-aif.patch +# For RHEL-21169 - [s390x] VM fails to start with ISM passed through QEMU 8.2 +Patch114: kvm-s390x-pci-refresh-fh-before-disabling-aif.patch +# For RHEL-21169 - [s390x] VM fails to start with ISM passed through QEMU 8.2 +Patch115: kvm-s390x-pci-drive-ISM-reset-from-subsystem-reset.patch +# For RHEL-21570 - Critical performance degradation for input devices in virtio vnc session +Patch116: kvm-include-ui-rect.h-fix-qemu_rect_init-mis-assignment.patch +# For RHEL-7565 - qemu crashed when migrate guest with blob resources enabled +Patch117: kvm-virtio-gpu-block-migration-of-VMs-with-blob-true.patch +# For RHEL-21293 - [emulated igb] Failed to set up TRIGGER eventfd signaling for interrupt INTX-0: VFIO_DEVICE_SET_IRQS failure: Invalid argument +Patch118: kvm-vfio-pci-Clear-MSI-X-IRQ-index-always.patch +# For RHEL-20341 - memory-device size alignment check invalid in QEMU 8.2 +Patch119: kvm-hv-balloon-use-get_min_alignment-to-express-32-GiB-a.patch +# For RHEL-20341 - memory-device size alignment check invalid in QEMU 8.2 +Patch120: kvm-memory-device-reintroduce-memory-region-size-check.patch +# For RHEL-24593 - qemu crash blk_get_aio_context(BlockBackend *): Assertion `ctx == blk->ctx' when repeatedly hotplug/unplug disk +Patch121: kvm-block-backend-Allow-concurrent-context-changes.patch +# For RHEL-24593 - qemu crash blk_get_aio_context(BlockBackend *): Assertion `ctx == blk->ctx' when repeatedly hotplug/unplug disk +Patch122: kvm-scsi-Await-request-purging.patch +# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. +# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support +# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] +Patch123: kvm-string-output-visitor-show-structs-as-omitted.patch +# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. +# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support +# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] +Patch124: kvm-string-output-visitor-Fix-pseudo-struct-handling.patch +# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. +# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support +# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] +Patch125: kvm-qdev-properties-alias-all-object-class-properties.patch +# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. +# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support +# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] +Patch126: kvm-qdev-add-IOThreadVirtQueueMappingList-property-type.patch +# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. +# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support +# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] +Patch127: kvm-virtio-blk-add-iothread-vq-mapping-parameter.patch +# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. +# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support +# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] +Patch128: kvm-virtio-blk-Fix-potential-nullpointer-read-access-in-.patch +# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. +# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support +# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] +Patch129: kvm-iotests-add-filter_qmp_generated_node_ids.patch +# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. +# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support +# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] +Patch130: kvm-iotests-port-141-to-Python-for-reliable-QMP-testing.patch +# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. +# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support +# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] +Patch131: kvm-monitor-only-run-coroutine-commands-in-qemu_aio_cont.patch +# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. +# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support +# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] +Patch132: kvm-virtio-blk-move-dataplane-code-into-virtio-blk.c.patch +# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. +# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support +# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] +Patch133: kvm-virtio-blk-rename-dataplane-create-destroy-functions.patch +# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. +# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support +# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] +Patch134: kvm-virtio-blk-rename-dataplane-to-ioeventfd.patch +# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. +# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support +# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] +Patch135: kvm-virtio-blk-restart-s-rq-reqs-in-vq-AioContexts.patch +# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. +# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support +# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] +Patch136: kvm-virtio-blk-tolerate-failure-to-set-BlockBackend-AioC.patch +# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. +# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support +# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] +Patch137: kvm-virtio-blk-always-set-ioeventfd-during-startup.patch +# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. +# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support +# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] +Patch138: kvm-tests-unit-Bump-test-replication-timeout-to-60-secon.patch +# For RHEL-17369 - [nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed. +# For RHEL-20764 - [qemu-kvm] Enable qemu multiqueue block layer support +# For RHEL-7356 - [qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9] +Patch139: kvm-iotests-iothreads-stream-Use-the-right-TimeoutError.patch +# For RHEL-24045 - QEMU: default-enable dynamically using multiple memslots for virtio-mem +Patch140: kvm-virtio-mem-default-enable-dynamic-memslots.patch +# For RHEL-3934 - [qemu-kvm] Failed on repeatedly hotplug/unplug disk iothread enabled +Patch141: kvm-virtio-scsi-Attach-event-vq-notifier-with-no_poll.patch +# For RHEL-3934 - [qemu-kvm] Failed on repeatedly hotplug/unplug disk iothread enabled +Patch142: kvm-virtio-Re-enable-notifications-after-drain.patch +# For RHEL-3934 - [qemu-kvm] Failed on repeatedly hotplug/unplug disk iothread enabled +Patch143: kvm-virtio-blk-Use-ioeventfd_attach-in-start_ioeventfd.patch +# For RHEL-15394 - virtio-blk: qemu hang on "no response on QMP query-status" when write data to disk without enough space +Patch144: kvm-virtio-blk-avoid-using-ioeventfd-state-in-irqfd-cond.patch +# For RHEL-24988 - Mark virt-rhel9.{0,2}.0 machine types as deprecated +Patch145: kvm-hw-arm-virt-deprecate-virt-rhel9.-0-2-.0-machine-typ.patch +# For RHEL-17068 - Check/fix machine type compatibility for qemu-kvm 8.2.0 [x86_64] +Patch146: kvm-x86-rhel-9.2.0-machine-type-compat-fix.patch +# For RHEL-26049 - When max vcpu is greater than or equal to 246, qemu unable to init event notifier +Patch147: kvm-qemu_init-increase-NOFILE-soft-limit-on-POSIX.patch +# For RHEL-24614 - [RHEL9][chardev][s390x] qemu hit core dump while using TLS server from host to guest +Patch148: kvm-chardev-char-socket-Fix-TLS-io-channels-sending-too-.patch +# For RHEL-19629 - CVE-2023-6683 qemu-kvm: QEMU: VNC: NULL pointer dereference in qemu_clipboard_request() [rhel-9] +Patch149: kvm-ui-clipboard-mark-type-as-not-available-when-there-i.patch +# For RHEL-19629 - CVE-2023-6683 qemu-kvm: QEMU: VNC: NULL pointer dereference in qemu_clipboard_request() [rhel-9] +Patch150: kvm-ui-clipboard-add-asserts-for-update-and-request.patch +# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information +Patch151: kvm-hw-i386-pc-Defer-smbios_set_defaults-to-machine_done.patch +# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information +Patch152: kvm-Implement-base-of-SMBIOS-type-9-descriptor.patch +# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information +Patch153: kvm-Implement-SMBIOS-type-9-v2.6.patch +# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information +Patch154: kvm-smbios-cleanup-smbios_get_tables-from-legacy-handlin.patch +# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information +Patch155: kvm-smbios-get-rid-of-smbios_smp_sockets-global.patch +# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information +Patch156: kvm-smbios-get-rid-of-smbios_legacy-global.patch +# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information +Patch157: kvm-smbios-avoid-mangling-user-provided-tables.patch +# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information +Patch158: kvm-smbios-don-t-check-type4-structures-in-legacy-mode.patch +# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information +Patch159: kvm-smbios-add-smbios_add_usr_blob_size-helper.patch +# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information +Patch160: kvm-smbios-rename-expose-structures-bitmaps-used-by-both.patch +# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information +Patch161: kvm-smbios-build-legacy-mode-code-only-for-pc-machine.patch +# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information +Patch162: kvm-smbios-handle-errors-consistently.patch +# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information +Patch163: kvm-smbios-get-rid-of-global-smbios_ep_type.patch +# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information +Patch164: kvm-smbios-clear-smbios_type4_count-before-building-tabl.patch +# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information +Patch165: kvm-smbios-extend-smbios-entry-point-type-with-auto-valu.patch +# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information +Patch166: kvm-smbios-in-case-of-entry-point-is-auto-try-to-build-v.patch +# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information +Patch167: kvm-smbios-error-out-when-building-type-4-table-is-not-p.patch +# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information +Patch168: kvm-pc-q35-set-SMBIOS-entry-point-type-to-auto-by-defaul.patch +# For RHEL-28125 - RHEL9.4 - KVM : Live migration of guest with multiple qcow devices remains incomplete. +Patch169: kvm-mirror-Don-t-call-job_pause_point-under-graph-lock.patch +# For RHEL-28125 - RHEL9.4 - KVM : Live migration of guest with multiple qcow devices remains incomplete. +Patch170: kvm-nbd-server-Fix-race-in-draining-the-export.patch +# For RHEL-28125 - RHEL9.4 - KVM : Live migration of guest with multiple qcow devices remains incomplete. +Patch171: kvm-iotests-Add-test-for-reset-AioContext-switches-with-.patch +# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information +Patch172: kvm-pc-smbios-fixup-manufacturer-product-version-to-matc.patch +# For RHEL-24614 - [RHEL9][chardev] qemu hit core dump while using TLS server from host to guest +Patch173: kvm-chardev-lower-priority-of-the-HUP-GSource-in-socket-.patch +# For RHEL-24614 - [RHEL9][chardev] qemu hit core dump while using TLS server from host to guest +Patch174: kvm-Revert-chardev-char-socket-Fix-TLS-io-channels-sendi.patch +# For RHEL-24614 - [RHEL9][chardev] qemu hit core dump while using TLS server from host to guest +Patch175: kvm-Revert-chardev-use-a-child-source-for-qio-input-sour.patch +# For RHEL-28947 - Qemu crashing with "failed to set up stack guard page: Cannot allocate memory" +Patch176: kvm-coroutine-cap-per-thread-local-pool-size.patch +# For RHEL-28947 - Qemu crashing with "failed to set up stack guard page: Cannot allocate memory" +Patch177: kvm-coroutine-reserve-5-000-mappings.patch %if %{have_clang} BuildRequires: clang @@ -558,6 +622,7 @@ BuildRequires: gcc BuildRequires: meson >= %{meson_version} BuildRequires: ninja-build BuildRequires: zlib-devel +BuildRequires: libzstd-devel BuildRequires: glib2-devel BuildRequires: gnutls-devel BuildRequires: cyrus-sasl-devel @@ -711,8 +776,8 @@ Summary: %{name} documentation %package -n qemu-pr-helper Summary: qemu-pr-helper utility for %{name} %description -n qemu-pr-helper -This package provides the qemu-pr-helper utility that is required for certain -SCSI features. +This package provides the qemu-pr-helper utility that is required for certain +SCSI features. %package -n qemu-img @@ -851,7 +916,7 @@ Summary: QEMU usbredir support Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} Requires: usbredir >= 0.7.1 Provides: %{name}-hw-usbredir -Obsoletes: %{name}-hw-usbredir <= %{epoch}:%{version} +Obsoletes: %{name}-hw-usbredir <= %{epoch}:%{version} %description device-usb-redirect This package provides usbredir support. @@ -915,6 +980,7 @@ ulimit -n 10240 --disable-debug-tcg \\\ --disable-dmg \\\ --disable-docs \\\ + --disable-download \\\ --disable-dsound \\\ --disable-fdt \\\ --disable-fuse \\\ @@ -927,7 +993,6 @@ ulimit -n 10240 --disable-gtk \\\ --disable-guest-agent \\\ --disable-guest-agent-msi \\\ - --disable-hax \\\ --disable-hvf \\\ --disable-iconv \\\ --disable-jack \\\ @@ -964,6 +1029,7 @@ ulimit -n 10240 --disable-pa \\\ --disable-parallels \\\ --disable-pie \\\ + --disable-plugins \\\ --disable-pvrdma \\\ --disable-qcow1 \\\ --disable-qed \\\ @@ -1018,7 +1084,6 @@ ulimit -n 10240 --disable-xen-pci-passthrough \\\ --disable-xkbcommon \\\ --disable-zstd \\\ - --with-git-submodules=ignore \\\ --without-default-devices @@ -1039,10 +1104,8 @@ run_configure() { --with-pkgversion="%{name}-%{version}-%{release}" \ --with-suffix="%{name}" \ --firmwarepath=%{firmwaredirs} \ - --meson="%{__meson}" \ --enable-trace-backends=dtrace \ --with-coroutine=ucontext \ - --with-git=git \ --tls-priority=@QEMU,SYSTEM \ %{disable_everything} \ --with-devices-%{kvm_target}=%{kvm_target}-rh-devices \ @@ -1133,6 +1196,7 @@ run_configure \ --enable-werror \ %endif --enable-xkbcommon \ + --enable-zstd \ %if %{have_safe_stack} --enable-safe-stack \ %endif @@ -1283,7 +1347,7 @@ mkdir -p %{buildroot}%{_datadir}/%{name}/tracetool/format install -m 0644 -t %{buildroot}%{_datadir}/%{name}/tracetool/format scripts/tracetool/format/*.py mkdir -p %{buildroot}%{qemudocdir} -install -p -m 0644 -t %{buildroot}%{qemudocdir} README.rst README.systemtap COPYING COPYING.LIB LICENSE docs/interop/qmp-spec.txt +install -p -m 0644 -t %{buildroot}%{qemudocdir} README.rst README.systemtap COPYING COPYING.LIB LICENSE # Rename man page pushd %{buildroot}%{_mandir}/man1/ @@ -1607,6 +1671,309 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Tue Mar 26 2024 Miroslav Rezanina - 8.2.0-11 +- kvm-coroutine-cap-per-thread-local-pool-size.patch [RHEL-28947] +- kvm-coroutine-reserve-5-000-mappings.patch [RHEL-28947] +- Resolves: RHEL-28947 + (Qemu crashing with "failed to set up stack guard page: Cannot allocate memory") + +* Thu Mar 21 2024 Miroslav Rezanina - 8.2.0-10 +- kvm-chardev-lower-priority-of-the-HUP-GSource-in-socket-.patch [RHEL-24614] +- kvm-Revert-chardev-char-socket-Fix-TLS-io-channels-sendi.patch [RHEL-24614] +- kvm-Revert-chardev-use-a-child-source-for-qio-input-sour.patch [RHEL-24614] +- Resolves: RHEL-24614 + ([RHEL9][chardev] qemu hit core dump while using TLS server from host to guest) + +* Wed Mar 20 2024 Miroslav Rezanina - 8.2.0-9 +- kvm-mirror-Don-t-call-job_pause_point-under-graph-lock.patch [RHEL-28125] +- kvm-nbd-server-Fix-race-in-draining-the-export.patch [RHEL-28125] +- kvm-iotests-Add-test-for-reset-AioContext-switches-with-.patch [RHEL-28125] +- kvm-pc-smbios-fixup-manufacturer-product-version-to-matc.patch [RHEL-21705] +- Resolves: RHEL-28125 + (RHEL9.4 - KVM : Live migration of guest with multiple qcow devices remains incomplete.) +- Resolves: RHEL-21705 + (pc-q35-rhel9.4.0 does not provide proper computer information) + +* Mon Mar 18 2024 Miroslav Rezanina - 8.2.0-8 +- kvm-ui-clipboard-mark-type-as-not-available-when-there-i.patch [RHEL-19629] +- kvm-ui-clipboard-add-asserts-for-update-and-request.patch [RHEL-19629] +- kvm-hw-i386-pc-Defer-smbios_set_defaults-to-machine_done.patch [RHEL-21705] +- kvm-Implement-base-of-SMBIOS-type-9-descriptor.patch [RHEL-21705] +- kvm-Implement-SMBIOS-type-9-v2.6.patch [RHEL-21705] +- kvm-smbios-cleanup-smbios_get_tables-from-legacy-handlin.patch [RHEL-21705] +- kvm-smbios-get-rid-of-smbios_smp_sockets-global.patch [RHEL-21705] +- kvm-smbios-get-rid-of-smbios_legacy-global.patch [RHEL-21705] +- kvm-smbios-avoid-mangling-user-provided-tables.patch [RHEL-21705] +- kvm-smbios-don-t-check-type4-structures-in-legacy-mode.patch [RHEL-21705] +- kvm-smbios-add-smbios_add_usr_blob_size-helper.patch [RHEL-21705] +- kvm-smbios-rename-expose-structures-bitmaps-used-by-both.patch [RHEL-21705] +- kvm-smbios-build-legacy-mode-code-only-for-pc-machine.patch [RHEL-21705] +- kvm-smbios-handle-errors-consistently.patch [RHEL-21705] +- kvm-smbios-get-rid-of-global-smbios_ep_type.patch [RHEL-21705] +- kvm-smbios-clear-smbios_type4_count-before-building-tabl.patch [RHEL-21705] +- kvm-smbios-extend-smbios-entry-point-type-with-auto-valu.patch [RHEL-21705] +- kvm-smbios-in-case-of-entry-point-is-auto-try-to-build-v.patch [RHEL-21705] +- kvm-smbios-error-out-when-building-type-4-table-is-not-p.patch [RHEL-21705] +- kvm-pc-q35-set-SMBIOS-entry-point-type-to-auto-by-defaul.patch [RHEL-21705] +- Resolves: RHEL-19629 + (CVE-2023-6683 qemu-kvm: QEMU: VNC: NULL pointer dereference in qemu_clipboard_request() [rhel-9]) +- Resolves: RHEL-21705 + (pc-q35-rhel9.4.0 does not provide proper computer information) + +* Fri Mar 08 2024 Miroslav Rezanina - 8.2.0-7 +- kvm-qemu_init-increase-NOFILE-soft-limit-on-POSIX.patch [RHEL-26049] +- kvm-chardev-char-socket-Fix-TLS-io-channels-sending-too-.patch [RHEL-24614] +- Resolves: RHEL-26049 + (When max vcpu is greater than or equal to 246, qemu unable to init event notifier) +- Resolves: RHEL-24614 + ([RHEL9][chardev][s390x] qemu hit core dump while using TLS server from host to guest) + +* Mon Feb 19 2024 Miroslav Rezanina - 8.2.0-6 +- kvm-virtio-scsi-Attach-event-vq-notifier-with-no_poll.patch [RHEL-3934] +- kvm-virtio-Re-enable-notifications-after-drain.patch [RHEL-3934] +- kvm-virtio-blk-Use-ioeventfd_attach-in-start_ioeventfd.patch [RHEL-3934] +- kvm-virtio-blk-avoid-using-ioeventfd-state-in-irqfd-cond.patch [RHEL-15394] +- kvm-hw-arm-virt-deprecate-virt-rhel9.-0-2-.0-machine-typ.patch [RHEL-24988] +- Resolves: RHEL-3934 + ([qemu-kvm] Failed on repeatedly hotplug/unplug disk iothread enabled ) +- Resolves: RHEL-15394 + (virtio-blk: qemu hang on "no response on QMP query-status" when write data to disk without enough space) +- Resolves: RHEL-24988 + (Mark virt-rhel9.{0,2}.0 machine types as deprecated) + +* Mon Feb 12 2024 Miroslav Rezanina - 8.2.0-5 +- kvm-hv-balloon-use-get_min_alignment-to-express-32-GiB-a.patch [RHEL-20341] +- kvm-memory-device-reintroduce-memory-region-size-check.patch [RHEL-20341] +- kvm-block-backend-Allow-concurrent-context-changes.patch [RHEL-24593] +- kvm-scsi-Await-request-purging.patch [RHEL-24593] +- kvm-string-output-visitor-show-structs-as-omitted.patch [RHEL-17369 RHEL-20764 RHEL-7356] +- kvm-string-output-visitor-Fix-pseudo-struct-handling.patch [RHEL-17369 RHEL-20764 RHEL-7356] +- kvm-qdev-properties-alias-all-object-class-properties.patch [RHEL-17369 RHEL-20764 RHEL-7356] +- kvm-qdev-add-IOThreadVirtQueueMappingList-property-type.patch [RHEL-17369 RHEL-20764 RHEL-7356] +- kvm-virtio-blk-add-iothread-vq-mapping-parameter.patch [RHEL-17369 RHEL-20764 RHEL-7356] +- kvm-virtio-blk-Fix-potential-nullpointer-read-access-in-.patch [RHEL-17369 RHEL-20764 RHEL-7356] +- kvm-iotests-add-filter_qmp_generated_node_ids.patch [RHEL-17369 RHEL-20764 RHEL-7356] +- kvm-iotests-port-141-to-Python-for-reliable-QMP-testing.patch [RHEL-17369 RHEL-20764 RHEL-7356] +- kvm-monitor-only-run-coroutine-commands-in-qemu_aio_cont.patch [RHEL-17369 RHEL-20764 RHEL-7356] +- kvm-virtio-blk-move-dataplane-code-into-virtio-blk.c.patch [RHEL-17369 RHEL-20764 RHEL-7356] +- kvm-virtio-blk-rename-dataplane-create-destroy-functions.patch [RHEL-17369 RHEL-20764 RHEL-7356] +- kvm-virtio-blk-rename-dataplane-to-ioeventfd.patch [RHEL-17369 RHEL-20764 RHEL-7356] +- kvm-virtio-blk-restart-s-rq-reqs-in-vq-AioContexts.patch [RHEL-17369 RHEL-20764 RHEL-7356] +- kvm-virtio-blk-tolerate-failure-to-set-BlockBackend-AioC.patch [RHEL-17369 RHEL-20764 RHEL-7356] +- kvm-virtio-blk-always-set-ioeventfd-during-startup.patch [RHEL-17369 RHEL-20764 RHEL-7356] +- kvm-tests-unit-Bump-test-replication-timeout-to-60-secon.patch [RHEL-17369 RHEL-20764 RHEL-7356] +- kvm-iotests-iothreads-stream-Use-the-right-TimeoutError.patch [RHEL-17369 RHEL-20764 RHEL-7356] +- kvm-virtio-mem-default-enable-dynamic-memslots.patch [RHEL-24045] +- Resolves: RHEL-20341 + (memory-device size alignment check invalid in QEMU 8.2) +- Resolves: RHEL-24593 + (qemu crash blk_get_aio_context(BlockBackend *): Assertion `ctx == blk->ctx' when repeatedly hotplug/unplug disk) +- Resolves: RHEL-17369 + ([nfv virt][rt][post-copy migration] qemu-kvm: ../block/qcow2.c:5263: ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *, Error **): Assertion `false' failed.) +- Resolves: RHEL-20764 + ([qemu-kvm] Enable qemu multiqueue block layer support) +- Resolves: RHEL-7356 + ([qemu-kvm] no response with QMP command device_add when repeatedly hotplug/unplug virtio disks [RHEL-9]) +- Resolves: RHEL-24045 + (QEMU: default-enable dynamically using multiple memslots for virtio-mem) + +* Tue Jan 30 2024 Miroslav Rezanina - 8.2.0-4 +- kvm-vfio-pci-Clear-MSI-X-IRQ-index-always.patch [RHEL-21293] +- Resolves: RHEL-21293 + ([emulated igb] Failed to set up TRIGGER eventfd signaling for interrupt INTX-0: VFIO_DEVICE_SET_IRQS failure: Invalid argument) + +* Wed Jan 24 2024 Miroslav Rezanina - 8.2.0-3 +- kvm-hw-arm-virt-Add-properties-to-disable-high-memory-re.patch [RHEL-19738] +- kvm-vfio-Introduce-base-object-for-VFIOContainer-and-tar.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Introduce-a-empty-VFIOIOMMUOps.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Switch-to-dma_map-unmap-API.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-common-Introduce-vfio_container_init-destroy-he.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-common-Move-giommu_list-in-base-container.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Move-space-field-to-base-container.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Switch-to-IOMMU-BE-set_dirty_page_tra.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Move-per-container-device-list-in-bas.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Convert-functions-to-base-container.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Move-pgsizes-and-dma_max_mappings-to-.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Move-vrdl_list-to-base-container.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Move-listener-to-base-container.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Move-dirty_pgsizes-and-max_dirty_bitm.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Move-iova_ranges-to-base-container.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Implement-attach-detach_device.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-spapr-Introduce-spapr-backend-and-target-interf.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-spapr-switch-to-spapr-IOMMU-BE-add-del_section_.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-spapr-Move-prereg_listener-into-spapr-container.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-spapr-Move-hostwin_list-into-spapr-container.patch [RHEL-19302 RHEL-21057] +- kvm-backends-iommufd-Introduce-the-iommufd-object.patch [RHEL-19302 RHEL-21057] +- kvm-util-char_dev-Add-open_cdev.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-common-return-early-if-space-isn-t-empty.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-iommufd-Implement-the-iommufd-backend.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-iommufd-Relax-assert-check-for-iommufd-backend.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-iommufd-Add-support-for-iova_ranges-and-pgsizes.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-pci-Extract-out-a-helper-vfio_pci_get_pci_hot_r.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-pci-Introduce-a-vfio-pci-hot-reset-interface.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-iommufd-Enable-pci-hot-reset-through-iommufd-cd.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-pci-Allow-the-selection-of-a-given-iommu-backen.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-pci-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-platform-Allow-the-selection-of-a-given-iommu-b.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-platform-Make-vfio-cdev-pre-openable-by-passing.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-ap-Allow-the-selection-of-a-given-iommu-backend.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-ap-Make-vfio-cdev-pre-openable-by-passing-a-fil.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-ccw-Allow-the-selection-of-a-given-iommu-backen.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-ccw-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-Make-VFIOContainerBase-poiner-parameter-const-i.patch [RHEL-19302 RHEL-21057] +- kvm-hw-arm-Activate-IOMMUFD-for-virt-machines.patch [RHEL-19302 RHEL-21057] +- kvm-kconfig-Activate-IOMMUFD-for-s390x-machines.patch [RHEL-19302 RHEL-21057] +- kvm-hw-i386-Activate-IOMMUFD-for-q35-machines.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-pci-Move-VFIODevice-initializations-in-vfio_ins.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-platform-Move-VFIODevice-initializations-in-vfi.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-ap-Move-VFIODevice-initializations-in-vfio_ap_i.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-ccw-Move-VFIODevice-initializations-in-vfio_ccw.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-Introduce-a-helper-function-to-initialize-VFIOD.patch [RHEL-19302 RHEL-21057] +- kvm-docs-devel-Add-VFIO-iommufd-backend-documentation.patch [RHEL-19302 RHEL-21057] +- kvm-hw-ppc-Kconfig-Imply-VFIO_PCI.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-spapr-Extend-VFIOIOMMUOps-with-a-release-handle.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Introduce-vfio_legacy_setup-for-furth.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Initialize-VFIOIOMMUOps-under-vfio_in.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Introduce-a-VFIOIOMMU-QOM-interface.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Introduce-a-VFIOIOMMU-legacy-QOM-inte.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Intoduce-a-new-VFIOIOMMUClass-setup-h.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-spapr-Introduce-a-sPAPR-VFIOIOMMU-QOM-interface.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-iommufd-Introduce-a-VFIOIOMMU-iommufd-QOM-inter.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-spapr-Only-compile-sPAPR-IOMMU-support-when-nee.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-iommufd-Remove-CONFIG_IOMMUFD-usage.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Replace-basename-with-g_path_get_base.patch [RHEL-19302 RHEL-21057] +- kvm-hw-vfio-fix-iteration-over-global-VFIODevice-list.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-iommufd-Remove-the-use-of-stat-to-check-file-ex.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-container-Rename-vfio_init_container-to-vfio_se.patch [RHEL-19302 RHEL-21057] +- kvm-vfio-migration-Add-helper-function-to-set-state-or-r.patch [RHEL-19302 RHEL-21057] +- kvm-backends-iommufd-Remove-check-on-number-of-backend-u.patch [RHEL-19302 RHEL-21057] +- kvm-backends-iommufd-Remove-mutex.patch [RHEL-19302 RHEL-21057] +- kvm-Compile-IOMMUFD-object-on-aarch64.patch [RHEL-19302 RHEL-21057] +- kvm-Compile-IOMMUFD-on-s390x.patch [RHEL-19302 RHEL-21057] +- kvm-Compile-IOMMUFD-on-x86_64.patch [RHEL-19302 RHEL-21057] +- kvm-target-s390x-kvm-pv-Provide-some-more-useful-informa.patch [RHEL-18212] +- kvm-nbd-server-avoid-per-NBDRequest-nbd_client_get-put.patch [RHEL-15965] +- kvm-nbd-server-only-traverse-NBDExport-clients-from-main.patch [RHEL-15965] +- kvm-nbd-server-introduce-NBDClient-lock-to-protect-field.patch [RHEL-15965] +- kvm-block-file-posix-set-up-Linux-AIO-and-io_uring-in-th.patch [RHEL-15965] +- kvm-virtio-blk-add-lock-to-protect-s-rq.patch [RHEL-15965] +- kvm-virtio-blk-don-t-lock-AioContext-in-the-completion-c.patch [RHEL-15965] +- kvm-virtio-blk-don-t-lock-AioContext-in-the-submission-c.patch [RHEL-15965] +- kvm-scsi-only-access-SCSIDevice-requests-from-one-thread.patch [RHEL-15965] +- kvm-virtio-scsi-don-t-lock-AioContext-around-virtio_queu.patch [RHEL-15965] +- kvm-scsi-don-t-lock-AioContext-in-I-O-code-path.patch [RHEL-15965] +- kvm-dma-helpers-don-t-lock-AioContext-in-dma_blk_cb.patch [RHEL-15965] +- kvm-virtio-scsi-replace-AioContext-lock-with-tmf_bh_lock.patch [RHEL-15965] +- kvm-scsi-assert-that-callbacks-run-in-the-correct-AioCon.patch [RHEL-15965] +- kvm-tests-remove-aio_context_acquire-tests.patch [RHEL-15965] +- kvm-aio-make-aio_context_acquire-aio_context_release-a-n.patch [RHEL-15965] +- kvm-graph-lock-remove-AioContext-locking.patch [RHEL-15965] +- kvm-block-remove-AioContext-locking.patch [RHEL-15965] +- kvm-block-remove-bdrv_co_lock.patch [RHEL-15965] +- kvm-scsi-remove-AioContext-locking.patch [RHEL-15965] +- kvm-aio-wait-draw-equivalence-between-AIO_WAIT_WHILE-and.patch [RHEL-15965] +- kvm-aio-remove-aio_context_acquire-aio_context_release-A.patch [RHEL-15965] +- kvm-docs-remove-AioContext-lock-from-IOThread-docs.patch [RHEL-15965] +- kvm-scsi-remove-outdated-AioContext-lock-comment.patch [RHEL-15965] +- kvm-job-remove-outdated-AioContext-locking-comments.patch [RHEL-15965] +- kvm-block-remove-outdated-AioContext-locking-comments.patch [RHEL-15965] +- kvm-block-coroutine-wrapper-use-qemu_get_current_aio_con.patch [RHEL-15965] +- kvm-s390x-pci-avoid-double-enable-disable-of-aif.patch [RHEL-21169] +- kvm-s390x-pci-refresh-fh-before-disabling-aif.patch [RHEL-21169] +- kvm-s390x-pci-drive-ISM-reset-from-subsystem-reset.patch [RHEL-21169] +- kvm-include-ui-rect.h-fix-qemu_rect_init-mis-assignment.patch [RHEL-21570] +- kvm-virtio-gpu-block-migration-of-VMs-with-blob-true.patch [RHEL-7565] +- kvm-spec-Enable-zstd.patch [RHEL-7361] +- Resolves: RHEL-19738 + (Enable properties allowing to disable high memory regions) +- Resolves: RHEL-19302 + (NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend) +- Resolves: RHEL-21057 + (Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6) +- Resolves: RHEL-18212 + ([RHEL9][Secure-execution][s390x] The error message is not clear when boot up a SE guest with wrong encryption) +- Resolves: RHEL-15965 + ( [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize)) +- Resolves: RHEL-21169 + ([s390x] VM fails to start with ISM passed through QEMU 8.2) +- Resolves: RHEL-21570 + (Critical performance degradation for input devices in virtio vnc session) +- Resolves: RHEL-7565 + (qemu crashed when migrate guest with blob resources enabled) +- Resolves: RHEL-7361 + ([qemu-kvm] Enable zstd support for qcow2 files) + +* Mon Jan 08 2024 Miroslav Rezanina - 8.2.0-2 +- kvm-hw-arm-virt-Fix-compats.patch [RHEL-17168] +- Resolves: RHEL-17168 + (Introduce virt-rhel9.4.0 arm-virt machine type [aarch64]) + +* Tue Jan 02 2024 Miroslav Rezanina - 8.2.0-1 +- Rebase to QEMU 8.2.0 [RHEL-14111] +- Fix machine type compatibility [RHEL-17067 RHEL-17068] +- Add 9.4.0 machine type [RHEL-17168 RHEL-19117 RHEL-19119] +- Resolves: RHEL-14111 + (Rebase qemu-kvm to QEMU 8.2.0) +- Resolves: RHEL-17067 + (Check/fix machine type compatibility for qemu-kvm 8.2.0 [s390x]) +- Resolves: RHEL-17068 + (Check/fix machine type compatibility for qemu-kvm 8.2.0 [x86_64]) +- Resolves: RHEL-17168 + (Introduce virt-rhel9.4.0 arm-virt machine type [aarch64]) +- Resolves: RHEL-19117 + (Introduce virt-rhel9.4.0 arm-virt machine type [x86_64]) +- Resolves: RHEL-19119 + (Introduce virt-rhel9.4.0 arm-virt machine type [s390x]) + +* Thu Nov 30 2023 Miroslav Rezanina - 8.1.0-5 +- kvm-Preparation-for-using-allow-rpcs-list-in-guest-agent.patch [RHEL-955] +- kvm-Use-allow-rpcs-instead-of-block-rpcs-in-guest-agent..patch [RHEL-955] +- Resolves: RHEL-955 + (Use allow-rpcs instead of block-rpcs in guest-agent.service) + +* Mon Nov 13 2023 Miroslav Rezanina - 8.1.0-4 +- kvm-hw-scsi-scsi-disk-Disallow-block-sizes-smaller-than-.patch [RHEL-2828] +- kvm-Enable-igb-on-x86_64.patch [RHEL-1308] +- kvm-host-include-generic-host-atomic128-Fix-compilation-.patch [RHEL-12991] +- kvm-Enable-qemu-kvm-device-usb-redirec-for-aarch64.patch [RHEL-7561] +- Resolves: RHEL-2828 + (CVE-2023-42467 qemu-kvm: qemu: denial of service due to division by zero [rhel-9]) +- Resolves: RHEL-1308 + ([RFE] iGB: Add an emulated SR-IOV network card) +- Resolves: RHEL-12991 + (qemu-kvm fails to build on s390x with clang-17) +- Resolves: RHEL-7561 + (Missing the rpm package qemu-kvm-device-usb-redirect on Arm64 platform) + +* Mon Oct 16 2023 Miroslav Rezanina - 8.1.0-3 +- kvm-migration-Fix-race-that-dest-preempt-thread-close-to.patch [RHEL-11219] +- kvm-migration-Fix-possible-race-when-setting-rp_state.er.patch [RHEL-11219] +- kvm-migration-Fix-possible-races-when-shutting-down-the-.patch [RHEL-11219] +- kvm-migration-Fix-possible-race-when-shutting-down-to_ds.patch [RHEL-11219] +- kvm-migration-Remove-redundant-cleanup-of-postcopy_qemuf.patch [RHEL-11219] +- kvm-migration-Consolidate-return-path-closing-code.patch [RHEL-11219] +- kvm-migration-Replace-the-return-path-retry-logic.patch [RHEL-11219] +- kvm-migration-Move-return-path-cleanup-to-main-migration.patch [RHEL-11219] +- kvm-file-posix-Clear-bs-bl.zoned-on-error.patch [RHEL-7360] +- kvm-file-posix-Check-bs-bl.zoned-for-zone-info.patch [RHEL-7360] +- kvm-file-posix-Fix-zone-update-in-I-O-error-path.patch [RHEL-7360] +- kvm-file-posix-Simplify-raw_co_prw-s-out-zone-code.patch [RHEL-7360] +- kvm-tests-file-io-error-New-test.patch [RHEL-7360] +- Resolves: RHEL-11219 + (migration tests failing for RHEL 9.4 sometimes) +- Resolves: RHEL-7360 + (Qemu Core Dumped When Writing Larger Size Than The Size of A Data Disk) + +* Mon Oct 02 2023 Miroslav Rezanina - 8.1.0-2 +- kvm-virtio-Drop-out-of-coroutine-context-in-virtio_load.patch [RHEL-832] +- Resolves: RHEL-832 + (qemu-kvm crashed when migrating guest with failover vf) + +* Mon Sep 04 2023 Miroslav Rezanina - 8.1.0-1 +- Rebase to QEMU 8.1 [RHEL-870] +- Resolves: RHEL-870 + (Rebase qemu-kvm to QEMU 8.1.0) + * Thu Aug 24 2023 Miroslav Rezanina - 8.0.0-13 - kvm-vdpa-return-errno-in-vhost_vdpa_get_vring_group-erro.patch [RHEL-923] - kvm-vdpa-move-CVQ-isolation-check-to-net_init_vhost_vdpa.patch [RHEL-923] From b93771b47804b6ef8e0e879ad4f10dd95176c593 Mon Sep 17 00:00:00 2001 From: Andrew Lukoshko Date: Thu, 20 Jun 2024 08:38:02 +0000 Subject: [PATCH 2/4] import OL qemu-kvm-8.2.0-11.el9_4.3 --- .../kvm-iotests-test-NBD-TLS-iothread.patch | 275 +++++++++++++++ ...-kvm_irqchip_add_msi_route-in-case-o.patch | 76 ++++ ...negotiation-functions-as-coroutine_f.patch | 329 ++++++++++++++++++ ...-not-poll-within-a-coroutine-context.patch | 207 +++++++++++ ...erit-follow_coroutine_ctx-across-TLS.patch | 129 +++++++ ...tio-pci-fix-use-of-a-released-vector.patch | 162 +++++++++ SPECS/qemu-kvm.spec | 32 +- 7 files changed, 1209 insertions(+), 1 deletion(-) create mode 100644 SOURCES/kvm-iotests-test-NBD-TLS-iothread.patch create mode 100644 SOURCES/kvm-kvm-error-out-of-kvm_irqchip_add_msi_route-in-case-o.patch create mode 100644 SOURCES/kvm-nbd-server-Mark-negotiation-functions-as-coroutine_f.patch create mode 100644 SOURCES/kvm-nbd-server-do-not-poll-within-a-coroutine-context.patch create mode 100644 SOURCES/kvm-qio-Inherit-follow_coroutine_ctx-across-TLS.patch create mode 100644 SOURCES/kvm-virtio-pci-fix-use-of-a-released-vector.patch diff --git a/SOURCES/kvm-iotests-test-NBD-TLS-iothread.patch b/SOURCES/kvm-iotests-test-NBD-TLS-iothread.patch new file mode 100644 index 0000000..9d47aab --- /dev/null +++ b/SOURCES/kvm-iotests-test-NBD-TLS-iothread.patch @@ -0,0 +1,275 @@ +From 4296c41178438f9dffa16c538b0c1a2e28944f4c Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Fri, 17 May 2024 21:50:15 -0500 +Subject: [PATCH 4/4] iotests: test NBD+TLS+iothread +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Blake +RH-MergeRequest: 375: Fix regression on nbd+tls +RH-Jira: RHEL-33754 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Miroslav Rezanina +RH-Commit: [4/4] 5905c09466f4e65f3ff9973b41f42cbe1d75363c (ebblake/qemu-kvm) + +Prevent regressions when using NBD with TLS in the presence of +iothreads, adding coverage the fix to qio channels made in the +previous patch. + +The shell function pick_unused_port() was copied from +nbdkit.git/tests/functions.sh.in, where it had all authors from Red +Hat, agreeing to the resulting relicensing from 2-clause BSD to GPLv2. + +CC: qemu-stable@nongnu.org +CC: "Richard W.M. Jones" +Signed-off-by: Eric Blake +Message-ID: <20240531180639.1392905-6-eblake@redhat.com> +Reviewed-by: Daniel P. Berrangé +(cherry picked from commit a73c99378022ebb785481e84cfe1e81097546268) +Jira: https://issues.redhat.com/browse/RHEL-33754 +Signed-off-by: Eric Blake +--- + tests/qemu-iotests/tests/nbd-tls-iothread | 168 ++++++++++++++++++ + tests/qemu-iotests/tests/nbd-tls-iothread.out | 54 ++++++ + 2 files changed, 222 insertions(+) + create mode 100755 tests/qemu-iotests/tests/nbd-tls-iothread + create mode 100644 tests/qemu-iotests/tests/nbd-tls-iothread.out + +diff --git a/tests/qemu-iotests/tests/nbd-tls-iothread b/tests/qemu-iotests/tests/nbd-tls-iothread +new file mode 100755 +index 0000000000..a2fb07206e +--- /dev/null ++++ b/tests/qemu-iotests/tests/nbd-tls-iothread +@@ -0,0 +1,168 @@ ++#!/usr/bin/env bash ++# group: rw quick ++# ++# Test of NBD+TLS+iothread ++# ++# Copyright (C) 2024 Red Hat, Inc. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++# ++ ++# creator ++owner=eblake@redhat.com ++ ++seq=`basename $0` ++echo "QA output created by $seq" ++ ++status=1 # failure is the default! ++ ++_cleanup() ++{ ++ _cleanup_qemu ++ _cleanup_test_img ++ rm -f "$dst_image" ++ tls_x509_cleanup ++} ++trap "_cleanup; exit \$status" 0 1 2 3 15 ++ ++# get standard environment, filters and checks ++cd .. ++. ./common.rc ++. ./common.filter ++. ./common.qemu ++. ./common.tls ++. ./common.nbd ++ ++_supported_fmt qcow2 # Hardcoded to qcow2 command line and QMP below ++_supported_proto file ++ ++# pick_unused_port ++# ++# Picks and returns an "unused" port, setting the global variable ++# $port. ++# ++# This is inherently racy, but we need it because qemu does not currently ++# permit NBD+TLS over a Unix domain socket ++pick_unused_port () ++{ ++ if ! (ss --version) >/dev/null 2>&1; then ++ _notrun "ss utility required, skipped this test" ++ fi ++ ++ # Start at a random port to make it less likely that two parallel ++ # tests will conflict. ++ port=$(( 50000 + (RANDOM%15000) )) ++ while ss -ltn | grep -sqE ":$port\b"; do ++ ((port++)) ++ if [ $port -eq 65000 ]; then port=50000; fi ++ done ++ echo picked unused port ++} ++ ++tls_x509_init ++ ++size=1G ++DST_IMG="$TEST_DIR/dst.qcow2" ++ ++echo ++echo "== preparing TLS creds and spare port ==" ++ ++pick_unused_port ++tls_x509_create_root_ca "ca1" ++tls_x509_create_server "ca1" "server1" ++tls_x509_create_client "ca1" "client1" ++tls_obj_base=tls-creds-x509,id=tls0,verify-peer=true,dir="${tls_dir}" ++ ++echo ++echo "== preparing image ==" ++ ++_make_test_img $size ++$QEMU_IMG create -f qcow2 "$DST_IMG" $size | _filter_img_create ++ ++echo ++echo === Starting Src QEMU === ++echo ++ ++_launch_qemu -machine q35 \ ++ -object iothread,id=iothread0 \ ++ -object "${tls_obj_base}"/client1,endpoint=client \ ++ -device '{"driver":"pcie-root-port", "id":"root0", "multifunction":true, ++ "bus":"pcie.0"}' \ ++ -device '{"driver":"virtio-scsi-pci", "id":"virtio_scsi_pci0", ++ "bus":"root0", "iothread":"iothread0"}' \ ++ -device '{"driver":"scsi-hd", "id":"image1", "drive":"drive_image1", ++ "bus":"virtio_scsi_pci0.0"}' \ ++ -blockdev '{"driver":"file", "cache":{"direct":true, "no-flush":false}, ++ "filename":"'"$TEST_IMG"'", "node-name":"drive_sys1"}' \ ++ -blockdev '{"driver":"qcow2", "node-name":"drive_image1", ++ "file":"drive_sys1"}' ++h1=$QEMU_HANDLE ++_send_qemu_cmd $h1 '{"execute": "qmp_capabilities"}' 'return' ++ ++echo ++echo === Starting Dst VM2 === ++echo ++ ++_launch_qemu -machine q35 \ ++ -object iothread,id=iothread0 \ ++ -object "${tls_obj_base}"/server1,endpoint=server \ ++ -device '{"driver":"pcie-root-port", "id":"root0", "multifunction":true, ++ "bus":"pcie.0"}' \ ++ -device '{"driver":"virtio-scsi-pci", "id":"virtio_scsi_pci0", ++ "bus":"root0", "iothread":"iothread0"}' \ ++ -device '{"driver":"scsi-hd", "id":"image1", "drive":"drive_image1", ++ "bus":"virtio_scsi_pci0.0"}' \ ++ -blockdev '{"driver":"file", "cache":{"direct":true, "no-flush":false}, ++ "filename":"'"$DST_IMG"'", "node-name":"drive_sys1"}' \ ++ -blockdev '{"driver":"qcow2", "node-name":"drive_image1", ++ "file":"drive_sys1"}' \ ++ -incoming defer ++h2=$QEMU_HANDLE ++_send_qemu_cmd $h2 '{"execute": "qmp_capabilities"}' 'return' ++ ++echo ++echo === Dst VM: Enable NBD server for incoming storage migration === ++echo ++ ++_send_qemu_cmd $h2 '{"execute": "nbd-server-start", "arguments": ++ {"addr": {"type": "inet", "data": {"host": "127.0.0.1", "port": "'$port'"}}, ++ "tls-creds": "tls0"}}' '{"return": {}}' | sed "s/\"$port\"/PORT/g" ++_send_qemu_cmd $h2 '{"execute": "block-export-add", "arguments": ++ {"node-name": "drive_image1", "type": "nbd", "writable": true, ++ "id": "drive_image1"}}' '{"return": {}}' ++ ++echo ++echo === Src VM: Mirror to dst NBD for outgoing storage migration === ++echo ++ ++_send_qemu_cmd $h1 '{"execute": "blockdev-add", "arguments": ++ {"node-name": "mirror", "driver": "nbd", ++ "server": {"type": "inet", "host": "127.0.0.1", "port": "'$port'"}, ++ "export": "drive_image1", "tls-creds": "tls0", ++ "tls-hostname": "127.0.0.1"}}' '{"return": {}}' | sed "s/\"$port\"/PORT/g" ++_send_qemu_cmd $h1 '{"execute": "blockdev-mirror", "arguments": ++ {"sync": "full", "device": "drive_image1", "target": "mirror", ++ "job-id": "drive_image1_53"}}' '{"return": {}}' ++_timed_wait_for $h1 '"ready"' ++ ++echo ++echo === Cleaning up === ++echo ++ ++_send_qemu_cmd $h1 '{"execute":"quit"}' '' ++_send_qemu_cmd $h2 '{"execute":"quit"}' '' ++ ++echo "*** done" ++rm -f $seq.full ++status=0 +diff --git a/tests/qemu-iotests/tests/nbd-tls-iothread.out b/tests/qemu-iotests/tests/nbd-tls-iothread.out +new file mode 100644 +index 0000000000..1d83d4f903 +--- /dev/null ++++ b/tests/qemu-iotests/tests/nbd-tls-iothread.out +@@ -0,0 +1,54 @@ ++QA output created by nbd-tls-iothread ++ ++== preparing TLS creds and spare port == ++picked unused port ++Generating a self signed certificate... ++Generating a signed certificate... ++Generating a signed certificate... ++ ++== preparing image == ++Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 ++Formatting 'TEST_DIR/dst.IMGFMT', fmt=IMGFMT size=1073741824 ++ ++=== Starting Src QEMU === ++ ++{"execute": "qmp_capabilities"} ++{"return": {}} ++ ++=== Starting Dst VM2 === ++ ++{"execute": "qmp_capabilities"} ++{"return": {}} ++ ++=== Dst VM: Enable NBD server for incoming storage migration === ++ ++{"execute": "nbd-server-start", "arguments": ++ {"addr": {"type": "inet", "data": {"host": "127.0.0.1", "port": PORT}}, ++ "tls-creds": "tls0"}} ++{"return": {}} ++{"execute": "block-export-add", "arguments": ++ {"node-name": "drive_image1", "type": "nbd", "writable": true, ++ "id": "drive_image1"}} ++{"return": {}} ++ ++=== Src VM: Mirror to dst NBD for outgoing storage migration === ++ ++{"execute": "blockdev-add", "arguments": ++ {"node-name": "mirror", "driver": "nbd", ++ "server": {"type": "inet", "host": "127.0.0.1", "port": PORT}, ++ "export": "drive_image1", "tls-creds": "tls0", ++ "tls-hostname": "127.0.0.1"}} ++{"return": {}} ++{"execute": "blockdev-mirror", "arguments": ++ {"sync": "full", "device": "drive_image1", "target": "mirror", ++ "job-id": "drive_image1_53"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "drive_image1_53"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "drive_image1_53"}} ++{"return": {}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "drive_image1_53"}} ++ ++=== Cleaning up === ++ ++{"execute":"quit"} ++{"execute":"quit"} ++*** done +-- +2.39.3 + diff --git a/SOURCES/kvm-kvm-error-out-of-kvm_irqchip_add_msi_route-in-case-o.patch b/SOURCES/kvm-kvm-error-out-of-kvm_irqchip_add_msi_route-in-case-o.patch new file mode 100644 index 0000000..5116b96 --- /dev/null +++ b/SOURCES/kvm-kvm-error-out-of-kvm_irqchip_add_msi_route-in-case-o.patch @@ -0,0 +1,76 @@ +From 8a8fa4ab4dc05502550ca207926cd0c93a3341ea Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Mon, 8 Apr 2024 12:43:49 +0200 +Subject: [PATCH] kvm: error out of kvm_irqchip_add_msi_route() in case of full + route table + +RH-Author: Igor Mammedov +RH-MergeRequest: 374: kvm: error out of kvm_irqchip_add_msi_route() in case of full route table +RH-Jira: RHEL-32990 +RH-Acked-by: Ani Sinha +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/1] df31f2d0cafe10a1ac22a2bebb85dc17c1e891e0 + +RH-Jira: RHEL-32990 + +subj is calling kvm_add_routing_entry() which simply extends + KVMState::irq_routes::entries[] +but doesn't check if number of routes goes beyond limit the kernel +is willing to accept. Which later leads toi the assert + + qemu-kvm: ../accel/kvm/kvm-all.c:1833: kvm_irqchip_commit_routes: Assertion `ret == 0' failed + +typically it happens during guest boot for large enough guest + +Reproduced with: + ./qemu --enable-kvm -m 8G -smp 64 -machine pc \ + `for b in {1..2}; do echo -n "-device pci-bridge,id=pci$b,chassis_nr=$b "; + for i in {0..31}; do touch /tmp/vblk$b$i; + echo -n "-drive file=/tmp/vblk$b$i,if=none,id=drive$b$i,format=raw + -device virtio-blk-pci,drive=drive$b$i,bus=pci$b "; + done; done` + +While crash at boot time is bad, the same might happen at hotplug time +which is unacceptable. +So instead calling kvm_add_routing_entry() unconditionally, check first +that number of routes won't exceed KVM_CAP_IRQ_ROUTING. This way virtio +device insteads killin qemu, will gracefully fail to initialize device +as expected with following warnings on console: + virtio-blk failed to set guest notifier (-28), ensure -accel kvm is set. + virtio_bus_start_ioeventfd: failed. Fallback to userspace (slower). + +Signed-off-by: Igor Mammedov +--- + accel/kvm/kvm-all.c | 15 ++++++++++----- + 1 file changed, 10 insertions(+), 5 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index e39a810a4e..f1a4564cbd 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -2000,12 +2000,17 @@ int kvm_irqchip_add_msi_route(KVMRouteChange *c, int vector, PCIDevice *dev) + return -EINVAL; + } + +- trace_kvm_irqchip_add_msi_route(dev ? dev->name : (char *)"N/A", +- vector, virq); ++ if (s->irq_routes->nr < s->gsi_count) { ++ trace_kvm_irqchip_add_msi_route(dev ? dev->name : (char *)"N/A", ++ vector, virq); + +- kvm_add_routing_entry(s, &kroute); +- kvm_arch_add_msi_route_post(&kroute, vector, dev); +- c->changes++; ++ kvm_add_routing_entry(s, &kroute); ++ kvm_arch_add_msi_route_post(&kroute, vector, dev); ++ c->changes++; ++ } else { ++ kvm_irqchip_release_virq(s, virq); ++ return -ENOSPC; ++ } + + return virq; + } +-- +2.39.3 + diff --git a/SOURCES/kvm-nbd-server-Mark-negotiation-functions-as-coroutine_f.patch b/SOURCES/kvm-nbd-server-Mark-negotiation-functions-as-coroutine_f.patch new file mode 100644 index 0000000..28b0064 --- /dev/null +++ b/SOURCES/kvm-nbd-server-Mark-negotiation-functions-as-coroutine_f.patch @@ -0,0 +1,329 @@ +From d1dd79b558fb9b23ae14165ec8edf0085e927091 Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Mon, 8 Apr 2024 11:00:44 -0500 +Subject: [PATCH 2/4] nbd/server: Mark negotiation functions as coroutine_fn + +RH-Author: Eric Blake +RH-MergeRequest: 375: Fix regression on nbd+tls +RH-Jira: RHEL-33754 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/4] 6dc8ecca16df5ae5cc6dc36e7b96f991396fcf24 (ebblake/qemu-kvm) + +nbd_negotiate() is already marked coroutine_fn. And given the fix in +the previous patch to have nbd_negotiate_handle_starttls not create +and wait on a g_main_loop (as that would violate coroutine +constraints), it is worth marking the rest of the related static +functions reachable only during option negotiation as also being +coroutine_fn. + +Suggested-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Eric Blake +Message-ID: <20240408160214.1200629-6-eblake@redhat.com> +Reviewed-by: Vladimir Sementsov-Ogievskiy +[eblake: drop one spurious coroutine_fn marking] +Signed-off-by: Eric Blake +(cherry picked from commit 4fa333e08dd96395a99ea8dd9e4c73a29dd23344) +Jira: https://issues.redhat.com/browse/RHEL-33754 +Signed-off-by: Eric Blake +--- + nbd/server.c | 102 +++++++++++++++++++++++++++++---------------------- + 1 file changed, 59 insertions(+), 43 deletions(-) + +diff --git a/nbd/server.c b/nbd/server.c +index 98ae0e1632..892797bb11 100644 +--- a/nbd/server.c ++++ b/nbd/server.c +@@ -195,8 +195,9 @@ static inline void set_be_option_rep(NBDOptionReply *rep, uint32_t option, + + /* Send a reply header, including length, but no payload. + * Return -errno on error, 0 on success. */ +-static int nbd_negotiate_send_rep_len(NBDClient *client, uint32_t type, +- uint32_t len, Error **errp) ++static coroutine_fn int ++nbd_negotiate_send_rep_len(NBDClient *client, uint32_t type, ++ uint32_t len, Error **errp) + { + NBDOptionReply rep; + +@@ -211,15 +212,15 @@ static int nbd_negotiate_send_rep_len(NBDClient *client, uint32_t type, + + /* Send a reply header with default 0 length. + * Return -errno on error, 0 on success. */ +-static int nbd_negotiate_send_rep(NBDClient *client, uint32_t type, +- Error **errp) ++static coroutine_fn int ++nbd_negotiate_send_rep(NBDClient *client, uint32_t type, Error **errp) + { + return nbd_negotiate_send_rep_len(client, type, 0, errp); + } + + /* Send an error reply. + * Return -errno on error, 0 on success. */ +-static int G_GNUC_PRINTF(4, 0) ++static coroutine_fn int G_GNUC_PRINTF(4, 0) + nbd_negotiate_send_rep_verr(NBDClient *client, uint32_t type, + Error **errp, const char *fmt, va_list va) + { +@@ -259,7 +260,7 @@ nbd_sanitize_name(const char *name) + + /* Send an error reply. + * Return -errno on error, 0 on success. */ +-static int G_GNUC_PRINTF(4, 5) ++static coroutine_fn int G_GNUC_PRINTF(4, 5) + nbd_negotiate_send_rep_err(NBDClient *client, uint32_t type, + Error **errp, const char *fmt, ...) + { +@@ -275,7 +276,7 @@ nbd_negotiate_send_rep_err(NBDClient *client, uint32_t type, + /* Drop remainder of the current option, and send a reply with the + * given error type and message. Return -errno on read or write + * failure; or 0 if connection is still live. */ +-static int G_GNUC_PRINTF(4, 0) ++static coroutine_fn int G_GNUC_PRINTF(4, 0) + nbd_opt_vdrop(NBDClient *client, uint32_t type, Error **errp, + const char *fmt, va_list va) + { +@@ -288,7 +289,7 @@ nbd_opt_vdrop(NBDClient *client, uint32_t type, Error **errp, + return ret; + } + +-static int G_GNUC_PRINTF(4, 5) ++static coroutine_fn int G_GNUC_PRINTF(4, 5) + nbd_opt_drop(NBDClient *client, uint32_t type, Error **errp, + const char *fmt, ...) + { +@@ -302,7 +303,7 @@ nbd_opt_drop(NBDClient *client, uint32_t type, Error **errp, + return ret; + } + +-static int G_GNUC_PRINTF(3, 4) ++static coroutine_fn int G_GNUC_PRINTF(3, 4) + nbd_opt_invalid(NBDClient *client, Error **errp, const char *fmt, ...) + { + int ret; +@@ -319,8 +320,9 @@ nbd_opt_invalid(NBDClient *client, Error **errp, const char *fmt, ...) + * If @check_nul, require that no NUL bytes appear in buffer. + * Return -errno on I/O error, 0 if option was completely handled by + * sending a reply about inconsistent lengths, or 1 on success. */ +-static int nbd_opt_read(NBDClient *client, void *buffer, size_t size, +- bool check_nul, Error **errp) ++static coroutine_fn int ++nbd_opt_read(NBDClient *client, void *buffer, size_t size, ++ bool check_nul, Error **errp) + { + if (size > client->optlen) { + return nbd_opt_invalid(client, errp, +@@ -343,7 +345,8 @@ static int nbd_opt_read(NBDClient *client, void *buffer, size_t size, + /* Drop size bytes from the unparsed payload of the current option. + * Return -errno on I/O error, 0 if option was completely handled by + * sending a reply about inconsistent lengths, or 1 on success. */ +-static int nbd_opt_skip(NBDClient *client, size_t size, Error **errp) ++static coroutine_fn int ++nbd_opt_skip(NBDClient *client, size_t size, Error **errp) + { + if (size > client->optlen) { + return nbd_opt_invalid(client, errp, +@@ -366,8 +369,9 @@ static int nbd_opt_skip(NBDClient *client, size_t size, Error **errp) + * Return -errno on I/O error, 0 if option was completely handled by + * sending a reply about inconsistent lengths, or 1 on success. + */ +-static int nbd_opt_read_name(NBDClient *client, char **name, uint32_t *length, +- Error **errp) ++static coroutine_fn int ++nbd_opt_read_name(NBDClient *client, char **name, uint32_t *length, ++ Error **errp) + { + int ret; + uint32_t len; +@@ -402,8 +406,8 @@ static int nbd_opt_read_name(NBDClient *client, char **name, uint32_t *length, + + /* Send a single NBD_REP_SERVER reply to NBD_OPT_LIST, including payload. + * Return -errno on error, 0 on success. */ +-static int nbd_negotiate_send_rep_list(NBDClient *client, NBDExport *exp, +- Error **errp) ++static coroutine_fn int ++nbd_negotiate_send_rep_list(NBDClient *client, NBDExport *exp, Error **errp) + { + ERRP_GUARD(); + size_t name_len, desc_len; +@@ -444,7 +448,8 @@ static int nbd_negotiate_send_rep_list(NBDClient *client, NBDExport *exp, + + /* Process the NBD_OPT_LIST command, with a potential series of replies. + * Return -errno on error, 0 on success. */ +-static int nbd_negotiate_handle_list(NBDClient *client, Error **errp) ++static coroutine_fn int ++nbd_negotiate_handle_list(NBDClient *client, Error **errp) + { + NBDExport *exp; + assert(client->opt == NBD_OPT_LIST); +@@ -459,7 +464,8 @@ static int nbd_negotiate_handle_list(NBDClient *client, Error **errp) + return nbd_negotiate_send_rep(client, NBD_REP_ACK, errp); + } + +-static void nbd_check_meta_export(NBDClient *client, NBDExport *exp) ++static coroutine_fn void ++nbd_check_meta_export(NBDClient *client, NBDExport *exp) + { + if (exp != client->contexts.exp) { + client->contexts.count = 0; +@@ -468,8 +474,9 @@ static void nbd_check_meta_export(NBDClient *client, NBDExport *exp) + + /* Send a reply to NBD_OPT_EXPORT_NAME. + * Return -errno on error, 0 on success. */ +-static int nbd_negotiate_handle_export_name(NBDClient *client, bool no_zeroes, +- Error **errp) ++static coroutine_fn int ++nbd_negotiate_handle_export_name(NBDClient *client, bool no_zeroes, ++ Error **errp) + { + ERRP_GUARD(); + g_autofree char *name = NULL; +@@ -536,9 +543,9 @@ static int nbd_negotiate_handle_export_name(NBDClient *client, bool no_zeroes, + /* Send a single NBD_REP_INFO, with a buffer @buf of @length bytes. + * The buffer does NOT include the info type prefix. + * Return -errno on error, 0 if ready to send more. */ +-static int nbd_negotiate_send_info(NBDClient *client, +- uint16_t info, uint32_t length, void *buf, +- Error **errp) ++static coroutine_fn int ++nbd_negotiate_send_info(NBDClient *client, uint16_t info, uint32_t length, ++ void *buf, Error **errp) + { + int rc; + +@@ -565,7 +572,8 @@ static int nbd_negotiate_send_info(NBDClient *client, + * -errno transmission error occurred or @fatal was requested, errp is set + * 0 error message successfully sent to client, errp is not set + */ +-static int nbd_reject_length(NBDClient *client, bool fatal, Error **errp) ++static coroutine_fn int ++nbd_reject_length(NBDClient *client, bool fatal, Error **errp) + { + int ret; + +@@ -583,7 +591,8 @@ static int nbd_reject_length(NBDClient *client, bool fatal, Error **errp) + /* Handle NBD_OPT_INFO and NBD_OPT_GO. + * Return -errno on error, 0 if ready for next option, and 1 to move + * into transmission phase. */ +-static int nbd_negotiate_handle_info(NBDClient *client, Error **errp) ++static coroutine_fn int ++nbd_negotiate_handle_info(NBDClient *client, Error **errp) + { + int rc; + g_autofree char *name = NULL; +@@ -755,7 +764,8 @@ struct NBDTLSServerHandshakeData { + Coroutine *co; + }; + +-static void nbd_server_tls_handshake(QIOTask *task, void *opaque) ++static void ++nbd_server_tls_handshake(QIOTask *task, void *opaque) + { + struct NBDTLSServerHandshakeData *data = opaque; + +@@ -768,8 +778,8 @@ static void nbd_server_tls_handshake(QIOTask *task, void *opaque) + + /* Handle NBD_OPT_STARTTLS. Return NULL to drop connection, or else the + * new channel for all further (now-encrypted) communication. */ +-static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client, +- Error **errp) ++static coroutine_fn QIOChannel * ++nbd_negotiate_handle_starttls(NBDClient *client, Error **errp) + { + QIOChannel *ioc; + QIOChannelTLS *tioc; +@@ -821,10 +831,9 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client, + * + * For NBD_OPT_LIST_META_CONTEXT @context_id is ignored, 0 is used instead. + */ +-static int nbd_negotiate_send_meta_context(NBDClient *client, +- const char *context, +- uint32_t context_id, +- Error **errp) ++static coroutine_fn int ++nbd_negotiate_send_meta_context(NBDClient *client, const char *context, ++ uint32_t context_id, Error **errp) + { + NBDOptionReplyMetaContext opt; + struct iovec iov[] = { +@@ -849,8 +858,9 @@ static int nbd_negotiate_send_meta_context(NBDClient *client, + * Return true if @query matches @pattern, or if @query is empty when + * the @client is performing _LIST_. + */ +-static bool nbd_meta_empty_or_pattern(NBDClient *client, const char *pattern, +- const char *query) ++static coroutine_fn bool ++nbd_meta_empty_or_pattern(NBDClient *client, const char *pattern, ++ const char *query) + { + if (!*query) { + trace_nbd_negotiate_meta_query_parse("empty"); +@@ -867,7 +877,8 @@ static bool nbd_meta_empty_or_pattern(NBDClient *client, const char *pattern, + /* + * Return true and adjust @str in place if it begins with @prefix. + */ +-static bool nbd_strshift(const char **str, const char *prefix) ++static coroutine_fn bool ++nbd_strshift(const char **str, const char *prefix) + { + size_t len = strlen(prefix); + +@@ -883,8 +894,9 @@ static bool nbd_strshift(const char **str, const char *prefix) + * Handle queries to 'base' namespace. For now, only the base:allocation + * context is available. Return true if @query has been handled. + */ +-static bool nbd_meta_base_query(NBDClient *client, NBDMetaContexts *meta, +- const char *query) ++static coroutine_fn bool ++nbd_meta_base_query(NBDClient *client, NBDMetaContexts *meta, ++ const char *query) + { + if (!nbd_strshift(&query, "base:")) { + return false; +@@ -903,8 +915,9 @@ static bool nbd_meta_base_query(NBDClient *client, NBDMetaContexts *meta, + * and qemu:allocation-depth contexts are available. Return true if @query + * has been handled. + */ +-static bool nbd_meta_qemu_query(NBDClient *client, NBDMetaContexts *meta, +- const char *query) ++static coroutine_fn bool ++nbd_meta_qemu_query(NBDClient *client, NBDMetaContexts *meta, ++ const char *query) + { + size_t i; + +@@ -968,8 +981,9 @@ static bool nbd_meta_qemu_query(NBDClient *client, NBDMetaContexts *meta, + * + * Return -errno on I/O error, 0 if option was completely handled by + * sending a reply about inconsistent lengths, or 1 on success. */ +-static int nbd_negotiate_meta_query(NBDClient *client, +- NBDMetaContexts *meta, Error **errp) ++static coroutine_fn int ++nbd_negotiate_meta_query(NBDClient *client, ++ NBDMetaContexts *meta, Error **errp) + { + int ret; + g_autofree char *query = NULL; +@@ -1008,7 +1022,8 @@ static int nbd_negotiate_meta_query(NBDClient *client, + * Handle NBD_OPT_LIST_META_CONTEXT and NBD_OPT_SET_META_CONTEXT + * + * Return -errno on I/O error, or 0 if option was completely handled. */ +-static int nbd_negotiate_meta_queries(NBDClient *client, Error **errp) ++static coroutine_fn int ++nbd_negotiate_meta_queries(NBDClient *client, Error **errp) + { + int ret; + g_autofree char *export_name = NULL; +@@ -1136,7 +1151,8 @@ static int nbd_negotiate_meta_queries(NBDClient *client, Error **errp) + * 1 if client sent NBD_OPT_ABORT, i.e. on valid disconnect, + * errp is not set + */ +-static int nbd_negotiate_options(NBDClient *client, Error **errp) ++static coroutine_fn int ++nbd_negotiate_options(NBDClient *client, Error **errp) + { + uint32_t flags; + bool fixedNewstyle = false; +-- +2.39.3 + diff --git a/SOURCES/kvm-nbd-server-do-not-poll-within-a-coroutine-context.patch b/SOURCES/kvm-nbd-server-do-not-poll-within-a-coroutine-context.patch new file mode 100644 index 0000000..8693400 --- /dev/null +++ b/SOURCES/kvm-nbd-server-do-not-poll-within-a-coroutine-context.patch @@ -0,0 +1,207 @@ +From ef01aeba9f6c4c886719261333a04bc32484f0d0 Mon Sep 17 00:00:00 2001 +From: Zhu Yangyang +Date: Mon, 8 Apr 2024 11:00:43 -0500 +Subject: [PATCH 1/4] nbd/server: do not poll within a coroutine context + +RH-Author: Eric Blake +RH-MergeRequest: 375: Fix regression on nbd+tls +RH-Jira: RHEL-33754 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/4] 88ca2c90e0c412ba8c1cfd7ea91e01ec58220e9b (ebblake/qemu-kvm) + +Coroutines are not supposed to block. Instead, they should yield. + +The client performs TLS upgrade outside of an AIOContext, during +synchronous handshake; this still requires g_main_loop. But the +server responds to TLS upgrade inside a coroutine, so a nested +g_main_loop is wrong. Since the two callbacks no longer share more +than the setting of data.complete and data.error, it's just as easy to +use static helpers instead of trying to share a common code path. It +is also possible to add assertions that no other code is interfering +with the eventual path to qio reaching the callback, whether or not it +required a yield or main loop. + +Fixes: f95910f ("nbd: implement TLS support in the protocol negotiation") +Signed-off-by: Zhu Yangyang +[eblake: move callbacks to their use point, add assertions] +Signed-off-by: Eric Blake +Message-ID: <20240408160214.1200629-5-eblake@redhat.com> +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit ae6d91a7e9b77abb029ed3fa9fad461422286942) +Jira: https://issues.redhat.com/browse/RHEL-33754 +Signed-off-by: Eric Blake +--- + nbd/client.c | 28 ++++++++++++++++++++++++---- + nbd/common.c | 11 ----------- + nbd/nbd-internal.h | 10 ---------- + nbd/server.c | 28 +++++++++++++++++++++++----- + 4 files changed, 47 insertions(+), 30 deletions(-) + +diff --git a/nbd/client.c b/nbd/client.c +index 29ffc609a4..c89c750467 100644 +--- a/nbd/client.c ++++ b/nbd/client.c +@@ -596,13 +596,31 @@ static int nbd_request_simple_option(QIOChannel *ioc, int opt, bool strict, + return 1; + } + ++/* Callback to learn when QIO TLS upgrade is complete */ ++struct NBDTLSClientHandshakeData { ++ bool complete; ++ Error *error; ++ GMainLoop *loop; ++}; ++ ++static void nbd_client_tls_handshake(QIOTask *task, void *opaque) ++{ ++ struct NBDTLSClientHandshakeData *data = opaque; ++ ++ qio_task_propagate_error(task, &data->error); ++ data->complete = true; ++ if (data->loop) { ++ g_main_loop_quit(data->loop); ++ } ++} ++ + static QIOChannel *nbd_receive_starttls(QIOChannel *ioc, + QCryptoTLSCreds *tlscreds, + const char *hostname, Error **errp) + { + int ret; + QIOChannelTLS *tioc; +- struct NBDTLSHandshakeData data = { 0 }; ++ struct NBDTLSClientHandshakeData data = { 0 }; + + ret = nbd_request_simple_option(ioc, NBD_OPT_STARTTLS, true, errp); + if (ret <= 0) { +@@ -619,18 +637,20 @@ static QIOChannel *nbd_receive_starttls(QIOChannel *ioc, + return NULL; + } + qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-client-tls"); +- data.loop = g_main_loop_new(g_main_context_default(), FALSE); + trace_nbd_receive_starttls_tls_handshake(); + qio_channel_tls_handshake(tioc, +- nbd_tls_handshake, ++ nbd_client_tls_handshake, + &data, + NULL, + NULL); + + if (!data.complete) { ++ data.loop = g_main_loop_new(g_main_context_default(), FALSE); + g_main_loop_run(data.loop); ++ assert(data.complete); ++ g_main_loop_unref(data.loop); + } +- g_main_loop_unref(data.loop); ++ + if (data.error) { + error_propagate(errp, data.error); + object_unref(OBJECT(tioc)); +diff --git a/nbd/common.c b/nbd/common.c +index 3247c1d618..589a748cfe 100644 +--- a/nbd/common.c ++++ b/nbd/common.c +@@ -47,17 +47,6 @@ int nbd_drop(QIOChannel *ioc, size_t size, Error **errp) + } + + +-void nbd_tls_handshake(QIOTask *task, +- void *opaque) +-{ +- struct NBDTLSHandshakeData *data = opaque; +- +- qio_task_propagate_error(task, &data->error); +- data->complete = true; +- g_main_loop_quit(data->loop); +-} +- +- + const char *nbd_opt_lookup(uint32_t opt) + { + switch (opt) { +diff --git a/nbd/nbd-internal.h b/nbd/nbd-internal.h +index dfa02f77ee..91895106a9 100644 +--- a/nbd/nbd-internal.h ++++ b/nbd/nbd-internal.h +@@ -72,16 +72,6 @@ static inline int nbd_write(QIOChannel *ioc, const void *buffer, size_t size, + return qio_channel_write_all(ioc, buffer, size, errp) < 0 ? -EIO : 0; + } + +-struct NBDTLSHandshakeData { +- GMainLoop *loop; +- bool complete; +- Error *error; +-}; +- +- +-void nbd_tls_handshake(QIOTask *task, +- void *opaque); +- + int nbd_drop(QIOChannel *ioc, size_t size, Error **errp); + + #endif +diff --git a/nbd/server.c b/nbd/server.c +index c3484cc1eb..98ae0e1632 100644 +--- a/nbd/server.c ++++ b/nbd/server.c +@@ -748,6 +748,23 @@ static int nbd_negotiate_handle_info(NBDClient *client, Error **errp) + return rc; + } + ++/* Callback to learn when QIO TLS upgrade is complete */ ++struct NBDTLSServerHandshakeData { ++ bool complete; ++ Error *error; ++ Coroutine *co; ++}; ++ ++static void nbd_server_tls_handshake(QIOTask *task, void *opaque) ++{ ++ struct NBDTLSServerHandshakeData *data = opaque; ++ ++ qio_task_propagate_error(task, &data->error); ++ data->complete = true; ++ if (!qemu_coroutine_entered(data->co)) { ++ aio_co_wake(data->co); ++ } ++} + + /* Handle NBD_OPT_STARTTLS. Return NULL to drop connection, or else the + * new channel for all further (now-encrypted) communication. */ +@@ -756,7 +773,7 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client, + { + QIOChannel *ioc; + QIOChannelTLS *tioc; +- struct NBDTLSHandshakeData data = { 0 }; ++ struct NBDTLSServerHandshakeData data = { 0 }; + + assert(client->opt == NBD_OPT_STARTTLS); + +@@ -777,17 +794,18 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client, + + qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-server-tls"); + trace_nbd_negotiate_handle_starttls_handshake(); +- data.loop = g_main_loop_new(g_main_context_default(), FALSE); ++ data.co = qemu_coroutine_self(); + qio_channel_tls_handshake(tioc, +- nbd_tls_handshake, ++ nbd_server_tls_handshake, + &data, + NULL, + NULL); + + if (!data.complete) { +- g_main_loop_run(data.loop); ++ qemu_coroutine_yield(); ++ assert(data.complete); + } +- g_main_loop_unref(data.loop); ++ + if (data.error) { + object_unref(OBJECT(tioc)); + error_propagate(errp, data.error); +-- +2.39.3 + diff --git a/SOURCES/kvm-qio-Inherit-follow_coroutine_ctx-across-TLS.patch b/SOURCES/kvm-qio-Inherit-follow_coroutine_ctx-across-TLS.patch new file mode 100644 index 0000000..f033486 --- /dev/null +++ b/SOURCES/kvm-qio-Inherit-follow_coroutine_ctx-across-TLS.patch @@ -0,0 +1,129 @@ +From 69d6b5f98d665fbcb86e42df40bcc5e9c79b397f Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Fri, 17 May 2024 21:50:14 -0500 +Subject: [PATCH 3/4] qio: Inherit follow_coroutine_ctx across TLS +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Blake +RH-MergeRequest: 375: Fix regression on nbd+tls +RH-Jira: RHEL-33754 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Miroslav Rezanina +RH-Commit: [3/4] 14ddea7e3c81898bb3fe4be51a40749d67a5c0c0 (ebblake/qemu-kvm) + +Since qemu 8.2, the combination of NBD + TLS + iothread crashes on an +assertion failure: + +qemu-kvm: ../io/channel.c:534: void qio_channel_restart_read(void *): Assertion `qemu_get_current_aio_context() == qemu_coroutine_get_aio_context(co)' failed. + +It turns out that when we removed AioContext locking, we did so by +having NBD tell its qio channels that it wanted to opt in to +qio_channel_set_follow_coroutine_ctx(); but while we opted in on the +main channel, we did not opt in on the TLS wrapper channel. +qemu-iotests has coverage of NBD+iothread and NBD+TLS, but apparently +no coverage of NBD+TLS+iothread, or we would have noticed this +regression sooner. (I'll add that in the next patch) + +But while we could manually opt in to the TLS channel in nbd/server.c +(a one-line change), it is more generic if all qio channels that wrap +other channels inherit the follow status, in the same way that they +inherit feature bits. + +CC: Stefan Hajnoczi +CC: Daniel P. Berrangé +CC: qemu-stable@nongnu.org +Fixes: https://issues.redhat.com/browse/RHEL-34786 +Fixes: 06e0f098 ("io: follow coroutine AioContext in qio_channel_yield()", v8.2.0) +Signed-off-by: Eric Blake +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Daniel P. Berrangé +Message-ID: <20240518025246.791593-5-eblake@redhat.com> +(cherry picked from commit 199e84de1c903ba5aa1f7256310bbc4a20dd930b) +Jira: https://issues.redhat.com/browse/RHEL-33754 +Signed-off-by: Eric Blake +--- + io/channel-tls.c | 26 +++++++++++++++----------- + io/channel-websock.c | 1 + + 2 files changed, 16 insertions(+), 11 deletions(-) + +diff --git a/io/channel-tls.c b/io/channel-tls.c +index 58fe1aceee..a8ad89c3d1 100644 +--- a/io/channel-tls.c ++++ b/io/channel-tls.c +@@ -69,37 +69,40 @@ qio_channel_tls_new_server(QIOChannel *master, + const char *aclname, + Error **errp) + { +- QIOChannelTLS *ioc; ++ QIOChannelTLS *tioc; ++ QIOChannel *ioc; + +- ioc = QIO_CHANNEL_TLS(object_new(TYPE_QIO_CHANNEL_TLS)); ++ tioc = QIO_CHANNEL_TLS(object_new(TYPE_QIO_CHANNEL_TLS)); ++ ioc = QIO_CHANNEL(tioc); + +- ioc->master = master; ++ tioc->master = master; ++ ioc->follow_coroutine_ctx = master->follow_coroutine_ctx; + if (qio_channel_has_feature(master, QIO_CHANNEL_FEATURE_SHUTDOWN)) { +- qio_channel_set_feature(QIO_CHANNEL(ioc), QIO_CHANNEL_FEATURE_SHUTDOWN); ++ qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN); + } + object_ref(OBJECT(master)); + +- ioc->session = qcrypto_tls_session_new( ++ tioc->session = qcrypto_tls_session_new( + creds, + NULL, + aclname, + QCRYPTO_TLS_CREDS_ENDPOINT_SERVER, + errp); +- if (!ioc->session) { ++ if (!tioc->session) { + goto error; + } + + qcrypto_tls_session_set_callbacks( +- ioc->session, ++ tioc->session, + qio_channel_tls_write_handler, + qio_channel_tls_read_handler, +- ioc); ++ tioc); + +- trace_qio_channel_tls_new_server(ioc, master, creds, aclname); +- return ioc; ++ trace_qio_channel_tls_new_server(tioc, master, creds, aclname); ++ return tioc; + + error: +- object_unref(OBJECT(ioc)); ++ object_unref(OBJECT(tioc)); + return NULL; + } + +@@ -116,6 +119,7 @@ qio_channel_tls_new_client(QIOChannel *master, + ioc = QIO_CHANNEL(tioc); + + tioc->master = master; ++ ioc->follow_coroutine_ctx = master->follow_coroutine_ctx; + if (qio_channel_has_feature(master, QIO_CHANNEL_FEATURE_SHUTDOWN)) { + qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN); + } +diff --git a/io/channel-websock.c b/io/channel-websock.c +index a12acc27cf..de39f0d182 100644 +--- a/io/channel-websock.c ++++ b/io/channel-websock.c +@@ -883,6 +883,7 @@ qio_channel_websock_new_server(QIOChannel *master) + ioc = QIO_CHANNEL(wioc); + + wioc->master = master; ++ ioc->follow_coroutine_ctx = master->follow_coroutine_ctx; + if (qio_channel_has_feature(master, QIO_CHANNEL_FEATURE_SHUTDOWN)) { + qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN); + } +-- +2.39.3 + diff --git a/SOURCES/kvm-virtio-pci-fix-use-of-a-released-vector.patch b/SOURCES/kvm-virtio-pci-fix-use-of-a-released-vector.patch new file mode 100644 index 0000000..4128907 --- /dev/null +++ b/SOURCES/kvm-virtio-pci-fix-use-of-a-released-vector.patch @@ -0,0 +1,162 @@ +From ccd8ffa5cd7f9bcfddeda7a9fa1ad86d4bad870e Mon Sep 17 00:00:00 2001 +From: Cindy Lu +Date: Fri, 12 Apr 2024 14:26:55 +0800 +Subject: [PATCH] virtio-pci: fix use of a released vector +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cindy Lu +RH-MergeRequest: 366: virtio-pci: fix use of a released vector +RH-Jira: RHEL-32837 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Jason Wang +RH-Commit: [1/1] edd26ffb8727635310aed42e42925afe87df2287 + +During the booting process of the non-standard image, the behavior of the +called function in qemu is as follows: + +1. vhost_net_stop() was triggered by guest image. This will call the function +virtio_pci_set_guest_notifiers() with assgin= false, +virtio_pci_set_guest_notifiers() will release the irqfd for vector 0 + +2. virtio_reset() was triggered, this will set configure vector to VIRTIO_NO_VECTOR + +3.vhost_net_start() was called (at this time, the configure vector is +still VIRTIO_NO_VECTOR) and then call virtio_pci_set_guest_notifiers() with +assgin=true, so the irqfd for vector 0 is still not "init" during this process + +4. The system continues to boot and sets the vector back to 0. After that +msix_fire_vector_notifier() was triggered to unmask the vector 0 and meet the crash + +To fix the issue, we need to support changing the vector after VIRTIO_CONFIG_S_DRIVER_OK is set. + +(gdb) bt +0 __pthread_kill_implementation (threadid=, signo=signo@entry=6, no_tid=no_tid@entry=0) + at pthread_kill.c:44 +1 0x00007fc87148ec53 in __pthread_kill_internal (signo=6, threadid=) at pthread_kill.c:78 +2 0x00007fc87143e956 in __GI_raise (sig=sig@entry=6) at ../sysdeps/posix/raise.c:26 +3 0x00007fc8714287f4 in __GI_abort () at abort.c:79 +4 0x00007fc87142871b in __assert_fail_base + (fmt=0x7fc8715bbde0 "%s%s%s:%u: %s%sAssertion `%s' failed.\n%n", assertion=0x5606413efd53 "ret == 0", file=0x5606413ef87d "../accel/kvm/kvm-all.c", line=1837, function=) at assert.c:92 +5 0x00007fc871437536 in __GI___assert_fail + (assertion=0x5606413efd53 "ret == 0", file=0x5606413ef87d "../accel/kvm/kvm-all.c", line=1837, function=0x5606413f06f0 <__PRETTY_FUNCTION__.19> "kvm_irqchip_commit_routes") at assert.c:101 +6 0x0000560640f884b5 in kvm_irqchip_commit_routes (s=0x560642cae1f0) at ../accel/kvm/kvm-all.c:1837 +7 0x0000560640c98f8e in virtio_pci_one_vector_unmask + (proxy=0x560643c65f00, queue_no=4294967295, vector=0, msg=..., n=0x560643c6e4c8) + at ../hw/virtio/virtio-pci.c:1005 +8 0x0000560640c99201 in virtio_pci_vector_unmask (dev=0x560643c65f00, vector=0, msg=...) + at ../hw/virtio/virtio-pci.c:1070 +9 0x0000560640bc402e in msix_fire_vector_notifier (dev=0x560643c65f00, vector=0, is_masked=false) + at ../hw/pci/msix.c:120 +10 0x0000560640bc40f1 in msix_handle_mask_update (dev=0x560643c65f00, vector=0, was_masked=true) + at ../hw/pci/msix.c:140 +11 0x0000560640bc4503 in msix_table_mmio_write (opaque=0x560643c65f00, addr=12, val=0, size=4) + at ../hw/pci/msix.c:231 +12 0x0000560640f26d83 in memory_region_write_accessor + (mr=0x560643c66540, addr=12, value=0x7fc86b7bc628, size=4, shift=0, mask=4294967295, attrs=...) + at ../system/memory.c:497 +13 0x0000560640f270a6 in access_with_adjusted_size + + (addr=12, value=0x7fc86b7bc628, size=4, access_size_min=1, access_size_max=4, access_fn=0x560640f26c8d , mr=0x560643c66540, attrs=...) at ../system/memory.c:573 +14 0x0000560640f2a2b5 in memory_region_dispatch_write (mr=0x560643c66540, addr=12, data=0, op=MO_32, attrs=...) + at ../system/memory.c:1521 +15 0x0000560640f37bac in flatview_write_continue + (fv=0x7fc65805e0b0, addr=4273803276, attrs=..., ptr=0x7fc871e9c028, len=4, addr1=12, l=4, mr=0x560643c66540) + at ../system/physmem.c:2714 +16 0x0000560640f37d0f in flatview_write + (fv=0x7fc65805e0b0, addr=4273803276, attrs=..., buf=0x7fc871e9c028, len=4) at ../system/physmem.c:2756 +17 0x0000560640f380bf in address_space_write + (as=0x560642161ae0 , addr=4273803276, attrs=..., buf=0x7fc871e9c028, len=4) + at ../system/physmem.c:2863 +18 0x0000560640f3812c in address_space_rw + (as=0x560642161ae0 , addr=4273803276, attrs=..., buf=0x7fc871e9c028, len=4, is_write=true) at ../system/physmem.c:2873 +--Type for more, q to quit, c to continue without paging-- +19 0x0000560640f8aa55 in kvm_cpu_exec (cpu=0x560642f205e0) at ../accel/kvm/kvm-all.c:2915 +20 0x0000560640f8d731 in kvm_vcpu_thread_fn (arg=0x560642f205e0) at ../accel/kvm/kvm-accel-ops.c:51 +21 0x00005606411949f4 in qemu_thread_start (args=0x560642f292b0) at ../util/qemu-thread-posix.c:541 +22 0x00007fc87148cdcd in start_thread (arg=) at pthread_create.c:442 +23 0x00007fc871512630 in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81 +(gdb) + +MST: coding style and typo fixups + +Fixes: f9a09ca3ea ("vhost: add support for configure interrupt") +Cc: qemu-stable@nongnu.org +Signed-off-by: Cindy Lu +Message-ID: <2321ade5f601367efe7380c04e3f61379c59b48f.1713173550.git.mst@redhat.com> +Cc: Lei Yang +Cc: Jason Wang +Signed-off-by: Michael S. Tsirkin +Tested-by: Cindy Lu +(cherry picked from commit 2ce6cff94df2650c460f809e5ad263f1d22507c0) +Signed-off-by: Cindy Lu +--- + hw/virtio/virtio-pci.c | 37 +++++++++++++++++++++++++++++++++++-- + 1 file changed, 35 insertions(+), 2 deletions(-) + +diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c +index e433879542..08faefe29a 100644 +--- a/hw/virtio/virtio-pci.c ++++ b/hw/virtio/virtio-pci.c +@@ -1424,6 +1424,38 @@ static int virtio_pci_add_mem_cap(VirtIOPCIProxy *proxy, + return offset; + } + ++static void virtio_pci_set_vector(VirtIODevice *vdev, ++ VirtIOPCIProxy *proxy, ++ int queue_no, uint16_t old_vector, ++ uint16_t new_vector) ++{ ++ bool kvm_irqfd = (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) && ++ msix_enabled(&proxy->pci_dev) && kvm_msi_via_irqfd_enabled(); ++ ++ if (new_vector == old_vector) { ++ return; ++ } ++ ++ /* ++ * If the device uses irqfd and the vector changes after DRIVER_OK is ++ * set, we need to release the old vector and set up the new one. ++ * Otherwise just need to set the new vector on the device. ++ */ ++ if (kvm_irqfd && old_vector != VIRTIO_NO_VECTOR) { ++ kvm_virtio_pci_vector_release_one(proxy, queue_no); ++ } ++ /* Set the new vector on the device. */ ++ if (queue_no == VIRTIO_CONFIG_IRQ_IDX) { ++ vdev->config_vector = new_vector; ++ } else { ++ virtio_queue_set_vector(vdev, queue_no, new_vector); ++ } ++ /* If the new vector changed need to set it up. */ ++ if (kvm_irqfd && new_vector != VIRTIO_NO_VECTOR) { ++ kvm_virtio_pci_vector_use_one(proxy, queue_no); ++ } ++} ++ + int virtio_pci_add_shm_cap(VirtIOPCIProxy *proxy, + uint8_t bar, uint64_t offset, uint64_t length, + uint8_t id) +@@ -1570,7 +1602,8 @@ static void virtio_pci_common_write(void *opaque, hwaddr addr, + } else { + val = VIRTIO_NO_VECTOR; + } +- vdev->config_vector = val; ++ virtio_pci_set_vector(vdev, proxy, VIRTIO_CONFIG_IRQ_IDX, ++ vdev->config_vector, val); + break; + case VIRTIO_PCI_COMMON_STATUS: + if (!(val & VIRTIO_CONFIG_S_DRIVER_OK)) { +@@ -1610,7 +1643,7 @@ static void virtio_pci_common_write(void *opaque, hwaddr addr, + } else { + val = VIRTIO_NO_VECTOR; + } +- virtio_queue_set_vector(vdev, vdev->queue_sel, val); ++ virtio_pci_set_vector(vdev, proxy, vdev->queue_sel, vector, val); + break; + case VIRTIO_PCI_COMMON_Q_ENABLE: + if (val == 1) { +-- +2.39.3 + diff --git a/SPECS/qemu-kvm.spec b/SPECS/qemu-kvm.spec index 973e9a2..c17ee63 100644 --- a/SPECS/qemu-kvm.spec +++ b/SPECS/qemu-kvm.spec @@ -149,7 +149,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 8.2.0 -Release: 11%{?rcrel}%{?dist}%{?cc_suffix} +Release: 11%{?rcrel}%{?dist}%{?cc_suffix}.3 # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -610,6 +610,18 @@ Patch175: kvm-Revert-chardev-use-a-child-source-for-qio-input-sour.patch Patch176: kvm-coroutine-cap-per-thread-local-pool-size.patch # For RHEL-28947 - Qemu crashing with "failed to set up stack guard page: Cannot allocate memory" Patch177: kvm-coroutine-reserve-5-000-mappings.patch +# For RHEL-32837 - qemu-kvm running Vyatta hits assert when doing KVM_SET_GSI_ROUTING [rhel-9.4.z] +Patch178: kvm-virtio-pci-fix-use-of-a-released-vector.patch +# For RHEL-32990 - qemu crash with kvm_irqchip_commit_routes: Assertion `ret == 0' failed if booting with many virtio disks and vcpus [rhel-9.4.z] +Patch179: kvm-kvm-error-out-of-kvm_irqchip_add_msi_route-in-case-o.patch +# For RHEL-33754 - Qemu hang when quit dst vm after storage migration(nbd+tls) [rhel-9.4.z] +Patch180: kvm-nbd-server-do-not-poll-within-a-coroutine-context.patch +# For RHEL-33754 - Qemu hang when quit dst vm after storage migration(nbd+tls) [rhel-9.4.z] +Patch181: kvm-nbd-server-Mark-negotiation-functions-as-coroutine_f.patch +# For RHEL-33754 - Qemu hang when quit dst vm after storage migration(nbd+tls) [rhel-9.4.z] +Patch182: kvm-qio-Inherit-follow_coroutine_ctx-across-TLS.patch +# For RHEL-33754 - Qemu hang when quit dst vm after storage migration(nbd+tls) [rhel-9.4.z] +Patch183: kvm-iotests-test-NBD-TLS-iothread.patch %if %{have_clang} BuildRequires: clang @@ -1671,6 +1683,24 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Fri Jun 07 2024 Miroslav Rezanina - 8.2.0-11.el9_4.3 +- kvm-nbd-server-do-not-poll-within-a-coroutine-context.patch [RHEL-33754] +- kvm-nbd-server-Mark-negotiation-functions-as-coroutine_f.patch [RHEL-33754] +- kvm-qio-Inherit-follow_coroutine_ctx-across-TLS.patch [RHEL-33754] +- kvm-iotests-test-NBD-TLS-iothread.patch [RHEL-33754] +- Resolves: RHEL-33754 + (Qemu hang when quit dst vm after storage migration(nbd+tls) [rhel-9.4.z]) + +* Mon May 20 2024 Miroslav Rezanina - 8.2.0-11.el9_4.2 +- kvm-kvm-error-out-of-kvm_irqchip_add_msi_route-in-case-o.patch [RHEL-32990] +- Resolves: RHEL-32990 + (qemu crash with kvm_irqchip_commit_routes: Assertion `ret == 0' failed if booting with many virtio disks and vcpus [rhel-9.4.z]) + +* Thu Apr 18 2024 Miroslav Rezanina - 8.2.0-11.el9_4.1 +- kvm-virtio-pci-fix-use-of-a-released-vector.patch [RHEL-32837] +- Resolves: RHEL-32837 + (qemu-kvm running Vyatta hits assert when doing KVM_SET_GSI_ROUTING [rhel-9.4.z]) + * Tue Mar 26 2024 Miroslav Rezanina - 8.2.0-11 - kvm-coroutine-cap-per-thread-local-pool-size.patch [RHEL-28947] - kvm-coroutine-reserve-5-000-mappings.patch [RHEL-28947] From bbc5de2b9d86721e813532e7703892d2fcd73f65 Mon Sep 17 00:00:00 2001 From: eabdullin Date: Wed, 3 Jul 2024 08:54:24 +0000 Subject: [PATCH 3/4] import OL qemu-kvm-8.2.0-11.el9_4.4 --- ...names-only-when-explicitly-requested.patch | 260 ++++++++++++++++++ ...t-store-data-file-with-protocol-in-i.patch | 69 +++++ ...t-store-data-file-with-json-prefix-i.patch | 72 +++++ ...n-t-open-data_file-with-BDRV_O_NO_IO.patch | 125 +++++++++ SPECS/qemu-kvm.spec | 14 +- 5 files changed, 539 insertions(+), 1 deletion(-) create mode 100644 SOURCES/kvm-block-Parse-filenames-only-when-explicitly-requested.patch create mode 100644 SOURCES/kvm-iotests-244-Don-t-store-data-file-with-protocol-in-i.patch create mode 100644 SOURCES/kvm-iotests-270-Don-t-store-data-file-with-json-prefix-i.patch create mode 100644 SOURCES/kvm-qcow2-Don-t-open-data_file-with-BDRV_O_NO_IO.patch diff --git a/SOURCES/kvm-block-Parse-filenames-only-when-explicitly-requested.patch b/SOURCES/kvm-block-Parse-filenames-only-when-explicitly-requested.patch new file mode 100644 index 0000000..5ea7203 --- /dev/null +++ b/SOURCES/kvm-block-Parse-filenames-only-when-explicitly-requested.patch @@ -0,0 +1,260 @@ +From 5c35b7d631e9cdf75512b9e1a0b5d48e8fd768d9 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 5 Jun 2024 19:56:51 -0400 +Subject: [PATCH 4/4] block: Parse filenames only when explicitly requested + +RH-Author: Jon Maloy +RH-MergeRequest: 2: EMBARGOED CVE-2024-4467 for rhel-9.4.z (PRDSC) +RH-Jira: https://issues.redhat.com/browse/RHEL-35610 +RH-CVE: CVE-2024-4467 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Hanna Czenczek +RH-Commit: [4/4] 6f71e6a07bd5a9f8352db920f498f5fa5a2cdbfb + +commit f44c2941d4419e60f16dea3e9adca164e75aa78d (origin/cve-2024-4467-hreitz-rhel-9.5.0) +Author: Kevin Wolf +Date: Thu Apr 25 14:56:02 2024 +0200 + + block: Parse filenames only when explicitly requested + + When handling image filenames from legacy options such as -drive or from + tools, these filenames are parsed for protocol prefixes, including for + the json:{} pseudo-protocol. + + This behaviour is intended for filenames that come directly from the + command line and for backing files, which may come from the image file + itself. Higher level management tools generally take care to verify that + untrusted images don't contain a bad (or any) backing file reference; + 'qemu-img info' is a suitable tool for this. + + However, for other files that can be referenced in images, such as + qcow2 data files or VMDK extents, the string from the image file is + usually not verified by management tools - and 'qemu-img info' wouldn't + be suitable because in contrast to backing files, it already opens these + other referenced files. So here the string should be interpreted as a + literal local filename. More complex configurations need to be specified + explicitly on the command line or in QMP. + + This patch changes bdrv_open_inherit() so that it only parses filenames + if a new parameter parse_filename is true. It is set for the top level + in bdrv_open(), for the file child and for the backing file child. All + other callers pass false and disable filename parsing this way. + + Signed-off-by: Kevin Wolf + Reviewed-by: Eric Blake + Reviewed-by: Stefan Hajnoczi + Reviewed-by: Hanna Czenczek + Upstream: N/A, embargoed + Signed-off-by: Hanna Czenczek + +Signed-off-by: Jon Maloy +--- + block.c | 90 ++++++++++++++++++++++++++++++++++++--------------------- + 1 file changed, 57 insertions(+), 33 deletions(-) + +diff --git a/block.c b/block.c +index a097772238..8b6aa4a65c 100644 +--- a/block.c ++++ b/block.c +@@ -86,6 +86,7 @@ static BlockDriverState *bdrv_open_inherit(const char *filename, + BlockDriverState *parent, + const BdrvChildClass *child_class, + BdrvChildRole child_role, ++ bool parse_filename, + Error **errp); + + static bool bdrv_recurse_has_child(BlockDriverState *bs, +@@ -2035,7 +2036,8 @@ static void parse_json_protocol(QDict *options, const char **pfilename, + * block driver has been specified explicitly. + */ + static int bdrv_fill_options(QDict **options, const char *filename, +- int *flags, Error **errp) ++ int *flags, bool allow_parse_filename, ++ Error **errp) + { + const char *drvname; + bool protocol = *flags & BDRV_O_PROTOCOL; +@@ -2077,7 +2079,7 @@ static int bdrv_fill_options(QDict **options, const char *filename, + if (protocol && filename) { + if (!qdict_haskey(*options, "filename")) { + qdict_put_str(*options, "filename", filename); +- parse_filename = true; ++ parse_filename = allow_parse_filename; + } else { + error_setg(errp, "Can't specify 'file' and 'filename' options at " + "the same time"); +@@ -3639,7 +3641,8 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, + } + + backing_hd = bdrv_open_inherit(backing_filename, reference, options, 0, bs, +- &child_of_bds, bdrv_backing_role(bs), errp); ++ &child_of_bds, bdrv_backing_role(bs), true, ++ errp); + if (!backing_hd) { + bs->open_flags |= BDRV_O_NO_BACKING; + error_prepend(errp, "Could not open backing file: "); +@@ -3673,7 +3676,8 @@ free_exit: + static BlockDriverState * + bdrv_open_child_bs(const char *filename, QDict *options, const char *bdref_key, + BlockDriverState *parent, const BdrvChildClass *child_class, +- BdrvChildRole child_role, bool allow_none, Error **errp) ++ BdrvChildRole child_role, bool allow_none, ++ bool parse_filename, Error **errp) + { + BlockDriverState *bs = NULL; + QDict *image_options; +@@ -3704,7 +3708,8 @@ bdrv_open_child_bs(const char *filename, QDict *options, const char *bdref_key, + } + + bs = bdrv_open_inherit(filename, reference, image_options, 0, +- parent, child_class, child_role, errp); ++ parent, child_class, child_role, parse_filename, ++ errp); + if (!bs) { + goto done; + } +@@ -3714,6 +3719,33 @@ done: + return bs; + } + ++static BdrvChild *bdrv_open_child_common(const char *filename, ++ QDict *options, const char *bdref_key, ++ BlockDriverState *parent, ++ const BdrvChildClass *child_class, ++ BdrvChildRole child_role, ++ bool allow_none, bool parse_filename, ++ Error **errp) ++{ ++ BlockDriverState *bs; ++ BdrvChild *child; ++ ++ GLOBAL_STATE_CODE(); ++ ++ bs = bdrv_open_child_bs(filename, options, bdref_key, parent, child_class, ++ child_role, allow_none, parse_filename, errp); ++ if (bs == NULL) { ++ return NULL; ++ } ++ ++ bdrv_graph_wrlock(); ++ child = bdrv_attach_child(parent, bs, bdref_key, child_class, child_role, ++ errp); ++ bdrv_graph_wrunlock(); ++ ++ return child; ++} ++ + /* + * Opens a disk image whose options are given as BlockdevRef in another block + * device's options. +@@ -3737,27 +3769,15 @@ BdrvChild *bdrv_open_child(const char *filename, + BdrvChildRole child_role, + bool allow_none, Error **errp) + { +- BlockDriverState *bs; +- BdrvChild *child; +- +- GLOBAL_STATE_CODE(); +- +- bs = bdrv_open_child_bs(filename, options, bdref_key, parent, child_class, +- child_role, allow_none, errp); +- if (bs == NULL) { +- return NULL; +- } +- +- bdrv_graph_wrlock(); +- child = bdrv_attach_child(parent, bs, bdref_key, child_class, child_role, +- errp); +- bdrv_graph_wrunlock(); +- +- return child; ++ return bdrv_open_child_common(filename, options, bdref_key, parent, ++ child_class, child_role, allow_none, false, ++ errp); + } + + /* +- * Wrapper on bdrv_open_child() for most popular case: open primary child of bs. ++ * This does mostly the same as bdrv_open_child(), but for opening the primary ++ * child of a node. A notable difference from bdrv_open_child() is that it ++ * enables filename parsing for protocol names (including json:). + * + * @parent can move to a different AioContext in this function. + */ +@@ -3772,8 +3792,8 @@ int bdrv_open_file_child(const char *filename, + role = parent->drv->is_filter ? + (BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY) : BDRV_CHILD_IMAGE; + +- if (!bdrv_open_child(filename, options, bdref_key, parent, +- &child_of_bds, role, false, errp)) ++ if (!bdrv_open_child_common(filename, options, bdref_key, parent, ++ &child_of_bds, role, false, true, errp)) + { + return -EINVAL; + } +@@ -3818,7 +3838,8 @@ BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp) + + } + +- bs = bdrv_open_inherit(NULL, reference, qdict, 0, NULL, NULL, 0, errp); ++ bs = bdrv_open_inherit(NULL, reference, qdict, 0, NULL, NULL, 0, false, ++ errp); + obj = NULL; + qobject_unref(obj); + visit_free(v); +@@ -3907,7 +3928,7 @@ static BlockDriverState * no_coroutine_fn + bdrv_open_inherit(const char *filename, const char *reference, QDict *options, + int flags, BlockDriverState *parent, + const BdrvChildClass *child_class, BdrvChildRole child_role, +- Error **errp) ++ bool parse_filename, Error **errp) + { + int ret; + BlockBackend *file = NULL; +@@ -3955,9 +3976,11 @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options, + } + + /* json: syntax counts as explicit options, as if in the QDict */ +- parse_json_protocol(options, &filename, &local_err); +- if (local_err) { +- goto fail; ++ if (parse_filename) { ++ parse_json_protocol(options, &filename, &local_err); ++ if (local_err) { ++ goto fail; ++ } + } + + bs->explicit_options = qdict_clone_shallow(options); +@@ -3982,7 +4005,8 @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options, + parent->open_flags, parent->options); + } + +- ret = bdrv_fill_options(&options, filename, &flags, &local_err); ++ ret = bdrv_fill_options(&options, filename, &flags, parse_filename, ++ &local_err); + if (ret < 0) { + goto fail; + } +@@ -4051,7 +4075,7 @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options, + + file_bs = bdrv_open_child_bs(filename, options, "file", bs, + &child_of_bds, BDRV_CHILD_IMAGE, +- true, &local_err); ++ true, true, &local_err); + if (local_err) { + goto fail; + } +@@ -4200,7 +4224,7 @@ BlockDriverState *bdrv_open(const char *filename, const char *reference, + GLOBAL_STATE_CODE(); + + return bdrv_open_inherit(filename, reference, options, flags, NULL, +- NULL, 0, errp); ++ NULL, 0, true, errp); + } + + /* Return true if the NULL-terminated @list contains @str */ +-- +2.39.3 + diff --git a/SOURCES/kvm-iotests-244-Don-t-store-data-file-with-protocol-in-i.patch b/SOURCES/kvm-iotests-244-Don-t-store-data-file-with-protocol-in-i.patch new file mode 100644 index 0000000..0e2fa11 --- /dev/null +++ b/SOURCES/kvm-iotests-244-Don-t-store-data-file-with-protocol-in-i.patch @@ -0,0 +1,69 @@ +From c2eafeb32a256cbafb0e65c0380acb478181326e Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 5 Jun 2024 19:56:51 -0400 +Subject: [PATCH 2/4] iotests/244: Don't store data-file with protocol in image + +RH-Author: Jon Maloy +RH-MergeRequest: 2: EMBARGOED CVE-2024-4467 for rhel-9.4.z (PRDSC) +RH-Jira: https://issues.redhat.com/browse/RHEL-35610 +RH-CVE: CVE-2024-4467 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Hanna Czenczek +RH-Commit: [2/4] ddef095945aa55bb0aacc2a2cb58f9e12ad20d5e + +commit 92e00dab8be1570b13172353d77d2af44cb4e22b +Author: Kevin Wolf +Date: Thu Apr 25 14:49:40 2024 +0200 + + iotests/244: Don't store data-file with protocol in image + + We want to disable filename parsing for data files because it's too easy + to abuse in malicious image files. Make the test ready for the change by + passing the data file explicitly in command line options. + + Signed-off-by: Kevin Wolf + Reviewed-by: Eric Blake + Reviewed-by: Stefan Hajnoczi + Reviewed-by: Hanna Czenczek + Upstream: N/A, embargoed + Signed-off-by: Hanna Czenczek + +Signed-off-by: Jon Maloy +--- + tests/qemu-iotests/244 | 19 ++++++++++++++++--- + 1 file changed, 16 insertions(+), 3 deletions(-) + +diff --git a/tests/qemu-iotests/244 b/tests/qemu-iotests/244 +index 3e61fa25bb..bb9cc6512f 100755 +--- a/tests/qemu-iotests/244 ++++ b/tests/qemu-iotests/244 +@@ -215,9 +215,22 @@ $QEMU_IMG convert -f $IMGFMT -O $IMGFMT -n -C "$TEST_IMG.src" "$TEST_IMG" + $QEMU_IMG compare -f $IMGFMT -F $IMGFMT "$TEST_IMG.src" "$TEST_IMG" + + # blkdebug doesn't support copy offloading, so this tests the error path +-$QEMU_IMG amend -f $IMGFMT -o "data_file=blkdebug::$TEST_IMG.data" "$TEST_IMG" +-$QEMU_IMG convert -f $IMGFMT -O $IMGFMT -n -C "$TEST_IMG.src" "$TEST_IMG" +-$QEMU_IMG compare -f $IMGFMT -F $IMGFMT "$TEST_IMG.src" "$TEST_IMG" ++test_img_with_blkdebug="json:{ ++ 'driver': 'qcow2', ++ 'file': { ++ 'driver': 'file', ++ 'filename': '$TEST_IMG' ++ }, ++ 'data-file': { ++ 'driver': 'blkdebug', ++ 'image': { ++ 'driver': 'file', ++ 'filename': '$TEST_IMG.data' ++ } ++ } ++}" ++$QEMU_IMG convert -f $IMGFMT -O $IMGFMT -n -C "$TEST_IMG.src" "$test_img_with_blkdebug" ++$QEMU_IMG compare -f $IMGFMT -F $IMGFMT "$TEST_IMG.src" "$test_img_with_blkdebug" + + echo + echo "=== Flushing should flush the data file ===" +-- +2.39.3 + diff --git a/SOURCES/kvm-iotests-270-Don-t-store-data-file-with-json-prefix-i.patch b/SOURCES/kvm-iotests-270-Don-t-store-data-file-with-json-prefix-i.patch new file mode 100644 index 0000000..caeac22 --- /dev/null +++ b/SOURCES/kvm-iotests-270-Don-t-store-data-file-with-json-prefix-i.patch @@ -0,0 +1,72 @@ +From 931ab59f39b5e3551b328fe5b0f872df7a19ba05 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 5 Jun 2024 19:56:51 -0400 +Subject: [PATCH 3/4] iotests/270: Don't store data-file with json: prefix in + image + +RH-Author: Jon Maloy +RH-MergeRequest: 2: EMBARGOED CVE-2024-4467 for rhel-9.4.z (PRDSC) +RH-Jira: https://issues.redhat.com/browse/RHEL-35610 +RH-CVE: CVE-2024-4467 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Hanna Czenczek +RH-Commit: [3/4] 7a9844fd48e3f3c4d1711ea4fb671c795ca4a1c1 + +commit 705bcc2819ce8e0f8b9d660a93bc48de26413aec +Author: Kevin Wolf +Date: Thu Apr 25 14:49:40 2024 +0200 + + iotests/270: Don't store data-file with json: prefix in image + + We want to disable filename parsing for data files because it's too easy + to abuse in malicious image files. Make the test ready for the change by + passing the data file explicitly in command line options. + + Signed-off-by: Kevin Wolf + Reviewed-by: Eric Blake + Reviewed-by: Stefan Hajnoczi + Reviewed-by: Hanna Czenczek + Upstream: N/A, embargoed + Signed-off-by: Hanna Czenczek + +Signed-off-by: Jon Maloy +--- + tests/qemu-iotests/270 | 14 +++++++++++--- + 1 file changed, 11 insertions(+), 3 deletions(-) + +diff --git a/tests/qemu-iotests/270 b/tests/qemu-iotests/270 +index 74352342db..c37b674aa2 100755 +--- a/tests/qemu-iotests/270 ++++ b/tests/qemu-iotests/270 +@@ -60,8 +60,16 @@ _make_test_img -o cluster_size=2M,data_file="$TEST_IMG.orig" \ + # "write" 2G of data without using any space. + # (qemu-img create does not like it, though, because null-co does not + # support image creation.) +-$QEMU_IMG amend -o data_file="json:{'driver':'null-co',,'size':'4294967296'}" \ +- "$TEST_IMG" ++test_img_with_null_data="json:{ ++ 'driver': '$IMGFMT', ++ 'file': { ++ 'filename': '$TEST_IMG' ++ }, ++ 'data-file': { ++ 'driver': 'null-co', ++ 'size':'4294967296' ++ } ++}" + + # This gives us a range of: + # 2^31 - 512 + 768 - 1 = 2^31 + 255 > 2^31 +@@ -74,7 +82,7 @@ $QEMU_IMG amend -o data_file="json:{'driver':'null-co',,'size':'4294967296'}" \ + # on L2 boundaries, we need large L2 tables; hence the cluster size of + # 2 MB. (Anything from 256 kB should work, though, because then one L2 + # table covers 8 GB.) +-$QEMU_IO -c "write 768 $((2 ** 31 - 512))" "$TEST_IMG" | _filter_qemu_io ++$QEMU_IO -c "write 768 $((2 ** 31 - 512))" "$test_img_with_null_data" | _filter_qemu_io + + _check_test_img + +-- +2.39.3 + diff --git a/SOURCES/kvm-qcow2-Don-t-open-data_file-with-BDRV_O_NO_IO.patch b/SOURCES/kvm-qcow2-Don-t-open-data_file-with-BDRV_O_NO_IO.patch new file mode 100644 index 0000000..a92c7f5 --- /dev/null +++ b/SOURCES/kvm-qcow2-Don-t-open-data_file-with-BDRV_O_NO_IO.patch @@ -0,0 +1,125 @@ +From 6e39b4c13c0eacb35e81874b09e6b6411266c631 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 5 Jun 2024 19:56:51 -0400 +Subject: [PATCH 1/4] qcow2: Don't open data_file with BDRV_O_NO_IO + +RH-Author: Jon Maloy +RH-MergeRequest: 2: EMBARGOED CVE-2024-4467 for rhel-9.4.z (PRDSC) +RH-Jira: https://issues.redhat.com/browse/RHEL-35610 +RH-CVE: CVE-2024-4467 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Hanna Czenczek +RH-Commit: [1/4] 1000359b05c706f3c5155a9481692352be333129 + +commit f9843ce5c519901654a7d8ba43ee95ce25ca13c2 +Author: Kevin Wolf +Date: Thu Apr 11 15:06:01 2024 +0200 + + qcow2: Don't open data_file with BDRV_O_NO_IO + + One use case for 'qemu-img info' is verifying that untrusted images + don't reference an unwanted external file, be it as a backing file or an + external data file. To make sure that calling 'qemu-img info' can't + already have undesired side effects with a malicious image, just don't + open the data file at all with BDRV_O_NO_IO. If nothing ever tries to do + I/O, we don't need to have it open. + + This changes the output of iotests case 061, which used 'qemu-img info' + to show that opening an image with an invalid data file fails. After + this patch, it succeeds. Replace this part of the test with a qemu-io + call, but keep the final 'qemu-img info' to show that the invalid data + file is correctly displayed in the output. + + Signed-off-by: Kevin Wolf + Reviewed-by: Eric Blake + Reviewed-by: Stefan Hajnoczi + Reviewed-by: Hanna Czenczek + Upstream: N/A, embargoed + Signed-off-by: Hanna Czenczek + +Signed-off-by: Jon Maloy +--- + block/qcow2.c | 17 ++++++++++++++++- + tests/qemu-iotests/061 | 6 ++++-- + tests/qemu-iotests/061.out | 8 ++++++-- + 3 files changed, 26 insertions(+), 5 deletions(-) + +diff --git a/block/qcow2.c b/block/qcow2.c +index d91b7b91d3..b269cfc78f 100644 +--- a/block/qcow2.c ++++ b/block/qcow2.c +@@ -1642,7 +1642,22 @@ qcow2_do_open(BlockDriverState *bs, QDict *options, int flags, + goto fail; + } + +- if (open_data_file) { ++ if (open_data_file && (flags & BDRV_O_NO_IO)) { ++ /* ++ * Don't open the data file for 'qemu-img info' so that it can be used ++ * to verify that an untrusted qcow2 image doesn't refer to external ++ * files. ++ * ++ * Note: This still makes has_data_file() return true. ++ */ ++ if (s->incompatible_features & QCOW2_INCOMPAT_DATA_FILE) { ++ s->data_file = NULL; ++ } else { ++ s->data_file = bs->file; ++ } ++ qdict_extract_subqdict(options, NULL, "data-file."); ++ qdict_del(options, "data-file"); ++ } else if (open_data_file) { + /* Open external data file */ + bdrv_graph_co_rdunlock(); + s->data_file = bdrv_co_open_child(NULL, options, "data-file", bs, +diff --git a/tests/qemu-iotests/061 b/tests/qemu-iotests/061 +index 53c7d428e3..b71ac097d1 100755 +--- a/tests/qemu-iotests/061 ++++ b/tests/qemu-iotests/061 +@@ -326,12 +326,14 @@ $QEMU_IMG amend -o "data_file=foo" "$TEST_IMG" + echo + _make_test_img -o "compat=1.1,data_file=$TEST_IMG.data" 64M + $QEMU_IMG amend -o "data_file=foo" "$TEST_IMG" +-_img_info --format-specific ++$QEMU_IO -c "read 0 4k" "$TEST_IMG" 2>&1 | _filter_testdir | _filter_imgfmt ++$QEMU_IO -c "open -o data-file.filename=$TEST_IMG.data,file.filename=$TEST_IMG" -c "read 0 4k" | _filter_qemu_io + TEST_IMG="data-file.filename=$TEST_IMG.data,file.filename=$TEST_IMG" _img_info --format-specific --image-opts + + echo + $QEMU_IMG amend -o "data_file=" --image-opts "data-file.filename=$TEST_IMG.data,file.filename=$TEST_IMG" +-_img_info --format-specific ++$QEMU_IO -c "read 0 4k" "$TEST_IMG" 2>&1 | _filter_testdir | _filter_imgfmt ++$QEMU_IO -c "open -o data-file.filename=$TEST_IMG.data,file.filename=$TEST_IMG" -c "read 0 4k" | _filter_qemu_io + TEST_IMG="data-file.filename=$TEST_IMG.data,file.filename=$TEST_IMG" _img_info --format-specific --image-opts + + echo +diff --git a/tests/qemu-iotests/061.out b/tests/qemu-iotests/061.out +index 139fc68177..24c33add7c 100644 +--- a/tests/qemu-iotests/061.out ++++ b/tests/qemu-iotests/061.out +@@ -545,7 +545,9 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 + qemu-img: data-file can only be set for images that use an external data file + + Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 data_file=TEST_DIR/t.IMGFMT.data +-qemu-img: Could not open 'TEST_DIR/t.IMGFMT': Could not open 'foo': No such file or directory ++qemu-io: can't open device TEST_DIR/t.IMGFMT: Could not open 'foo': No such file or directory ++read 4096/4096 bytes at offset 0 ++4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + image: TEST_DIR/t.IMGFMT + file format: IMGFMT + virtual size: 64 MiB (67108864 bytes) +@@ -560,7 +562,9 @@ Format specific information: + corrupt: false + extended l2: false + +-qemu-img: Could not open 'TEST_DIR/t.IMGFMT': 'data-file' is required for this image ++qemu-io: can't open device TEST_DIR/t.IMGFMT: 'data-file' is required for this image ++read 4096/4096 bytes at offset 0 ++4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + image: TEST_DIR/t.IMGFMT + file format: IMGFMT + virtual size: 64 MiB (67108864 bytes) +-- +2.39.3 + diff --git a/SPECS/qemu-kvm.spec b/SPECS/qemu-kvm.spec index c17ee63..80f9432 100644 --- a/SPECS/qemu-kvm.spec +++ b/SPECS/qemu-kvm.spec @@ -149,7 +149,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 8.2.0 -Release: 11%{?rcrel}%{?dist}%{?cc_suffix}.3 +Release: 11%{?rcrel}%{?dist}%{?cc_suffix}.4 # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -622,6 +622,14 @@ Patch181: kvm-nbd-server-Mark-negotiation-functions-as-coroutine_f.patch Patch182: kvm-qio-Inherit-follow_coroutine_ctx-across-TLS.patch # For RHEL-33754 - Qemu hang when quit dst vm after storage migration(nbd+tls) [rhel-9.4.z] Patch183: kvm-iotests-test-NBD-TLS-iothread.patch +# For RHEL-35610 +Patch184: kvm-qcow2-Don-t-open-data_file-with-BDRV_O_NO_IO.patch +# For RHEL-35610 +Patch185: kvm-iotests-244-Don-t-store-data-file-with-protocol-in-i.patch +# For RHEL-35610 +Patch186: kvm-iotests-270-Don-t-store-data-file-with-json-prefix-i.patch +# For RHEL-35610 +Patch187: kvm-block-Parse-filenames-only-when-explicitly-requested.patch %if %{have_clang} BuildRequires: clang @@ -1683,6 +1691,10 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Wed Jun 19 2024 Miroslav Rezanina - 8.2.0-11.el9_4.4 +- Fixing CVE-2024-4467 +- Resolves: RHEL-35610 + * Fri Jun 07 2024 Miroslav Rezanina - 8.2.0-11.el9_4.3 - kvm-nbd-server-do-not-poll-within-a-coroutine-context.patch [RHEL-33754] - kvm-nbd-server-Mark-negotiation-functions-as-coroutine_f.patch [RHEL-33754] From b8753d7e41520928b3bf125367efce2d37fac82a Mon Sep 17 00:00:00 2001 From: eabdullin Date: Wed, 4 Sep 2024 05:31:51 +0000 Subject: [PATCH 4/4] import OL qemu-kvm-8.2.0-11.el9_4.6 --- ...canout-version-with-pc-q35-rhel9.4.0.patch | 128 ++++++++++++ ...io-add-IO_CMD_FDSYNC-command-support.patch | 126 +++++++++++ ...024-7409-Cap-default-max-connections.patch | 184 ++++++++++++++++ ...024-7409-Close-stray-clients-at-serv.patch | 173 ++++++++++++++++ ...024-7409-Drop-non-negotiating-client.patch | 134 ++++++++++++ ...-Plumb-in-new-args-to-nbd_client_add.patch | 175 ++++++++++++++++ ...ne-type-compat-for-virtio-gpu-migrat.patch | 38 ++++ ...-gpu-fix-scanout-migration-post-load.patch | 196 ++++++++++++++++++ SOURCES/kvm-virtio-gpu-fix-v2-migration.patch | 122 +++++++++++ SPECS/qemu-kvm.spec | 41 +++- 10 files changed, 1316 insertions(+), 1 deletion(-) create mode 100644 SOURCES/kvm-Fix-scanout-version-with-pc-q35-rhel9.4.0.patch create mode 100644 SOURCES/kvm-linux-aio-add-IO_CMD_FDSYNC-command-support.patch create mode 100644 SOURCES/kvm-nbd-server-CVE-2024-7409-Cap-default-max-connections.patch create mode 100644 SOURCES/kvm-nbd-server-CVE-2024-7409-Close-stray-clients-at-serv.patch create mode 100644 SOURCES/kvm-nbd-server-CVE-2024-7409-Drop-non-negotiating-client.patch create mode 100644 SOURCES/kvm-nbd-server-Plumb-in-new-args-to-nbd_client_add.patch create mode 100644 SOURCES/kvm-rhel-9.4.0-machine-type-compat-for-virtio-gpu-migrat.patch create mode 100644 SOURCES/kvm-virtio-gpu-fix-scanout-migration-post-load.patch create mode 100644 SOURCES/kvm-virtio-gpu-fix-v2-migration.patch diff --git a/SOURCES/kvm-Fix-scanout-version-with-pc-q35-rhel9.4.0.patch b/SOURCES/kvm-Fix-scanout-version-with-pc-q35-rhel9.4.0.patch new file mode 100644 index 0000000..fdac70c --- /dev/null +++ b/SOURCES/kvm-Fix-scanout-version-with-pc-q35-rhel9.4.0.patch @@ -0,0 +1,128 @@ +From ef212eb3026a2460f6502a76afa3baedeb74d975 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Tue, 6 Aug 2024 14:14:27 +0400 +Subject: [PATCH 1/5] Fix scanout version with pc-q35-rhel9.4.0 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +RH-MergeRequest: 383: Fix scanout version with pc-q35-rhel9.4.0 +RH-Jira: RHEL-53565 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/1] ed2860cfb933654b0046c1b59f78d2e50146bd6c + +Resolves: https://issues.redhat.com/browse/RHEL-52940 + +Introduce hw_compat_rhel_9_4_extra so that pc-q35-rhel9.4.0 has +x-scanout-vmstate-version=1 + +Signed-off-by: Marc-André Lureau +--- + hw/arm/virt.c | 1 + + hw/core/machine.c | 13 +++++++++++-- + hw/i386/pc_piix.c | 2 ++ + hw/i386/pc_q35.c | 3 +++ + hw/s390x/s390-virtio-ccw.c | 1 + + include/hw/boards.h | 3 +++ + 6 files changed, 21 insertions(+), 2 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index e4a66affcb..851eff6b28 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3670,6 +3670,7 @@ type_init(rhel_machine_init); + + static void rhel940_virt_options(MachineClass *mc) + { ++ compat_props_add(mc->compat_props, hw_compat_rhel_9_4_extra, hw_compat_rhel_9_4_extra_len); + } + DEFINE_RHEL_MACHINE_AS_LATEST(9, 4, 0) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index c8c460c916..fe0a28ef3b 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -64,6 +64,17 @@ const size_t hw_compat_7_2_len = G_N_ELEMENTS(hw_compat_7_2); + const char *rhel_old_machine_deprecation = + "machine types for previous major releases are deprecated"; + ++/* ++ * rhel_9_4_extra is used for <= 9.4 machines. ++ * ++ * (for compatibility and historical reasons, rhel_9_4 is used for <= 9.2 too) ++ */ ++GlobalProperty hw_compat_rhel_9_4_extra[] = { ++ /* hw_compat_rhel_9_4_extra from hw_compat_8_2 */ ++ { "virtio-gpu-device", "x-scanout-vmstate-version", "1" }, ++}; ++const size_t hw_compat_rhel_9_4_extra_len = G_N_ELEMENTS(hw_compat_rhel_9_4_extra); ++ + GlobalProperty hw_compat_rhel_9_4[] = { + /* hw_compat_rhel_9_4 from hw_compat_8_0 */ + { TYPE_VIRTIO_NET, "host_uso", "off"}, +@@ -81,8 +92,6 @@ GlobalProperty hw_compat_rhel_9_4[] = { + { "igb", "x-pcie-flr-init", "off" }, + /* hw_compat_rhel_9_4 jira RHEL-24045 */ + { "virtio-mem", "dynamic-memslots", "off" }, +- /* hw_compat_rhel_9_4 from hw_compat_8_1 */ +- { "virtio-gpu-device", "x-scanout-vmstate-version", "1" }, + }; + const size_t hw_compat_rhel_9_4_len = G_N_ELEMENTS(hw_compat_rhel_9_4); + +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index 54d1c58bce..c846673d87 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -1031,6 +1031,8 @@ static void pc_machine_rhel760_options(MachineClass *m) + "Use a different south bridge than PIIX3"); + + ++ compat_props_add(m->compat_props, hw_compat_rhel_9_4_extra, ++ hw_compat_rhel_9_4_extra_len); + compat_props_add(m->compat_props, hw_compat_rhel_9_4, + hw_compat_rhel_9_4_len); + compat_props_add(m->compat_props, hw_compat_rhel_9_3, +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index cd5fb7380e..02bc3d515f 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -731,6 +731,9 @@ static void pc_q35_machine_rhel940_options(MachineClass *m) + m->desc = "RHEL-9.4.0 PC (Q35 + ICH9, 2009)"; + pcmc->smbios_stream_product = "RHEL"; + pcmc->smbios_stream_version = "9.4.0"; ++ ++ compat_props_add(m->compat_props, hw_compat_rhel_9_4_extra, ++ hw_compat_rhel_9_4_extra_len); + } + + DEFINE_PC_MACHINE(q35_rhel940, "pc-q35-rhel9.4.0", pc_q35_init_rhel940, +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index 24f4773179..cc6087c37a 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -1277,6 +1277,7 @@ static void ccw_machine_rhel940_instance_options(MachineState *machine) + + static void ccw_machine_rhel940_class_options(MachineClass *mc) + { ++ compat_props_add(mc->compat_props, hw_compat_rhel_9_4_extra, hw_compat_rhel_9_4_extra_len); + } + DEFINE_CCW_MACHINE(rhel940, "rhel9.4.0", true); + +diff --git a/include/hw/boards.h b/include/hw/boards.h +index 4edfdb0ddb..e6ae0e61cf 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -505,6 +505,9 @@ extern const size_t hw_compat_2_2_len; + extern GlobalProperty hw_compat_2_1[]; + extern const size_t hw_compat_2_1_len; + ++extern GlobalProperty hw_compat_rhel_9_4_extra[]; ++extern const size_t hw_compat_rhel_9_4_extra_len; ++ + extern GlobalProperty hw_compat_rhel_9_4[]; + extern const size_t hw_compat_rhel_9_4_len; + +-- +2.39.3 + diff --git a/SOURCES/kvm-linux-aio-add-IO_CMD_FDSYNC-command-support.patch b/SOURCES/kvm-linux-aio-add-IO_CMD_FDSYNC-command-support.patch new file mode 100644 index 0000000..391e618 --- /dev/null +++ b/SOURCES/kvm-linux-aio-add-IO_CMD_FDSYNC-command-support.patch @@ -0,0 +1,126 @@ +From 9d00965910d5a7818ffe55b18d3e9dd4c7210e1b Mon Sep 17 00:00:00 2001 +From: Prasad Pandit +Date: Thu, 25 Apr 2024 12:34:12 +0530 +Subject: [PATCH 4/4] linux-aio: add IO_CMD_FDSYNC command support + +RH-Author: Prasad Pandit +RH-MergeRequest: 378: linux-aio: add IO_CMD_FDSYNC command support +RH-Jira: RHEL-43261 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/1] 3b80d4a162aad1a87322078a7d7b060a9496035b + +Libaio defines IO_CMD_FDSYNC command to sync all outstanding +asynchronous I/O operations, by flushing out file data to the +disk storage. Enable linux-aio to submit such aio request. + +When using aio=native without fdsync() support, QEMU creates +pthreads, and destroying these pthreads results in TLB flushes. +In a real-time guest environment, TLB flushes cause a latency +spike. This patch helps to avoid such spikes. + +Jira: https://issues.redhat.com/browse/RHEL-43261 +Reviewed-by: Stefan Hajnoczi +Signed-off-by: Prasad Pandit +Message-ID: <20240425070412.37248-1-ppandit@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 24687abf237e3c15816d689a8e4b08d7c3190dcb) +Signed-off-by: Prasad Pandit +--- + block/file-posix.c | 9 +++++++++ + block/linux-aio.c | 21 ++++++++++++++++++++- + include/block/raw-aio.h | 1 + + 3 files changed, 30 insertions(+), 1 deletion(-) + +diff --git a/block/file-posix.c b/block/file-posix.c +index 35684f7e21..9831b08fb6 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -159,6 +159,7 @@ typedef struct BDRVRawState { + bool has_discard:1; + bool has_write_zeroes:1; + bool use_linux_aio:1; ++ bool has_laio_fdsync:1; + bool use_linux_io_uring:1; + int page_cache_inconsistent; /* errno from fdatasync failure */ + bool has_fallocate; +@@ -718,6 +719,9 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, + ret = -EINVAL; + goto fail; + } ++ if (s->use_linux_aio) { ++ s->has_laio_fdsync = laio_has_fdsync(s->fd); ++ } + #else + if (s->use_linux_aio) { + error_setg(errp, "aio=native was specified, but is not supported " +@@ -2599,6 +2603,11 @@ static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs) + if (raw_check_linux_io_uring(s)) { + return luring_co_submit(bs, s->fd, 0, NULL, QEMU_AIO_FLUSH); + } ++#endif ++#ifdef CONFIG_LINUX_AIO ++ if (s->has_laio_fdsync && raw_check_linux_aio(s)) { ++ return laio_co_submit(s->fd, 0, NULL, QEMU_AIO_FLUSH, 0); ++ } + #endif + return raw_thread_pool_submit(handle_aiocb_flush, &acb); + } +diff --git a/block/linux-aio.c b/block/linux-aio.c +index ec05d946f3..e3b5ec9aba 100644 +--- a/block/linux-aio.c ++++ b/block/linux-aio.c +@@ -384,6 +384,9 @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset, + case QEMU_AIO_READ: + io_prep_preadv(iocbs, fd, qiov->iov, qiov->niov, offset); + break; ++ case QEMU_AIO_FLUSH: ++ io_prep_fdsync(iocbs, fd); ++ break; + /* Currently Linux kernel does not support other operations */ + default: + fprintf(stderr, "%s: invalid AIO request type 0x%x.\n", +@@ -412,7 +415,7 @@ int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov, + AioContext *ctx = qemu_get_current_aio_context(); + struct qemu_laiocb laiocb = { + .co = qemu_coroutine_self(), +- .nbytes = qiov->size, ++ .nbytes = qiov ? qiov->size : 0, + .ctx = aio_get_linux_aio(ctx), + .ret = -EINPROGRESS, + .is_read = (type == QEMU_AIO_READ), +@@ -486,3 +489,19 @@ void laio_cleanup(LinuxAioState *s) + } + g_free(s); + } ++ ++bool laio_has_fdsync(int fd) ++{ ++ struct iocb cb; ++ struct iocb *cbs[] = {&cb, NULL}; ++ ++ io_context_t ctx = 0; ++ io_setup(1, &ctx); ++ ++ /* check if host kernel supports IO_CMD_FDSYNC */ ++ io_prep_fdsync(&cb, fd); ++ int ret = io_submit(ctx, 1, cbs); ++ ++ io_destroy(ctx); ++ return (ret == -EINVAL) ? false : true; ++} +diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h +index 0f63c2800c..3166903a56 100644 +--- a/include/block/raw-aio.h ++++ b/include/block/raw-aio.h +@@ -60,6 +60,7 @@ void laio_cleanup(LinuxAioState *s); + int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov, + int type, uint64_t dev_max_batch); + ++bool laio_has_fdsync(int); + void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context); + void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context); + #endif +-- +2.39.3 + diff --git a/SOURCES/kvm-nbd-server-CVE-2024-7409-Cap-default-max-connections.patch b/SOURCES/kvm-nbd-server-CVE-2024-7409-Cap-default-max-connections.patch new file mode 100644 index 0000000..7098ffc --- /dev/null +++ b/SOURCES/kvm-nbd-server-CVE-2024-7409-Cap-default-max-connections.patch @@ -0,0 +1,184 @@ +From 3311ddaffa78ad8d2c40c86cd69461ebeabe1052 Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Tue, 6 Aug 2024 13:53:00 -0500 +Subject: [PATCH 3/5] nbd/server: CVE-2024-7409: Cap default max-connections to + 100 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Blake +RH-MergeRequest: 386: nbd/server: fix CVE-2024-7409 (qemu crash on nbd-server-stop) [rhel-9.4.z] +RH-Jira: RHEL-52616 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/4] 17c9c81282e180485e2f2d584a3e0c73a6fbd66a (ebblake/qemu-kvm) + +Allowing an unlimited number of clients to any web service is a recipe +for a rudimentary denial of service attack: the client merely needs to +open lots of sockets without closing them, until qemu no longer has +any more fds available to allocate. + +For qemu-nbd, we default to allowing only 1 connection unless more are +explicitly asked for (-e or --shared); this was historically picked as +a nice default (without an explicit -t, a non-persistent qemu-nbd goes +away after a client disconnects, without needing any additional +follow-up commands), and we are not going to change that interface now +(besides, someday we want to point people towards qemu-storage-daemon +instead of qemu-nbd). + +But for qemu proper, and the newer qemu-storage-daemon, the QMP +nbd-server-start command has historically had a default of unlimited +number of connections, in part because unlike qemu-nbd it is +inherently persistent until nbd-server-stop. Allowing multiple client +sockets is particularly useful for clients that can take advantage of +MULTI_CONN (creating parallel sockets to increase throughput), +although known clients that do so (such as libnbd's nbdcopy) typically +use only 8 or 16 connections (the benefits of scaling diminish once +more sockets are competing for kernel attention). Picking a number +large enough for typical use cases, but not unlimited, makes it +slightly harder for a malicious client to perform a denial of service +merely by opening lots of connections withot progressing through the +handshake. + +This change does not eliminate CVE-2024-7409 on its own, but reduces +the chance for fd exhaustion or unlimited memory usage as an attack +surface. On the other hand, by itself, it makes it more obvious that +with a finite limit, we have the problem of an unauthenticated client +holding 100 fds opened as a way to block out a legitimate client from +being able to connect; thus, later patches will further add timeouts +to reject clients that are not making progress. + +This is an INTENTIONAL change in behavior, and will break any client +of nbd-server-start that was not passing an explicit max-connections +parameter, yet expects more than 100 simultaneous connections. We are +not aware of any such client (as stated above, most clients aware of +MULTI_CONN get by just fine on 8 or 16 connections, and probably cope +with later connections failing by relying on the earlier connections; +libvirt has not yet been passing max-connections, but generally +creates NBD servers with the intent for a single client for the sake +of live storage migration; meanwhile, the KubeSAN project anticipates +a large cluster sharing multiple clients [up to 8 per node, and up to +100 nodes in a cluster], but it currently uses qemu-nbd with an +explicit --shared=0 rather than qemu-storage-daemon with +nbd-server-start). + +We considered using a deprecation period (declare that omitting +max-parameters is deprecated, and make it mandatory in 3 releases - +then we don't need to pick an arbitrary default); that has zero risk +of breaking any apps that accidentally depended on more than 100 +connections, and where such breakage might not be noticed under unit +testing but only under the larger loads of production usage. But it +does not close the denial-of-service hole until far into the future, +and requires all apps to change to add the parameter even if 100 was +good enough. It also has a drawback that any app (like libvirt) that +is accidentally relying on an unlimited default should seriously +consider their own CVE now, at which point they are going to change to +pass explicit max-connections sooner than waiting for 3 qemu releases. +Finally, if our changed default breaks an app, that app can always +pass in an explicit max-parameters with a larger value. + +It is also intentional that the HMP interface to nbd-server-start is +not changed to expose max-connections (any client needing to fine-tune +things should be using QMP). + +Suggested-by: Daniel P. Berrangé +Signed-off-by: Eric Blake +Message-ID: <20240807174943.771624-12-eblake@redhat.com> +Reviewed-by: Daniel P. Berrangé +[ericb: Expand commit message to summarize Dan's argument for why we +break corner-case back-compat behavior without a deprecation period] +Signed-off-by: Eric Blake + +(cherry picked from commit c8a76dbd90c2f48df89b75bef74917f90a59b623) +Jira: https://issues.redhat.com/browse/RHEL-52616 +Signed-off-by: Eric Blake +--- + block/monitor/block-hmp-cmds.c | 3 ++- + blockdev-nbd.c | 8 ++++++++ + include/block/nbd.h | 7 +++++++ + qapi/block-export.json | 4 ++-- + 4 files changed, 19 insertions(+), 3 deletions(-) + +diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c +index bdbb5cb141..2d7b4d80c8 100644 +--- a/block/monitor/block-hmp-cmds.c ++++ b/block/monitor/block-hmp-cmds.c +@@ -402,7 +402,8 @@ void hmp_nbd_server_start(Monitor *mon, const QDict *qdict) + goto exit; + } + +- nbd_server_start(addr, NULL, NULL, 0, &local_err); ++ nbd_server_start(addr, NULL, NULL, NBD_DEFAULT_MAX_CONNECTIONS, ++ &local_err); + qapi_free_SocketAddress(addr); + if (local_err != NULL) { + goto exit; +diff --git a/blockdev-nbd.c b/blockdev-nbd.c +index 267a1de903..24ba5382db 100644 +--- a/blockdev-nbd.c ++++ b/blockdev-nbd.c +@@ -170,6 +170,10 @@ void nbd_server_start(SocketAddress *addr, const char *tls_creds, + + void nbd_server_start_options(NbdServerOptions *arg, Error **errp) + { ++ if (!arg->has_max_connections) { ++ arg->max_connections = NBD_DEFAULT_MAX_CONNECTIONS; ++ } ++ + nbd_server_start(arg->addr, arg->tls_creds, arg->tls_authz, + arg->max_connections, errp); + } +@@ -182,6 +186,10 @@ void qmp_nbd_server_start(SocketAddressLegacy *addr, + { + SocketAddress *addr_flat = socket_address_flatten(addr); + ++ if (!has_max_connections) { ++ max_connections = NBD_DEFAULT_MAX_CONNECTIONS; ++ } ++ + nbd_server_start(addr_flat, tls_creds, tls_authz, max_connections, errp); + qapi_free_SocketAddress(addr_flat); + } +diff --git a/include/block/nbd.h b/include/block/nbd.h +index 1d4d65922d..d4f8b21aec 100644 +--- a/include/block/nbd.h ++++ b/include/block/nbd.h +@@ -39,6 +39,13 @@ extern const BlockExportDriver blk_exp_nbd; + */ + #define NBD_DEFAULT_HANDSHAKE_MAX_SECS 10 + ++/* ++ * NBD_DEFAULT_MAX_CONNECTIONS: Number of client sockets to allow at ++ * once; must be large enough to allow a MULTI_CONN-aware client like ++ * nbdcopy to create its typical number of 8-16 sockets. ++ */ ++#define NBD_DEFAULT_MAX_CONNECTIONS 100 ++ + /* Handshake phase structs - this struct is passed on the wire */ + + typedef struct NBDOption { +diff --git a/qapi/block-export.json b/qapi/block-export.json +index 7874a49ba7..1d255d77e3 100644 +--- a/qapi/block-export.json ++++ b/qapi/block-export.json +@@ -28,7 +28,7 @@ + # @max-connections: The maximum number of connections to allow at the + # same time, 0 for unlimited. Setting this to 1 also stops the + # server from advertising multiple client support (since 5.2; +-# default: 0) ++# default: 100) + # + # Since: 4.2 + ## +@@ -63,7 +63,7 @@ + # @max-connections: The maximum number of connections to allow at the + # same time, 0 for unlimited. Setting this to 1 also stops the + # server from advertising multiple client support (since 5.2; +-# default: 0). ++# default: 100). + # + # Returns: error if the server is already running. + # +-- +2.39.3 + diff --git a/SOURCES/kvm-nbd-server-CVE-2024-7409-Close-stray-clients-at-serv.patch b/SOURCES/kvm-nbd-server-CVE-2024-7409-Close-stray-clients-at-serv.patch new file mode 100644 index 0000000..834eae3 --- /dev/null +++ b/SOURCES/kvm-nbd-server-CVE-2024-7409-Close-stray-clients-at-serv.patch @@ -0,0 +1,173 @@ +From 1c2fe81349a99cf0c28549426c7ad0c06d942ae3 Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Wed, 7 Aug 2024 12:23:13 -0500 +Subject: [PATCH 5/5] nbd/server: CVE-2024-7409: Close stray clients at + server-stop +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Blake +RH-MergeRequest: 386: nbd/server: fix CVE-2024-7409 (qemu crash on nbd-server-stop) [rhel-9.4.z] +RH-Jira: RHEL-52616 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [4/4] 54d56c0d92d92b2b6a0f81a01853881286beae0e (ebblake/qemu-kvm) + +A malicious client can attempt to connect to an NBD server, and then +intentionally delay progress in the handshake, including if it does +not know the TLS secrets. Although the previous two patches reduce +this behavior by capping the default max-connections parameter and +killing slow clients, they did not eliminate the possibility of a +client waiting to close the socket until after the QMP nbd-server-stop +command is executed, at which point qemu would SEGV when trying to +dereference the NULL nbd_server global which is no longer present. +This amounts to a denial of service attack. Worse, if another NBD +server is started before the malicious client disconnects, I cannot +rule out additional adverse effects when the old client interferes +with the connection count of the new server (although the most likely +is a crash due to an assertion failure when checking +nbd_server->connections > 0). + +For environments without this patch, the CVE can be mitigated by +ensuring (such as via a firewall) that only trusted clients can +connect to an NBD server. Note that using frameworks like libvirt +that ensure that TLS is used and that nbd-server-stop is not executed +while any trusted clients are still connected will only help if there +is also no possibility for an untrusted client to open a connection +but then stall on the NBD handshake. + +Given the previous patches, it would be possible to guarantee that no +clients remain connected by having nbd-server-stop sleep for longer +than the default handshake deadline before finally freeing the global +nbd_server object, but that could make QMP non-responsive for a long +time. So intead, this patch fixes the problem by tracking all client +sockets opened while the server is running, and forcefully closing any +such sockets remaining without a completed handshake at the time of +nbd-server-stop, then waiting until the coroutines servicing those +sockets notice the state change. nbd-server-stop now has a second +AIO_WAIT_WHILE_UNLOCKED (the first is indirectly through the +blk_exp_close_all_type() that disconnects all clients that completed +handshakes), but forced socket shutdown is enough to progress the +coroutines and quickly tear down all clients before the server is +freed, thus finally fixing the CVE. + +This patch relies heavily on the fact that nbd/server.c guarantees +that it only calls nbd_blockdev_client_closed() from the main loop +(see the assertion in nbd_client_put() and the hoops used in +nbd_client_put_nonzero() to achieve that); if we did not have that +guarantee, we would also need a mutex protecting our accesses of the +list of connections to survive re-entrancy from independent iothreads. + +Although I did not actually try to test old builds, it looks like this +problem has existed since at least commit 862172f45c (v2.12.0, 2017) - +even back when that patch started using a QIONetListener to handle +listening on multiple sockets, nbd_server_free() was already unaware +that the nbd_blockdev_client_closed callback can be reached later by a +client thread that has not completed handshakes (and therefore the +client's socket never got added to the list closed in +nbd_export_close_all), despite that patch intentionally tearing down +the QIONetListener to prevent new clients. + +Reported-by: Alexander Ivanov +Fixes: CVE-2024-7409 +CC: qemu-stable@nongnu.org +Signed-off-by: Eric Blake +Message-ID: <20240807174943.771624-14-eblake@redhat.com> +Reviewed-by: Daniel P. Berrangé + +(cherry picked from commit 3e7ef738c8462c45043a1d39f702a0990406a3b3) +Jira: https://issues.redhat.com/browse/RHEL-52616 +Signed-off-by: Eric Blake +--- + blockdev-nbd.c | 35 ++++++++++++++++++++++++++++++++++- + 1 file changed, 34 insertions(+), 1 deletion(-) + +diff --git a/blockdev-nbd.c b/blockdev-nbd.c +index 24ba5382db..f73409ae49 100644 +--- a/blockdev-nbd.c ++++ b/blockdev-nbd.c +@@ -21,12 +21,18 @@ + #include "io/channel-socket.h" + #include "io/net-listener.h" + ++typedef struct NBDConn { ++ QIOChannelSocket *cioc; ++ QLIST_ENTRY(NBDConn) next; ++} NBDConn; ++ + typedef struct NBDServerData { + QIONetListener *listener; + QCryptoTLSCreds *tlscreds; + char *tlsauthz; + uint32_t max_connections; + uint32_t connections; ++ QLIST_HEAD(, NBDConn) conns; + } NBDServerData; + + static NBDServerData *nbd_server; +@@ -51,6 +57,14 @@ int nbd_server_max_connections(void) + + static void nbd_blockdev_client_closed(NBDClient *client, bool ignored) + { ++ NBDConn *conn = nbd_client_owner(client); ++ ++ assert(qemu_in_main_thread() && nbd_server); ++ ++ object_unref(OBJECT(conn->cioc)); ++ QLIST_REMOVE(conn, next); ++ g_free(conn); ++ + nbd_client_put(client); + assert(nbd_server->connections > 0); + nbd_server->connections--; +@@ -60,14 +74,20 @@ static void nbd_blockdev_client_closed(NBDClient *client, bool ignored) + static void nbd_accept(QIONetListener *listener, QIOChannelSocket *cioc, + gpointer opaque) + { ++ NBDConn *conn = g_new0(NBDConn, 1); ++ ++ assert(qemu_in_main_thread() && nbd_server); + nbd_server->connections++; ++ object_ref(OBJECT(cioc)); ++ conn->cioc = cioc; ++ QLIST_INSERT_HEAD(&nbd_server->conns, conn, next); + nbd_update_server_watch(nbd_server); + + qio_channel_set_name(QIO_CHANNEL(cioc), "nbd-server"); + /* TODO - expose handshake timeout as QMP option */ + nbd_client_new(cioc, NBD_DEFAULT_HANDSHAKE_MAX_SECS, + nbd_server->tlscreds, nbd_server->tlsauthz, +- nbd_blockdev_client_closed, NULL); ++ nbd_blockdev_client_closed, conn); + } + + static void nbd_update_server_watch(NBDServerData *s) +@@ -81,12 +101,25 @@ static void nbd_update_server_watch(NBDServerData *s) + + static void nbd_server_free(NBDServerData *server) + { ++ NBDConn *conn, *tmp; ++ + if (!server) { + return; + } + ++ /* ++ * Forcefully close the listener socket, and any clients that have ++ * not yet disconnected on their own. ++ */ + qio_net_listener_disconnect(server->listener); + object_unref(OBJECT(server->listener)); ++ QLIST_FOREACH_SAFE(conn, &server->conns, next, tmp) { ++ qio_channel_shutdown(QIO_CHANNEL(conn->cioc), QIO_CHANNEL_SHUTDOWN_BOTH, ++ NULL); ++ } ++ ++ AIO_WAIT_WHILE_UNLOCKED(NULL, server->connections > 0); ++ + if (server->tlscreds) { + object_unref(OBJECT(server->tlscreds)); + } +-- +2.39.3 + diff --git a/SOURCES/kvm-nbd-server-CVE-2024-7409-Drop-non-negotiating-client.patch b/SOURCES/kvm-nbd-server-CVE-2024-7409-Drop-non-negotiating-client.patch new file mode 100644 index 0000000..0b2261e --- /dev/null +++ b/SOURCES/kvm-nbd-server-CVE-2024-7409-Drop-non-negotiating-client.patch @@ -0,0 +1,134 @@ +From 71c525e45f3f2a421ad651bc50eff94be925b36c Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Thu, 8 Aug 2024 16:05:08 -0500 +Subject: [PATCH 4/5] nbd/server: CVE-2024-7409: Drop non-negotiating clients +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Blake +RH-MergeRequest: 386: nbd/server: fix CVE-2024-7409 (qemu crash on nbd-server-stop) [rhel-9.4.z] +RH-Jira: RHEL-52616 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [3/4] 4394e60a8733cd57ee6c8cb140171c7f61cc13a3 (ebblake/qemu-kvm) + +A client that opens a socket but does not negotiate is merely hogging +qemu's resources (an open fd and a small amount of memory); and a +malicious client that can access the port where NBD is listening can +attempt a denial of service attack by intentionally opening and +abandoning lots of unfinished connections. The previous patch put a +default bound on the number of such ongoing connections, but once that +limit is hit, no more clients can connect (including legitimate ones). +The solution is to insist that clients complete handshake within a +reasonable time limit, defaulting to 10 seconds. A client that has +not successfully completed NBD_OPT_GO by then (including the case of +where the client didn't know TLS credentials to even reach the point +of NBD_OPT_GO) is wasting our time and does not deserve to stay +connected. Later patches will allow fine-tuning the limit away from +the default value (including disabling it for doing integration +testing of the handshake process itself). + +Note that this patch in isolation actually makes it more likely to see +qemu SEGV after nbd-server-stop, as any client socket still connected +when the server shuts down will now be closed after 10 seconds rather +than at the client's whims. That will be addressed in the next patch. + +For a demo of this patch in action: +$ qemu-nbd -f raw -r -t -e 10 file & +$ nbdsh --opt-mode -c ' +H = list() +for i in range(20): + print(i) + H.insert(i, nbd.NBD()) + H[i].set_opt_mode(True) + H[i].connect_uri("nbd://localhost") +' +$ kill $! + +where later connections get to start progressing once earlier ones are +forcefully dropped for taking too long, rather than hanging. + +Suggested-by: Daniel P. Berrangé +Signed-off-by: Eric Blake +Message-ID: <20240807174943.771624-13-eblake@redhat.com> +Reviewed-by: Daniel P. Berrangé +[eblake: rebase to changes earlier in series, reduce scope of timer] +Signed-off-by: Eric Blake + +(cherry picked from commit b9b72cb3ce15b693148bd09cef7e50110566d8a0) +Jira: https://issues.redhat.com/browse/RHEL-52616 +Signed-off-by: Eric Blake +--- + nbd/server.c | 28 +++++++++++++++++++++++++++- + nbd/trace-events | 1 + + 2 files changed, 28 insertions(+), 1 deletion(-) + +diff --git a/nbd/server.c b/nbd/server.c +index e50012499f..39285cc971 100644 +--- a/nbd/server.c ++++ b/nbd/server.c +@@ -3186,22 +3186,48 @@ static void nbd_client_receive_next_request(NBDClient *client) + } + } + ++static void nbd_handshake_timer_cb(void *opaque) ++{ ++ QIOChannel *ioc = opaque; ++ ++ trace_nbd_handshake_timer_cb(); ++ qio_channel_shutdown(ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL); ++} ++ + static coroutine_fn void nbd_co_client_start(void *opaque) + { + NBDClient *client = opaque; + Error *local_err = NULL; ++ QEMUTimer *handshake_timer = NULL; + + qemu_co_mutex_init(&client->send_lock); + +- /* TODO - utilize client->handshake_max_secs */ ++ /* ++ * Create a timer to bound the time spent in negotiation. If the ++ * timer expires, it is likely nbd_negotiate will fail because the ++ * socket was shutdown. ++ */ ++ if (client->handshake_max_secs > 0) { ++ handshake_timer = aio_timer_new(qemu_get_aio_context(), ++ QEMU_CLOCK_REALTIME, ++ SCALE_NS, ++ nbd_handshake_timer_cb, ++ client->sioc); ++ timer_mod(handshake_timer, ++ qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + ++ client->handshake_max_secs * NANOSECONDS_PER_SECOND); ++ } ++ + if (nbd_negotiate(client, &local_err)) { + if (local_err) { + error_report_err(local_err); + } ++ timer_free(handshake_timer); + client_close(client, false); + return; + } + ++ timer_free(handshake_timer); + WITH_QEMU_LOCK_GUARD(&client->lock) { + nbd_client_receive_next_request(client); + } +diff --git a/nbd/trace-events b/nbd/trace-events +index 00ae3216a1..cbd0a4ab7e 100644 +--- a/nbd/trace-events ++++ b/nbd/trace-events +@@ -76,6 +76,7 @@ nbd_co_receive_request_payload_received(uint64_t cookie, uint64_t len) "Payload + nbd_co_receive_ext_payload_compliance(uint64_t from, uint64_t len) "client sent non-compliant write without payload flag: from=0x%" PRIx64 ", len=0x%" PRIx64 + nbd_co_receive_align_compliance(const char *op, uint64_t from, uint64_t len, uint32_t align) "client sent non-compliant unaligned %s request: from=0x%" PRIx64 ", len=0x%" PRIx64 ", align=0x%" PRIx32 + nbd_trip(void) "Reading request" ++nbd_handshake_timer_cb(void) "client took too long to negotiate" + + # client-connection.c + nbd_connect_thread_sleep(uint64_t timeout) "timeout %" PRIu64 +-- +2.39.3 + diff --git a/SOURCES/kvm-nbd-server-Plumb-in-new-args-to-nbd_client_add.patch b/SOURCES/kvm-nbd-server-Plumb-in-new-args-to-nbd_client_add.patch new file mode 100644 index 0000000..85a9986 --- /dev/null +++ b/SOURCES/kvm-nbd-server-Plumb-in-new-args-to-nbd_client_add.patch @@ -0,0 +1,175 @@ +From c8c7dc7a445892a820223be2e7617a790e0400f5 Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Wed, 7 Aug 2024 08:50:01 -0500 +Subject: [PATCH 2/5] nbd/server: Plumb in new args to nbd_client_add() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Blake +RH-MergeRequest: 386: nbd/server: fix CVE-2024-7409 (qemu crash on nbd-server-stop) [rhel-9.4.z] +RH-Jira: RHEL-52616 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/4] 658d80c80a1163fcad456a34ec20d7828273a36c (ebblake/qemu-kvm) + +Upcoming patches to fix a CVE need to track an opaque pointer passed +in by the owner of a client object, as well as request for a time +limit on how fast negotiation must complete. Prepare for that by +changing the signature of nbd_client_new() and adding an accessor to +get at the opaque pointer, although for now the two servers +(qemu-nbd.c and blockdev-nbd.c) do not change behavior even though +they pass in a new default timeout value. + +Suggested-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Eric Blake +Message-ID: <20240807174943.771624-11-eblake@redhat.com> +Reviewed-by: Daniel P. Berrangé +[eblake: s/LIMIT/MAX_SECS/ as suggested by Dan] +Signed-off-by: Eric Blake + +(cherry picked from commit fb1c2aaa981e0a2fa6362c9985f1296b74f055ac) +Jira: https://issues.redhat.com/browse/RHEL-52616 +Signed-off-by: Eric Blake +--- + blockdev-nbd.c | 6 ++++-- + include/block/nbd.h | 11 ++++++++++- + nbd/server.c | 20 +++++++++++++++++--- + qemu-nbd.c | 4 +++- + 4 files changed, 34 insertions(+), 7 deletions(-) + +diff --git a/blockdev-nbd.c b/blockdev-nbd.c +index 213012435f..267a1de903 100644 +--- a/blockdev-nbd.c ++++ b/blockdev-nbd.c +@@ -64,8 +64,10 @@ static void nbd_accept(QIONetListener *listener, QIOChannelSocket *cioc, + nbd_update_server_watch(nbd_server); + + qio_channel_set_name(QIO_CHANNEL(cioc), "nbd-server"); +- nbd_client_new(cioc, nbd_server->tlscreds, nbd_server->tlsauthz, +- nbd_blockdev_client_closed); ++ /* TODO - expose handshake timeout as QMP option */ ++ nbd_client_new(cioc, NBD_DEFAULT_HANDSHAKE_MAX_SECS, ++ nbd_server->tlscreds, nbd_server->tlsauthz, ++ nbd_blockdev_client_closed, NULL); + } + + static void nbd_update_server_watch(NBDServerData *s) +diff --git a/include/block/nbd.h b/include/block/nbd.h +index 4e7bd6342f..1d4d65922d 100644 +--- a/include/block/nbd.h ++++ b/include/block/nbd.h +@@ -33,6 +33,12 @@ typedef struct NBDMetaContexts NBDMetaContexts; + + extern const BlockExportDriver blk_exp_nbd; + ++/* ++ * NBD_DEFAULT_HANDSHAKE_MAX_SECS: Number of seconds in which client must ++ * succeed at NBD_OPT_GO before being forcefully dropped as too slow. ++ */ ++#define NBD_DEFAULT_HANDSHAKE_MAX_SECS 10 ++ + /* Handshake phase structs - this struct is passed on the wire */ + + typedef struct NBDOption { +@@ -403,9 +409,12 @@ AioContext *nbd_export_aio_context(NBDExport *exp); + NBDExport *nbd_export_find(const char *name); + + void nbd_client_new(QIOChannelSocket *sioc, ++ uint32_t handshake_max_secs, + QCryptoTLSCreds *tlscreds, + const char *tlsauthz, +- void (*close_fn)(NBDClient *, bool)); ++ void (*close_fn)(NBDClient *, bool), ++ void *owner); ++void *nbd_client_owner(NBDClient *client); + void nbd_client_get(NBDClient *client); + void nbd_client_put(NBDClient *client); + +diff --git a/nbd/server.c b/nbd/server.c +index 892797bb11..e50012499f 100644 +--- a/nbd/server.c ++++ b/nbd/server.c +@@ -124,12 +124,14 @@ struct NBDMetaContexts { + struct NBDClient { + int refcount; /* atomic */ + void (*close_fn)(NBDClient *client, bool negotiated); ++ void *owner; + + QemuMutex lock; + + NBDExport *exp; + QCryptoTLSCreds *tlscreds; + char *tlsauthz; ++ uint32_t handshake_max_secs; + QIOChannelSocket *sioc; /* The underlying data channel */ + QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */ + +@@ -3191,6 +3193,7 @@ static coroutine_fn void nbd_co_client_start(void *opaque) + + qemu_co_mutex_init(&client->send_lock); + ++ /* TODO - utilize client->handshake_max_secs */ + if (nbd_negotiate(client, &local_err)) { + if (local_err) { + error_report_err(local_err); +@@ -3205,14 +3208,17 @@ static coroutine_fn void nbd_co_client_start(void *opaque) + } + + /* +- * Create a new client listener using the given channel @sioc. ++ * Create a new client listener using the given channel @sioc and @owner. + * Begin servicing it in a coroutine. When the connection closes, call +- * @close_fn with an indication of whether the client completed negotiation. ++ * @close_fn with an indication of whether the client completed negotiation ++ * within @handshake_max_secs seconds (0 for unbounded). + */ + void nbd_client_new(QIOChannelSocket *sioc, ++ uint32_t handshake_max_secs, + QCryptoTLSCreds *tlscreds, + const char *tlsauthz, +- void (*close_fn)(NBDClient *, bool)) ++ void (*close_fn)(NBDClient *, bool), ++ void *owner) + { + NBDClient *client; + Coroutine *co; +@@ -3225,13 +3231,21 @@ void nbd_client_new(QIOChannelSocket *sioc, + object_ref(OBJECT(client->tlscreds)); + } + client->tlsauthz = g_strdup(tlsauthz); ++ client->handshake_max_secs = handshake_max_secs; + client->sioc = sioc; + qio_channel_set_delay(QIO_CHANNEL(sioc), false); + object_ref(OBJECT(client->sioc)); + client->ioc = QIO_CHANNEL(sioc); + object_ref(OBJECT(client->ioc)); + client->close_fn = close_fn; ++ client->owner = owner; + + co = qemu_coroutine_create(nbd_co_client_start, client); + qemu_coroutine_enter(co); + } ++ ++void * ++nbd_client_owner(NBDClient *client) ++{ ++ return client->owner; ++} +diff --git a/qemu-nbd.c b/qemu-nbd.c +index bac0b5e3ec..01b4a63c31 100644 +--- a/qemu-nbd.c ++++ b/qemu-nbd.c +@@ -389,7 +389,9 @@ static void nbd_accept(QIONetListener *listener, QIOChannelSocket *cioc, + + nb_fds++; + nbd_update_server_watch(); +- nbd_client_new(cioc, tlscreds, tlsauthz, nbd_client_closed); ++ /* TODO - expose handshake timeout as command line option */ ++ nbd_client_new(cioc, NBD_DEFAULT_HANDSHAKE_MAX_SECS, ++ tlscreds, tlsauthz, nbd_client_closed, NULL); + } + + static void nbd_update_server_watch(void) +-- +2.39.3 + diff --git a/SOURCES/kvm-rhel-9.4.0-machine-type-compat-for-virtio-gpu-migrat.patch b/SOURCES/kvm-rhel-9.4.0-machine-type-compat-for-virtio-gpu-migrat.patch new file mode 100644 index 0000000..667e80b --- /dev/null +++ b/SOURCES/kvm-rhel-9.4.0-machine-type-compat-for-virtio-gpu-migrat.patch @@ -0,0 +1,38 @@ +From aa0ad36a56cbc0528306e47112081b0db124e512 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Wed, 5 Jun 2024 10:28:20 +0400 +Subject: [PATCH 3/4] rhel 9.4.0 machine type compat for virtio-gpu migration +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +RH-MergeRequest: 377: virtio-gpu: fix scanout migration post-load +RH-Jira: RHEL-36181 +RH-Acked-by: Peter Xu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [3/3] 270e932e04eb2d730550f3a56b53cbb17f5e66f8 + +Jira: https://issues.redhat.com/browse/RHEL-36181 + +Signed-off-by: Marc-André Lureau +--- + hw/core/machine.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index a0843ab93e..c8c460c916 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -81,6 +81,8 @@ GlobalProperty hw_compat_rhel_9_4[] = { + { "igb", "x-pcie-flr-init", "off" }, + /* hw_compat_rhel_9_4 jira RHEL-24045 */ + { "virtio-mem", "dynamic-memslots", "off" }, ++ /* hw_compat_rhel_9_4 from hw_compat_8_1 */ ++ { "virtio-gpu-device", "x-scanout-vmstate-version", "1" }, + }; + const size_t hw_compat_rhel_9_4_len = G_N_ELEMENTS(hw_compat_rhel_9_4); + +-- +2.39.3 + diff --git a/SOURCES/kvm-virtio-gpu-fix-scanout-migration-post-load.patch b/SOURCES/kvm-virtio-gpu-fix-scanout-migration-post-load.patch new file mode 100644 index 0000000..b1581bf --- /dev/null +++ b/SOURCES/kvm-virtio-gpu-fix-scanout-migration-post-load.patch @@ -0,0 +1,196 @@ +From 523423436e83b01a83c60bc44bd08471992aaff4 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Mon, 15 Jan 2024 16:34:33 +0400 +Subject: [PATCH 1/4] virtio-gpu: fix scanout migration post-load +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +RH-MergeRequest: 377: virtio-gpu: fix scanout migration post-load +RH-Jira: RHEL-36181 +RH-Acked-by: Peter Xu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/3] 05e9dbf7f602d62ec2f336a855daf57e6ee8f3f3 + +The current post-loading code for scanout has a FIXME: it doesn't take +the resource region/rect into account. But there is more, when adding +blob migration support in commit f66767f75c9, I didn't realize that blob +resources could be used for scanouts. This situationn leads to a crash +during post-load, as they don't have an associated res->image. + +virtio_gpu_do_set_scanout() handle all cases, but requires the +associated virtio_gpu_framebuffer, which is currently not saved during +migration. + +Add a v2 of "virtio-gpu-one-scanout" with the framebuffer fields, so we +can restore blob scanouts, as well as fixing the existing FIXME. + +Signed-off-by: Marc-André Lureau +Reviewed-by: Sebastian Ott +(cherry picked from commit dfcf74fa68c88233209aafc5f82728d0b9a1af5c) +--- + hw/display/virtio-gpu.c | 55 +++++++++++++++++++++++++++------- + include/hw/virtio/virtio-gpu.h | 1 + + 2 files changed, 46 insertions(+), 10 deletions(-) + +diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c +index 1702190ead..bc485d60ea 100644 +--- a/hw/display/virtio-gpu.c ++++ b/hw/display/virtio-gpu.c +@@ -598,6 +598,7 @@ static void virtio_unref_resource(pixman_image_t *image, void *data) + static void virtio_gpu_update_scanout(VirtIOGPU *g, + uint32_t scanout_id, + struct virtio_gpu_simple_resource *res, ++ struct virtio_gpu_framebuffer *fb, + struct virtio_gpu_rect *r) + { + struct virtio_gpu_simple_resource *ores; +@@ -615,9 +616,10 @@ static void virtio_gpu_update_scanout(VirtIOGPU *g, + scanout->y = r->y; + scanout->width = r->width; + scanout->height = r->height; ++ scanout->fb = *fb; + } + +-static void virtio_gpu_do_set_scanout(VirtIOGPU *g, ++static bool virtio_gpu_do_set_scanout(VirtIOGPU *g, + uint32_t scanout_id, + struct virtio_gpu_framebuffer *fb, + struct virtio_gpu_simple_resource *res, +@@ -643,7 +645,7 @@ static void virtio_gpu_do_set_scanout(VirtIOGPU *g, + r->x, r->y, r->width, r->height, + fb->width, fb->height); + *error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER; +- return; ++ return false; + } + + g->parent_obj.enable = 1; +@@ -651,11 +653,12 @@ static void virtio_gpu_do_set_scanout(VirtIOGPU *g, + if (res->blob) { + if (console_has_gl(scanout->con)) { + if (!virtio_gpu_update_dmabuf(g, scanout_id, res, fb, r)) { +- virtio_gpu_update_scanout(g, scanout_id, res, r); ++ virtio_gpu_update_scanout(g, scanout_id, res, fb, r); + } else { + *error = VIRTIO_GPU_RESP_ERR_OUT_OF_MEMORY; ++ return false; + } +- return; ++ return true; + } + + data = res->blob; +@@ -684,7 +687,7 @@ static void virtio_gpu_do_set_scanout(VirtIOGPU *g, + scanout->ds = qemu_create_displaysurface_pixman(rect); + if (!scanout->ds) { + *error = VIRTIO_GPU_RESP_ERR_UNSPEC; +- return; ++ return false; + } + #ifdef WIN32 + qemu_displaysurface_win32_set_handle(scanout->ds, res->handle, fb->offset); +@@ -695,7 +698,8 @@ static void virtio_gpu_do_set_scanout(VirtIOGPU *g, + scanout->ds); + } + +- virtio_gpu_update_scanout(g, scanout_id, res, r); ++ virtio_gpu_update_scanout(g, scanout_id, res, fb, r); ++ return true; + } + + static void virtio_gpu_set_scanout(VirtIOGPU *g, +@@ -1166,8 +1170,9 @@ static void virtio_gpu_cursor_bh(void *opaque) + + static const VMStateDescription vmstate_virtio_gpu_scanout = { + .name = "virtio-gpu-one-scanout", +- .version_id = 1, +- .fields = (VMStateField[]) { ++ .version_id = 2, ++ .minimum_version_id = 1, ++ .fields = (const VMStateField[]) { + VMSTATE_UINT32(resource_id, struct virtio_gpu_scanout), + VMSTATE_UINT32(width, struct virtio_gpu_scanout), + VMSTATE_UINT32(height, struct virtio_gpu_scanout), +@@ -1178,6 +1183,12 @@ static const VMStateDescription vmstate_virtio_gpu_scanout = { + VMSTATE_UINT32(cursor.hot_y, struct virtio_gpu_scanout), + VMSTATE_UINT32(cursor.pos.x, struct virtio_gpu_scanout), + VMSTATE_UINT32(cursor.pos.y, struct virtio_gpu_scanout), ++ VMSTATE_UINT32_V(fb.format, struct virtio_gpu_scanout, 2), ++ VMSTATE_UINT32_V(fb.bytes_pp, struct virtio_gpu_scanout, 2), ++ VMSTATE_UINT32_V(fb.width, struct virtio_gpu_scanout, 2), ++ VMSTATE_UINT32_V(fb.height, struct virtio_gpu_scanout, 2), ++ VMSTATE_UINT32_V(fb.stride, struct virtio_gpu_scanout, 2), ++ VMSTATE_UINT32_V(fb.offset, struct virtio_gpu_scanout, 2), + VMSTATE_END_OF_LIST() + }, + }; +@@ -1349,6 +1360,7 @@ static int virtio_gpu_blob_save(QEMUFile *f, void *opaque, size_t size, + if (!res->blob_size) { + continue; + } ++ assert(!res->image); + qemu_put_be32(f, res->resource_id); + qemu_put_be32(f, res->blob_size); + qemu_put_be32(f, res->iov_cnt); +@@ -1411,11 +1423,11 @@ static int virtio_gpu_post_load(void *opaque, int version_id) + int i; + + for (i = 0; i < g->parent_obj.conf.max_outputs; i++) { +- /* FIXME: should take scanout.r.{x,y} into account */ + scanout = &g->parent_obj.scanout[i]; + if (!scanout->resource_id) { + continue; + } ++ + res = virtio_gpu_find_resource(g, scanout->resource_id); + if (!res) { + return -EINVAL; +@@ -1428,7 +1440,30 @@ static int virtio_gpu_post_load(void *opaque, int version_id) + qemu_displaysurface_win32_set_handle(scanout->ds, res->handle, 0); + #endif + +- dpy_gfx_replace_surface(scanout->con, scanout->ds); ++ if (scanout->fb.format != 0) { ++ uint32_t error = 0; ++ struct virtio_gpu_rect r = { ++ .x = scanout->x, ++ .y = scanout->y, ++ .width = scanout->width, ++ .height = scanout->height ++ }; ++ ++ if (!virtio_gpu_do_set_scanout(g, i, &scanout->fb, res, &r, &error)) { ++ return -EINVAL; ++ } ++ } else { ++ /* legacy v1 migration support */ ++ if (!res->image) { ++ return -EINVAL; ++ } ++ scanout->ds = qemu_create_displaysurface_pixman(res->image); ++#ifdef WIN32 ++ qemu_displaysurface_win32_set_handle(scanout->ds, res->handle, 0); ++#endif ++ dpy_gfx_replace_surface(scanout->con, scanout->ds); ++ } ++ + dpy_gfx_update_full(scanout->con); + if (scanout->cursor.resource_id) { + update_cursor(g, &scanout->cursor); +diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h +index 584ba2ed73..9a13afb34e 100644 +--- a/include/hw/virtio/virtio-gpu.h ++++ b/include/hw/virtio/virtio-gpu.h +@@ -81,6 +81,7 @@ struct virtio_gpu_scanout { + uint32_t resource_id; + struct virtio_gpu_update_cursor cursor; + QEMUCursor *current_cursor; ++ struct virtio_gpu_framebuffer fb; + }; + + struct virtio_gpu_requested_state { +-- +2.39.3 + diff --git a/SOURCES/kvm-virtio-gpu-fix-v2-migration.patch b/SOURCES/kvm-virtio-gpu-fix-v2-migration.patch new file mode 100644 index 0000000..028b23b --- /dev/null +++ b/SOURCES/kvm-virtio-gpu-fix-v2-migration.patch @@ -0,0 +1,122 @@ +From 7741a7d1c1d719ff681f73a023f27d5951b98882 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Thu, 16 May 2024 12:40:22 +0400 +Subject: [PATCH 2/4] virtio-gpu: fix v2 migration +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +RH-MergeRequest: 377: virtio-gpu: fix scanout migration post-load +RH-Jira: RHEL-36181 +RH-Acked-by: Peter Xu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/3] 482c37c10ac7e5e8a6c36ac2eb806ac02beeefdc + +Commit dfcf74fa ("virtio-gpu: fix scanout migration post-load") broke +forward/backward version migration. Versioning of nested VMSD structures +is not straightforward, as the wire format doesn't have nested +structures versions. Introduce x-scanout-vmstate-version and a field +test to save/load appropriately according to the machine version. + +Fixes: dfcf74fa ("virtio-gpu: fix scanout migration post-load") +Signed-off-by: Marc-André Lureau +Signed-off-by: Peter Xu +Reviewed-by: Fiona Ebner +Tested-by: Fiona Ebner +[fixed long lines] +Signed-off-by: Fabiano Rosas + +[ Move the hw-compat from 8.2 to 8.1 ] +Signed-off-by: Marc-André Lureau +(cherry picked from commit 40a23ef643664b5c1021a9789f9d680b6294fb50) +--- + hw/core/machine.c | 1 + + hw/display/virtio-gpu.c | 30 ++++++++++++++++++++++-------- + include/hw/virtio/virtio-gpu.h | 1 + + 3 files changed, 24 insertions(+), 8 deletions(-) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 309f6ba685..a0843ab93e 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -37,6 +37,7 @@ GlobalProperty hw_compat_8_1[] = { + { "ramfb", "x-migrate", "off" }, + { "vfio-pci-nohotplug", "x-ramfb-migrate", "off" }, + { "igb", "x-pcie-flr-init", "off" }, ++ { "virtio-gpu-device", "x-scanout-vmstate-version", "1" }, + }; + const size_t hw_compat_8_1_len = G_N_ELEMENTS(hw_compat_8_1); + +diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c +index bc485d60ea..9d35a016ae 100644 +--- a/hw/display/virtio-gpu.c ++++ b/hw/display/virtio-gpu.c +@@ -1168,10 +1168,17 @@ static void virtio_gpu_cursor_bh(void *opaque) + virtio_gpu_handle_cursor(&g->parent_obj.parent_obj, g->cursor_vq); + } + ++static bool scanout_vmstate_after_v2(void *opaque, int version) ++{ ++ struct VirtIOGPUBase *base = container_of(opaque, VirtIOGPUBase, scanout); ++ struct VirtIOGPU *gpu = container_of(base, VirtIOGPU, parent_obj); ++ ++ return gpu->scanout_vmstate_version >= 2; ++} ++ + static const VMStateDescription vmstate_virtio_gpu_scanout = { + .name = "virtio-gpu-one-scanout", +- .version_id = 2, +- .minimum_version_id = 1, ++ .version_id = 1, + .fields = (const VMStateField[]) { + VMSTATE_UINT32(resource_id, struct virtio_gpu_scanout), + VMSTATE_UINT32(width, struct virtio_gpu_scanout), +@@ -1183,12 +1190,18 @@ static const VMStateDescription vmstate_virtio_gpu_scanout = { + VMSTATE_UINT32(cursor.hot_y, struct virtio_gpu_scanout), + VMSTATE_UINT32(cursor.pos.x, struct virtio_gpu_scanout), + VMSTATE_UINT32(cursor.pos.y, struct virtio_gpu_scanout), +- VMSTATE_UINT32_V(fb.format, struct virtio_gpu_scanout, 2), +- VMSTATE_UINT32_V(fb.bytes_pp, struct virtio_gpu_scanout, 2), +- VMSTATE_UINT32_V(fb.width, struct virtio_gpu_scanout, 2), +- VMSTATE_UINT32_V(fb.height, struct virtio_gpu_scanout, 2), +- VMSTATE_UINT32_V(fb.stride, struct virtio_gpu_scanout, 2), +- VMSTATE_UINT32_V(fb.offset, struct virtio_gpu_scanout, 2), ++ VMSTATE_UINT32_TEST(fb.format, struct virtio_gpu_scanout, ++ scanout_vmstate_after_v2), ++ VMSTATE_UINT32_TEST(fb.bytes_pp, struct virtio_gpu_scanout, ++ scanout_vmstate_after_v2), ++ VMSTATE_UINT32_TEST(fb.width, struct virtio_gpu_scanout, ++ scanout_vmstate_after_v2), ++ VMSTATE_UINT32_TEST(fb.height, struct virtio_gpu_scanout, ++ scanout_vmstate_after_v2), ++ VMSTATE_UINT32_TEST(fb.stride, struct virtio_gpu_scanout, ++ scanout_vmstate_after_v2), ++ VMSTATE_UINT32_TEST(fb.offset, struct virtio_gpu_scanout, ++ scanout_vmstate_after_v2), + VMSTATE_END_OF_LIST() + }, + }; +@@ -1668,6 +1681,7 @@ static Property virtio_gpu_properties[] = { + DEFINE_PROP_BIT("blob", VirtIOGPU, parent_obj.conf.flags, + VIRTIO_GPU_FLAG_BLOB_ENABLED, false), + DEFINE_PROP_SIZE("hostmem", VirtIOGPU, parent_obj.conf.hostmem, 0), ++ DEFINE_PROP_UINT8("x-scanout-vmstate-version", VirtIOGPU, scanout_vmstate_version, 2), + DEFINE_PROP_END_OF_LIST(), + }; + +diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h +index 9a13afb34e..18500432c6 100644 +--- a/include/hw/virtio/virtio-gpu.h ++++ b/include/hw/virtio/virtio-gpu.h +@@ -177,6 +177,7 @@ typedef struct VGPUDMABuf { + struct VirtIOGPU { + VirtIOGPUBase parent_obj; + ++ uint8_t scanout_vmstate_version; + uint64_t conf_max_hostmem; + + VirtQueue *ctrl_vq; +-- +2.39.3 + diff --git a/SPECS/qemu-kvm.spec b/SPECS/qemu-kvm.spec index 80f9432..eeff4c6 100644 --- a/SPECS/qemu-kvm.spec +++ b/SPECS/qemu-kvm.spec @@ -149,7 +149,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 8.2.0 -Release: 11%{?rcrel}%{?dist}%{?cc_suffix}.4 +Release: 11%{?rcrel}%{?dist}%{?cc_suffix}.6 # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -630,6 +630,24 @@ Patch185: kvm-iotests-244-Don-t-store-data-file-with-protocol-in-i.patch Patch186: kvm-iotests-270-Don-t-store-data-file-with-json-prefix-i.patch # For RHEL-35610 Patch187: kvm-block-Parse-filenames-only-when-explicitly-requested.patch +# For RHEL-36181 - [RHEL9.5.0][stable_guest_abi]Failed to migrate VM with (qemu) qemu-kvm: Missing section footer for 0000:00:01.0/virtio-gpu qemu-kvm: load of migration failed: Invalid argument [rhel-9.4.0.z] +Patch188: kvm-virtio-gpu-fix-scanout-migration-post-load.patch +# For RHEL-36181 - [RHEL9.5.0][stable_guest_abi]Failed to migrate VM with (qemu) qemu-kvm: Missing section footer for 0000:00:01.0/virtio-gpu qemu-kvm: load of migration failed: Invalid argument [rhel-9.4.0.z] +Patch189: kvm-virtio-gpu-fix-v2-migration.patch +# For RHEL-36181 - [RHEL9.5.0][stable_guest_abi]Failed to migrate VM with (qemu) qemu-kvm: Missing section footer for 0000:00:01.0/virtio-gpu qemu-kvm: load of migration failed: Invalid argument [rhel-9.4.0.z] +Patch190: kvm-rhel-9.4.0-machine-type-compat-for-virtio-gpu-migrat.patch +# For RHEL-43261 - qemu-kvm: linux-aio: add support for IO_CMD_FDSYNC command [rhel-9.4.z] +Patch191: kvm-linux-aio-add-IO_CMD_FDSYNC-command-support.patch +# For RHEL-53565 - [RHEL9.4_to_RHEL9.5]When the VM is with only 9.4.0 (q35) machine type, still hit error of virtio-gpu issue [rhel-9.4.z] +Patch192: kvm-Fix-scanout-version-with-pc-q35-rhel9.4.0.patch +# For RHEL-52616 - CVE-2024-7409 qemu-kvm: Denial of Service via Improper Synchronization in QEMU NBD Server During Socket Closure [rhel-9.4.z] +Patch193: kvm-nbd-server-Plumb-in-new-args-to-nbd_client_add.patch +# For RHEL-52616 - CVE-2024-7409 qemu-kvm: Denial of Service via Improper Synchronization in QEMU NBD Server During Socket Closure [rhel-9.4.z] +Patch194: kvm-nbd-server-CVE-2024-7409-Cap-default-max-connections.patch +# For RHEL-52616 - CVE-2024-7409 qemu-kvm: Denial of Service via Improper Synchronization in QEMU NBD Server During Socket Closure [rhel-9.4.z] +Patch195: kvm-nbd-server-CVE-2024-7409-Drop-non-negotiating-client.patch +# For RHEL-52616 - CVE-2024-7409 qemu-kvm: Denial of Service via Improper Synchronization in QEMU NBD Server During Socket Closure [rhel-9.4.z] +Patch196: kvm-nbd-server-CVE-2024-7409-Close-stray-clients-at-serv.patch %if %{have_clang} BuildRequires: clang @@ -1691,6 +1709,27 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Thu Aug 15 2024 Miroslav Rezanina - 8.2.0-11.el9_4.6 +- kvm-Fix-scanout-version-with-pc-q35-rhel9.4.0.patch [RHEL-53565] +- kvm-nbd-server-Plumb-in-new-args-to-nbd_client_add.patch [RHEL-52616] +- kvm-nbd-server-CVE-2024-7409-Cap-default-max-connections.patch [RHEL-52616] +- kvm-nbd-server-CVE-2024-7409-Drop-non-negotiating-client.patch [RHEL-52616] +- kvm-nbd-server-CVE-2024-7409-Close-stray-clients-at-serv.patch [RHEL-52616] +- Resolves: RHEL-53565 + ([RHEL9.4_to_RHEL9.5]When the VM is with only 9.4.0 (q35) machine type, still hit error of virtio-gpu issue [rhel-9.4.z]) +- Resolves: RHEL-52616 + (CVE-2024-7409 qemu-kvm: Denial of Service via Improper Synchronization in QEMU NBD Server During Socket Closure [rhel-9.4.z]) + +* Mon Jul 15 2024 Miroslav Rezanina - 8.2.0-11.el9_4.5 +- kvm-virtio-gpu-fix-scanout-migration-post-load.patch [RHEL-36181] +- kvm-virtio-gpu-fix-v2-migration.patch [RHEL-36181] +- kvm-rhel-9.4.0-machine-type-compat-for-virtio-gpu-migrat.patch [RHEL-36181] +- kvm-linux-aio-add-IO_CMD_FDSYNC-command-support.patch [RHEL-43261] +- Resolves: RHEL-36181 + ([RHEL9.5.0][stable_guest_abi]Failed to migrate VM with (qemu) qemu-kvm: Missing section footer for 0000:00:01.0/virtio-gpu qemu-kvm: load of migration failed: Invalid argument [rhel-9.4.0.z]) +- Resolves: RHEL-43261 + (qemu-kvm: linux-aio: add support for IO_CMD_FDSYNC command [rhel-9.4.z]) + * Wed Jun 19 2024 Miroslav Rezanina - 8.2.0-11.el9_4.4 - Fixing CVE-2024-4467 - Resolves: RHEL-35610