From 775870a7baedc48f8c3817582e9ae01ae2e4075f Mon Sep 17 00:00:00 2001 From: James Antill Date: Mon, 8 Aug 2022 14:11:01 -0400 Subject: [PATCH] Import rpm: 432cb78217266d84b03f8eeefce139f91808411d --- .gitignore | 1 + ...at-Adding-slirp-to-the-exploded-tree.patch | 17931 ++++++++++++++++ 0004-Initial-redhat-build.patch | 313 + 0005-Enable-disable-devices-for-RHEL.patch | 642 + 0005-Initial-redhat-build.patch | 351 + 0006-Enable-disable-devices-for-RHEL.patch | 795 + ...Machine-type-related-general-changes.patch | 619 + 0007-Add-aarch64-machine-types.patch | 352 + ...Machine-type-related-general-changes.patch | 1071 + 0008-Add-aarch64-machine-types.patch | 405 + 0008-Add-ppc64-machine-types.patch | 528 + 0009-Add-ppc64-machine-types.patch | 714 + 0009-Add-s390x-machine-types.patch | 186 + 0010-Add-s390x-machine-types.patch | 165 + 0010-Add-x86_64-machine-types.patch | 714 + 0011-Add-x86_64-machine-types.patch | 1276 ++ 0011-Enable-make-check.patch | 186 + 0012-Enable-make-check.patch | 407 + ...mber-of-devices-that-can-be-assigned.patch | 104 + ...Add-support-statement-to-help-output.patch | 55 + ...mber-of-devices-that-can-be-assigned.patch | 110 + ...Add-support-statement-to-help-output.patch | 55 + ...lly-limit-the-maximum-number-of-CPUs.patch | 45 + ...documentation-instead-of-qemu-system.patch | 61 + ...lly-limit-the-maximum-number-of-CPUs.patch | 65 + ...documentation-instead-of-qemu-system.patch | 126 + ...ct-scsi-cd-if-data-plane-enabled-RHE.patch | 66 + ...e-at-least-64kiB-pages-for-downstrea.patch | 60 + ...ct-scsi-cd-if-data-plane-enabled-RHE.patch | 66 + ...e-at-least-64kiB-pages-for-downstrea.patch | 60 + ...on-warning-when-opening-v2-images-rw.patch | 77 + ...oduce-RHEL-9.0.0-hw-compat-structure.patch | 135 + 0019-compat-Update-hw_compat_rhel_8_5.patch | 53 + ...pdate-pseries-rhel8.5.0-machine-type.patch | 43 + ...90x-machine-type-compatibility-for-r.patch | 38 + ...ve-s3-s4-suspend-disabling-to-compat.patch | 70 + ...8.5.0-Update-machine-type-compatibil.patch | 51 + 0022-Fix-virtio-net-pci-vectors-compat.patch | 45 + ...machine-types-Add-pc_rhel_8_5_compat.patch | 73 + ...-types-Wire-compat-into-q35-and-i440.patch | 54 + ...-machine-type-compatibility-handling.patch | 58 + 81-kvm-rhel.rules | 1 + 85-kvm.preset | 5 + 95-kvm-memlock.conf | 10 + 99-qemu-guest-agent.rules | 2 + README.rst | 19 + README.tests | 39 + bridge.conf | 1 + gating.yaml | 9 + ksm.service | 13 + ksm.sysconfig | 4 + ksmctl.c | 77 + ksmtuned | 139 + ksmtuned.conf | 21 + ksmtuned.service | 12 + kvm-Enable-SGX-RH-Only.patch | 28 + kvm-Enable-virtio-iommu-pci-on-aarch64.patch | 41 + kvm-Enable-virtio-iommu-pci-on-x86_64.patch | 41 + ...oduce-event-loop-base-abstract-class.patch | 503 + ...lags-on-io_writev-and-introduce-io_f.patch | 420 + ...-Fix-zero-copy-send-so-socket-flush-.patch | 58 + ...-Implement-io_writev-zero-copy-flag-.patch | 249 + ...-Introduce-assert-and-reduce-ifdefs-.patch | 82 + ...packet-for-vhost-vsock-device-in-rhe.patch | 107 + ...y-AArch64-Drop-unsupported-CPU-types.patch | 237 + ...avocado-Switch-aarch64-tests-from-a5.patch | 95 + ...lly-limit-the-maximum-number-of-CPUs.patch | 58 + ...vert-migration-Simplify-unqueue_page.patch | 134 + ...d-hw_compat_4_2_extra-and-apply-to-u.patch | 93 + ...able-FDC-device-for-upstream-machine.patch | 53 + ...pose-upstream-machines-pc-4.2-and-pc.patch | 191 + ...si-Reject-scsi-cd-if-data-plane-enab.patch | 51 + ...acpi-fix-OEM-ID-OEM-Table-ID-padding.patch | 78 + ...U-crash-when-started-with-SLIC-table.patch | 108 + ...pcie-set-power-on-cap-on-parent-slot.patch | 140 + ...-validate-hotplug-selector-on-access.patch | 51 + ...ntext-for-drain_end-in-blockdev-reop.patch | 63 + ...event-dangling-BDS-pointers-across-a.patch | 129 + ...Update-BSC-only-if-want_zero-is-true.patch | 56 + ...sert-there-are-no-timers-when-closed.patch | 52 + ...lete-reconnect-delay-timer-when-done.patch | 54 + ...-nbd-Move-s-ioc-on-AioContext-change.patch | 107 + ...ndling-of-holes-in-.bdrv_co_block_st.patch | 59 + ...-rbd-workaround-for-ceph-issue-53784.patch | 103 + ...aarch64-softmmu-Enable-CONFIG_VIRTIO.patch | 41 + ...ame-qemu_coroutine_inc-dec_pool_size.patch | 101 + ...outine-Revert-to-constant-batch-size.patch | 138 + ...ontext-use-QEMU_DEFINE_STATIC_CO_TLS.patch | 132 + ...outine-use-QEMU_DEFINE_STATIC_CO_TLS.patch | 139 + ...-win32-use-QEMU_DEFINE_STATIC_CO_TLS.patch | 99 + ...er-fix-race-condition-in-qxl_cursor-.patch | 58 + kvm-doc-Add-the-SGX-numa-description.patch | 77 + ...d-Use-existing-CPU-topology-to-build.patch | 179 + kvm-hw-arm-virt-Add-8.6-machine-type.patch | 57 + ...k-no_tcg_its-and-minor-style-changes.patch | 86 + ...ider-SMP-configuration-in-CPU-topolo.patch | 74 + ...-virt-Fix-CPU-s-default-NUMA-node-ID.patch | 88 + ...missing-initialization-in-instance-c.patch | 56 + ...t-Register-iommu-as-a-class-property.patch | 78 + ...irt-Register-its-as-a-class-property.patch | 57 + ...ve-the-dtb-kaslr-seed-machine-option.patch | 76 + ...virt-Rename-default_bus_bypass_iommu.patch | 46 + ...vent-end-of-track-overrun-CVE-2021-3.patch | 96 + ...3-Check-for-MEMTX_OK-instead-of-MEMT.patch | 75 + ...virtio-Replace-g_memdup-by-g_memdup2.patch | 95 + ...ix-leak-of-host-notifier-memory-regi.patch | 66 + ...-Server-v6-CPU-model-with-5-level-EP.patch | 59 + ...08-Fix-when-missing-user_allow_other.patch | 52 + ...-Test-new-refcount-rebuild-algorithm.patch | 445 + ...Let-NBD-connection-yield-in-iothread.patch | 108 + kvm-iotests-281-Test-lingering-timers.patch | 174 + ...ckdev-reopen-with-iothreads-and-thro.patch | 106 + kvm-iotests-block-status-cache-New-test.patch | 197 + ...tests-stream-error-on-reset-New-test.patch | 198 + ...tests.py-Add-QemuStorageDaemon-class.patch | 92 + ...on-t-use-perror-without-useful-errno.patch | 62 + ...n-why-max-batch-is-checked-in-laio_i.patch | 49 + ...balanced-plugged-counter-in-laio_io_.patch | 56 + kvm-meson-create-have_vhost_-variables.patch | 154 + ...ave_vhost_-variables-to-pick-sources.patch | 213 + ...docker-test-build-alpine-when-includ.patch | 87 + ...migration-Add-migrate_use_tls-helper.patch | 106 + ...ro-copy-send-parameter-for-QMP-HMP-f.patch | 250 + ...migrate-recover-to-run-multiple-time.patch | 98 + ...-zero_copy_send-from-migration-param.patch | 289 + kvm-migration-Fix-operator-type.patch | 47 + ...es-before-compressing-them-with-zlib.patch | 142 + ...t-zero-copy-write-in-multifd-migrati.patch | 182 + ...der-packet-without-flags-if-zero-cop.patch | 102 + ...send_sync_main-now-returns-negative-.patch | 163 + ...-MULTI_CONN-for-shared-writable-expo.patch | 381 + ...uma-Enable-numa-for-SGX-EPC-sections.patch | 287 + ...-numa-in-the-monitor-and-Libvirt-int.patch | 210 + ...-Split-virtio-scsi-code-from-virtio_.patch | 180 + ...-bootmap-Improve-the-guessing-logic-.patch | 102 + ...-netboot.mak-Ignore-Clang-s-warnings.patch | 78 + ...-virtio-Beautify-the-code-for-readin.patch | 56 + ...-virtio-Introduce-a-macro-for-the-DA.patch | 63 + ...-virtio-Read-device-config-after-fea.patch | 67 + ...-virtio-Set-missing-status-bits-whil.patch | 93 + ...-virtio-blkdev-Remove-virtio_assume_.patch | 101 + ...-virtio-blkdev-Request-the-right-fea.patch | 63 + ...-virtio-blkdev-Simplify-fix-virtio_i.patch | 124 + ...-expose-TYPE_XIO3130_DOWNSTREAM-name.patch | 83 + ...-related-comments-and-restore-sectio.patch | 214 + kvm-qapi-machine.json-Add-cluster-id.patch | 126 + ...d-errp-to-rebuild_refcount_structure.patch | 162 + ...mprove-refcount-structure-rebuilding.patch | 465 + ...ss-max-connections-to-blockdev-layer.patch | 92 + ...Correct-CPU-and-NUMA-association-in-.patch | 100 + ...Specify-CPU-topology-in-aarch64_numa.patch | 68 + ...Add-rhel8.6.0-machine-type-for-s390x.patch | 69 + ...efine-pseries-rhel8.6.0-machine-type.patch | 76 + ...nux-headers-linux-kvm.h-to-v5.18-rc6.patch | 106 + ...machine-types-x86-set-prefer_sockets.patch | 52 + kvm-s390x-css-fix-PMCW-invalid-mask.patch | 58 + kvm-s390x.conf | 19 + kvm-setup | 49 + kvm-setup.service | 14 + ...ce-deletion-events-with-device-JSON-.patch | 131 + ...Introduce-MemTxAttrs-memory-field-an.patch | 175 + ...Simplify-flatview_write-and-address_.patch | 80 + ...m-Add-a-stub-function-for-TPM_IS_CRB.patch | 54 + ...arget-arm-deprecate-named-CPU-models.patch | 129 + ...recate-CPUs-older-than-x86_64-v2-ABI.patch | 273 + ...odels-Fix-ppc_cpu_aliases-list-for-R.patch | 48 + ...-s390x-deprecate-CPUs-older-than-z14.patch | 194 + ...-Honor-storage-keys-during-emulation.patch | 103 + ...ests-acpi-SLIC-update-expected-blobs.patch | 47 + kvm-tests-acpi-add-SLIC-table-test.patch | 76 + ...lly-pad-OEM_ID-OEM_TABLE_ID-for-test.patch | 84 + ...short-OEM_ID-OEM_TABLE_ID-values-in-.patch | 77 + kvm-tests-acpi-update-expected-blobs.patch | 58 + ...list-expected-blobs-before-changing-.patch | 47 + ...list-nvdimm-s-SSDT-and-FACP.slic-exp.patch | 57 + ...date-aarch64_virt-test-to-exercise-c.patch | 157 + ...test-Add-a-regression-test-for-CVE-2.patch | 119 + ...base-Introduce-options-to-set-the-th.patch | 385 + ...oop-Introduce-the-main-loop-into-QOM.patch | 233 + kvm-vdpa-Add-device-migration-blocker.patch | 106 + ...-Add-x-svq-to-NetdevVhostVDPAOptions.patch | 223 + ...compiler-to-squash-reads-to-used-idx.patch | 65 + ...ffer-CVQ-support-on-shadow-virtqueue.patch | 323 + ...t-vhost_vdpa_dma_map-and-unmap-calls.patch | 84 + ...-features-part-from-vhost_vdpa_get_m.patch | 108 + kvm-vdpa-manual-forward-CVQ-buffers.patch | 166 + ...ve-spurious-tpm-crb-cmd-misalignment.patch | 114 + ...ve-spurious-warning-on-vfio_listener.patch | 78 + kvm-vhost-Add-SVQDescState.patch | 135 + ...vhost-Add-svq-avail_handler-callback.patch | 164 + ...heck-for-queue-full-at-vhost_svq_add.patch | 134 + ...-vhost_svq_add-from-VirtQueueElement.patch | 138 + kvm-vhost-Expose-vhost_svq_add.patch | 73 + ...Fix-device-s-used-descriptor-dequeue.patch | 83 + ...Fix-element-in-vhost_svq_add-failure.patch | 68 + ...vhost_svq_kick-call-to-vhost_svq_add.patch | 61 + kvm-vhost-Reorder-vhost_svq_kick.patch | 88 + ...k-descriptor-chain-in-private-at-SVQ.patch | 123 + ...rack-number-of-descs-in-SVQDescState.patch | 81 + kvm-vhost-add-vhost_svq_poll.patch | 92 + kvm-vhost-add-vhost_svq_push_elem.patch | 83 + ...iptor-translation-to-vhost_svq_vring.patch | 120 + ...-improper-cleanup-in-vhost_net_start.patch | 56 + ...dd-stubs-for-when-no-virtio-net-devi.patch | 87 + ...backend-feature-should-set-only-once.patch | 58 + ...e-name-and-polarity-for-vhost_vdpa_o.patch | 123 + ...mproper-cleanup-in-net_init_vhost_vd.patch | 48 + ...ch-the-virqueue-element-in-case-of-e.patch | 76 + ...-an-assert-check-in-translate-routin.patch | 46 + ...-bypass-mode-support-to-assigned-dev.patch | 250 + ...irtio-iommu-Fix-migration-regression.patch | 54 + ...ix-the-partial-copy-of-probe-request.patch | 67 + ...Use-recursive-lock-to-avoid-deadlock.patch | 141 + kvm-virtio-net-Expose-MAC_TABLE_ENTRIES.patch | 69 + ...rtio-net-Expose-ctrl-virtqueue-logic.patch | 169 + ...-ctrl_vq-index-for-non-mq-guest-for-.patch | 143 + ...-handle-mq-request-in-userspace-hand.patch | 109 + ...-vhost_dev-and-notifiers-for-cvq-onl.patch | 52 + ...i-clean-up-virtio_scsi_handle_cmd_vq.patch | 77 + ...-clean-up-virtio_scsi_handle_ctrl_vq.patch | 65 + ...clean-up-virtio_scsi_handle_event_vq.patch | 62 + ...t-waste-CPU-polling-the-event-virtqu.patch | 103 + ...ctrl-and-event-handler-functions-in-.patch | 119 + ...-request-related-items-from-.h-to-.c.patch | 168 + ...embership-of-all-supplementary-group.patch | 110 + kvm-vmxcap-Add-5-level-EPT-bit.patch | 48 + kvm-x86-Add-q35-RHEL-8.6.0-machine-type.patch | 68 + kvm-x86.conf | 12 + kvm.conf | 3 + modules-load.conf | 4 + qemu-ga.sysconfig | 19 + qemu-guest-agent.service | 20 + qemu-kvm.spec | 4174 ++++ qemu-pr-helper.service | 15 + qemu-pr-helper.socket | 9 + rpminspect.yaml | 11 + sources | 1 + tests_data_acpi_pc_SSDT.dimmpxm | Bin 0 -> 734 bytes tests_data_acpi_q35_FACP.slic | Bin 0 -> 244 bytes tests_data_acpi_q35_SSDT.dimmpxm | Bin 0 -> 734 bytes tests_data_acpi_virt_SSDT.memhp | Bin 0 -> 736 bytes udev-kvm-check.c | 155 + vhost.conf | 3 + 243 files changed, 52953 insertions(+) create mode 100644 .gitignore create mode 100644 0001-redhat-Adding-slirp-to-the-exploded-tree.patch create mode 100644 0004-Initial-redhat-build.patch create mode 100644 0005-Enable-disable-devices-for-RHEL.patch create mode 100644 0005-Initial-redhat-build.patch create mode 100644 0006-Enable-disable-devices-for-RHEL.patch create mode 100644 0006-Machine-type-related-general-changes.patch create mode 100644 0007-Add-aarch64-machine-types.patch create mode 100644 0007-Machine-type-related-general-changes.patch create mode 100644 0008-Add-aarch64-machine-types.patch create mode 100644 0008-Add-ppc64-machine-types.patch create mode 100644 0009-Add-ppc64-machine-types.patch create mode 100644 0009-Add-s390x-machine-types.patch create mode 100644 0010-Add-s390x-machine-types.patch create mode 100644 0010-Add-x86_64-machine-types.patch create mode 100644 0011-Add-x86_64-machine-types.patch create mode 100644 0011-Enable-make-check.patch create mode 100644 0012-Enable-make-check.patch create mode 100644 0012-vfio-cap-number-of-devices-that-can-be-assigned.patch create mode 100644 0013-Add-support-statement-to-help-output.patch create mode 100644 0013-vfio-cap-number-of-devices-that-can-be-assigned.patch create mode 100644 0014-Add-support-statement-to-help-output.patch create mode 100644 0014-globally-limit-the-maximum-number-of-CPUs.patch create mode 100644 0015-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch create mode 100644 0015-globally-limit-the-maximum-number-of-CPUs.patch create mode 100644 0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch create mode 100644 0016-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch create mode 100644 0017-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch create mode 100644 0017-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch create mode 100644 0018-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch create mode 100644 0018-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch create mode 100644 0019-WRB-Introduce-RHEL-9.0.0-hw-compat-structure.patch create mode 100644 0019-compat-Update-hw_compat_rhel_8_5.patch create mode 100644 0020-redhat-Update-pseries-rhel8.5.0-machine-type.patch create mode 100644 0020-redhat-Update-s390x-machine-type-compatibility-for-r.patch create mode 100644 0021-pc-Move-s3-s4-suspend-disabling-to-compat.patch create mode 100644 0021-redhat-virt-rhel8.5.0-Update-machine-type-compatibil.patch create mode 100644 0022-Fix-virtio-net-pci-vectors-compat.patch create mode 100644 0023-x86-rhel-machine-types-Add-pc_rhel_8_5_compat.patch create mode 100644 0024-x86-rhel-machine-types-Wire-compat-into-q35-and-i440.patch create mode 100644 0025-redhat-Add-s390x-machine-type-compatibility-handling.patch create mode 100644 81-kvm-rhel.rules create mode 100644 85-kvm.preset create mode 100644 95-kvm-memlock.conf create mode 100644 99-qemu-guest-agent.rules create mode 100644 README.rst create mode 100644 README.tests create mode 100644 bridge.conf create mode 100644 gating.yaml create mode 100644 ksm.service create mode 100644 ksm.sysconfig create mode 100644 ksmctl.c create mode 100644 ksmtuned create mode 100644 ksmtuned.conf create mode 100644 ksmtuned.service create mode 100644 kvm-Enable-SGX-RH-Only.patch create mode 100644 kvm-Enable-virtio-iommu-pci-on-aarch64.patch create mode 100644 kvm-Enable-virtio-iommu-pci-on-x86_64.patch create mode 100644 kvm-Introduce-event-loop-base-abstract-class.patch create mode 100644 kvm-QIOChannel-Add-flags-on-io_writev-and-introduce-io_f.patch create mode 100644 kvm-QIOChannelSocket-Fix-zero-copy-send-so-socket-flush-.patch create mode 100644 kvm-QIOChannelSocket-Implement-io_writev-zero-copy-flag-.patch create mode 100644 kvm-QIOChannelSocket-Introduce-assert-and-reduce-ifdefs-.patch create mode 100644 kvm-RHEL-disable-seqpacket-for-vhost-vsock-device-in-rhe.patch create mode 100644 kvm-RHEL-only-AArch64-Drop-unsupported-CPU-types.patch create mode 100644 kvm-RHEL-only-tests-avocado-Switch-aarch64-tests-from-a5.patch create mode 100644 kvm-Revert-globally-limit-the-maximum-number-of-CPUs.patch create mode 100644 kvm-Revert-migration-Simplify-unqueue_page.patch create mode 100644 kvm-Revert-redhat-Add-hw_compat_4_2_extra-and-apply-to-u.patch create mode 100644 kvm-Revert-redhat-Enable-FDC-device-for-upstream-machine.patch create mode 100644 kvm-Revert-redhat-Expose-upstream-machines-pc-4.2-and-pc.patch create mode 100644 kvm-Revert-virtio-scsi-Reject-scsi-cd-if-data-plane-enab.patch create mode 100644 kvm-acpi-fix-OEM-ID-OEM-Table-ID-padding.patch create mode 100644 kvm-acpi-fix-QEMU-crash-when-started-with-SLIC-table.patch create mode 100644 kvm-acpi-pcihp-pcie-set-power-on-cap-on-parent-slot.patch create mode 100644 kvm-acpi-validate-hotplug-selector-on-access.patch create mode 100644 kvm-block-Lock-AioContext-for-drain_end-in-blockdev-reop.patch create mode 100644 kvm-block-backend-prevent-dangling-BDS-pointers-across-a.patch create mode 100644 kvm-block-io-Update-BSC-only-if-want_zero-is-true.patch create mode 100644 kvm-block-nbd-Assert-there-are-no-timers-when-closed.patch create mode 100644 kvm-block-nbd-Delete-reconnect-delay-timer-when-done.patch create mode 100644 kvm-block-nbd-Move-s-ioc-on-AioContext-change.patch create mode 100644 kvm-block-rbd-fix-handling-of-holes-in-.bdrv_co_block_st.patch create mode 100644 kvm-block-rbd-workaround-for-ceph-issue-53784.patch create mode 100644 kvm-configs-devices-aarch64-softmmu-Enable-CONFIG_VIRTIO.patch create mode 100644 kvm-coroutine-Rename-qemu_coroutine_inc-dec_pool_size.patch create mode 100644 kvm-coroutine-Revert-to-constant-batch-size.patch create mode 100644 kvm-coroutine-ucontext-use-QEMU_DEFINE_STATIC_CO_TLS.patch create mode 100644 kvm-coroutine-use-QEMU_DEFINE_STATIC_CO_TLS.patch create mode 100644 kvm-coroutine-win32-use-QEMU_DEFINE_STATIC_CO_TLS.patch create mode 100644 kvm-display-qxl-render-fix-race-condition-in-qxl_cursor-.patch create mode 100644 kvm-doc-Add-the-SGX-numa-description.patch create mode 100644 kvm-hw-acpi-aml-build-Use-existing-CPU-topology-to-build.patch create mode 100644 kvm-hw-arm-virt-Add-8.6-machine-type.patch create mode 100644 kvm-hw-arm-virt-Check-no_tcg_its-and-minor-style-changes.patch create mode 100644 kvm-hw-arm-virt-Consider-SMP-configuration-in-CPU-topolo.patch create mode 100644 kvm-hw-arm-virt-Fix-CPU-s-default-NUMA-node-ID.patch create mode 100644 kvm-hw-arm-virt-Fix-missing-initialization-in-instance-c.patch create mode 100644 kvm-hw-arm-virt-Register-iommu-as-a-class-property.patch create mode 100644 kvm-hw-arm-virt-Register-its-as-a-class-property.patch create mode 100644 kvm-hw-arm-virt-Remove-the-dtb-kaslr-seed-machine-option.patch create mode 100644 kvm-hw-arm-virt-Rename-default_bus_bypass_iommu.patch create mode 100644 kvm-hw-block-fdc-Prevent-end-of-track-overrun-CVE-2021-3.patch create mode 100644 kvm-hw-intc-arm_gicv3-Check-for-MEMTX_OK-instead-of-MEMT.patch create mode 100644 kvm-hw-virtio-Replace-g_memdup-by-g_memdup2.patch create mode 100644 kvm-hw-virtio-vdpa-Fix-leak-of-host-notifier-memory-regi.patch create mode 100644 kvm-i386-Add-Icelake-Server-v6-CPU-model-with-5-level-EP.patch create mode 100644 kvm-iotests-108-Fix-when-missing-user_allow_other.patch create mode 100644 kvm-iotests-108-Test-new-refcount-rebuild-algorithm.patch create mode 100644 kvm-iotests-281-Let-NBD-connection-yield-in-iothread.patch create mode 100644 kvm-iotests-281-Test-lingering-timers.patch create mode 100644 kvm-iotests-Test-blockdev-reopen-with-iothreads-and-thro.patch create mode 100644 kvm-iotests-block-status-cache-New-test.patch create mode 100644 kvm-iotests-stream-error-on-reset-New-test.patch create mode 100644 kvm-iotests.py-Add-QemuStorageDaemon-class.patch create mode 100644 kvm-kvm-don-t-use-perror-without-useful-errno.patch create mode 100644 kvm-linux-aio-explain-why-max-batch-is-checked-in-laio_i.patch create mode 100644 kvm-linux-aio-fix-unbalanced-plugged-counter-in-laio_io_.patch create mode 100644 kvm-meson-create-have_vhost_-variables.patch create mode 100644 kvm-meson-use-have_vhost_-variables-to-pick-sources.patch create mode 100644 kvm-meson.build-Fix-docker-test-build-alpine-when-includ.patch create mode 100644 kvm-migration-Add-migrate_use_tls-helper.patch create mode 100644 kvm-migration-Add-zero-copy-send-parameter-for-QMP-HMP-f.patch create mode 100644 kvm-migration-Allow-migrate-recover-to-run-multiple-time.patch create mode 100644 kvm-migration-Change-zero_copy_send-from-migration-param.patch create mode 100644 kvm-migration-Fix-operator-type.patch create mode 100644 kvm-multifd-Copy-pages-before-compressing-them-with-zlib.patch create mode 100644 kvm-multifd-Implement-zero-copy-write-in-multifd-migrati.patch create mode 100644 kvm-multifd-Send-header-packet-without-flags-if-zero-cop.patch create mode 100644 kvm-multifd-multifd_send_sync_main-now-returns-negative-.patch create mode 100644 kvm-nbd-server-Allow-MULTI_CONN-for-shared-writable-expo.patch create mode 100644 kvm-numa-Enable-numa-for-SGX-EPC-sections.patch create mode 100644 kvm-numa-Support-SGX-numa-in-the-monitor-and-Libvirt-int.patch create mode 100644 kvm-pc-bios-s390-ccw-Split-virtio-scsi-code-from-virtio_.patch create mode 100644 kvm-pc-bios-s390-ccw-bootmap-Improve-the-guessing-logic-.patch create mode 100644 kvm-pc-bios-s390-ccw-netboot.mak-Ignore-Clang-s-warnings.patch create mode 100644 kvm-pc-bios-s390-ccw-virtio-Beautify-the-code-for-readin.patch create mode 100644 kvm-pc-bios-s390-ccw-virtio-Introduce-a-macro-for-the-DA.patch create mode 100644 kvm-pc-bios-s390-ccw-virtio-Read-device-config-after-fea.patch create mode 100644 kvm-pc-bios-s390-ccw-virtio-Set-missing-status-bits-whil.patch create mode 100644 kvm-pc-bios-s390-ccw-virtio-blkdev-Remove-virtio_assume_.patch create mode 100644 kvm-pc-bios-s390-ccw-virtio-blkdev-Request-the-right-fea.patch create mode 100644 kvm-pc-bios-s390-ccw-virtio-blkdev-Simplify-fix-virtio_i.patch create mode 100644 kvm-pci-expose-TYPE_XIO3130_DOWNSTREAM-name.patch create mode 100644 kvm-qapi-Cleanup-SGX-related-comments-and-restore-sectio.patch create mode 100644 kvm-qapi-machine.json-Add-cluster-id.patch create mode 100644 kvm-qcow2-Add-errp-to-rebuild_refcount_structure.patch create mode 100644 kvm-qcow2-Improve-refcount-structure-rebuilding.patch create mode 100644 kvm-qemu-nbd-Pass-max-connections-to-blockdev-layer.patch create mode 100644 kvm-qtest-numa-test-Correct-CPU-and-NUMA-association-in-.patch create mode 100644 kvm-qtest-numa-test-Specify-CPU-topology-in-aarch64_numa.patch create mode 100644 kvm-redhat-Add-rhel8.6.0-machine-type-for-s390x.patch create mode 100644 kvm-redhat-Define-pseries-rhel8.6.0-machine-type.patch create mode 100644 kvm-redhat-Update-linux-headers-linux-kvm.h-to-v5.18-rc6.patch create mode 100644 kvm-rhel-machine-types-x86-set-prefer_sockets.patch create mode 100644 kvm-s390x-css-fix-PMCW-invalid-mask.patch create mode 100644 kvm-s390x.conf create mode 100644 kvm-setup create mode 100644 kvm-setup.service create mode 100644 kvm-softmmu-fix-device-deletion-events-with-device-JSON-.patch create mode 100644 kvm-softmmu-physmem-Introduce-MemTxAttrs-memory-field-an.patch create mode 100644 kvm-softmmu-physmem-Simplify-flatview_write-and-address_.patch create mode 100644 kvm-sysemu-tpm-Add-a-stub-function-for-TPM_IS_CRB.patch create mode 100644 kvm-target-arm-deprecate-named-CPU-models.patch create mode 100644 kvm-target-i386-deprecate-CPUs-older-than-x86_64-v2-ABI.patch create mode 100644 kvm-target-ppc-cpu-models-Fix-ppc_cpu_aliases-list-for-R.patch create mode 100644 kvm-target-s390x-deprecate-CPUs-older-than-z14.patch create mode 100644 kvm-target-s390x-kvm-Honor-storage-keys-during-emulation.patch create mode 100644 kvm-tests-acpi-SLIC-update-expected-blobs.patch create mode 100644 kvm-tests-acpi-add-SLIC-table-test.patch create mode 100644 kvm-tests-acpi-manually-pad-OEM_ID-OEM_TABLE_ID-for-test.patch create mode 100644 kvm-tests-acpi-test-short-OEM_ID-OEM_TABLE_ID-values-in-.patch create mode 100644 kvm-tests-acpi-update-expected-blobs.patch create mode 100644 kvm-tests-acpi-whitelist-expected-blobs-before-changing-.patch create mode 100644 kvm-tests-acpi-whitelist-nvdimm-s-SSDT-and-FACP.slic-exp.patch create mode 100644 kvm-tests-avocado-update-aarch64_virt-test-to-exercise-c.patch create mode 100644 kvm-tests-qtest-fdc-test-Add-a-regression-test-for-CVE-2.patch create mode 100644 kvm-util-event-loop-base-Introduce-options-to-set-the-th.patch create mode 100644 kvm-util-main-loop-Introduce-the-main-loop-into-QOM.patch create mode 100644 kvm-vdpa-Add-device-migration-blocker.patch create mode 100644 kvm-vdpa-Add-x-svq-to-NetdevVhostVDPAOptions.patch create mode 100644 kvm-vdpa-Avoid-compiler-to-squash-reads-to-used-idx.patch create mode 100644 kvm-vdpa-Buffer-CVQ-support-on-shadow-virtqueue.patch create mode 100644 kvm-vdpa-Export-vhost_vdpa_dma_map-and-unmap-calls.patch create mode 100644 kvm-vdpa-Extract-get-features-part-from-vhost_vdpa_get_m.patch create mode 100644 kvm-vdpa-manual-forward-CVQ-buffers.patch create mode 100644 kvm-vfio-common-remove-spurious-tpm-crb-cmd-misalignment.patch create mode 100644 kvm-vfio-common-remove-spurious-warning-on-vfio_listener.patch create mode 100644 kvm-vhost-Add-SVQDescState.patch create mode 100644 kvm-vhost-Add-svq-avail_handler-callback.patch create mode 100644 kvm-vhost-Check-for-queue-full-at-vhost_svq_add.patch create mode 100644 kvm-vhost-Decouple-vhost_svq_add-from-VirtQueueElement.patch create mode 100644 kvm-vhost-Expose-vhost_svq_add.patch create mode 100644 kvm-vhost-Fix-device-s-used-descriptor-dequeue.patch create mode 100644 kvm-vhost-Fix-element-in-vhost_svq_add-failure.patch create mode 100644 kvm-vhost-Move-vhost_svq_kick-call-to-vhost_svq_add.patch create mode 100644 kvm-vhost-Reorder-vhost_svq_kick.patch create mode 100644 kvm-vhost-Track-descriptor-chain-in-private-at-SVQ.patch create mode 100644 kvm-vhost-Track-number-of-descs-in-SVQDescState.patch create mode 100644 kvm-vhost-add-vhost_svq_poll.patch create mode 100644 kvm-vhost-add-vhost_svq_push_elem.patch create mode 100644 kvm-vhost-move-descriptor-translation-to-vhost_svq_vring.patch create mode 100644 kvm-vhost-net-fix-improper-cleanup-in-vhost_net_start.patch create mode 100644 kvm-vhost-net-vdpa-add-stubs-for-when-no-virtio-net-devi.patch create mode 100644 kvm-vhost-vdpa-backend-feature-should-set-only-once.patch create mode 100644 kvm-vhost-vdpa-change-name-and-polarity-for-vhost_vdpa_o.patch create mode 100644 kvm-vhost-vdpa-fix-improper-cleanup-in-net_init_vhost_vd.patch create mode 100644 kvm-vhost-vsock-detach-the-virqueue-element-in-case-of-e.patch create mode 100644 kvm-virtio-iommu-Add-an-assert-check-in-translate-routin.patch create mode 100644 kvm-virtio-iommu-Add-bypass-mode-support-to-assigned-dev.patch create mode 100644 kvm-virtio-iommu-Fix-migration-regression.patch create mode 100644 kvm-virtio-iommu-Fix-the-partial-copy-of-probe-request.patch create mode 100644 kvm-virtio-iommu-Use-recursive-lock-to-avoid-deadlock.patch create mode 100644 kvm-virtio-net-Expose-MAC_TABLE_ENTRIES.patch create mode 100644 kvm-virtio-net-Expose-ctrl-virtqueue-logic.patch create mode 100644 kvm-virtio-net-align-ctrl_vq-index-for-non-mq-guest-for-.patch create mode 100644 kvm-virtio-net-don-t-handle-mq-request-in-userspace-hand.patch create mode 100644 kvm-virtio-net-setup-vhost_dev-and-notifiers-for-cvq-onl.patch create mode 100644 kvm-virtio-scsi-clean-up-virtio_scsi_handle_cmd_vq.patch create mode 100644 kvm-virtio-scsi-clean-up-virtio_scsi_handle_ctrl_vq.patch create mode 100644 kvm-virtio-scsi-clean-up-virtio_scsi_handle_event_vq.patch create mode 100644 kvm-virtio-scsi-don-t-waste-CPU-polling-the-event-virtqu.patch create mode 100644 kvm-virtio-scsi-fix-ctrl-and-event-handler-functions-in-.patch create mode 100644 kvm-virtio-scsi-move-request-related-items-from-.h-to-.c.patch create mode 100644 kvm-virtiofsd-Drop-membership-of-all-supplementary-group.patch create mode 100644 kvm-vmxcap-Add-5-level-EPT-bit.patch create mode 100644 kvm-x86-Add-q35-RHEL-8.6.0-machine-type.patch create mode 100644 kvm-x86.conf create mode 100644 kvm.conf create mode 100644 modules-load.conf create mode 100644 qemu-ga.sysconfig create mode 100644 qemu-guest-agent.service create mode 100644 qemu-kvm.spec create mode 100644 qemu-pr-helper.service create mode 100644 qemu-pr-helper.socket create mode 100644 rpminspect.yaml create mode 100644 sources create mode 100644 tests_data_acpi_pc_SSDT.dimmpxm create mode 100644 tests_data_acpi_q35_FACP.slic create mode 100644 tests_data_acpi_q35_SSDT.dimmpxm create mode 100644 tests_data_acpi_virt_SSDT.memhp create mode 100644 udev-kvm-check.c create mode 100644 vhost.conf diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f5dae2e --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +SOURCES/qemu-6.2.0.tar.xz diff --git a/0001-redhat-Adding-slirp-to-the-exploded-tree.patch b/0001-redhat-Adding-slirp-to-the-exploded-tree.patch new file mode 100644 index 0000000..43fbac3 --- /dev/null +++ b/0001-redhat-Adding-slirp-to-the-exploded-tree.patch @@ -0,0 +1,17931 @@ +From 0a17d5f6abf800e88069738904e3fcd8427ab28a Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Thu, 5 Aug 2021 01:07:55 -0400 +Subject: redhat: Adding slirp to the exploded tree + +RH-Author: Danilo de Paula +Message-id: <20190907020756.8619-1-ddepaula@redhat.com> +Patchwork-id: 90309 +O-Subject: [RHEL-AV-8.1.0 qemu-kvm PATCH 1/1] redhat: Adding slirp to the exploded tree +Bugzilla: +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Wainer dos Santos Moschetta + +Until qemu-kvm-3.1 slirp used to live as a regular folder in qemu-kvm. +After that it got moved into its own submodule. Which means it's not +part of the qemu-kvm git tree anymore. + +This passed unoticed for RHEL-AV-8.0.1 and 8.1.0 because qemu still ships +the code in the tarball. That's why scratch builds still works (it's based in +the tarball content). + +As we're receiving some CVE's against slirp, we need a way to patch +slirp in RHEL-8.1.0 without handling as a separate package (as we do for +firmwares). + +The simplest solution is to copy the slirp folder from the tarball into the +exploded tree. + +To be able to do that, I had to make some changes: + +slirp needs to be removed from .gitmodules, otherwise git complains +about files on it. + +Since "make -C redhat rh-brew" uses the tarball and apply all the +patches on top of it, we need to remove the folder from the tarball before applying +the patch (because we are actually re-applying them). + +We also need to use --ignore-submodule while generating the patches for +scratch-build, otherwise it will include some weird definition of the +slirp folder in the patch, something that /usr/bin/patch gets mad with. + +After that I compared the patch list, after and before this change, and +saw no major differences. + +This is an exploded-tree-only change and shouldn't be applied to dist-git. + +Signed-off-by: Danilo C. L. de Paula + +Rebase notes (weekly-210217): + - Upstream slirp updated to 8f43a99191afb47ca3f3c6972f6306209f367ece + +Rebase notes (6.1.0-rc2): +- Upstream slirp updated to a88d9ace234a24ce1c17189642ef9104799425e0 + +Merged commits (weekly-210203): + - a3f5f082f Drop bogus IPv6 messagesa + +Merged commits (weekly-210714): +- ce9ddeef04 Add mtod_check() +- 0609398e76 bootp: limit vendor-specific area to input packet memory buffer +- 377f755273 bootp: check bootp_input buffer size +- 4101e41f0d upd6: check udp6_input buffer size +- 7a663c9667 tftp: check tftp_input buffer size +- 76f81fc22c tftp: introduce a header structure +- 6903e9ba25 udp: check upd_input buffer size +- 8aa4fe0b6d Fix "DHCP broken in libslirp v4.6.0" +--- + .gitmodules | 3 - + slirp/.clang-format | 58 ++ + slirp/.gitignore | 11 + + slirp/.gitlab-ci.yml | 43 + + slirp/.gitpublish | 3 + + slirp/CHANGELOG.md | 184 ++++ + slirp/COPYRIGHT | 62 ++ + slirp/README.md | 60 ++ + slirp/build-aux/git-version-gen | 158 ++++ + slirp/meson.build | 162 ++++ + slirp/meson_options.txt | 2 + + slirp/src/arp_table.c | 94 ++ + slirp/src/bootp.c | 375 ++++++++ + slirp/src/bootp.h | 129 +++ + slirp/src/cksum.c | 179 ++++ + slirp/src/debug.h | 59 ++ + slirp/src/dhcpv6.c | 224 +++++ + slirp/src/dhcpv6.h | 68 ++ + slirp/src/dnssearch.c | 306 ++++++ + slirp/src/if.c | 215 +++++ + slirp/src/if.h | 25 + + slirp/src/ip.h | 242 +++++ + slirp/src/ip6.h | 214 +++++ + slirp/src/ip6_icmp.c | 444 +++++++++ + slirp/src/ip6_icmp.h | 220 +++++ + slirp/src/ip6_input.c | 88 ++ + slirp/src/ip6_output.c | 45 + + slirp/src/ip_icmp.c | 524 +++++++++++ + slirp/src/ip_icmp.h | 168 ++++ + slirp/src/ip_input.c | 463 +++++++++ + slirp/src/ip_output.c | 171 ++++ + slirp/src/libslirp-version.h.in | 24 + + slirp/src/libslirp.h | 236 +++++ + slirp/src/libslirp.map | 36 + + slirp/src/main.h | 16 + + slirp/src/mbuf.c | 281 ++++++ + slirp/src/mbuf.h | 192 ++++ + slirp/src/misc.c | 440 +++++++++ + slirp/src/misc.h | 72 ++ + slirp/src/ncsi-pkt.h | 445 +++++++++ + slirp/src/ncsi.c | 197 ++++ + slirp/src/ndp_table.c | 98 ++ + slirp/src/sbuf.c | 168 ++++ + slirp/src/sbuf.h | 27 + + slirp/src/slirp.c | 1387 +++++++++++++++++++++++++++ + slirp/src/slirp.h | 289 ++++++ + slirp/src/socket.c | 1104 ++++++++++++++++++++++ + slirp/src/socket.h | 186 ++++ + slirp/src/state.c | 379 ++++++++ + slirp/src/stream.c | 120 +++ + slirp/src/stream.h | 35 + + slirp/src/tcp.h | 169 ++++ + slirp/src/tcp_input.c | 1552 +++++++++++++++++++++++++++++++ + slirp/src/tcp_output.c | 516 ++++++++++ + slirp/src/tcp_subr.c | 1011 ++++++++++++++++++++ + slirp/src/tcp_timer.c | 286 ++++++ + slirp/src/tcp_timer.h | 130 +++ + slirp/src/tcp_var.h | 161 ++++ + slirp/src/tcpip.h | 104 +++ + slirp/src/tftp.c | 470 ++++++++++ + slirp/src/tftp.h | 58 ++ + slirp/src/udp.c | 425 +++++++++ + slirp/src/udp.h | 96 ++ + slirp/src/udp6.c | 196 ++++ + slirp/src/util.c | 441 +++++++++ + slirp/src/util.h | 203 ++++ + slirp/src/version.c | 8 + + slirp/src/vmstate.c | 444 +++++++++ + slirp/src/vmstate.h | 391 ++++++++ + 69 files changed, 17389 insertions(+), 3 deletions(-) + create mode 100644 slirp/.clang-format + create mode 100644 slirp/.gitignore + create mode 100644 slirp/.gitlab-ci.yml + create mode 100644 slirp/.gitpublish + create mode 100644 slirp/CHANGELOG.md + create mode 100644 slirp/COPYRIGHT + create mode 100644 slirp/README.md + create mode 100755 slirp/build-aux/git-version-gen + create mode 100644 slirp/meson.build + create mode 100644 slirp/meson_options.txt + create mode 100644 slirp/src/arp_table.c + create mode 100644 slirp/src/bootp.c + create mode 100644 slirp/src/bootp.h + create mode 100644 slirp/src/cksum.c + create mode 100644 slirp/src/debug.h + create mode 100644 slirp/src/dhcpv6.c + create mode 100644 slirp/src/dhcpv6.h + create mode 100644 slirp/src/dnssearch.c + create mode 100644 slirp/src/if.c + create mode 100644 slirp/src/if.h + create mode 100644 slirp/src/ip.h + create mode 100644 slirp/src/ip6.h + create mode 100644 slirp/src/ip6_icmp.c + create mode 100644 slirp/src/ip6_icmp.h + create mode 100644 slirp/src/ip6_input.c + create mode 100644 slirp/src/ip6_output.c + create mode 100644 slirp/src/ip_icmp.c + create mode 100644 slirp/src/ip_icmp.h + create mode 100644 slirp/src/ip_input.c + create mode 100644 slirp/src/ip_output.c + create mode 100644 slirp/src/libslirp-version.h.in + create mode 100644 slirp/src/libslirp.h + create mode 100644 slirp/src/libslirp.map + create mode 100644 slirp/src/main.h + create mode 100644 slirp/src/mbuf.c + create mode 100644 slirp/src/mbuf.h + create mode 100644 slirp/src/misc.c + create mode 100644 slirp/src/misc.h + create mode 100644 slirp/src/ncsi-pkt.h + create mode 100644 slirp/src/ncsi.c + create mode 100644 slirp/src/ndp_table.c + create mode 100644 slirp/src/sbuf.c + create mode 100644 slirp/src/sbuf.h + create mode 100644 slirp/src/slirp.c + create mode 100644 slirp/src/slirp.h + create mode 100644 slirp/src/socket.c + create mode 100644 slirp/src/socket.h + create mode 100644 slirp/src/state.c + create mode 100644 slirp/src/stream.c + create mode 100644 slirp/src/stream.h + create mode 100644 slirp/src/tcp.h + create mode 100644 slirp/src/tcp_input.c + create mode 100644 slirp/src/tcp_output.c + create mode 100644 slirp/src/tcp_subr.c + create mode 100644 slirp/src/tcp_timer.c + create mode 100644 slirp/src/tcp_timer.h + create mode 100644 slirp/src/tcp_var.h + create mode 100644 slirp/src/tcpip.h + create mode 100644 slirp/src/tftp.c + create mode 100644 slirp/src/tftp.h + create mode 100644 slirp/src/udp.c + create mode 100644 slirp/src/udp.h + create mode 100644 slirp/src/udp6.c + create mode 100644 slirp/src/util.c + create mode 100644 slirp/src/util.h + create mode 100644 slirp/src/version.c + create mode 100644 slirp/src/vmstate.c + create mode 100644 slirp/src/vmstate.h + +diff --git a/slirp/.clang-format b/slirp/.clang-format +new file mode 100644 +index 0000000000..17fb49fe65 +--- /dev/null ++++ b/slirp/.clang-format +@@ -0,0 +1,58 @@ ++# https://clang.llvm.org/docs/ClangFormat.html ++# https://clang.llvm.org/docs/ClangFormatStyleOptions.html ++--- ++Language: Cpp ++AlignAfterOpenBracket: Align ++AlignConsecutiveAssignments: false # although we like it, it creates churn ++AlignConsecutiveDeclarations: false ++AlignEscapedNewlinesLeft: true ++AlignOperands: true ++AlignTrailingComments: false # churn ++AllowAllParametersOfDeclarationOnNextLine: true ++AllowShortBlocksOnASingleLine: false ++AllowShortCaseLabelsOnASingleLine: false ++AllowShortFunctionsOnASingleLine: None ++AllowShortIfStatementsOnASingleLine: false ++AllowShortLoopsOnASingleLine: false ++AlwaysBreakAfterReturnType: None # AlwaysBreakAfterDefinitionReturnType is taken into account ++AlwaysBreakBeforeMultilineStrings: false ++BinPackArguments: true ++BinPackParameters: true ++BraceWrapping: ++ AfterControlStatement: false ++ AfterEnum: false ++ AfterFunction: true ++ AfterStruct: false ++ AfterUnion: false ++ BeforeElse: false ++ IndentBraces: false ++BreakBeforeBinaryOperators: None ++BreakBeforeBraces: Custom ++BreakBeforeTernaryOperators: false ++BreakStringLiterals: true ++ColumnLimit: 80 ++ContinuationIndentWidth: 4 ++Cpp11BracedListStyle: false ++DerivePointerAlignment: false ++DisableFormat: false ++IndentCaseLabels: false ++IndentWidth: 4 ++IndentWrappedFunctionNames: false ++KeepEmptyLinesAtTheStartOfBlocks: false ++MacroBlockBegin: '.*_BEGIN$' # only PREC_BEGIN ? ++MacroBlockEnd: '.*_END$' ++MaxEmptyLinesToKeep: 2 ++PointerAlignment: Right ++ReflowComments: true ++SortIncludes: false ++SpaceAfterCStyleCast: false ++SpaceBeforeAssignmentOperators: true ++SpaceBeforeParens: ControlStatements ++SpaceInEmptyParentheses: false ++SpacesBeforeTrailingComments: 1 ++SpacesInContainerLiterals: true ++SpacesInParentheses: false ++SpacesInSquareBrackets: false ++Standard: Auto ++UseTab: Never ++... +diff --git a/slirp/CHANGELOG.md b/slirp/CHANGELOG.md +new file mode 100644 +index 0000000000..bd4845ca29 +--- /dev/null ++++ b/slirp/CHANGELOG.md +@@ -0,0 +1,184 @@ ++# Changelog ++ ++All notable changes to this project will be documented in this file. ++ ++The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ++and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ++ ++## [4.6.1] - 2021-06-18 ++ ++### Fixed ++ ++ - Fix DHCP regression introduced in 4.6.0. !95 ++ ++## [4.6.0] - 2021-06-14 ++ ++### Added ++ ++ - mbuf: Add debugging helpers for allocation. !90 ++ ++### Changed ++ ++ - Revert "Set macOS deployment target to macOS 10.4". !93 ++ ++### Fixed ++ ++ - mtod()-related buffer overflows (CVE-2021-3592 #44, CVE-2021-3593 #45, ++ CVE-2021-3594 #47, CVE-2021-3595 #46). ++ - poll_fd: add missing fd registration for UDP and ICMP ++ - ncsi: make ncsi_calculate_checksum work with unaligned data. !89 ++ - Various typos and doc fixes. !88 ++ ++## [4.5.0] - 2021-05-18 ++ ++### Added ++ ++ - IPv6 forwarding. !62 !75 !77 ++ - slirp_neighbor_info() to dump the ARP/NDP tables. !71 ++ ++### Changed ++ ++ - Lazy guest address resolution for IPv6. !81 ++ - Improve signal handling when spawning a child. !61 ++ - Set macOS deployment target to macOS 10.4. !72 ++ - slirp_add_hostfwd: Ensure all error paths set errno. !80 ++ - More API documentation. ++ ++### Fixed ++ ++ - Assertion failure on unspecified IPv6 address. !86 ++ - Disable polling for PRI on MacOS, fixing some closing streams issues. !73 ++ - Various memory leak fixes on fastq/batchq. !68 ++ - Memory leak on IPv6 fast-send. !67 ++ - Slow socket response on Windows. !64 ++ - Misc build and code cleanups. !60 !63 !76 !79 !84 ++ ++## [4.4.0] - 2020-12-02 ++ ++### Added ++ ++ - udp, udp6, icmp: handle TTL value. !48 ++ - Enable forwarding ICMP errors. !49 ++ - Add DNS resolving for iOS. !54 ++ ++### Changed ++ ++ - Improve meson subproject() support. !53 ++ - Removed Makefile-based build system. !56 ++ ++### Fixed ++ ++ - socket: consume empty packets. !55 ++ - check pkt_len before reading protocol header (CVE-2020-29129). !57 ++ - ip_stripoptions use memmove (fixes undefined behaviour). !47 ++ - various Coverity-related changes/fixes. ++ ++## [4.3.1] - 2020-07-08 ++ ++### Changed ++ ++ - A silent truncation could occur in `slirp_fmt()`, which will now print a ++ critical message. See also #22. ++ ++### Fixed ++ ++ - CVE-2020-10756 - Drop bogus IPv6 messages that could lead to data leakage. ++ See !44 and !42. ++ - Fix win32 builds by using the SLIRP_PACKED definition. ++ - Various coverity scan errors fixed. !41 ++ - Fix new GCC warnings. !43 ++ ++## [4.3.0] - 2020-04-22 ++ ++### Added ++ ++ - `SLIRP_VERSION_STRING` macro, with the git sha suffix when building from git ++ - `SlirpConfig.disable_dns`, to disable DNS redirection #16 ++ ++### Changed ++ ++ - `slirp_version_string()` now has the git sha suffix when building form git ++ - Limit DNS redirection to port 53 #16 ++ ++### Fixed ++ ++ - Fix build regression with mingw & NetBSD ++ - Fix use-afte-free in `ip_reass()` (CVE-2020-1983) ++ ++## [4.2.0] - 2020-03-17 ++ ++### Added ++ ++ - New API function `slirp_add_unix`: add a forward rule to a Unix socket. ++ - New API function `slirp_remove_guestfwd`: remove a forward rule previously ++ added by `slirp_add_exec`, `slirp_add_unix` or `slirp_add_guestfwd` ++ - New `SlirpConfig.outbound_addr{,6}` fields to bind output socket to a ++ specific address ++ ++### Changed ++ ++ - socket: do not fallback on host loopback if `get_dns_addr()` failed ++ or the address is in slirp network ++ ++### Fixed ++ ++ - ncsi: fix checksum OOB memory access ++ - `tcp_emu()`: fix OOB accesses ++ - tftp: restrict relative path access ++ - state: fix loading of guestfwd state ++ ++## [4.1.0] - 2019-12-02 ++ ++### Added ++ ++ - The `slirp_new()` API, simpler and more extensible than `slirp_init()`. ++ - Allow custom MTU configuration. ++ - Option to disable host loopback connections. ++ - CI now runs scan-build too. ++ ++### Changed ++ ++ - Disable `tcp_emu()` by default. `tcp_emu()` is known to have caused ++ several CVEs, and not useful today in most cases. The feature can ++ be still enabled by setting `SlirpConfig.enable_emu` to true. ++ - meson build system is now `subproject()` friendly. ++ - Replace remaining `malloc()`/`free()` with glib (which aborts on OOM) ++ - Various code cleanups. ++ ++### Deprecated ++ ++ - The `slirp_init()` API. ++ ++### Fixed ++ ++ - `getpeername()` error after `shutdown(SHUT_WR)`. ++ - Exec forward: correctly parse command lines that contain spaces. ++ - Allow 0.0.0.0 destination address. ++ - Make host receive broadcast packets. ++ - Various memory related fixes (heap overflow, leaks, NULL ++ dereference). ++ - Compilation warnings, dead code. ++ ++## [4.0.0] - 2019-05-24 ++ ++### Added ++ ++ - Installable as a shared library. ++ - meson build system ++ (& make build system for in-tree QEMU integration) ++ ++### Changed ++ ++ - Standalone project, removing any QEMU dependency. ++ - License clarifications. ++ ++[Unreleased]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.6.1...master ++[4.6.1]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.6.0...v4.6.1 ++[4.6.0]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.5.0...v4.6.0 ++[4.5.0]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.4.0...v4.5.0 ++[4.4.0]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.3.1...v4.4.0 ++[4.3.1]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.3.0...v4.3.1 ++[4.3.0]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.2.0...v4.3.0 ++[4.2.0]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.1.0...v4.2.0 ++[4.1.0]: https://gitlab.freedesktop.org/slirp/libslirp/compare/v4.0.0...v4.1.0 ++[4.0.0]: https://gitlab.freedesktop.org/slirp/libslirp/commits/v4.0.0 +diff --git a/slirp/COPYRIGHT b/slirp/COPYRIGHT +new file mode 100644 +index 0000000000..ed49512dbc +--- /dev/null ++++ b/slirp/COPYRIGHT +@@ -0,0 +1,62 @@ ++Slirp was written by Danny Gasparovski. ++Copyright (c), 1995,1996 All Rights Reserved. ++ ++Slirp is free software; "free" as in you don't have to pay for it, and you ++are free to do whatever you want with it. I do not accept any donations, ++monetary or otherwise, for Slirp. Instead, I would ask you to pass this ++potential donation to your favorite charity. In fact, I encourage ++*everyone* who finds Slirp useful to make a small donation to their ++favorite charity (for example, GreenPeace). This is not a requirement, but ++a suggestion from someone who highly values the service they provide. ++ ++The copyright terms and conditions: ++ ++---BEGIN--- ++ ++ Copyright (c) 1995,1996 Danny Gasparovski. All rights reserved. ++ ++ Redistribution and use in source and binary forms, with or without ++ modification, are permitted provided that the following conditions ++ are met: ++ 1. Redistributions of source code must retain the above copyright ++ notice, this list of conditions and the following disclaimer. ++ 2. Redistributions in binary form must reproduce the above copyright ++ notice, this list of conditions and the following disclaimer in the ++ documentation and/or other materials provided with the distribution. ++ 3. Neither the name of the copyright holder nor the names of its ++ contributors may be used to endorse or promote products derived ++ from this software without specific prior written permission. ++ ++ THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, ++ INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY ++ AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ++ DANNY GASPAROVSKI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, ++ INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT ++ NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF ++ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ ++---END--- ++ ++This basically means you can do anything you want with the software, except ++1) call it your own, and 2) claim warranty on it. There is no warranty for ++this software. None. Nada. If you lose a million dollars while using ++Slirp, that's your loss not mine. So, ***USE AT YOUR OWN RISK!***. ++ ++If these conditions cannot be met due to legal restrictions (E.g. where it ++is against the law to give out Software without warranty), you must cease ++using the software and delete all copies you have. ++ ++Slirp uses code that is copyrighted by the following people/organizations: ++ ++Juha Pirkola. ++Gregory M. Christy. ++The Regents of the University of California. ++Carnegie Mellon University. ++The Australian National University. ++RSA Data Security, Inc. ++ ++Please read the top of each source file for the details on the various ++copyrights. +diff --git a/slirp/README.md b/slirp/README.md +new file mode 100644 +index 0000000000..9f9c1b14f6 +--- /dev/null ++++ b/slirp/README.md +@@ -0,0 +1,60 @@ ++# libslirp ++ ++libslirp is a user-mode networking library used by virtual machines, ++containers or various tools. ++ ++## Getting Started ++ ++### Prerequisites ++ ++A C compiler, meson and glib2 development libraries. ++ ++(see also [.gitlab-ci.yml](.gitlab-ci.yml) DEPS variable for the list ++of dependencies on Fedora) ++ ++### Building ++ ++You may build and install the shared library with meson: ++ ++``` sh ++meson build ++ninja -C build install ++``` ++And configure QEMU with --enable-slirp=system to link against it. ++ ++(QEMU may build with the submodule static library using --enable-slirp=git) ++ ++### Testing ++ ++Unfortunately, there are no automated tests available. ++ ++You may run QEMU ``-net user`` linked with your development version. ++ ++## Contributing ++ ++Feel free to open issues on the [project ++issues](https://gitlab.freedesktop.org/slirp/libslirp/issues) page. ++ ++You may clone the [gitlab ++project](https://gitlab.freedesktop.org/slirp/libslirp) and create a ++merge request. ++ ++Contributing with gitlab allows gitlab workflow, tracking issues, ++running CI etc. ++ ++Alternatively, you may send patches to slirp@lists.freedesktop.org ++mailing list. ++ ++## Versioning ++ ++We intend to use [libtool's ++versioning](https://www.gnu.org/software/libtool/manual/html_node/Updating-version-info.html) ++for the shared libraries and use [SemVer](http://semver.org/) for ++project versions. ++ ++For the versions available, see the [tags on this ++repository](https://gitlab.freedesktop.org/slirp/libslirp/releases). ++ ++## License ++ ++See the [COPYRIGHT](COPYRIGHT) file for details. +diff --git a/slirp/build-aux/git-version-gen b/slirp/build-aux/git-version-gen +new file mode 100755 +index 0000000000..5617eb8d4e +--- /dev/null ++++ b/slirp/build-aux/git-version-gen +@@ -0,0 +1,158 @@ ++#!/bin/sh ++# Print a version string. ++scriptversion=2010-06-14.19; # UTC ++ ++# Copyright (C) 2007-2010 Free Software Foundation, Inc. ++# ++# This program is free software: you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 3 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++ ++# This script is derived from GIT-VERSION-GEN from GIT: http://git.or.cz/. ++# It may be run two ways: ++# - from a git repository in which the "git describe" command below ++# produces useful output (thus requiring at least one signed tag) ++# - from a non-git-repo directory containing a .tarball-version file, which ++# presumes this script is invoked like "./git-version-gen .tarball-version". ++ ++# In order to use intra-version strings in your project, you will need two ++# separate generated version string files: ++# ++# .tarball-version - present only in a distribution tarball, and not in ++# a checked-out repository. Created with contents that were learned at ++# the last time autoconf was run, and used by git-version-gen. Must not ++# be present in either $(srcdir) or $(builddir) for git-version-gen to ++# give accurate answers during normal development with a checked out tree, ++# but must be present in a tarball when there is no version control system. ++# Therefore, it cannot be used in any dependencies. GNUmakefile has ++# hooks to force a reconfigure at distribution time to get the value ++# correct, without penalizing normal development with extra reconfigures. ++# ++# .version - present in a checked-out repository and in a distribution ++# tarball. Usable in dependencies, particularly for files that don't ++# want to depend on config.h but do want to track version changes. ++# Delete this file prior to any autoconf run where you want to rebuild ++# files to pick up a version string change; and leave it stale to ++# minimize rebuild time after unrelated changes to configure sources. ++# ++# It is probably wise to add these two files to .gitignore, so that you ++# don't accidentally commit either generated file. ++# ++# Use the following line in your configure.ac, so that $(VERSION) will ++# automatically be up-to-date each time configure is run (and note that ++# since configure.ac no longer includes a version string, Makefile rules ++# should not depend on configure.ac for version updates). ++# ++# AC_INIT([GNU project], ++# m4_esyscmd([build-aux/git-version-gen .tarball-version]), ++# [bug-project@example]) ++# ++# Then use the following lines in your Makefile.am, so that .version ++# will be present for dependencies, and so that .tarball-version will ++# exist in distribution tarballs. ++# ++# BUILT_SOURCES = $(top_srcdir)/.version ++# $(top_srcdir)/.version: ++# echo $(VERSION) > $@-t && mv $@-t $@ ++# dist-hook: ++# echo $(VERSION) > $(distdir)/.tarball-version ++ ++case $# in ++ 1|2) ;; ++ *) echo 1>&2 "Usage: $0 \$srcdir/.tarball-version" \ ++ '[TAG-NORMALIZATION-SED-SCRIPT]' ++ exit 1;; ++esac ++ ++tarball_version_file=$1 ++tag_sed_script="${2:-s/x/x/}" ++nl=' ++' ++ ++# Avoid meddling by environment variable of the same name. ++v= ++ ++# First see if there is a tarball-only version file. ++# then try "git describe", then default. ++if test -f $tarball_version_file ++then ++ v=`cat $tarball_version_file` || exit 1 ++ case $v in ++ *$nl*) v= ;; # reject multi-line output ++ [0-9]*) ;; ++ *) v= ;; ++ esac ++ test -z "$v" \ ++ && echo "$0: WARNING: $tarball_version_file seems to be damaged" 1>&2 ++fi ++ ++if test -n "$v" ++then ++ : # use $v ++elif test -d .git \ ++ && v=`git describe --abbrev=4 --match='v*' HEAD 2>/dev/null \ ++ || git describe --abbrev=4 HEAD 2>/dev/null` \ ++ && v=`printf '%s\n' "$v" | sed "$tag_sed_script"` \ ++ && case $v in ++ v[0-9]*) ;; ++ *) (exit 1) ;; ++ esac ++then ++ # Is this a new git that lists number of commits since the last ++ # tag or the previous older version that did not? ++ # Newer: v6.10-77-g0f8faeb ++ # Older: v6.10-g0f8faeb ++ case $v in ++ *-*-*) : git describe is okay three part flavor ;; ++ *-*) ++ : git describe is older two part flavor ++ # Recreate the number of commits and rewrite such that the ++ # result is the same as if we were using the newer version ++ # of git describe. ++ vtag=`echo "$v" | sed 's/-.*//'` ++ numcommits=`git rev-list "$vtag"..HEAD | wc -l` ++ v=`echo "$v" | sed "s/\(.*\)-\(.*\)/\1-$numcommits-\2/"`; ++ ;; ++ esac ++ ++ # Change the first '-' to a '.', so version-comparing tools work properly. ++ # Remove the "g" in git describe's output string, to save a byte. ++ v=`echo "$v" | sed 's/-/./;s/\(.*\)-g/\1-/'`; ++else ++ v=UNKNOWN ++fi ++ ++v=`echo "$v" |sed 's/^v//'` ++ ++# Don't declare a version "dirty" merely because a time stamp has changed. ++git update-index --refresh > /dev/null 2>&1 ++ ++dirty=`sh -c 'git diff-index --name-only HEAD' 2>/dev/null` || dirty= ++case "$dirty" in ++ '') ;; ++ *) # Append the suffix only if there isn't one already. ++ case $v in ++ *-dirty) ;; ++ *) v="$v-dirty" ;; ++ esac ;; ++esac ++ ++# Omit the trailing newline, so that m4_esyscmd can use the result directly. ++echo "$v" | tr -d "$nl" ++ ++# Local variables: ++# eval: (add-hook 'write-file-hooks 'time-stamp) ++# time-stamp-start: "scriptversion=" ++# time-stamp-format: "%:y-%02m-%02d.%02H" ++# time-stamp-time-zone: "UTC" ++# time-stamp-end: "; # UTC" ++# End: +diff --git a/slirp/meson.build b/slirp/meson.build +new file mode 100644 +index 0000000000..cb1396ad59 +--- /dev/null ++++ b/slirp/meson.build +@@ -0,0 +1,162 @@ ++project('libslirp', 'c', ++ version : '4.6.1', ++ license : 'BSD-3-Clause', ++ default_options : ['warning_level=1', 'c_std=gnu99'], ++ meson_version : '>= 0.50', ++) ++ ++version = meson.project_version() ++varr = version.split('.') ++major_version = varr[0] ++minor_version = varr[1] ++micro_version = varr[2] ++ ++conf = configuration_data() ++conf.set('SLIRP_MAJOR_VERSION', major_version) ++conf.set('SLIRP_MINOR_VERSION', minor_version) ++conf.set('SLIRP_MICRO_VERSION', micro_version) ++ ++full_version = run_command('build-aux/git-version-gen', ++ '@0@/.tarball-version'.format(meson.current_source_dir()), ++ check : true).stdout().strip() ++if full_version.startswith('UNKNOWN') ++ full_version = meson.project_version() ++elif not full_version.startswith(meson.project_version()) ++ error('meson.build project version @0@ does not match git-describe output @1@' ++ .format(meson.project_version(), full_version)) ++endif ++conf.set_quoted('SLIRP_VERSION_STRING', full_version + get_option('version_suffix')) ++ ++# libtool versioning - this applies to libslirp ++# ++# See http://sources.redhat.com/autobook/autobook/autobook_91.html#SEC91 for details ++# ++# - If interfaces have been changed or added, but binary compatibility ++# has been preserved, change: ++# CURRENT += 1 ++# REVISION = 0 ++# AGE += 1 ++# - If binary compatibility has been broken (eg removed or changed ++# interfaces), change: ++# CURRENT += 1 ++# REVISION = 0 ++# AGE = 0 ++# - If the interface is the same as the previous version, but bugs are ++# fixed, change: ++# REVISION += 1 ++lt_current = 3 ++lt_revision = 1 ++lt_age = 3 ++lt_version = '@0@.@1@.@2@'.format(lt_current - lt_age, lt_age, lt_revision) ++ ++host_system = host_machine.system() ++ ++glib_dep = dependency('glib-2.0') ++ ++cc = meson.get_compiler('c') ++ ++platform_deps = [] ++ ++if host_system == 'windows' ++ platform_deps += [ ++ cc.find_library('ws2_32'), ++ cc.find_library('iphlpapi') ++ ] ++elif host_system == 'darwin' ++ platform_deps += [ ++ cc.find_library('resolv') ++ ] ++endif ++ ++cargs = [ ++ '-DG_LOG_DOMAIN="Slirp"', ++] ++ ++if cc.check_header('valgrind/valgrind.h') ++ cargs += [ '-DHAVE_VALGRIND=1' ] ++endif ++ ++sources = [ ++ 'src/arp_table.c', ++ 'src/bootp.c', ++ 'src/cksum.c', ++ 'src/dhcpv6.c', ++ 'src/dnssearch.c', ++ 'src/if.c', ++ 'src/ip6_icmp.c', ++ 'src/ip6_input.c', ++ 'src/ip6_output.c', ++ 'src/ip_icmp.c', ++ 'src/ip_input.c', ++ 'src/ip_output.c', ++ 'src/mbuf.c', ++ 'src/misc.c', ++ 'src/ncsi.c', ++ 'src/ndp_table.c', ++ 'src/sbuf.c', ++ 'src/slirp.c', ++ 'src/socket.c', ++ 'src/state.c', ++ 'src/stream.c', ++ 'src/tcp_input.c', ++ 'src/tcp_output.c', ++ 'src/tcp_subr.c', ++ 'src/tcp_timer.c', ++ 'src/tftp.c', ++ 'src/udp.c', ++ 'src/udp6.c', ++ 'src/util.c', ++ 'src/version.c', ++ 'src/vmstate.c', ++] ++ ++mapfile = 'src/libslirp.map' ++vflag = [] ++vflag_test = '-Wl,--version-script,@0@/@1@'.format(meson.current_source_dir(), mapfile) ++if cc.has_link_argument(vflag_test) ++ vflag += vflag_test ++endif ++ ++install_devel = not meson.is_subproject() ++ ++configure_file( ++ input : 'src/libslirp-version.h.in', ++ output : 'libslirp-version.h', ++ install : install_devel, ++ install_dir : join_paths(get_option('includedir'), 'slirp'), ++ configuration : conf ++) ++ ++lib = library('slirp', sources, ++ version : lt_version, ++ c_args : cargs, ++ link_args : vflag, ++ link_depends : mapfile, ++ dependencies : [glib_dep, platform_deps], ++ install : install_devel or get_option('default_library') == 'shared', ++) ++ ++if install_devel ++ install_headers(['src/libslirp.h'], subdir : 'slirp') ++ ++ pkg = import('pkgconfig') ++ ++ pkg.generate( ++ version : version, ++ libraries : lib, ++ requires : [ ++ 'glib-2.0', ++ ], ++ name : 'slirp', ++ description : 'User-space network stack', ++ filebase : 'slirp', ++ subdirs : 'slirp', ++ ) ++else ++ if get_option('default_library') == 'both' ++ lib = lib.get_static_lib() ++ endif ++ libslirp_dep = declare_dependency( ++ include_directories: include_directories('.', 'src'), ++ link_with: lib) ++endif +diff --git a/slirp/meson_options.txt b/slirp/meson_options.txt +new file mode 100644 +index 0000000000..27e7c8059c +--- /dev/null ++++ b/slirp/meson_options.txt +@@ -0,0 +1,2 @@ ++option('version_suffix', type: 'string', value: '', ++ description: 'Suffix to append to SLIRP_VERSION_STRING') +diff --git a/slirp/src/arp_table.c b/slirp/src/arp_table.c +new file mode 100644 +index 0000000000..ba8c8a4eee +--- /dev/null ++++ b/slirp/src/arp_table.c +@@ -0,0 +1,94 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * ARP table ++ * ++ * Copyright (c) 2011 AdaCore ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++ ++#include "slirp.h" ++ ++#include ++ ++void arp_table_add(Slirp *slirp, uint32_t ip_addr, ++ const uint8_t ethaddr[ETH_ALEN]) ++{ ++ const uint32_t broadcast_addr = ++ ~slirp->vnetwork_mask.s_addr | slirp->vnetwork_addr.s_addr; ++ ArpTable *arptbl = &slirp->arp_table; ++ int i; ++ char ethaddr_str[ETH_ADDRSTRLEN]; ++ ++ DEBUG_CALL("arp_table_add"); ++ DEBUG_ARG("ip = %s", inet_ntoa((struct in_addr){ .s_addr = ip_addr })); ++ DEBUG_ARG("hw addr = %s", slirp_ether_ntoa(ethaddr, ethaddr_str, ++ sizeof(ethaddr_str))); ++ ++ if (ip_addr == 0 || ip_addr == 0xffffffff || ip_addr == broadcast_addr) { ++ /* Do not register broadcast addresses */ ++ return; ++ } ++ ++ /* Search for an entry */ ++ for (i = 0; i < ARP_TABLE_SIZE; i++) { ++ if (arptbl->table[i].ar_sip == ip_addr) { ++ /* Update the entry */ ++ memcpy(arptbl->table[i].ar_sha, ethaddr, ETH_ALEN); ++ return; ++ } ++ } ++ ++ /* No entry found, create a new one */ ++ arptbl->table[arptbl->next_victim].ar_sip = ip_addr; ++ memcpy(arptbl->table[arptbl->next_victim].ar_sha, ethaddr, ETH_ALEN); ++ arptbl->next_victim = (arptbl->next_victim + 1) % ARP_TABLE_SIZE; ++} ++ ++bool arp_table_search(Slirp *slirp, uint32_t ip_addr, ++ uint8_t out_ethaddr[ETH_ALEN]) ++{ ++ const uint32_t broadcast_addr = ++ ~slirp->vnetwork_mask.s_addr | slirp->vnetwork_addr.s_addr; ++ ArpTable *arptbl = &slirp->arp_table; ++ int i; ++ char ethaddr_str[ETH_ADDRSTRLEN]; ++ ++ DEBUG_CALL("arp_table_search"); ++ DEBUG_ARG("ip = %s", inet_ntoa((struct in_addr){ .s_addr = ip_addr })); ++ ++ /* If broadcast address */ ++ if (ip_addr == 0 || ip_addr == 0xffffffff || ip_addr == broadcast_addr) { ++ /* return Ethernet broadcast address */ ++ memset(out_ethaddr, 0xff, ETH_ALEN); ++ return 1; ++ } ++ ++ for (i = 0; i < ARP_TABLE_SIZE; i++) { ++ if (arptbl->table[i].ar_sip == ip_addr) { ++ memcpy(out_ethaddr, arptbl->table[i].ar_sha, ETH_ALEN); ++ DEBUG_ARG("found hw addr = %s", ++ slirp_ether_ntoa(out_ethaddr, ethaddr_str, ++ sizeof(ethaddr_str))); ++ return 1; ++ } ++ } ++ ++ return 0; ++} +diff --git a/slirp/src/bootp.c b/slirp/src/bootp.c +new file mode 100644 +index 0000000000..d78d61b44c +--- /dev/null ++++ b/slirp/src/bootp.c +@@ -0,0 +1,375 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * QEMU BOOTP/DHCP server ++ * ++ * Copyright (c) 2004 Fabrice Bellard ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++#include "slirp.h" ++ ++#if defined(_WIN32) ++/* Windows ntohl() returns an u_long value. ++ * Add a type cast to match the format strings. */ ++#define ntohl(n) ((uint32_t)ntohl(n)) ++#endif ++ ++/* XXX: only DHCP is supported */ ++ ++#define LEASE_TIME (24 * 3600) ++ ++static const uint8_t rfc1533_cookie[] = { RFC1533_COOKIE }; ++ ++#define DPRINTF(fmt, ...) DEBUG_CALL(fmt, ##__VA_ARGS__) ++ ++static BOOTPClient *get_new_addr(Slirp *slirp, struct in_addr *paddr, ++ const uint8_t *macaddr) ++{ ++ BOOTPClient *bc; ++ int i; ++ ++ for (i = 0; i < NB_BOOTP_CLIENTS; i++) { ++ bc = &slirp->bootp_clients[i]; ++ if (!bc->allocated || !memcmp(macaddr, bc->macaddr, 6)) ++ goto found; ++ } ++ return NULL; ++found: ++ bc = &slirp->bootp_clients[i]; ++ bc->allocated = 1; ++ paddr->s_addr = slirp->vdhcp_startaddr.s_addr + htonl(i); ++ return bc; ++} ++ ++static BOOTPClient *request_addr(Slirp *slirp, const struct in_addr *paddr, ++ const uint8_t *macaddr) ++{ ++ uint32_t req_addr = ntohl(paddr->s_addr); ++ uint32_t dhcp_addr = ntohl(slirp->vdhcp_startaddr.s_addr); ++ BOOTPClient *bc; ++ ++ if (req_addr >= dhcp_addr && req_addr < (dhcp_addr + NB_BOOTP_CLIENTS)) { ++ bc = &slirp->bootp_clients[req_addr - dhcp_addr]; ++ if (!bc->allocated || !memcmp(macaddr, bc->macaddr, 6)) { ++ bc->allocated = 1; ++ return bc; ++ } ++ } ++ return NULL; ++} ++ ++static BOOTPClient *find_addr(Slirp *slirp, struct in_addr *paddr, ++ const uint8_t *macaddr) ++{ ++ BOOTPClient *bc; ++ int i; ++ ++ for (i = 0; i < NB_BOOTP_CLIENTS; i++) { ++ if (!memcmp(macaddr, slirp->bootp_clients[i].macaddr, 6)) ++ goto found; ++ } ++ return NULL; ++found: ++ bc = &slirp->bootp_clients[i]; ++ bc->allocated = 1; ++ paddr->s_addr = slirp->vdhcp_startaddr.s_addr + htonl(i); ++ return bc; ++} ++ ++static void dhcp_decode(const struct bootp_t *bp, ++ const uint8_t *bp_end, ++ int *pmsg_type, ++ struct in_addr *preq_addr) ++{ ++ const uint8_t *p; ++ int len, tag; ++ ++ *pmsg_type = 0; ++ preq_addr->s_addr = htonl(0L); ++ ++ p = bp->bp_vend; ++ if (memcmp(p, rfc1533_cookie, 4) != 0) ++ return; ++ p += 4; ++ while (p < bp_end) { ++ tag = p[0]; ++ if (tag == RFC1533_PAD) { ++ p++; ++ } else if (tag == RFC1533_END) { ++ break; ++ } else { ++ p++; ++ if (p >= bp_end) ++ break; ++ len = *p++; ++ if (p + len > bp_end) { ++ break; ++ } ++ DPRINTF("dhcp: tag=%d len=%d\n", tag, len); ++ ++ switch (tag) { ++ case RFC2132_MSG_TYPE: ++ if (len >= 1) ++ *pmsg_type = p[0]; ++ break; ++ case RFC2132_REQ_ADDR: ++ if (len >= 4) { ++ memcpy(&(preq_addr->s_addr), p, 4); ++ } ++ break; ++ default: ++ break; ++ } ++ p += len; ++ } ++ } ++ if (*pmsg_type == DHCPREQUEST && preq_addr->s_addr == htonl(0L) && ++ bp->bp_ciaddr.s_addr) { ++ memcpy(&(preq_addr->s_addr), &bp->bp_ciaddr, 4); ++ } ++} ++ ++static void bootp_reply(Slirp *slirp, ++ const struct bootp_t *bp, ++ const uint8_t *bp_end) ++{ ++ BOOTPClient *bc = NULL; ++ struct mbuf *m; ++ struct bootp_t *rbp; ++ struct sockaddr_in saddr, daddr; ++ struct in_addr preq_addr; ++ int dhcp_msg_type, val; ++ uint8_t *q; ++ uint8_t *end; ++ uint8_t client_ethaddr[ETH_ALEN]; ++ ++ /* extract exact DHCP msg type */ ++ dhcp_decode(bp, bp_end, &dhcp_msg_type, &preq_addr); ++ DPRINTF("bootp packet op=%d msgtype=%d", bp->bp_op, dhcp_msg_type); ++ if (preq_addr.s_addr != htonl(0L)) ++ DPRINTF(" req_addr=%08" PRIx32 "\n", ntohl(preq_addr.s_addr)); ++ else { ++ DPRINTF("\n"); ++ } ++ ++ if (dhcp_msg_type == 0) ++ dhcp_msg_type = DHCPREQUEST; /* Force reply for old BOOTP clients */ ++ ++ if (dhcp_msg_type != DHCPDISCOVER && dhcp_msg_type != DHCPREQUEST) ++ return; ++ ++ /* Get client's hardware address from bootp request */ ++ memcpy(client_ethaddr, bp->bp_hwaddr, ETH_ALEN); ++ ++ m = m_get(slirp); ++ if (!m) { ++ return; ++ } ++ m->m_data += IF_MAXLINKHDR; ++ m_inc(m, sizeof(struct bootp_t) + DHCP_OPT_LEN); ++ rbp = (struct bootp_t *)m->m_data; ++ m->m_data += sizeof(struct udpiphdr); ++ memset(rbp, 0, sizeof(struct bootp_t) + DHCP_OPT_LEN); ++ ++ if (dhcp_msg_type == DHCPDISCOVER) { ++ if (preq_addr.s_addr != htonl(0L)) { ++ bc = request_addr(slirp, &preq_addr, client_ethaddr); ++ if (bc) { ++ daddr.sin_addr = preq_addr; ++ } ++ } ++ if (!bc) { ++ new_addr: ++ bc = get_new_addr(slirp, &daddr.sin_addr, client_ethaddr); ++ if (!bc) { ++ DPRINTF("no address left\n"); ++ return; ++ } ++ } ++ memcpy(bc->macaddr, client_ethaddr, ETH_ALEN); ++ } else if (preq_addr.s_addr != htonl(0L)) { ++ bc = request_addr(slirp, &preq_addr, client_ethaddr); ++ if (bc) { ++ daddr.sin_addr = preq_addr; ++ memcpy(bc->macaddr, client_ethaddr, ETH_ALEN); ++ } else { ++ /* DHCPNAKs should be sent to broadcast */ ++ daddr.sin_addr.s_addr = 0xffffffff; ++ } ++ } else { ++ bc = find_addr(slirp, &daddr.sin_addr, bp->bp_hwaddr); ++ if (!bc) { ++ /* if never assigned, behaves as if it was already ++ assigned (windows fix because it remembers its address) */ ++ goto new_addr; ++ } ++ } ++ ++ /* Update ARP table for this IP address */ ++ arp_table_add(slirp, daddr.sin_addr.s_addr, client_ethaddr); ++ ++ saddr.sin_addr = slirp->vhost_addr; ++ saddr.sin_port = htons(BOOTP_SERVER); ++ ++ daddr.sin_port = htons(BOOTP_CLIENT); ++ ++ rbp->bp_op = BOOTP_REPLY; ++ rbp->bp_xid = bp->bp_xid; ++ rbp->bp_htype = 1; ++ rbp->bp_hlen = 6; ++ memcpy(rbp->bp_hwaddr, bp->bp_hwaddr, ETH_ALEN); ++ ++ rbp->bp_yiaddr = daddr.sin_addr; /* Client IP address */ ++ rbp->bp_siaddr = saddr.sin_addr; /* Server IP address */ ++ ++ q = rbp->bp_vend; ++ end = rbp->bp_vend + DHCP_OPT_LEN; ++ memcpy(q, rfc1533_cookie, 4); ++ q += 4; ++ ++ if (bc) { ++ DPRINTF("%s addr=%08" PRIx32 "\n", ++ (dhcp_msg_type == DHCPDISCOVER) ? "offered" : "ack'ed", ++ ntohl(daddr.sin_addr.s_addr)); ++ ++ if (dhcp_msg_type == DHCPDISCOVER) { ++ *q++ = RFC2132_MSG_TYPE; ++ *q++ = 1; ++ *q++ = DHCPOFFER; ++ } else /* DHCPREQUEST */ { ++ *q++ = RFC2132_MSG_TYPE; ++ *q++ = 1; ++ *q++ = DHCPACK; ++ } ++ ++ if (slirp->bootp_filename) { ++ g_assert(strlen(slirp->bootp_filename) < sizeof(rbp->bp_file)); ++ strcpy(rbp->bp_file, slirp->bootp_filename); ++ } ++ ++ *q++ = RFC2132_SRV_ID; ++ *q++ = 4; ++ memcpy(q, &saddr.sin_addr, 4); ++ q += 4; ++ ++ *q++ = RFC1533_NETMASK; ++ *q++ = 4; ++ memcpy(q, &slirp->vnetwork_mask, 4); ++ q += 4; ++ ++ if (!slirp->restricted) { ++ *q++ = RFC1533_GATEWAY; ++ *q++ = 4; ++ memcpy(q, &saddr.sin_addr, 4); ++ q += 4; ++ ++ *q++ = RFC1533_DNS; ++ *q++ = 4; ++ memcpy(q, &slirp->vnameserver_addr, 4); ++ q += 4; ++ } ++ ++ *q++ = RFC2132_LEASE_TIME; ++ *q++ = 4; ++ val = htonl(LEASE_TIME); ++ memcpy(q, &val, 4); ++ q += 4; ++ ++ if (*slirp->client_hostname) { ++ val = strlen(slirp->client_hostname); ++ if (q + val + 2 >= end) { ++ g_warning("DHCP packet size exceeded, " ++ "omitting host name option."); ++ } else { ++ *q++ = RFC1533_HOSTNAME; ++ *q++ = val; ++ memcpy(q, slirp->client_hostname, val); ++ q += val; ++ } ++ } ++ ++ if (slirp->vdomainname) { ++ val = strlen(slirp->vdomainname); ++ if (q + val + 2 >= end) { ++ g_warning("DHCP packet size exceeded, " ++ "omitting domain name option."); ++ } else { ++ *q++ = RFC1533_DOMAINNAME; ++ *q++ = val; ++ memcpy(q, slirp->vdomainname, val); ++ q += val; ++ } ++ } ++ ++ if (slirp->tftp_server_name) { ++ val = strlen(slirp->tftp_server_name); ++ if (q + val + 2 >= end) { ++ g_warning("DHCP packet size exceeded, " ++ "omitting tftp-server-name option."); ++ } else { ++ *q++ = RFC2132_TFTP_SERVER_NAME; ++ *q++ = val; ++ memcpy(q, slirp->tftp_server_name, val); ++ q += val; ++ } ++ } ++ ++ if (slirp->vdnssearch) { ++ val = slirp->vdnssearch_len; ++ if (q + val >= end) { ++ g_warning("DHCP packet size exceeded, " ++ "omitting domain-search option."); ++ } else { ++ memcpy(q, slirp->vdnssearch, val); ++ q += val; ++ } ++ } ++ } else { ++ static const char nak_msg[] = "requested address not available"; ++ ++ DPRINTF("nak'ed addr=%08" PRIx32 "\n", ntohl(preq_addr.s_addr)); ++ ++ *q++ = RFC2132_MSG_TYPE; ++ *q++ = 1; ++ *q++ = DHCPNAK; ++ ++ *q++ = RFC2132_MESSAGE; ++ *q++ = sizeof(nak_msg) - 1; ++ memcpy(q, nak_msg, sizeof(nak_msg) - 1); ++ q += sizeof(nak_msg) - 1; ++ } ++ assert(q < end); ++ *q++ = RFC1533_END; ++ ++ daddr.sin_addr.s_addr = 0xffffffffu; ++ ++ assert(q <= end); ++ ++ m->m_len = sizeof(struct bootp_t) + (end - rbp->bp_vend) - sizeof(struct ip) - sizeof(struct udphdr); ++ udp_output(NULL, m, &saddr, &daddr, IPTOS_LOWDELAY); ++} ++ ++void bootp_input(struct mbuf *m) ++{ ++ struct bootp_t *bp = mtod_check(m, sizeof(struct bootp_t)); ++ ++ if (bp && bp->bp_op == BOOTP_REQUEST) { ++ bootp_reply(m->slirp, bp, m_end(m)); ++ } ++} +diff --git a/slirp/src/bootp.h b/slirp/src/bootp.h +new file mode 100644 +index 0000000000..31ce5fd33f +--- /dev/null ++++ b/slirp/src/bootp.h +@@ -0,0 +1,129 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* bootp/dhcp defines */ ++ ++#ifndef SLIRP_BOOTP_H ++#define SLIRP_BOOTP_H ++ ++#define BOOTP_SERVER 67 ++#define BOOTP_CLIENT 68 ++ ++#define BOOTP_REQUEST 1 ++#define BOOTP_REPLY 2 ++ ++#define RFC1533_COOKIE 99, 130, 83, 99 ++#define RFC1533_PAD 0 ++#define RFC1533_NETMASK 1 ++#define RFC1533_TIMEOFFSET 2 ++#define RFC1533_GATEWAY 3 ++#define RFC1533_TIMESERVER 4 ++#define RFC1533_IEN116NS 5 ++#define RFC1533_DNS 6 ++#define RFC1533_LOGSERVER 7 ++#define RFC1533_COOKIESERVER 8 ++#define RFC1533_LPRSERVER 9 ++#define RFC1533_IMPRESSSERVER 10 ++#define RFC1533_RESOURCESERVER 11 ++#define RFC1533_HOSTNAME 12 ++#define RFC1533_BOOTFILESIZE 13 ++#define RFC1533_MERITDUMPFILE 14 ++#define RFC1533_DOMAINNAME 15 ++#define RFC1533_SWAPSERVER 16 ++#define RFC1533_ROOTPATH 17 ++#define RFC1533_EXTENSIONPATH 18 ++#define RFC1533_IPFORWARDING 19 ++#define RFC1533_IPSOURCEROUTING 20 ++#define RFC1533_IPPOLICYFILTER 21 ++#define RFC1533_IPMAXREASSEMBLY 22 ++#define RFC1533_IPTTL 23 ++#define RFC1533_IPMTU 24 ++#define RFC1533_IPMTUPLATEAU 25 ++#define RFC1533_INTMTU 26 ++#define RFC1533_INTLOCALSUBNETS 27 ++#define RFC1533_INTBROADCAST 28 ++#define RFC1533_INTICMPDISCOVER 29 ++#define RFC1533_INTICMPRESPOND 30 ++#define RFC1533_INTROUTEDISCOVER 31 ++#define RFC1533_INTROUTESOLICIT 32 ++#define RFC1533_INTSTATICROUTES 33 ++#define RFC1533_LLTRAILERENCAP 34 ++#define RFC1533_LLARPCACHETMO 35 ++#define RFC1533_LLETHERNETENCAP 36 ++#define RFC1533_TCPTTL 37 ++#define RFC1533_TCPKEEPALIVETMO 38 ++#define RFC1533_TCPKEEPALIVEGB 39 ++#define RFC1533_NISDOMAIN 40 ++#define RFC1533_NISSERVER 41 ++#define RFC1533_NTPSERVER 42 ++#define RFC1533_VENDOR 43 ++#define RFC1533_NBNS 44 ++#define RFC1533_NBDD 45 ++#define RFC1533_NBNT 46 ++#define RFC1533_NBSCOPE 47 ++#define RFC1533_XFS 48 ++#define RFC1533_XDM 49 ++ ++#define RFC2132_REQ_ADDR 50 ++#define RFC2132_LEASE_TIME 51 ++#define RFC2132_MSG_TYPE 53 ++#define RFC2132_SRV_ID 54 ++#define RFC2132_PARAM_LIST 55 ++#define RFC2132_MESSAGE 56 ++#define RFC2132_MAX_SIZE 57 ++#define RFC2132_RENEWAL_TIME 58 ++#define RFC2132_REBIND_TIME 59 ++#define RFC2132_TFTP_SERVER_NAME 66 ++ ++#define DHCPDISCOVER 1 ++#define DHCPOFFER 2 ++#define DHCPREQUEST 3 ++#define DHCPACK 5 ++#define DHCPNAK 6 ++ ++#define RFC1533_VENDOR_MAJOR 0 ++#define RFC1533_VENDOR_MINOR 0 ++ ++#define RFC1533_VENDOR_MAGIC 128 ++#define RFC1533_VENDOR_ADDPARM 129 ++#define RFC1533_VENDOR_ETHDEV 130 ++#define RFC1533_VENDOR_HOWTO 132 ++#define RFC1533_VENDOR_MNUOPTS 160 ++#define RFC1533_VENDOR_SELECTION 176 ++#define RFC1533_VENDOR_MOTD 184 ++#define RFC1533_VENDOR_NUMOFMOTD 8 ++#define RFC1533_VENDOR_IMG 192 ++#define RFC1533_VENDOR_NUMOFIMG 16 ++ ++#define RFC1533_END 255 ++#define BOOTP_VENDOR_LEN 64 ++#define DHCP_OPT_LEN 312 ++ ++struct bootp_t { ++ struct ip ip; ++ struct udphdr udp; ++ uint8_t bp_op; ++ uint8_t bp_htype; ++ uint8_t bp_hlen; ++ uint8_t bp_hops; ++ uint32_t bp_xid; ++ uint16_t bp_secs; ++ uint16_t unused; ++ struct in_addr bp_ciaddr; ++ struct in_addr bp_yiaddr; ++ struct in_addr bp_siaddr; ++ struct in_addr bp_giaddr; ++ uint8_t bp_hwaddr[16]; ++ uint8_t bp_sname[64]; ++ char bp_file[128]; ++ uint8_t bp_vend[]; ++}; ++ ++typedef struct { ++ uint16_t allocated; ++ uint8_t macaddr[6]; ++} BOOTPClient; ++ ++#define NB_BOOTP_CLIENTS 16 ++ ++void bootp_input(struct mbuf *m); ++ ++#endif +diff --git a/slirp/src/cksum.c b/slirp/src/cksum.c +new file mode 100644 +index 0000000000..b1cb97b7e1 +--- /dev/null ++++ b/slirp/src/cksum.c +@@ -0,0 +1,179 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1988, 1992, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)in_cksum.c 8.1 (Berkeley) 6/10/93 ++ * in_cksum.c,v 1.2 1994/08/02 07:48:16 davidg Exp ++ */ ++ ++#include "slirp.h" ++ ++/* ++ * Checksum routine for Internet Protocol family headers (Portable Version). ++ * ++ * This routine is very heavily used in the network ++ * code and should be modified for each CPU to be as fast as possible. ++ * ++ * XXX Since we will never span more than 1 mbuf, we can optimise this ++ */ ++ ++#define ADDCARRY(x) (x > 65535 ? x -= 65535 : x) ++#define REDUCE \ ++ { \ ++ l_util.l = sum; \ ++ sum = l_util.s[0] + l_util.s[1]; \ ++ ADDCARRY(sum); \ ++ } ++ ++int cksum(struct mbuf *m, int len) ++{ ++ register uint16_t *w; ++ register int sum = 0; ++ register int mlen = 0; ++ int byte_swapped = 0; ++ ++ union { ++ uint8_t c[2]; ++ uint16_t s; ++ } s_util; ++ union { ++ uint16_t s[2]; ++ uint32_t l; ++ } l_util; ++ ++ if (m->m_len == 0) ++ goto cont; ++ w = mtod(m, uint16_t *); ++ ++ mlen = m->m_len; ++ ++ if (len < mlen) ++ mlen = len; ++ len -= mlen; ++ /* ++ * Force to even boundary. ++ */ ++ if ((1 & (uintptr_t)w) && (mlen > 0)) { ++ REDUCE; ++ sum <<= 8; ++ s_util.c[0] = *(uint8_t *)w; ++ w = (uint16_t *)((int8_t *)w + 1); ++ mlen--; ++ byte_swapped = 1; ++ } ++ /* ++ * Unroll the loop to make overhead from ++ * branches &c small. ++ */ ++ while ((mlen -= 32) >= 0) { ++ sum += w[0]; ++ sum += w[1]; ++ sum += w[2]; ++ sum += w[3]; ++ sum += w[4]; ++ sum += w[5]; ++ sum += w[6]; ++ sum += w[7]; ++ sum += w[8]; ++ sum += w[9]; ++ sum += w[10]; ++ sum += w[11]; ++ sum += w[12]; ++ sum += w[13]; ++ sum += w[14]; ++ sum += w[15]; ++ w += 16; ++ } ++ mlen += 32; ++ while ((mlen -= 8) >= 0) { ++ sum += w[0]; ++ sum += w[1]; ++ sum += w[2]; ++ sum += w[3]; ++ w += 4; ++ } ++ mlen += 8; ++ if (mlen == 0 && byte_swapped == 0) ++ goto cont; ++ REDUCE; ++ while ((mlen -= 2) >= 0) { ++ sum += *w++; ++ } ++ ++ if (byte_swapped) { ++ REDUCE; ++ sum <<= 8; ++ if (mlen == -1) { ++ s_util.c[1] = *(uint8_t *)w; ++ sum += s_util.s; ++ mlen = 0; ++ } else ++ ++ mlen = -1; ++ } else if (mlen == -1) ++ s_util.c[0] = *(uint8_t *)w; ++ ++cont: ++ if (len) { ++ DEBUG_ERROR("cksum: out of data"); ++ DEBUG_ERROR(" len = %d", len); ++ } ++ if (mlen == -1) { ++ /* The last mbuf has odd # of bytes. Follow the ++ standard (the odd byte may be shifted left by 8 bits ++ or not as determined by endian-ness of the machine) */ ++ s_util.c[1] = 0; ++ sum += s_util.s; ++ } ++ REDUCE; ++ return (~sum & 0xffff); ++} ++ ++int ip6_cksum(struct mbuf *m) ++{ ++ /* TODO: Optimize this by being able to pass the ip6_pseudohdr to cksum ++ * separately from the mbuf */ ++ struct ip6 save_ip, *ip = mtod(m, struct ip6 *); ++ struct ip6_pseudohdr *ih = mtod(m, struct ip6_pseudohdr *); ++ int sum; ++ ++ save_ip = *ip; ++ ++ ih->ih_src = save_ip.ip_src; ++ ih->ih_dst = save_ip.ip_dst; ++ ih->ih_pl = htonl((uint32_t)ntohs(save_ip.ip_pl)); ++ ih->ih_zero_hi = 0; ++ ih->ih_zero_lo = 0; ++ ih->ih_nh = save_ip.ip_nh; ++ ++ sum = cksum(m, ((int)sizeof(struct ip6_pseudohdr)) + ntohl(ih->ih_pl)); ++ ++ *ip = save_ip; ++ ++ return sum; ++} +diff --git a/slirp/src/debug.h b/slirp/src/debug.h +new file mode 100644 +index 0000000000..0f9f3eff3f +--- /dev/null ++++ b/slirp/src/debug.h +@@ -0,0 +1,59 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#ifndef DEBUG_H_ ++#define DEBUG_H_ ++ ++#define DBG_CALL (1 << 0) ++#define DBG_MISC (1 << 1) ++#define DBG_ERROR (1 << 2) ++#define DBG_TFTP (1 << 3) ++#define DBG_VERBOSE_CALL (1 << 4) ++ ++extern int slirp_debug; ++ ++#define DEBUG_CALL(fmt, ...) \ ++ do { \ ++ if (G_UNLIKELY(slirp_debug & DBG_CALL)) { \ ++ g_debug(fmt "...", ##__VA_ARGS__); \ ++ } \ ++ } while (0) ++ ++#define DEBUG_VERBOSE_CALL(fmt, ...) \ ++ do { \ ++ if (G_UNLIKELY(slirp_debug & DBG_VERBOSE_CALL)) { \ ++ g_debug(fmt "...", ##__VA_ARGS__); \ ++ } \ ++ } while (0) ++ ++#define DEBUG_ARG(fmt, ...) \ ++ do { \ ++ if (G_UNLIKELY(slirp_debug & DBG_CALL)) { \ ++ g_debug(" " fmt, ##__VA_ARGS__); \ ++ } \ ++ } while (0) ++ ++#define DEBUG_MISC(fmt, ...) \ ++ do { \ ++ if (G_UNLIKELY(slirp_debug & DBG_MISC)) { \ ++ g_debug(fmt, ##__VA_ARGS__); \ ++ } \ ++ } while (0) ++ ++#define DEBUG_ERROR(fmt, ...) \ ++ do { \ ++ if (G_UNLIKELY(slirp_debug & DBG_ERROR)) { \ ++ g_debug(fmt, ##__VA_ARGS__); \ ++ } \ ++ } while (0) ++ ++#define DEBUG_TFTP(fmt, ...) \ ++ do { \ ++ if (G_UNLIKELY(slirp_debug & DBG_TFTP)) { \ ++ g_debug(fmt, ##__VA_ARGS__); \ ++ } \ ++ } while (0) ++ ++#endif /* DEBUG_H_ */ +diff --git a/slirp/src/dhcpv6.c b/slirp/src/dhcpv6.c +new file mode 100644 +index 0000000000..77b451b910 +--- /dev/null ++++ b/slirp/src/dhcpv6.c +@@ -0,0 +1,224 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * SLIRP stateless DHCPv6 ++ * ++ * We only support stateless DHCPv6, e.g. for network booting. ++ * See RFC 3315, RFC 3736, RFC 3646 and RFC 5970 for details. ++ * ++ * Copyright 2016 Thomas Huth, Red Hat Inc. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer. ++ * ++ * 2. Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials provided ++ * with the distribution. ++ * ++ * 3. Neither the name of the copyright holder nor the names of its ++ * contributors may be used to endorse or promote products derived ++ * from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS ++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, ++ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR ++ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED ++ * OF THE POSSIBILITY OF SUCH DAMAGE. ++ */ ++ ++#include "slirp.h" ++#include "dhcpv6.h" ++ ++/* DHCPv6 message types */ ++#define MSGTYPE_REPLY 7 ++#define MSGTYPE_INFO_REQUEST 11 ++ ++/* DHCPv6 option types */ ++#define OPTION_CLIENTID 1 ++#define OPTION_IAADDR 5 ++#define OPTION_ORO 6 ++#define OPTION_DNS_SERVERS 23 ++#define OPTION_BOOTFILE_URL 59 ++ ++struct requested_infos { ++ uint8_t *client_id; ++ int client_id_len; ++ bool want_dns; ++ bool want_boot_url; ++}; ++ ++/** ++ * Analyze the info request message sent by the client to see what data it ++ * provided and what it wants to have. The information is gathered in the ++ * "requested_infos" struct. Note that client_id (if provided) points into ++ * the odata region, thus the caller must keep odata valid as long as it ++ * needs to access the requested_infos struct. ++ */ ++static int dhcpv6_parse_info_request(Slirp *slirp, uint8_t *odata, int olen, ++ struct requested_infos *ri) ++{ ++ int i, req_opt; ++ ++ while (olen > 4) { ++ /* Parse one option */ ++ int option = odata[0] << 8 | odata[1]; ++ int len = odata[2] << 8 | odata[3]; ++ ++ if (len + 4 > olen) { ++ slirp->cb->guest_error("Guest sent bad DHCPv6 packet!", ++ slirp->opaque); ++ return -E2BIG; ++ } ++ ++ switch (option) { ++ case OPTION_IAADDR: ++ /* According to RFC3315, we must discard requests with IA option */ ++ return -EINVAL; ++ case OPTION_CLIENTID: ++ if (len > 256) { ++ /* Avoid very long IDs which could cause problems later */ ++ return -E2BIG; ++ } ++ ri->client_id = odata + 4; ++ ri->client_id_len = len; ++ break; ++ case OPTION_ORO: /* Option request option */ ++ if (len & 1) { ++ return -EINVAL; ++ } ++ /* Check which options the client wants to have */ ++ for (i = 0; i < len; i += 2) { ++ req_opt = odata[4 + i] << 8 | odata[4 + i + 1]; ++ switch (req_opt) { ++ case OPTION_DNS_SERVERS: ++ ri->want_dns = true; ++ break; ++ case OPTION_BOOTFILE_URL: ++ ri->want_boot_url = true; ++ break; ++ default: ++ DEBUG_MISC("dhcpv6: Unsupported option request %d", ++ req_opt); ++ } ++ } ++ break; ++ default: ++ DEBUG_MISC("dhcpv6 info req: Unsupported option %d, len=%d", option, ++ len); ++ } ++ ++ odata += len + 4; ++ olen -= len + 4; ++ } ++ ++ return 0; ++} ++ ++ ++/** ++ * Handle information request messages ++ */ ++static void dhcpv6_info_request(Slirp *slirp, struct sockaddr_in6 *srcsas, ++ uint32_t xid, uint8_t *odata, int olen) ++{ ++ struct requested_infos ri = { NULL }; ++ struct sockaddr_in6 sa6, da6; ++ struct mbuf *m; ++ uint8_t *resp; ++ ++ if (dhcpv6_parse_info_request(slirp, odata, olen, &ri) < 0) { ++ return; ++ } ++ ++ m = m_get(slirp); ++ if (!m) { ++ return; ++ } ++ memset(m->m_data, 0, m->m_size); ++ m->m_data += IF_MAXLINKHDR; ++ resp = (uint8_t *)m->m_data + sizeof(struct ip6) + sizeof(struct udphdr); ++ ++ /* Fill in response */ ++ *resp++ = MSGTYPE_REPLY; ++ *resp++ = (uint8_t)(xid >> 16); ++ *resp++ = (uint8_t)(xid >> 8); ++ *resp++ = (uint8_t)xid; ++ ++ if (ri.client_id) { ++ *resp++ = OPTION_CLIENTID >> 8; /* option-code high byte */ ++ *resp++ = OPTION_CLIENTID; /* option-code low byte */ ++ *resp++ = ri.client_id_len >> 8; /* option-len high byte */ ++ *resp++ = ri.client_id_len; /* option-len low byte */ ++ memcpy(resp, ri.client_id, ri.client_id_len); ++ resp += ri.client_id_len; ++ } ++ if (ri.want_dns) { ++ *resp++ = OPTION_DNS_SERVERS >> 8; /* option-code high byte */ ++ *resp++ = OPTION_DNS_SERVERS; /* option-code low byte */ ++ *resp++ = 0; /* option-len high byte */ ++ *resp++ = 16; /* option-len low byte */ ++ memcpy(resp, &slirp->vnameserver_addr6, 16); ++ resp += 16; ++ } ++ if (ri.want_boot_url) { ++ uint8_t *sa = slirp->vhost_addr6.s6_addr; ++ int slen, smaxlen; ++ ++ *resp++ = OPTION_BOOTFILE_URL >> 8; /* option-code high byte */ ++ *resp++ = OPTION_BOOTFILE_URL; /* option-code low byte */ ++ smaxlen = (uint8_t *)m->m_data + slirp->if_mtu - (resp + 2); ++ slen = slirp_fmt((char *)resp + 2, smaxlen, ++ "tftp://[%02x%02x:%02x%02x:%02x%02x:%02x%02x:" ++ "%02x%02x:%02x%02x:%02x%02x:%02x%02x]/%s", ++ sa[0], sa[1], sa[2], sa[3], sa[4], sa[5], sa[6], sa[7], ++ sa[8], sa[9], sa[10], sa[11], sa[12], sa[13], sa[14], ++ sa[15], slirp->bootp_filename); ++ *resp++ = slen >> 8; /* option-len high byte */ ++ *resp++ = slen; /* option-len low byte */ ++ resp += slen; ++ } ++ ++ sa6.sin6_addr = slirp->vhost_addr6; ++ sa6.sin6_port = DHCPV6_SERVER_PORT; ++ da6.sin6_addr = srcsas->sin6_addr; ++ da6.sin6_port = srcsas->sin6_port; ++ m->m_data += sizeof(struct ip6) + sizeof(struct udphdr); ++ m->m_len = resp - (uint8_t *)m->m_data; ++ udp6_output(NULL, m, &sa6, &da6); ++} ++ ++/** ++ * Handle DHCPv6 messages sent by the client ++ */ ++void dhcpv6_input(struct sockaddr_in6 *srcsas, struct mbuf *m) ++{ ++ uint8_t *data = (uint8_t *)m->m_data + sizeof(struct udphdr); ++ int data_len = m->m_len - sizeof(struct udphdr); ++ uint32_t xid; ++ ++ if (data_len < 4) { ++ return; ++ } ++ ++ xid = ntohl(*(uint32_t *)data) & 0xffffff; ++ ++ switch (data[0]) { ++ case MSGTYPE_INFO_REQUEST: ++ dhcpv6_info_request(m->slirp, srcsas, xid, &data[4], data_len - 4); ++ break; ++ default: ++ DEBUG_MISC("dhcpv6_input: Unsupported message type 0x%x", data[0]); ++ } ++} +diff --git a/slirp/src/dhcpv6.h b/slirp/src/dhcpv6.h +new file mode 100644 +index 0000000000..d12c49b36c +--- /dev/null ++++ b/slirp/src/dhcpv6.h +@@ -0,0 +1,68 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Definitions and prototypes for SLIRP stateless DHCPv6 ++ * ++ * Copyright 2016 Thomas Huth, Red Hat Inc. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer. ++ * ++ * 2. Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials provided ++ * with the distribution. ++ * ++ * 3. Neither the name of the copyright holder nor the names of its ++ * contributors may be used to endorse or promote products derived ++ * from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS ++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, ++ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR ++ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED ++ * OF THE POSSIBILITY OF SUCH DAMAGE. ++ */ ++#ifndef SLIRP_DHCPV6_H ++#define SLIRP_DHCPV6_H ++ ++#define DHCPV6_SERVER_PORT 547 ++ ++#define ALLDHCP_MULTICAST \ ++ { \ ++ .s6_addr = { \ ++ 0xff, \ ++ 0x02, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x01, \ ++ 0x00, \ ++ 0x02 \ ++ } \ ++ } ++ ++#define in6_dhcp_multicast(a) in6_equal(a, &(struct in6_addr)ALLDHCP_MULTICAST) ++ ++void dhcpv6_input(struct sockaddr_in6 *srcsas, struct mbuf *m); ++ ++#endif +diff --git a/slirp/src/dnssearch.c b/slirp/src/dnssearch.c +new file mode 100644 +index 0000000000..55497e860e +--- /dev/null ++++ b/slirp/src/dnssearch.c +@@ -0,0 +1,306 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * Domain search option for DHCP (RFC 3397) ++ * ++ * Copyright (c) 2012 Klaus Stengel ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++ ++#include "slirp.h" ++ ++static const uint8_t RFC3397_OPT_DOMAIN_SEARCH = 119; ++static const uint8_t MAX_OPT_LEN = 255; ++static const uint8_t OPT_HEADER_LEN = 2; ++static const uint8_t REFERENCE_LEN = 2; ++ ++struct compact_domain; ++ ++typedef struct compact_domain { ++ struct compact_domain *self; ++ struct compact_domain *refdom; ++ uint8_t *labels; ++ size_t len; ++ size_t common_octets; ++} CompactDomain; ++ ++static size_t domain_suffix_diffoff(const CompactDomain *a, ++ const CompactDomain *b) ++{ ++ size_t la = a->len, lb = b->len; ++ uint8_t *da = a->labels + la, *db = b->labels + lb; ++ size_t i, lm = (la < lb) ? la : lb; ++ ++ for (i = 0; i < lm; i++) { ++ da--; ++ db--; ++ if (*da != *db) { ++ break; ++ } ++ } ++ return i; ++} ++ ++static int domain_suffix_ord(const void *cva, const void *cvb) ++{ ++ const CompactDomain *a = cva, *b = cvb; ++ size_t la = a->len, lb = b->len; ++ size_t doff = domain_suffix_diffoff(a, b); ++ uint8_t ca = a->labels[la - doff]; ++ uint8_t cb = b->labels[lb - doff]; ++ ++ if (ca < cb) { ++ return -1; ++ } ++ if (ca > cb) { ++ return 1; ++ } ++ if (la < lb) { ++ return -1; ++ } ++ if (la > lb) { ++ return 1; ++ } ++ return 0; ++} ++ ++static size_t domain_common_label(CompactDomain *a, CompactDomain *b) ++{ ++ size_t res, doff = domain_suffix_diffoff(a, b); ++ uint8_t *first_eq_pos = a->labels + (a->len - doff); ++ uint8_t *label = a->labels; ++ ++ while (*label && label < first_eq_pos) { ++ label += *label + 1; ++ } ++ res = a->len - (label - a->labels); ++ /* only report if it can help to reduce the packet size */ ++ return (res > REFERENCE_LEN) ? res : 0; ++} ++ ++static void domain_fixup_order(CompactDomain *cd, size_t n) ++{ ++ size_t i; ++ ++ for (i = 0; i < n; i++) { ++ CompactDomain *cur = cd + i, *next = cd[i].self; ++ ++ while (!cur->common_octets) { ++ CompactDomain *tmp = next->self; /* backup target value */ ++ ++ next->self = cur; ++ cur->common_octets++; ++ ++ cur = next; ++ next = tmp; ++ } ++ } ++} ++ ++static void domain_mklabels(CompactDomain *cd, const char *input) ++{ ++ uint8_t *len_marker = cd->labels; ++ uint8_t *output = len_marker; /* pre-incremented */ ++ const char *in = input; ++ char cur_chr; ++ size_t len = 0; ++ ++ if (cd->len == 0) { ++ goto fail; ++ } ++ cd->len++; ++ ++ do { ++ cur_chr = *in++; ++ if (cur_chr == '.' || cur_chr == '\0') { ++ len = output - len_marker; ++ if ((len == 0 && cur_chr == '.') || len >= 64) { ++ goto fail; ++ } ++ *len_marker = len; ++ ++ output++; ++ len_marker = output; ++ } else { ++ output++; ++ *output = cur_chr; ++ } ++ } while (cur_chr != '\0'); ++ ++ /* ensure proper zero-termination */ ++ if (len != 0) { ++ *len_marker = 0; ++ cd->len++; ++ } ++ return; ++ ++fail: ++ g_warning("failed to parse domain name '%s'\n", input); ++ cd->len = 0; ++} ++ ++static void domain_mkxrefs(CompactDomain *doms, CompactDomain *last, ++ size_t depth) ++{ ++ CompactDomain *i = doms, *target = doms; ++ ++ do { ++ if (i->labels < target->labels) { ++ target = i; ++ } ++ } while (i++ != last); ++ ++ for (i = doms; i != last; i++) { ++ CompactDomain *group_last; ++ size_t next_depth; ++ ++ if (i->common_octets == depth) { ++ continue; ++ } ++ ++ next_depth = -1; ++ for (group_last = i; group_last != last; group_last++) { ++ size_t co = group_last->common_octets; ++ if (co <= depth) { ++ break; ++ } ++ if (co < next_depth) { ++ next_depth = co; ++ } ++ } ++ domain_mkxrefs(i, group_last, next_depth); ++ ++ i = group_last; ++ if (i == last) { ++ break; ++ } ++ } ++ ++ if (depth == 0) { ++ return; ++ } ++ ++ i = doms; ++ do { ++ if (i != target && i->refdom == NULL) { ++ i->refdom = target; ++ i->common_octets = depth; ++ } ++ } while (i++ != last); ++} ++ ++static size_t domain_compactify(CompactDomain *domains, size_t n) ++{ ++ uint8_t *start = domains->self->labels, *outptr = start; ++ size_t i; ++ ++ for (i = 0; i < n; i++) { ++ CompactDomain *cd = domains[i].self; ++ CompactDomain *rd = cd->refdom; ++ ++ if (rd != NULL) { ++ size_t moff = (rd->labels - start) + (rd->len - cd->common_octets); ++ if (moff < 0x3FFFu) { ++ cd->len -= cd->common_octets - 2; ++ cd->labels[cd->len - 1] = moff & 0xFFu; ++ cd->labels[cd->len - 2] = 0xC0u | (moff >> 8); ++ } ++ } ++ ++ if (cd->labels != outptr) { ++ memmove(outptr, cd->labels, cd->len); ++ cd->labels = outptr; ++ } ++ outptr += cd->len; ++ } ++ return outptr - start; ++} ++ ++int translate_dnssearch(Slirp *s, const char **names) ++{ ++ size_t blocks, bsrc_start, bsrc_end, bdst_start; ++ size_t i, num_domains, memreq = 0; ++ uint8_t *result = NULL, *outptr; ++ CompactDomain *domains = NULL; ++ ++ num_domains = g_strv_length((GStrv)(void *)names); ++ if (num_domains == 0) { ++ return -2; ++ } ++ ++ domains = g_malloc(num_domains * sizeof(*domains)); ++ ++ for (i = 0; i < num_domains; i++) { ++ size_t nlen = strlen(names[i]); ++ memreq += nlen + 2; /* 1 zero octet + 1 label length octet */ ++ domains[i].self = domains + i; ++ domains[i].len = nlen; ++ domains[i].common_octets = 0; ++ domains[i].refdom = NULL; ++ } ++ ++ /* reserve extra 2 header bytes for each 255 bytes of output */ ++ memreq += DIV_ROUND_UP(memreq, MAX_OPT_LEN) * OPT_HEADER_LEN; ++ result = g_malloc(memreq * sizeof(*result)); ++ ++ outptr = result; ++ for (i = 0; i < num_domains; i++) { ++ domains[i].labels = outptr; ++ domain_mklabels(domains + i, names[i]); ++ outptr += domains[i].len; ++ } ++ ++ if (outptr == result) { ++ g_free(domains); ++ g_free(result); ++ return -1; ++ } ++ ++ qsort(domains, num_domains, sizeof(*domains), domain_suffix_ord); ++ domain_fixup_order(domains, num_domains); ++ ++ for (i = 1; i < num_domains; i++) { ++ size_t cl = domain_common_label(domains + i - 1, domains + i); ++ domains[i - 1].common_octets = cl; ++ } ++ ++ domain_mkxrefs(domains, domains + num_domains - 1, 0); ++ memreq = domain_compactify(domains, num_domains); ++ ++ blocks = DIV_ROUND_UP(memreq, MAX_OPT_LEN); ++ bsrc_end = memreq; ++ bsrc_start = (blocks - 1) * MAX_OPT_LEN; ++ bdst_start = bsrc_start + blocks * OPT_HEADER_LEN; ++ memreq += blocks * OPT_HEADER_LEN; ++ ++ while (blocks--) { ++ size_t len = bsrc_end - bsrc_start; ++ memmove(result + bdst_start, result + bsrc_start, len); ++ result[bdst_start - 2] = RFC3397_OPT_DOMAIN_SEARCH; ++ result[bdst_start - 1] = len; ++ bsrc_end = bsrc_start; ++ bsrc_start -= MAX_OPT_LEN; ++ bdst_start -= MAX_OPT_LEN + OPT_HEADER_LEN; ++ } ++ ++ g_free(domains); ++ s->vdnssearch = result; ++ s->vdnssearch_len = memreq; ++ return 0; ++} +diff --git a/slirp/src/if.c b/slirp/src/if.c +new file mode 100644 +index 0000000000..9a1eec97b8 +--- /dev/null ++++ b/slirp/src/if.c +@@ -0,0 +1,215 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++ ++static void ifs_insque(struct mbuf *ifm, struct mbuf *ifmhead) ++{ ++ ifm->ifs_next = ifmhead->ifs_next; ++ ifmhead->ifs_next = ifm; ++ ifm->ifs_prev = ifmhead; ++ ifm->ifs_next->ifs_prev = ifm; ++} ++ ++static void ifs_remque(struct mbuf *ifm) ++{ ++ ifm->ifs_prev->ifs_next = ifm->ifs_next; ++ ifm->ifs_next->ifs_prev = ifm->ifs_prev; ++} ++ ++void if_init(Slirp *slirp) ++{ ++ slirp->if_fastq.qh_link = slirp->if_fastq.qh_rlink = &slirp->if_fastq; ++ slirp->if_batchq.qh_link = slirp->if_batchq.qh_rlink = &slirp->if_batchq; ++} ++ ++/* ++ * if_output: Queue packet into an output queue. ++ * There are 2 output queue's, if_fastq and if_batchq. ++ * Each output queue is a doubly linked list of double linked lists ++ * of mbufs, each list belonging to one "session" (socket). This ++ * way, we can output packets fairly by sending one packet from each ++ * session, instead of all the packets from one session, then all packets ++ * from the next session, etc. Packets on the if_fastq get absolute ++ * priority, but if one session hogs the link, it gets "downgraded" ++ * to the batchq until it runs out of packets, then it'll return ++ * to the fastq (eg. if the user does an ls -alR in a telnet session, ++ * it'll temporarily get downgraded to the batchq) ++ */ ++void if_output(struct socket *so, struct mbuf *ifm) ++{ ++ Slirp *slirp = ifm->slirp; ++ M_DUP_DEBUG(slirp, ifm, 0, 0); ++ ++ struct mbuf *ifq; ++ int on_fastq = 1; ++ ++ DEBUG_CALL("if_output"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("ifm = %p", ifm); ++ ++ /* ++ * First remove the mbuf from m_usedlist, ++ * since we're gonna use m_next and m_prev ourselves ++ * XXX Shouldn't need this, gotta change dtom() etc. ++ */ ++ if (ifm->m_flags & M_USEDLIST) { ++ remque(ifm); ++ ifm->m_flags &= ~M_USEDLIST; ++ } ++ ++ /* ++ * See if there's already a batchq list for this session. ++ * This can include an interactive session, which should go on fastq, ++ * but gets too greedy... hence it'll be downgraded from fastq to batchq. ++ * We mustn't put this packet back on the fastq (or we'll send it out of ++ * order) ++ * XXX add cache here? ++ */ ++ if (so) { ++ for (ifq = (struct mbuf *)slirp->if_batchq.qh_rlink; ++ (struct quehead *)ifq != &slirp->if_batchq; ifq = ifq->ifq_prev) { ++ if (so == ifq->ifq_so) { ++ /* A match! */ ++ ifm->ifq_so = so; ++ ifs_insque(ifm, ifq->ifs_prev); ++ goto diddit; ++ } ++ } ++ } ++ ++ /* No match, check which queue to put it on */ ++ if (so && (so->so_iptos & IPTOS_LOWDELAY)) { ++ ifq = (struct mbuf *)slirp->if_fastq.qh_rlink; ++ on_fastq = 1; ++ /* ++ * Check if this packet is a part of the last ++ * packet's session ++ */ ++ if (ifq->ifq_so == so) { ++ ifm->ifq_so = so; ++ ifs_insque(ifm, ifq->ifs_prev); ++ goto diddit; ++ } ++ } else { ++ ifq = (struct mbuf *)slirp->if_batchq.qh_rlink; ++ } ++ ++ /* Create a new doubly linked list for this session */ ++ ifm->ifq_so = so; ++ ifs_init(ifm); ++ insque(ifm, ifq); ++ ++diddit: ++ if (so) { ++ /* Update *_queued */ ++ so->so_queued++; ++ so->so_nqueued++; ++ /* ++ * Check if the interactive session should be downgraded to ++ * the batchq. A session is downgraded if it has queued 6 ++ * packets without pausing, and at least 3 of those packets ++ * have been sent over the link ++ * (XXX These are arbitrary numbers, probably not optimal..) ++ */ ++ if (on_fastq && ++ ((so->so_nqueued >= 6) && (so->so_nqueued - so->so_queued) >= 3)) { ++ /* Remove from current queue... */ ++ remque(ifm->ifs_next); ++ ++ /* ...And insert in the new. That'll teach ya! */ ++ insque(ifm->ifs_next, &slirp->if_batchq); ++ } ++ } ++ ++ /* ++ * This prevents us from malloc()ing too many mbufs ++ */ ++ if_start(ifm->slirp); ++} ++ ++/* ++ * Send one packet from each session. ++ * If there are packets on the fastq, they are sent FIFO, before ++ * everything else. Then we choose the first packet from each ++ * batchq session (socket) and send it. ++ * For example, if there are 3 ftp sessions fighting for bandwidth, ++ * one packet will be sent from the first session, then one packet ++ * from the second session, then one packet from the third. ++ */ ++void if_start(Slirp *slirp) ++{ ++ uint64_t now = slirp->cb->clock_get_ns(slirp->opaque); ++ bool from_batchq = false; ++ struct mbuf *ifm, *ifm_next, *ifqt; ++ ++ DEBUG_VERBOSE_CALL("if_start"); ++ ++ if (slirp->if_start_busy) { ++ return; ++ } ++ slirp->if_start_busy = true; ++ ++ struct mbuf *batch_head = NULL; ++ if (slirp->if_batchq.qh_link != &slirp->if_batchq) { ++ batch_head = (struct mbuf *)slirp->if_batchq.qh_link; ++ } ++ ++ if (slirp->if_fastq.qh_link != &slirp->if_fastq) { ++ ifm_next = (struct mbuf *)slirp->if_fastq.qh_link; ++ } else if (batch_head) { ++ /* Nothing on fastq, pick up from batchq */ ++ ifm_next = batch_head; ++ from_batchq = true; ++ } else { ++ ifm_next = NULL; ++ } ++ ++ while (ifm_next) { ++ ifm = ifm_next; ++ ++ ifm_next = ifm->ifq_next; ++ if ((struct quehead *)ifm_next == &slirp->if_fastq) { ++ /* No more packets in fastq, switch to batchq */ ++ ifm_next = batch_head; ++ from_batchq = true; ++ } ++ if ((struct quehead *)ifm_next == &slirp->if_batchq) { ++ /* end of batchq */ ++ ifm_next = NULL; ++ } ++ ++ /* Try to send packet unless it already expired */ ++ if (ifm->expiration_date >= now && !if_encap(slirp, ifm)) { ++ /* Packet is delayed due to pending ARP or NDP resolution */ ++ continue; ++ } ++ ++ /* Remove it from the queue */ ++ ifqt = ifm->ifq_prev; ++ remque(ifm); ++ ++ /* If there are more packets for this session, re-queue them */ ++ if (ifm->ifs_next != ifm) { ++ struct mbuf *next = ifm->ifs_next; ++ ++ insque(next, ifqt); ++ ifs_remque(ifm); ++ if (!from_batchq) { ++ ifm_next = next; ++ } ++ } ++ ++ /* Update so_queued */ ++ if (ifm->ifq_so && --ifm->ifq_so->so_queued == 0) { ++ /* If there's no more queued, reset nqueued */ ++ ifm->ifq_so->so_nqueued = 0; ++ } ++ ++ m_free(ifm); ++ } ++ ++ slirp->if_start_busy = false; ++} +diff --git a/slirp/src/if.h b/slirp/src/if.h +new file mode 100644 +index 0000000000..7cf9d2750e +--- /dev/null ++++ b/slirp/src/if.h +@@ -0,0 +1,25 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#ifndef IF_H ++#define IF_H ++ ++#define IF_COMPRESS 0x01 /* We want compression */ ++#define IF_NOCOMPRESS 0x02 /* Do not do compression */ ++#define IF_AUTOCOMP 0x04 /* Autodetect (default) */ ++#define IF_NOCIDCOMP 0x08 /* CID compression */ ++ ++#define IF_MTU_DEFAULT 1500 ++#define IF_MTU_MIN 68 ++#define IF_MTU_MAX 65521 ++#define IF_MRU_DEFAULT 1500 ++#define IF_MRU_MIN 68 ++#define IF_MRU_MAX 65521 ++#define IF_COMP IF_AUTOCOMP /* Flags for compression */ ++ ++/* 2 for alignment, 14 for ethernet */ ++#define IF_MAXLINKHDR (2 + ETH_HLEN) ++ ++#endif +diff --git a/slirp/src/ip.h b/slirp/src/ip.h +new file mode 100644 +index 0000000000..e5d4aa8a6d +--- /dev/null ++++ b/slirp/src/ip.h +@@ -0,0 +1,242 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)ip.h 8.1 (Berkeley) 6/10/93 ++ * ip.h,v 1.3 1994/08/21 05:27:30 paul Exp ++ */ ++ ++#ifndef IP_H ++#define IP_H ++ ++#include ++ ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++#undef NTOHL ++#undef NTOHS ++#undef HTONL ++#undef HTONS ++#define NTOHL(d) ++#define NTOHS(d) ++#define HTONL(d) ++#define HTONS(d) ++#else ++#ifndef NTOHL ++#define NTOHL(d) ((d) = ntohl((d))) ++#endif ++#ifndef NTOHS ++#define NTOHS(d) ((d) = ntohs((uint16_t)(d))) ++#endif ++#ifndef HTONL ++#define HTONL(d) ((d) = htonl((d))) ++#endif ++#ifndef HTONS ++#define HTONS(d) ((d) = htons((uint16_t)(d))) ++#endif ++#endif ++ ++typedef uint32_t n_long; /* long as received from the net */ ++ ++/* ++ * Definitions for internet protocol version 4. ++ * Per RFC 791, September 1981. ++ */ ++#define IPVERSION 4 ++ ++/* ++ * Structure of an internet header, naked of options. ++ */ ++struct ip { ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++ uint8_t ip_v : 4, /* version */ ++ ip_hl : 4; /* header length */ ++#else ++ uint8_t ip_hl : 4, /* header length */ ++ ip_v : 4; /* version */ ++#endif ++ uint8_t ip_tos; /* type of service */ ++ uint16_t ip_len; /* total length */ ++ uint16_t ip_id; /* identification */ ++ uint16_t ip_off; /* fragment offset field */ ++#define IP_DF 0x4000 /* don't fragment flag */ ++#define IP_MF 0x2000 /* more fragments flag */ ++#define IP_OFFMASK 0x1fff /* mask for fragmenting bits */ ++ uint8_t ip_ttl; /* time to live */ ++ uint8_t ip_p; /* protocol */ ++ uint16_t ip_sum; /* checksum */ ++ struct in_addr ip_src, ip_dst; /* source and dest address */ ++} SLIRP_PACKED; ++ ++#define IP_MAXPACKET 65535 /* maximum packet size */ ++ ++/* ++ * Definitions for IP type of service (ip_tos) ++ */ ++#define IPTOS_LOWDELAY 0x10 ++#define IPTOS_THROUGHPUT 0x08 ++#define IPTOS_RELIABILITY 0x04 ++ ++/* ++ * Definitions for options. ++ */ ++#define IPOPT_COPIED(o) ((o)&0x80) ++#define IPOPT_CLASS(o) ((o)&0x60) ++#define IPOPT_NUMBER(o) ((o)&0x1f) ++ ++#define IPOPT_CONTROL 0x00 ++#define IPOPT_RESERVED1 0x20 ++#define IPOPT_DEBMEAS 0x40 ++#define IPOPT_RESERVED2 0x60 ++ ++#define IPOPT_EOL 0 /* end of option list */ ++#define IPOPT_NOP 1 /* no operation */ ++ ++#define IPOPT_RR 7 /* record packet route */ ++#define IPOPT_TS 68 /* timestamp */ ++#define IPOPT_SECURITY 130 /* provide s,c,h,tcc */ ++#define IPOPT_LSRR 131 /* loose source route */ ++#define IPOPT_SATID 136 /* satnet id */ ++#define IPOPT_SSRR 137 /* strict source route */ ++ ++/* ++ * Offsets to fields in options other than EOL and NOP. ++ */ ++#define IPOPT_OPTVAL 0 /* option ID */ ++#define IPOPT_OLEN 1 /* option length */ ++#define IPOPT_OFFSET 2 /* offset within option */ ++#define IPOPT_MINOFF 4 /* min value of above */ ++ ++/* ++ * Time stamp option structure. ++ */ ++struct ip_timestamp { ++ uint8_t ipt_code; /* IPOPT_TS */ ++ uint8_t ipt_len; /* size of structure (variable) */ ++ uint8_t ipt_ptr; /* index of current entry */ ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++ uint8_t ipt_oflw : 4, /* overflow counter */ ++ ipt_flg : 4; /* flags, see below */ ++#else ++ uint8_t ipt_flg : 4, /* flags, see below */ ++ ipt_oflw : 4; /* overflow counter */ ++#endif ++ union ipt_timestamp { ++ n_long ipt_time[1]; ++ struct ipt_ta { ++ struct in_addr ipt_addr; ++ n_long ipt_time; ++ } ipt_ta[1]; ++ } ipt_timestamp; ++} SLIRP_PACKED; ++ ++/* flag bits for ipt_flg */ ++#define IPOPT_TS_TSONLY 0 /* timestamps only */ ++#define IPOPT_TS_TSANDADDR 1 /* timestamps and addresses */ ++#define IPOPT_TS_PRESPEC 3 /* specified modules only */ ++ ++/* bits for security (not byte swapped) */ ++#define IPOPT_SECUR_UNCLASS 0x0000 ++#define IPOPT_SECUR_CONFID 0xf135 ++#define IPOPT_SECUR_EFTO 0x789a ++#define IPOPT_SECUR_MMMM 0xbc4d ++#define IPOPT_SECUR_RESTR 0xaf13 ++#define IPOPT_SECUR_SECRET 0xd788 ++#define IPOPT_SECUR_TOPSECRET 0x6bc5 ++ ++/* ++ * Internet implementation parameters. ++ */ ++#define MAXTTL 255 /* maximum time to live (seconds) */ ++#define IPDEFTTL 64 /* default ttl, from RFC 1340 */ ++#define IPFRAGTTL 60 /* time to live for frags, slowhz */ ++#define IPTTLDEC 1 /* subtracted when forwarding */ ++ ++#define IP_MSS 576 /* default maximum segment size */ ++ ++#if GLIB_SIZEOF_VOID_P == 4 ++struct mbuf_ptr { ++ struct mbuf *mptr; ++ uint32_t dummy; ++} SLIRP_PACKED; ++#else ++struct mbuf_ptr { ++ struct mbuf *mptr; ++} SLIRP_PACKED; ++#endif ++struct qlink { ++ void *next, *prev; ++}; ++ ++/* ++ * Overlay for ip header used by other protocols (tcp, udp). ++ */ ++struct ipovly { ++ struct mbuf_ptr ih_mbuf; /* backpointer to mbuf */ ++ uint8_t ih_x1; /* (unused) */ ++ uint8_t ih_pr; /* protocol */ ++ uint16_t ih_len; /* protocol length */ ++ struct in_addr ih_src; /* source internet address */ ++ struct in_addr ih_dst; /* destination internet address */ ++} SLIRP_PACKED; ++ ++/* ++ * Ip reassembly queue structure. Each fragment ++ * being reassembled is attached to one of these structures. ++ * They are timed out after ipq_ttl drops to 0, and may also ++ * be reclaimed if memory becomes tight. ++ * size 28 bytes ++ */ ++struct ipq { ++ struct qlink frag_link; /* to ip headers of fragments */ ++ struct qlink ip_link; /* to other reass headers */ ++ uint8_t ipq_ttl; /* time for reass q to live */ ++ uint8_t ipq_p; /* protocol of this fragment */ ++ uint16_t ipq_id; /* sequence id for reassembly */ ++ struct in_addr ipq_src, ipq_dst; ++}; ++ ++/* ++ * Ip header, when holding a fragment. ++ * ++ * Note: ipf_link must be at same offset as frag_link above ++ */ ++struct ipasfrag { ++ struct qlink ipf_link; ++ struct ip ipf_ip; ++}; ++ ++G_STATIC_ASSERT(offsetof(struct ipq, frag_link) == ++ offsetof(struct ipasfrag, ipf_link)); ++ ++#define ipf_off ipf_ip.ip_off ++#define ipf_tos ipf_ip.ip_tos ++#define ipf_len ipf_ip.ip_len ++#define ipf_next ipf_link.next ++#define ipf_prev ipf_link.prev ++ ++#endif +diff --git a/slirp/src/ip6.h b/slirp/src/ip6.h +new file mode 100644 +index 0000000000..0630309d29 +--- /dev/null ++++ b/slirp/src/ip6.h +@@ -0,0 +1,214 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 2013 ++ * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. ++ */ ++ ++#ifndef SLIRP_IP6_H ++#define SLIRP_IP6_H ++ ++#include ++#include ++ ++#define ALLNODES_MULTICAST \ ++ { \ ++ .s6_addr = { \ ++ 0xff, \ ++ 0x02, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x01 \ ++ } \ ++ } ++ ++#define SOLICITED_NODE_PREFIX \ ++ { \ ++ .s6_addr = { \ ++ 0xff, \ ++ 0x02, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x01, \ ++ 0xff, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00 \ ++ } \ ++ } ++ ++#define LINKLOCAL_ADDR \ ++ { \ ++ .s6_addr = { \ ++ 0xfe, \ ++ 0x80, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x02 \ ++ } \ ++ } ++ ++#define ZERO_ADDR \ ++ { \ ++ .s6_addr = { \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00, \ ++ 0x00 \ ++ } \ ++ } ++ ++static inline bool in6_equal(const struct in6_addr *a, const struct in6_addr *b) ++{ ++ return memcmp(a, b, sizeof(*a)) == 0; ++} ++ ++static inline bool in6_equal_net(const struct in6_addr *a, ++ const struct in6_addr *b, int prefix_len) ++{ ++ if (memcmp(a, b, prefix_len / 8) != 0) { ++ return 0; ++ } ++ ++ if (prefix_len % 8 == 0) { ++ return 1; ++ } ++ ++ return a->s6_addr[prefix_len / 8] >> (8 - (prefix_len % 8)) == ++ b->s6_addr[prefix_len / 8] >> (8 - (prefix_len % 8)); ++} ++ ++static inline bool in6_equal_mach(const struct in6_addr *a, ++ const struct in6_addr *b, int prefix_len) ++{ ++ if (memcmp(&(a->s6_addr[DIV_ROUND_UP(prefix_len, 8)]), ++ &(b->s6_addr[DIV_ROUND_UP(prefix_len, 8)]), ++ 16 - DIV_ROUND_UP(prefix_len, 8)) != 0) { ++ return 0; ++ } ++ ++ if (prefix_len % 8 == 0) { ++ return 1; ++ } ++ ++ return (a->s6_addr[prefix_len / 8] & ++ ((1U << (8 - (prefix_len % 8))) - 1)) == ++ (b->s6_addr[prefix_len / 8] & ((1U << (8 - (prefix_len % 8))) - 1)); ++} ++ ++ ++#define in6_equal_router(a) \ ++ ((in6_equal_net(a, &slirp->vprefix_addr6, slirp->vprefix_len) && \ ++ in6_equal_mach(a, &slirp->vhost_addr6, slirp->vprefix_len)) || \ ++ (in6_equal_net(a, &(struct in6_addr)LINKLOCAL_ADDR, 64) && \ ++ in6_equal_mach(a, &slirp->vhost_addr6, 64))) ++ ++#define in6_equal_dns(a) \ ++ ((in6_equal_net(a, &slirp->vprefix_addr6, slirp->vprefix_len) && \ ++ in6_equal_mach(a, &slirp->vnameserver_addr6, slirp->vprefix_len)) || \ ++ (in6_equal_net(a, &(struct in6_addr)LINKLOCAL_ADDR, 64) && \ ++ in6_equal_mach(a, &slirp->vnameserver_addr6, 64))) ++ ++#define in6_equal_host(a) (in6_equal_router(a) || in6_equal_dns(a)) ++ ++#define in6_solicitednode_multicast(a) \ ++ (in6_equal_net(a, &(struct in6_addr)SOLICITED_NODE_PREFIX, 104)) ++ ++#define in6_zero(a) (in6_equal(a, &(struct in6_addr)ZERO_ADDR)) ++ ++/* Compute emulated host MAC address from its ipv6 address */ ++static inline void in6_compute_ethaddr(struct in6_addr ip, ++ uint8_t eth[ETH_ALEN]) ++{ ++ eth[0] = 0x52; ++ eth[1] = 0x56; ++ memcpy(ð[2], &ip.s6_addr[16 - (ETH_ALEN - 2)], ETH_ALEN - 2); ++} ++ ++/* ++ * Definitions for internet protocol version 6. ++ * Per RFC 2460, December 1998. ++ */ ++#define IP6VERSION 6 ++#define IP6_HOP_LIMIT 255 ++ ++/* ++ * Structure of an internet header, naked of options. ++ */ ++struct ip6 { ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++ uint32_t ip_v : 4, /* version */ ++ ip_tc_hi : 4, /* traffic class */ ++ ip_tc_lo : 4, ip_fl_hi : 4, /* flow label */ ++ ip_fl_lo : 16; ++#else ++ uint32_t ip_tc_hi : 4, ip_v : 4, ip_fl_hi : 4, ip_tc_lo : 4, ip_fl_lo : 16; ++#endif ++ uint16_t ip_pl; /* payload length */ ++ uint8_t ip_nh; /* next header */ ++ uint8_t ip_hl; /* hop limit */ ++ struct in6_addr ip_src, ip_dst; /* source and dest address */ ++}; ++ ++/* ++ * IPv6 pseudo-header used by upper-layer protocols ++ */ ++struct ip6_pseudohdr { ++ struct in6_addr ih_src; /* source internet address */ ++ struct in6_addr ih_dst; /* destination internet address */ ++ uint32_t ih_pl; /* upper-layer packet length */ ++ uint16_t ih_zero_hi; /* zero */ ++ uint8_t ih_zero_lo; /* zero */ ++ uint8_t ih_nh; /* next header */ ++}; ++ ++/* ++ * We don't want to mark these ip6 structs as packed as they are naturally ++ * correctly aligned; instead assert that there is no stray padding. ++ * If we marked the struct as packed then we would be unable to take ++ * the address of any of the fields in it. ++ */ ++G_STATIC_ASSERT(sizeof(struct ip6) == 40); ++G_STATIC_ASSERT(sizeof(struct ip6_pseudohdr) == 40); ++ ++#endif +diff --git a/slirp/src/ip6_icmp.c b/slirp/src/ip6_icmp.c +new file mode 100644 +index 0000000000..738b40f725 +--- /dev/null ++++ b/slirp/src/ip6_icmp.c +@@ -0,0 +1,444 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 2013 ++ * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. ++ */ ++ ++#include "slirp.h" ++#include "ip6_icmp.h" ++ ++#define NDP_Interval \ ++ g_rand_int_range(slirp->grand, NDP_MinRtrAdvInterval, NDP_MaxRtrAdvInterval) ++ ++static void ra_timer_handler(void *opaque) ++{ ++ Slirp *slirp = opaque; ++ ++ slirp->cb->timer_mod(slirp->ra_timer, ++ slirp->cb->clock_get_ns(slirp->opaque) / SCALE_MS + ++ NDP_Interval, ++ slirp->opaque); ++ ndp_send_ra(slirp); ++} ++ ++void icmp6_init(Slirp *slirp) ++{ ++ if (!slirp->in6_enabled) { ++ return; ++ } ++ ++ slirp->ra_timer = ++ slirp->cb->timer_new(ra_timer_handler, slirp, slirp->opaque); ++ slirp->cb->timer_mod(slirp->ra_timer, ++ slirp->cb->clock_get_ns(slirp->opaque) / SCALE_MS + ++ NDP_Interval, ++ slirp->opaque); ++} ++ ++void icmp6_cleanup(Slirp *slirp) ++{ ++ if (!slirp->in6_enabled) { ++ return; ++ } ++ ++ slirp->cb->timer_free(slirp->ra_timer, slirp->opaque); ++} ++ ++static void icmp6_send_echoreply(struct mbuf *m, Slirp *slirp, struct ip6 *ip, ++ struct icmp6 *icmp) ++{ ++ struct mbuf *t = m_get(slirp); ++ t->m_len = sizeof(struct ip6) + ntohs(ip->ip_pl); ++ memcpy(t->m_data, m->m_data, t->m_len); ++ ++ /* IPv6 Packet */ ++ struct ip6 *rip = mtod(t, struct ip6 *); ++ rip->ip_dst = ip->ip_src; ++ rip->ip_src = ip->ip_dst; ++ ++ /* ICMPv6 packet */ ++ t->m_data += sizeof(struct ip6); ++ struct icmp6 *ricmp = mtod(t, struct icmp6 *); ++ ricmp->icmp6_type = ICMP6_ECHO_REPLY; ++ ricmp->icmp6_cksum = 0; ++ ++ /* Checksum */ ++ t->m_data -= sizeof(struct ip6); ++ ricmp->icmp6_cksum = ip6_cksum(t); ++ ++ ip6_output(NULL, t, 0); ++} ++ ++void icmp6_forward_error(struct mbuf *m, uint8_t type, uint8_t code, struct in6_addr *src) ++{ ++ Slirp *slirp = m->slirp; ++ struct mbuf *t; ++ struct ip6 *ip = mtod(m, struct ip6 *); ++ char addrstr[INET6_ADDRSTRLEN]; ++ ++ DEBUG_CALL("icmp6_send_error"); ++ DEBUG_ARG("type = %d, code = %d", type, code); ++ ++ if (IN6_IS_ADDR_MULTICAST(&ip->ip_src) || in6_zero(&ip->ip_src)) { ++ /* TODO icmp error? */ ++ return; ++ } ++ ++ t = m_get(slirp); ++ ++ /* IPv6 packet */ ++ struct ip6 *rip = mtod(t, struct ip6 *); ++ rip->ip_src = *src; ++ rip->ip_dst = ip->ip_src; ++ inet_ntop(AF_INET6, &rip->ip_dst, addrstr, INET6_ADDRSTRLEN); ++ DEBUG_ARG("target = %s", addrstr); ++ ++ rip->ip_nh = IPPROTO_ICMPV6; ++ const int error_data_len = MIN( ++ m->m_len, slirp->if_mtu - (sizeof(struct ip6) + ICMP6_ERROR_MINLEN)); ++ rip->ip_pl = htons(ICMP6_ERROR_MINLEN + error_data_len); ++ t->m_len = sizeof(struct ip6) + ntohs(rip->ip_pl); ++ ++ /* ICMPv6 packet */ ++ t->m_data += sizeof(struct ip6); ++ struct icmp6 *ricmp = mtod(t, struct icmp6 *); ++ ricmp->icmp6_type = type; ++ ricmp->icmp6_code = code; ++ ricmp->icmp6_cksum = 0; ++ ++ switch (type) { ++ case ICMP6_UNREACH: ++ case ICMP6_TIMXCEED: ++ ricmp->icmp6_err.unused = 0; ++ break; ++ case ICMP6_TOOBIG: ++ ricmp->icmp6_err.mtu = htonl(slirp->if_mtu); ++ break; ++ case ICMP6_PARAMPROB: ++ /* TODO: Handle this case */ ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ t->m_data += ICMP6_ERROR_MINLEN; ++ memcpy(t->m_data, m->m_data, error_data_len); ++ ++ /* Checksum */ ++ t->m_data -= ICMP6_ERROR_MINLEN; ++ t->m_data -= sizeof(struct ip6); ++ ricmp->icmp6_cksum = ip6_cksum(t); ++ ++ ip6_output(NULL, t, 0); ++} ++ ++void icmp6_send_error(struct mbuf *m, uint8_t type, uint8_t code) ++{ ++ struct in6_addr src = LINKLOCAL_ADDR; ++ icmp6_forward_error(m, type, code, &src); ++} ++ ++/* ++ * Send NDP Router Advertisement ++ */ ++void ndp_send_ra(Slirp *slirp) ++{ ++ DEBUG_CALL("ndp_send_ra"); ++ ++ /* Build IPv6 packet */ ++ struct mbuf *t = m_get(slirp); ++ struct ip6 *rip = mtod(t, struct ip6 *); ++ size_t pl_size = 0; ++ struct in6_addr addr; ++ uint32_t scope_id; ++ ++ rip->ip_src = (struct in6_addr)LINKLOCAL_ADDR; ++ rip->ip_dst = (struct in6_addr)ALLNODES_MULTICAST; ++ rip->ip_nh = IPPROTO_ICMPV6; ++ ++ /* Build ICMPv6 packet */ ++ t->m_data += sizeof(struct ip6); ++ struct icmp6 *ricmp = mtod(t, struct icmp6 *); ++ ricmp->icmp6_type = ICMP6_NDP_RA; ++ ricmp->icmp6_code = 0; ++ ricmp->icmp6_cksum = 0; ++ ++ /* NDP */ ++ ricmp->icmp6_nra.chl = NDP_AdvCurHopLimit; ++ ricmp->icmp6_nra.M = NDP_AdvManagedFlag; ++ ricmp->icmp6_nra.O = NDP_AdvOtherConfigFlag; ++ ricmp->icmp6_nra.reserved = 0; ++ ricmp->icmp6_nra.lifetime = htons(NDP_AdvDefaultLifetime); ++ ricmp->icmp6_nra.reach_time = htonl(NDP_AdvReachableTime); ++ ricmp->icmp6_nra.retrans_time = htonl(NDP_AdvRetransTime); ++ t->m_data += ICMP6_NDP_RA_MINLEN; ++ pl_size += ICMP6_NDP_RA_MINLEN; ++ ++ /* Source link-layer address (NDP option) */ ++ struct ndpopt *opt = mtod(t, struct ndpopt *); ++ opt->ndpopt_type = NDPOPT_LINKLAYER_SOURCE; ++ opt->ndpopt_len = NDPOPT_LINKLAYER_LEN / 8; ++ in6_compute_ethaddr(rip->ip_src, opt->ndpopt_linklayer); ++ t->m_data += NDPOPT_LINKLAYER_LEN; ++ pl_size += NDPOPT_LINKLAYER_LEN; ++ ++ /* Prefix information (NDP option) */ ++ struct ndpopt *opt2 = mtod(t, struct ndpopt *); ++ opt2->ndpopt_type = NDPOPT_PREFIX_INFO; ++ opt2->ndpopt_len = NDPOPT_PREFIXINFO_LEN / 8; ++ opt2->ndpopt_prefixinfo.prefix_length = slirp->vprefix_len; ++ opt2->ndpopt_prefixinfo.L = 1; ++ opt2->ndpopt_prefixinfo.A = 1; ++ opt2->ndpopt_prefixinfo.reserved1 = 0; ++ opt2->ndpopt_prefixinfo.valid_lt = htonl(NDP_AdvValidLifetime); ++ opt2->ndpopt_prefixinfo.pref_lt = htonl(NDP_AdvPrefLifetime); ++ opt2->ndpopt_prefixinfo.reserved2 = 0; ++ opt2->ndpopt_prefixinfo.prefix = slirp->vprefix_addr6; ++ t->m_data += NDPOPT_PREFIXINFO_LEN; ++ pl_size += NDPOPT_PREFIXINFO_LEN; ++ ++ /* Prefix information (NDP option) */ ++ if (get_dns6_addr(&addr, &scope_id) >= 0) { ++ /* Host system does have an IPv6 DNS server, announce our proxy. */ ++ struct ndpopt *opt3 = mtod(t, struct ndpopt *); ++ opt3->ndpopt_type = NDPOPT_RDNSS; ++ opt3->ndpopt_len = NDPOPT_RDNSS_LEN / 8; ++ opt3->ndpopt_rdnss.reserved = 0; ++ opt3->ndpopt_rdnss.lifetime = htonl(2 * NDP_MaxRtrAdvInterval); ++ opt3->ndpopt_rdnss.addr = slirp->vnameserver_addr6; ++ t->m_data += NDPOPT_RDNSS_LEN; ++ pl_size += NDPOPT_RDNSS_LEN; ++ } ++ ++ rip->ip_pl = htons(pl_size); ++ t->m_data -= sizeof(struct ip6) + pl_size; ++ t->m_len = sizeof(struct ip6) + pl_size; ++ ++ /* ICMPv6 Checksum */ ++ ricmp->icmp6_cksum = ip6_cksum(t); ++ ++ ip6_output(NULL, t, 0); ++} ++ ++/* ++ * Send NDP Neighbor Solitication ++ */ ++void ndp_send_ns(Slirp *slirp, struct in6_addr addr) ++{ ++ char addrstr[INET6_ADDRSTRLEN]; ++ ++ inet_ntop(AF_INET6, &addr, addrstr, INET6_ADDRSTRLEN); ++ ++ DEBUG_CALL("ndp_send_ns"); ++ DEBUG_ARG("target = %s", addrstr); ++ ++ /* Build IPv6 packet */ ++ struct mbuf *t = m_get(slirp); ++ struct ip6 *rip = mtod(t, struct ip6 *); ++ rip->ip_src = slirp->vhost_addr6; ++ rip->ip_dst = (struct in6_addr)SOLICITED_NODE_PREFIX; ++ memcpy(&rip->ip_dst.s6_addr[13], &addr.s6_addr[13], 3); ++ rip->ip_nh = IPPROTO_ICMPV6; ++ rip->ip_pl = htons(ICMP6_NDP_NS_MINLEN + NDPOPT_LINKLAYER_LEN); ++ t->m_len = sizeof(struct ip6) + ntohs(rip->ip_pl); ++ ++ /* Build ICMPv6 packet */ ++ t->m_data += sizeof(struct ip6); ++ struct icmp6 *ricmp = mtod(t, struct icmp6 *); ++ ricmp->icmp6_type = ICMP6_NDP_NS; ++ ricmp->icmp6_code = 0; ++ ricmp->icmp6_cksum = 0; ++ ++ /* NDP */ ++ ricmp->icmp6_nns.reserved = 0; ++ ricmp->icmp6_nns.target = addr; ++ ++ /* Build NDP option */ ++ t->m_data += ICMP6_NDP_NS_MINLEN; ++ struct ndpopt *opt = mtod(t, struct ndpopt *); ++ opt->ndpopt_type = NDPOPT_LINKLAYER_SOURCE; ++ opt->ndpopt_len = NDPOPT_LINKLAYER_LEN / 8; ++ in6_compute_ethaddr(slirp->vhost_addr6, opt->ndpopt_linklayer); ++ ++ /* ICMPv6 Checksum */ ++ t->m_data -= ICMP6_NDP_NA_MINLEN; ++ t->m_data -= sizeof(struct ip6); ++ ricmp->icmp6_cksum = ip6_cksum(t); ++ ++ ip6_output(NULL, t, 1); ++} ++ ++/* ++ * Send NDP Neighbor Advertisement ++ */ ++static void ndp_send_na(Slirp *slirp, struct ip6 *ip, struct icmp6 *icmp) ++{ ++ /* Build IPv6 packet */ ++ struct mbuf *t = m_get(slirp); ++ struct ip6 *rip = mtod(t, struct ip6 *); ++ rip->ip_src = icmp->icmp6_nns.target; ++ if (in6_zero(&ip->ip_src)) { ++ rip->ip_dst = (struct in6_addr)ALLNODES_MULTICAST; ++ } else { ++ rip->ip_dst = ip->ip_src; ++ } ++ rip->ip_nh = IPPROTO_ICMPV6; ++ rip->ip_pl = htons(ICMP6_NDP_NA_MINLEN + NDPOPT_LINKLAYER_LEN); ++ t->m_len = sizeof(struct ip6) + ntohs(rip->ip_pl); ++ ++ /* Build ICMPv6 packet */ ++ t->m_data += sizeof(struct ip6); ++ struct icmp6 *ricmp = mtod(t, struct icmp6 *); ++ ricmp->icmp6_type = ICMP6_NDP_NA; ++ ricmp->icmp6_code = 0; ++ ricmp->icmp6_cksum = 0; ++ ++ /* NDP */ ++ ricmp->icmp6_nna.R = NDP_IsRouter; ++ ricmp->icmp6_nna.S = !IN6_IS_ADDR_MULTICAST(&rip->ip_dst); ++ ricmp->icmp6_nna.O = 1; ++ ricmp->icmp6_nna.reserved_hi = 0; ++ ricmp->icmp6_nna.reserved_lo = 0; ++ ricmp->icmp6_nna.target = icmp->icmp6_nns.target; ++ ++ /* Build NDP option */ ++ t->m_data += ICMP6_NDP_NA_MINLEN; ++ struct ndpopt *opt = mtod(t, struct ndpopt *); ++ opt->ndpopt_type = NDPOPT_LINKLAYER_TARGET; ++ opt->ndpopt_len = NDPOPT_LINKLAYER_LEN / 8; ++ in6_compute_ethaddr(ricmp->icmp6_nna.target, opt->ndpopt_linklayer); ++ ++ /* ICMPv6 Checksum */ ++ t->m_data -= ICMP6_NDP_NA_MINLEN; ++ t->m_data -= sizeof(struct ip6); ++ ricmp->icmp6_cksum = ip6_cksum(t); ++ ++ ip6_output(NULL, t, 0); ++} ++ ++/* ++ * Process a NDP message ++ */ ++static void ndp_input(struct mbuf *m, Slirp *slirp, struct ip6 *ip, ++ struct icmp6 *icmp) ++{ ++ g_assert(M_ROOMBEFORE(m) >= ETH_HLEN); ++ ++ m->m_len += ETH_HLEN; ++ m->m_data -= ETH_HLEN; ++ struct ethhdr *eth = mtod(m, struct ethhdr *); ++ m->m_len -= ETH_HLEN; ++ m->m_data += ETH_HLEN; ++ ++ switch (icmp->icmp6_type) { ++ case ICMP6_NDP_RS: ++ DEBUG_CALL(" type = Router Solicitation"); ++ if (ip->ip_hl == 255 && icmp->icmp6_code == 0 && ++ ntohs(ip->ip_pl) >= ICMP6_NDP_RS_MINLEN) { ++ /* Gratuitous NDP */ ++ ndp_table_add(slirp, ip->ip_src, eth->h_source); ++ ++ ndp_send_ra(slirp); ++ } ++ break; ++ ++ case ICMP6_NDP_RA: ++ DEBUG_CALL(" type = Router Advertisement"); ++ slirp->cb->guest_error("Warning: guest sent NDP RA, but shouldn't", ++ slirp->opaque); ++ break; ++ ++ case ICMP6_NDP_NS: ++ DEBUG_CALL(" type = Neighbor Solicitation"); ++ if (ip->ip_hl == 255 && icmp->icmp6_code == 0 && ++ !IN6_IS_ADDR_MULTICAST(&icmp->icmp6_nns.target) && ++ ntohs(ip->ip_pl) >= ICMP6_NDP_NS_MINLEN && ++ (!in6_zero(&ip->ip_src) || ++ in6_solicitednode_multicast(&ip->ip_dst))) { ++ if (in6_equal_host(&icmp->icmp6_nns.target)) { ++ /* Gratuitous NDP */ ++ ndp_table_add(slirp, ip->ip_src, eth->h_source); ++ ndp_send_na(slirp, ip, icmp); ++ } ++ } ++ break; ++ ++ case ICMP6_NDP_NA: ++ DEBUG_CALL(" type = Neighbor Advertisement"); ++ if (ip->ip_hl == 255 && icmp->icmp6_code == 0 && ++ ntohs(ip->ip_pl) >= ICMP6_NDP_NA_MINLEN && ++ !IN6_IS_ADDR_MULTICAST(&icmp->icmp6_nna.target) && ++ (!IN6_IS_ADDR_MULTICAST(&ip->ip_dst) || icmp->icmp6_nna.S == 0)) { ++ ndp_table_add(slirp, ip->ip_src, eth->h_source); ++ } ++ break; ++ ++ case ICMP6_NDP_REDIRECT: ++ DEBUG_CALL(" type = Redirect"); ++ slirp->cb->guest_error( ++ "Warning: guest sent NDP REDIRECT, but shouldn't", slirp->opaque); ++ break; ++ } ++} ++ ++/* ++ * Process a received ICMPv6 message. ++ */ ++void icmp6_input(struct mbuf *m) ++{ ++ Slirp *slirp = m->slirp; ++ /* NDP reads the ethernet header for gratuitous NDP */ ++ M_DUP_DEBUG(slirp, m, 1, ETH_HLEN); ++ ++ struct icmp6 *icmp; ++ struct ip6 *ip = mtod(m, struct ip6 *); ++ int hlen = sizeof(struct ip6); ++ ++ DEBUG_CALL("icmp6_input"); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("m_len = %d", m->m_len); ++ ++ if (ntohs(ip->ip_pl) < ICMP6_MINLEN) { ++ goto end; ++ } ++ ++ if (ip6_cksum(m)) { ++ goto end; ++ } ++ ++ m->m_len -= hlen; ++ m->m_data += hlen; ++ icmp = mtod(m, struct icmp6 *); ++ m->m_len += hlen; ++ m->m_data -= hlen; ++ ++ DEBUG_ARG("icmp6_type = %d", icmp->icmp6_type); ++ switch (icmp->icmp6_type) { ++ case ICMP6_ECHO_REQUEST: ++ if (in6_equal_host(&ip->ip_dst)) { ++ icmp6_send_echoreply(m, slirp, ip, icmp); ++ } else { ++ /* TODO */ ++ g_critical("external icmpv6 not supported yet"); ++ } ++ break; ++ ++ case ICMP6_NDP_RS: ++ case ICMP6_NDP_RA: ++ case ICMP6_NDP_NS: ++ case ICMP6_NDP_NA: ++ case ICMP6_NDP_REDIRECT: ++ ndp_input(m, slirp, ip, icmp); ++ break; ++ ++ case ICMP6_UNREACH: ++ case ICMP6_TOOBIG: ++ case ICMP6_TIMXCEED: ++ case ICMP6_PARAMPROB: ++ /* XXX? report error? close socket? */ ++ default: ++ break; ++ } ++ ++end: ++ m_free(m); ++} +diff --git a/slirp/src/ip6_icmp.h b/slirp/src/ip6_icmp.h +new file mode 100644 +index 0000000000..9070999cfc +--- /dev/null ++++ b/slirp/src/ip6_icmp.h +@@ -0,0 +1,220 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 2013 ++ * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. ++ */ ++ ++#ifndef SLIRP_IP6_ICMP_H ++#define SLIRP_IP6_ICMP_H ++ ++/* ++ * Interface Control Message Protocol version 6 Definitions. ++ * Per RFC 4443, March 2006. ++ * ++ * Network Discover Protocol Definitions. ++ * Per RFC 4861, September 2007. ++ */ ++ ++struct icmp6_echo { /* Echo Messages */ ++ uint16_t id; ++ uint16_t seq_num; ++}; ++ ++union icmp6_error_body { ++ uint32_t unused; ++ uint32_t pointer; ++ uint32_t mtu; ++}; ++ ++/* ++ * NDP Messages ++ */ ++struct ndp_rs { /* Router Solicitation Message */ ++ uint32_t reserved; ++}; ++ ++struct ndp_ra { /* Router Advertisement Message */ ++ uint8_t chl; /* Cur Hop Limit */ ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++ uint8_t M : 1, O : 1, reserved : 6; ++#else ++ uint8_t reserved : 6, O : 1, M : 1; ++#endif ++ uint16_t lifetime; /* Router Lifetime */ ++ uint32_t reach_time; /* Reachable Time */ ++ uint32_t retrans_time; /* Retrans Timer */ ++}; ++ ++G_STATIC_ASSERT(sizeof(struct ndp_ra) == 12); ++ ++struct ndp_ns { /* Neighbor Solicitation Message */ ++ uint32_t reserved; ++ struct in6_addr target; /* Target Address */ ++}; ++ ++G_STATIC_ASSERT(sizeof(struct ndp_ns) == 20); ++ ++struct ndp_na { /* Neighbor Advertisement Message */ ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++ uint32_t R : 1, /* Router Flag */ ++ S : 1, /* Solicited Flag */ ++ O : 1, /* Override Flag */ ++ reserved_hi : 5, reserved_lo : 24; ++#else ++ uint32_t reserved_hi : 5, O : 1, S : 1, R : 1, reserved_lo : 24; ++#endif ++ struct in6_addr target; /* Target Address */ ++}; ++ ++G_STATIC_ASSERT(sizeof(struct ndp_na) == 20); ++ ++struct ndp_redirect { ++ uint32_t reserved; ++ struct in6_addr target; /* Target Address */ ++ struct in6_addr dest; /* Destination Address */ ++}; ++ ++G_STATIC_ASSERT(sizeof(struct ndp_redirect) == 36); ++ ++/* ++ * Structure of an icmpv6 header. ++ */ ++struct icmp6 { ++ uint8_t icmp6_type; /* type of message, see below */ ++ uint8_t icmp6_code; /* type sub code */ ++ uint16_t icmp6_cksum; /* ones complement cksum of struct */ ++ union { ++ union icmp6_error_body error_body; ++ struct icmp6_echo echo; ++ struct ndp_rs ndp_rs; ++ struct ndp_ra ndp_ra; ++ struct ndp_ns ndp_ns; ++ struct ndp_na ndp_na; ++ struct ndp_redirect ndp_redirect; ++ } icmp6_body; ++#define icmp6_err icmp6_body.error_body ++#define icmp6_echo icmp6_body.echo ++#define icmp6_nrs icmp6_body.ndp_rs ++#define icmp6_nra icmp6_body.ndp_ra ++#define icmp6_nns icmp6_body.ndp_ns ++#define icmp6_nna icmp6_body.ndp_na ++#define icmp6_redirect icmp6_body.ndp_redirect ++}; ++ ++G_STATIC_ASSERT(sizeof(struct icmp6) == 40); ++ ++#define ICMP6_MINLEN 4 ++#define ICMP6_ERROR_MINLEN 8 ++#define ICMP6_ECHO_MINLEN 8 ++#define ICMP6_NDP_RS_MINLEN 8 ++#define ICMP6_NDP_RA_MINLEN 16 ++#define ICMP6_NDP_NS_MINLEN 24 ++#define ICMP6_NDP_NA_MINLEN 24 ++#define ICMP6_NDP_REDIRECT_MINLEN 40 ++ ++/* ++ * NDP Options ++ */ ++struct ndpopt { ++ uint8_t ndpopt_type; /* Option type */ ++ uint8_t ndpopt_len; /* /!\ In units of 8 octets */ ++ union { ++ unsigned char linklayer_addr[6]; /* Source/Target Link-layer */ ++#define ndpopt_linklayer ndpopt_body.linklayer_addr ++ struct prefixinfo { /* Prefix Information */ ++ uint8_t prefix_length; ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++ uint8_t L : 1, A : 1, reserved1 : 6; ++#else ++ uint8_t reserved1 : 6, A : 1, L : 1; ++#endif ++ uint32_t valid_lt; /* Valid Lifetime */ ++ uint32_t pref_lt; /* Preferred Lifetime */ ++ uint32_t reserved2; ++ struct in6_addr prefix; ++ } SLIRP_PACKED prefixinfo; ++#define ndpopt_prefixinfo ndpopt_body.prefixinfo ++ struct rdnss { ++ uint16_t reserved; ++ uint32_t lifetime; ++ struct in6_addr addr; ++ } SLIRP_PACKED rdnss; ++#define ndpopt_rdnss ndpopt_body.rdnss ++ } ndpopt_body; ++} SLIRP_PACKED; ++ ++/* NDP options type */ ++#define NDPOPT_LINKLAYER_SOURCE 1 /* Source Link-Layer Address */ ++#define NDPOPT_LINKLAYER_TARGET 2 /* Target Link-Layer Address */ ++#define NDPOPT_PREFIX_INFO 3 /* Prefix Information */ ++#define NDPOPT_RDNSS 25 /* Recursive DNS Server Address */ ++ ++/* NDP options size, in octets. */ ++#define NDPOPT_LINKLAYER_LEN 8 ++#define NDPOPT_PREFIXINFO_LEN 32 ++#define NDPOPT_RDNSS_LEN 24 ++ ++/* ++ * Definition of type and code field values. ++ * Per https://www.iana.org/assignments/icmpv6-parameters/icmpv6-parameters.xml ++ * Last Updated 2012-11-12 ++ */ ++ ++/* Errors */ ++#define ICMP6_UNREACH 1 /* Destination Unreachable */ ++#define ICMP6_UNREACH_NO_ROUTE 0 /* no route to dest */ ++#define ICMP6_UNREACH_DEST_PROHIB 1 /* com with dest prohibited */ ++#define ICMP6_UNREACH_SCOPE 2 /* beyond scope of src addr */ ++#define ICMP6_UNREACH_ADDRESS 3 /* address unreachable */ ++#define ICMP6_UNREACH_PORT 4 /* port unreachable */ ++#define ICMP6_UNREACH_SRC_FAIL 5 /* src addr failed */ ++#define ICMP6_UNREACH_REJECT_ROUTE 6 /* reject route to dest */ ++#define ICMP6_UNREACH_SRC_HDR_ERROR 7 /* error in src routing header */ ++#define ICMP6_TOOBIG 2 /* Packet Too Big */ ++#define ICMP6_TIMXCEED 3 /* Time Exceeded */ ++#define ICMP6_TIMXCEED_INTRANS 0 /* hop limit exceeded in transit */ ++#define ICMP6_TIMXCEED_REASS 1 /* ttl=0 in reass */ ++#define ICMP6_PARAMPROB 4 /* Parameter Problem */ ++#define ICMP6_PARAMPROB_HDR_FIELD 0 /* err header field */ ++#define ICMP6_PARAMPROB_NXTHDR_TYPE 1 /* unrecognized Next Header type */ ++#define ICMP6_PARAMPROB_IPV6_OPT 2 /* unrecognized IPv6 option */ ++ ++/* Informational Messages */ ++#define ICMP6_ECHO_REQUEST 128 /* Echo Request */ ++#define ICMP6_ECHO_REPLY 129 /* Echo Reply */ ++#define ICMP6_NDP_RS 133 /* Router Solicitation (NDP) */ ++#define ICMP6_NDP_RA 134 /* Router Advertisement (NDP) */ ++#define ICMP6_NDP_NS 135 /* Neighbor Solicitation (NDP) */ ++#define ICMP6_NDP_NA 136 /* Neighbor Advertisement (NDP) */ ++#define ICMP6_NDP_REDIRECT 137 /* Redirect Message (NDP) */ ++ ++/* ++ * Router Configuration Variables (rfc4861#section-6) ++ */ ++#define NDP_IsRouter 1 ++#define NDP_AdvSendAdvertisements 1 ++#define NDP_MaxRtrAdvInterval 600000 ++#define NDP_MinRtrAdvInterval \ ++ ((NDP_MaxRtrAdvInterval >= 9) ? NDP_MaxRtrAdvInterval / 3 : \ ++ NDP_MaxRtrAdvInterval) ++#define NDP_AdvManagedFlag 0 ++#define NDP_AdvOtherConfigFlag 0 ++#define NDP_AdvLinkMTU 0 ++#define NDP_AdvReachableTime 0 ++#define NDP_AdvRetransTime 0 ++#define NDP_AdvCurHopLimit 64 ++#define NDP_AdvDefaultLifetime ((3 * NDP_MaxRtrAdvInterval) / 1000) ++#define NDP_AdvValidLifetime 86400 ++#define NDP_AdvOnLinkFlag 1 ++#define NDP_AdvPrefLifetime 14400 ++#define NDP_AdvAutonomousFlag 1 ++ ++void icmp6_init(Slirp *slirp); ++void icmp6_cleanup(Slirp *slirp); ++void icmp6_input(struct mbuf *); ++void icmp6_forward_error(struct mbuf *m, uint8_t type, uint8_t code, struct in6_addr *src); ++void icmp6_send_error(struct mbuf *m, uint8_t type, uint8_t code); ++void ndp_send_ra(Slirp *slirp); ++void ndp_send_ns(Slirp *slirp, struct in6_addr addr); ++ ++#endif +diff --git a/slirp/src/ip6_input.c b/slirp/src/ip6_input.c +new file mode 100644 +index 0000000000..b3d98653df +--- /dev/null ++++ b/slirp/src/ip6_input.c +@@ -0,0 +1,88 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 2013 ++ * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. ++ */ ++ ++#include "slirp.h" ++#include "ip6_icmp.h" ++ ++/* ++ * IP initialization: fill in IP protocol switch table. ++ * All protocols not implemented in kernel go to raw IP protocol handler. ++ */ ++void ip6_init(Slirp *slirp) ++{ ++ icmp6_init(slirp); ++} ++ ++void ip6_cleanup(Slirp *slirp) ++{ ++ icmp6_cleanup(slirp); ++} ++ ++void ip6_input(struct mbuf *m) ++{ ++ Slirp *slirp = m->slirp; ++ /* NDP reads the ethernet header for gratuitous NDP */ ++ M_DUP_DEBUG(slirp, m, 1, TCPIPHDR_DELTA + 2 + ETH_HLEN); ++ ++ struct ip6 *ip6; ++ ++ if (!slirp->in6_enabled) { ++ goto bad; ++ } ++ ++ DEBUG_CALL("ip6_input"); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("m_len = %d", m->m_len); ++ ++ if (m->m_len < sizeof(struct ip6)) { ++ goto bad; ++ } ++ ++ ip6 = mtod(m, struct ip6 *); ++ ++ if (ip6->ip_v != IP6VERSION) { ++ goto bad; ++ } ++ ++ if (ntohs(ip6->ip_pl) + sizeof(struct ip6) > slirp->if_mtu) { ++ icmp6_send_error(m, ICMP6_TOOBIG, 0); ++ goto bad; ++ } ++ ++ // Check if the message size is big enough to hold what's ++ // set in the payload length header. If not this is an invalid ++ // packet ++ if (m->m_len < ntohs(ip6->ip_pl) + sizeof(struct ip6)) { ++ goto bad; ++ } ++ ++ /* check ip_ttl for a correct ICMP reply */ ++ if (ip6->ip_hl == 0) { ++ icmp6_send_error(m, ICMP6_TIMXCEED, ICMP6_TIMXCEED_INTRANS); ++ goto bad; ++ } ++ ++ /* ++ * Switch out to protocol's input routine. ++ */ ++ switch (ip6->ip_nh) { ++ case IPPROTO_TCP: ++ NTOHS(ip6->ip_pl); ++ tcp_input(m, sizeof(struct ip6), (struct socket *)NULL, AF_INET6); ++ break; ++ case IPPROTO_UDP: ++ udp6_input(m); ++ break; ++ case IPPROTO_ICMPV6: ++ icmp6_input(m); ++ break; ++ default: ++ m_free(m); ++ } ++ return; ++bad: ++ m_free(m); ++} +diff --git a/slirp/src/ip6_output.c b/slirp/src/ip6_output.c +new file mode 100644 +index 0000000000..834f1c0a32 +--- /dev/null ++++ b/slirp/src/ip6_output.c +@@ -0,0 +1,45 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 2013 ++ * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. ++ */ ++ ++#include "slirp.h" ++ ++/* Number of packets queued before we start sending ++ * (to prevent allocing too many mbufs) */ ++#define IF6_THRESH 10 ++ ++/* ++ * IPv6 output. The packet in mbuf chain m contains a IP header ++ */ ++int ip6_output(struct socket *so, struct mbuf *m, int fast) ++{ ++ Slirp *slirp = m->slirp; ++ M_DUP_DEBUG(slirp, m, 0, 0); ++ ++ struct ip6 *ip = mtod(m, struct ip6 *); ++ ++ DEBUG_CALL("ip6_output"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("m = %p", m); ++ ++ /* Fill IPv6 header */ ++ ip->ip_v = IP6VERSION; ++ ip->ip_hl = IP6_HOP_LIMIT; ++ ip->ip_tc_hi = 0; ++ ip->ip_tc_lo = 0; ++ ip->ip_fl_hi = 0; ++ ip->ip_fl_lo = 0; ++ ++ if (fast) { ++ /* We cannot fast-send non-multicast, we'd need a NDP NS */ ++ assert(IN6_IS_ADDR_MULTICAST(&ip->ip_dst)); ++ if_encap(m->slirp, m); ++ m_free(m); ++ } else { ++ if_output(so, m); ++ } ++ ++ return 0; ++} +diff --git a/slirp/src/ip_icmp.c b/slirp/src/ip_icmp.c +new file mode 100644 +index 0000000000..9fba653a46 +--- /dev/null ++++ b/slirp/src/ip_icmp.c +@@ -0,0 +1,524 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)ip_icmp.c 8.2 (Berkeley) 1/4/94 ++ * ip_icmp.c,v 1.7 1995/05/30 08:09:42 rgrimes Exp ++ */ ++ ++#include "slirp.h" ++#include "ip_icmp.h" ++ ++#ifndef WITH_ICMP_ERROR_MSG ++#define WITH_ICMP_ERROR_MSG 0 ++#endif ++ ++/* The message sent when emulating PING */ ++/* Be nice and tell them it's just a pseudo-ping packet */ ++static const char icmp_ping_msg[] = ++ "This is a pseudo-PING packet used by Slirp to emulate ICMP ECHO-REQUEST " ++ "packets.\n"; ++ ++/* list of actions for icmp_send_error() on RX of an icmp message */ ++static const int icmp_flush[19] = { ++ /* ECHO REPLY (0) */ 0, ++ 1, ++ 1, ++ /* DEST UNREACH (3) */ 1, ++ /* SOURCE QUENCH (4)*/ 1, ++ /* REDIRECT (5) */ 1, ++ 1, ++ 1, ++ /* ECHO (8) */ 0, ++ /* ROUTERADVERT (9) */ 1, ++ /* ROUTERSOLICIT (10) */ 1, ++ /* TIME EXCEEDED (11) */ 1, ++ /* PARAMETER PROBLEM (12) */ 1, ++ /* TIMESTAMP (13) */ 0, ++ /* TIMESTAMP REPLY (14) */ 0, ++ /* INFO (15) */ 0, ++ /* INFO REPLY (16) */ 0, ++ /* ADDR MASK (17) */ 0, ++ /* ADDR MASK REPLY (18) */ 0 ++}; ++ ++void icmp_init(Slirp *slirp) ++{ ++ slirp->icmp.so_next = slirp->icmp.so_prev = &slirp->icmp; ++ slirp->icmp_last_so = &slirp->icmp; ++} ++ ++void icmp_cleanup(Slirp *slirp) ++{ ++ struct socket *so, *so_next; ++ ++ for (so = slirp->icmp.so_next; so != &slirp->icmp; so = so_next) { ++ so_next = so->so_next; ++ icmp_detach(so); ++ } ++} ++ ++static int icmp_send(struct socket *so, struct mbuf *m, int hlen) ++{ ++ Slirp *slirp = m->slirp; ++ M_DUP_DEBUG(slirp, m, 0, 0); ++ ++ struct ip *ip = mtod(m, struct ip *); ++ struct sockaddr_in addr; ++ ++ so->s = slirp_socket(AF_INET, SOCK_DGRAM, IPPROTO_ICMP); ++ if (so->s == -1) { ++ return -1; ++ } ++ so->slirp->cb->register_poll_fd(so->s, so->slirp->opaque); ++ ++ if (slirp_bind_outbound(so, AF_INET) != 0) { ++ // bind failed - close socket ++ closesocket(so->s); ++ so->s = -1; ++ return -1; ++ } ++ ++ so->so_m = m; ++ so->so_faddr = ip->ip_dst; ++ so->so_laddr = ip->ip_src; ++ so->so_iptos = ip->ip_tos; ++ so->so_type = IPPROTO_ICMP; ++ so->so_state = SS_ISFCONNECTED; ++ so->so_expire = curtime + SO_EXPIRE; ++ ++ addr.sin_family = AF_INET; ++ addr.sin_addr = so->so_faddr; ++ ++ insque(so, &so->slirp->icmp); ++ ++ if (sendto(so->s, m->m_data + hlen, m->m_len - hlen, 0, ++ (struct sockaddr *)&addr, sizeof(addr)) == -1) { ++ DEBUG_MISC("icmp_input icmp sendto tx errno = %d-%s", errno, ++ strerror(errno)); ++ icmp_send_error(m, ICMP_UNREACH, ICMP_UNREACH_NET, 0, strerror(errno)); ++ icmp_detach(so); ++ } ++ ++ return 0; ++} ++ ++void icmp_detach(struct socket *so) ++{ ++ so->slirp->cb->unregister_poll_fd(so->s, so->slirp->opaque); ++ closesocket(so->s); ++ sofree(so); ++} ++ ++/* ++ * Process a received ICMP message. ++ */ ++void icmp_input(struct mbuf *m, int hlen) ++{ ++ Slirp *slirp = m->slirp; ++ M_DUP_DEBUG(slirp, m, 0, 0); ++ ++ register struct icmp *icp; ++ register struct ip *ip = mtod(m, struct ip *); ++ int icmplen = ip->ip_len; ++ ++ DEBUG_CALL("icmp_input"); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("m_len = %d", m->m_len); ++ ++ /* ++ * Locate icmp structure in mbuf, and check ++ * that its not corrupted and of at least minimum length. ++ */ ++ if (icmplen < ICMP_MINLEN) { /* min 8 bytes payload */ ++ freeit: ++ m_free(m); ++ goto end_error; ++ } ++ ++ m->m_len -= hlen; ++ m->m_data += hlen; ++ icp = mtod(m, struct icmp *); ++ if (cksum(m, icmplen)) { ++ goto freeit; ++ } ++ m->m_len += hlen; ++ m->m_data -= hlen; ++ ++ DEBUG_ARG("icmp_type = %d", icp->icmp_type); ++ switch (icp->icmp_type) { ++ case ICMP_ECHO: ++ ip->ip_len += hlen; /* since ip_input subtracts this */ ++ if (ip->ip_dst.s_addr == slirp->vhost_addr.s_addr || ++ ip->ip_dst.s_addr == slirp->vnameserver_addr.s_addr) { ++ icmp_reflect(m); ++ } else if (slirp->restricted) { ++ goto freeit; ++ } else { ++ struct socket *so; ++ struct sockaddr_storage addr; ++ int ttl; ++ ++ so = socreate(slirp); ++ if (icmp_send(so, m, hlen) == 0) { ++ /* We could send this as ICMP, good! */ ++ return; ++ } ++ ++ /* We could not send this as ICMP, try to send it on UDP echo ++ * service (7), wishfully hoping that it is open there. */ ++ ++ if (udp_attach(so, AF_INET) == -1) { ++ DEBUG_MISC("icmp_input udp_attach errno = %d-%s", errno, ++ strerror(errno)); ++ sofree(so); ++ m_free(m); ++ goto end_error; ++ } ++ so->so_m = m; ++ so->so_ffamily = AF_INET; ++ so->so_faddr = ip->ip_dst; ++ so->so_fport = htons(7); ++ so->so_lfamily = AF_INET; ++ so->so_laddr = ip->ip_src; ++ so->so_lport = htons(9); ++ so->so_iptos = ip->ip_tos; ++ so->so_type = IPPROTO_ICMP; ++ so->so_state = SS_ISFCONNECTED; ++ ++ /* Send the packet */ ++ addr = so->fhost.ss; ++ if (sotranslate_out(so, &addr) < 0) { ++ icmp_send_error(m, ICMP_UNREACH, ICMP_UNREACH_NET, 0, ++ strerror(errno)); ++ udp_detach(so); ++ return; ++ } ++ ++ /* ++ * Check for TTL ++ */ ++ ttl = ip->ip_ttl-1; ++ if (ttl <= 0) { ++ DEBUG_MISC("udp ttl exceeded"); ++ icmp_send_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0, ++ NULL); ++ udp_detach(so); ++ break; ++ } ++ setsockopt(so->s, IPPROTO_IP, IP_TTL, &ttl, sizeof(ttl)); ++ ++ if (sendto(so->s, icmp_ping_msg, strlen(icmp_ping_msg), 0, ++ (struct sockaddr *)&addr, sockaddr_size(&addr)) == -1) { ++ DEBUG_MISC("icmp_input udp sendto tx errno = %d-%s", errno, ++ strerror(errno)); ++ icmp_send_error(m, ICMP_UNREACH, ICMP_UNREACH_NET, 0, ++ strerror(errno)); ++ udp_detach(so); ++ } ++ } /* if ip->ip_dst.s_addr == alias_addr.s_addr */ ++ break; ++ case ICMP_UNREACH: ++ /* XXX? report error? close socket? */ ++ case ICMP_TIMXCEED: ++ case ICMP_PARAMPROB: ++ case ICMP_SOURCEQUENCH: ++ case ICMP_TSTAMP: ++ case ICMP_MASKREQ: ++ case ICMP_REDIRECT: ++ m_free(m); ++ break; ++ ++ default: ++ m_free(m); ++ } /* switch */ ++ ++end_error: ++ /* m is m_free()'d xor put in a socket xor or given to ip_send */ ++ return; ++} ++ ++ ++/* ++ * Send an ICMP message in response to a situation ++ * ++ * RFC 1122: 3.2.2 MUST send at least the IP header and 8 bytes of header. ++ *MAY send more (we do). MUST NOT change this header information. MUST NOT reply ++ *to a multicast/broadcast IP address. MUST NOT reply to a multicast/broadcast ++ *MAC address. MUST reply to only the first fragment. ++ */ ++/* ++ * Send ICMP_UNREACH back to the source regarding msrc. ++ * mbuf *msrc is used as a template, but is NOT m_free()'d. ++ * It is reported as the bad ip packet. The header should ++ * be fully correct and in host byte order. ++ * ICMP fragmentation is illegal. All machines must accept 576 bytes in one ++ * packet. The maximum payload is 576-20(ip hdr)-8(icmp hdr)=548 ++ */ ++ ++#define ICMP_MAXDATALEN (IP_MSS - 28) ++void icmp_forward_error(struct mbuf *msrc, uint8_t type, uint8_t code, int minsize, ++ const char *message, struct in_addr *src) ++{ ++ unsigned hlen, shlen, s_ip_len; ++ register struct ip *ip; ++ register struct icmp *icp; ++ register struct mbuf *m; ++ ++ DEBUG_CALL("icmp_send_error"); ++ DEBUG_ARG("msrc = %p", msrc); ++ DEBUG_ARG("msrc_len = %d", msrc->m_len); ++ ++ if (type != ICMP_UNREACH && type != ICMP_TIMXCEED) ++ goto end_error; ++ ++ /* check msrc */ ++ if (!msrc) ++ goto end_error; ++ ip = mtod(msrc, struct ip *); ++ if (slirp_debug & DBG_MISC) { ++ char bufa[20], bufb[20]; ++ slirp_pstrcpy(bufa, sizeof(bufa), inet_ntoa(ip->ip_src)); ++ slirp_pstrcpy(bufb, sizeof(bufb), inet_ntoa(ip->ip_dst)); ++ DEBUG_MISC(" %.16s to %.16s", bufa, bufb); ++ } ++ if (ip->ip_off & IP_OFFMASK) ++ goto end_error; /* Only reply to fragment 0 */ ++ ++ /* Do not reply to source-only IPs */ ++ if ((ip->ip_src.s_addr & htonl(~(0xf << 28))) == 0) { ++ goto end_error; ++ } ++ ++ shlen = ip->ip_hl << 2; ++ s_ip_len = ip->ip_len; ++ if (ip->ip_p == IPPROTO_ICMP) { ++ icp = (struct icmp *)((char *)ip + shlen); ++ /* ++ * Assume any unknown ICMP type is an error. This isn't ++ * specified by the RFC, but think about it.. ++ */ ++ if (icp->icmp_type > 18 || icmp_flush[icp->icmp_type]) ++ goto end_error; ++ } ++ ++ /* make a copy */ ++ m = m_get(msrc->slirp); ++ if (!m) { ++ goto end_error; ++ } ++ ++ { ++ int new_m_size; ++ new_m_size = ++ sizeof(struct ip) + ICMP_MINLEN + msrc->m_len + ICMP_MAXDATALEN; ++ if (new_m_size > m->m_size) ++ m_inc(m, new_m_size); ++ } ++ memcpy(m->m_data, msrc->m_data, msrc->m_len); ++ m->m_len = msrc->m_len; /* copy msrc to m */ ++ ++ /* make the header of the reply packet */ ++ ip = mtod(m, struct ip *); ++ hlen = sizeof(struct ip); /* no options in reply */ ++ ++ /* fill in icmp */ ++ m->m_data += hlen; ++ m->m_len -= hlen; ++ ++ icp = mtod(m, struct icmp *); ++ ++ if (minsize) ++ s_ip_len = shlen + ICMP_MINLEN; /* return header+8b only */ ++ else if (s_ip_len > ICMP_MAXDATALEN) /* maximum size */ ++ s_ip_len = ICMP_MAXDATALEN; ++ ++ m->m_len = ICMP_MINLEN + s_ip_len; /* 8 bytes ICMP header */ ++ ++ /* min. size = 8+sizeof(struct ip)+8 */ ++ ++ icp->icmp_type = type; ++ icp->icmp_code = code; ++ icp->icmp_id = 0; ++ icp->icmp_seq = 0; ++ ++ memcpy(&icp->icmp_ip, msrc->m_data, s_ip_len); /* report the ip packet */ ++ HTONS(icp->icmp_ip.ip_len); ++ HTONS(icp->icmp_ip.ip_id); ++ HTONS(icp->icmp_ip.ip_off); ++ ++ if (message && WITH_ICMP_ERROR_MSG) { /* append message to ICMP packet */ ++ int message_len; ++ char *cpnt; ++ message_len = strlen(message); ++ if (message_len > ICMP_MAXDATALEN) ++ message_len = ICMP_MAXDATALEN; ++ cpnt = (char *)m->m_data + m->m_len; ++ memcpy(cpnt, message, message_len); ++ m->m_len += message_len; ++ } ++ ++ icp->icmp_cksum = 0; ++ icp->icmp_cksum = cksum(m, m->m_len); ++ ++ m->m_data -= hlen; ++ m->m_len += hlen; ++ ++ /* fill in ip */ ++ ip->ip_hl = hlen >> 2; ++ ip->ip_len = m->m_len; ++ ++ ip->ip_tos = ((ip->ip_tos & 0x1E) | 0xC0); /* high priority for errors */ ++ ++ ip->ip_ttl = MAXTTL; ++ ip->ip_p = IPPROTO_ICMP; ++ ip->ip_dst = ip->ip_src; /* ip addresses */ ++ ip->ip_src = *src; ++ ++ ip_output((struct socket *)NULL, m); ++ ++end_error: ++ return; ++} ++#undef ICMP_MAXDATALEN ++ ++void icmp_send_error(struct mbuf *msrc, uint8_t type, uint8_t code, int minsize, ++ const char *message) ++{ ++ icmp_forward_error(msrc, type, code, minsize, message, &msrc->slirp->vhost_addr); ++} ++ ++/* ++ * Reflect the ip packet back to the source ++ */ ++void icmp_reflect(struct mbuf *m) ++{ ++ register struct ip *ip = mtod(m, struct ip *); ++ int hlen = ip->ip_hl << 2; ++ int optlen = hlen - sizeof(struct ip); ++ register struct icmp *icp; ++ ++ /* ++ * Send an icmp packet back to the ip level, ++ * after supplying a checksum. ++ */ ++ m->m_data += hlen; ++ m->m_len -= hlen; ++ icp = mtod(m, struct icmp *); ++ ++ icp->icmp_type = ICMP_ECHOREPLY; ++ icp->icmp_cksum = 0; ++ icp->icmp_cksum = cksum(m, ip->ip_len - hlen); ++ ++ m->m_data -= hlen; ++ m->m_len += hlen; ++ ++ /* fill in ip */ ++ if (optlen > 0) { ++ /* ++ * Strip out original options by copying rest of first ++ * mbuf's data back, and adjust the IP length. ++ */ ++ memmove((char *)(ip + 1), (char *)ip + hlen, ++ (unsigned)(m->m_len - hlen)); ++ hlen -= optlen; ++ ip->ip_hl = hlen >> 2; ++ ip->ip_len -= optlen; ++ m->m_len -= optlen; ++ } ++ ++ ip->ip_ttl = MAXTTL; ++ { /* swap */ ++ struct in_addr icmp_dst; ++ icmp_dst = ip->ip_dst; ++ ip->ip_dst = ip->ip_src; ++ ip->ip_src = icmp_dst; ++ } ++ ++ ip_output((struct socket *)NULL, m); ++} ++ ++void icmp_receive(struct socket *so) ++{ ++ struct mbuf *m = so->so_m; ++ struct ip *ip = mtod(m, struct ip *); ++ int hlen = ip->ip_hl << 2; ++ uint8_t error_code; ++ struct icmp *icp; ++ int id, len; ++ ++ m->m_data += hlen; ++ m->m_len -= hlen; ++ icp = mtod(m, struct icmp *); ++ ++ id = icp->icmp_id; ++ len = recv(so->s, icp, M_ROOM(m), 0); ++ /* ++ * The behavior of reading SOCK_DGRAM+IPPROTO_ICMP sockets is inconsistent ++ * between host OSes. On Linux, only the ICMP header and payload is ++ * included. On macOS/Darwin, the socket acts like a raw socket and ++ * includes the IP header as well. On other BSDs, SOCK_DGRAM+IPPROTO_ICMP ++ * sockets aren't supported at all, so we treat them like raw sockets. It ++ * isn't possible to detect this difference at runtime, so we must use an ++ * #ifdef to determine if we need to remove the IP header. ++ */ ++#ifdef CONFIG_BSD ++ if (len >= sizeof(struct ip)) { ++ struct ip *inner_ip = mtod(m, struct ip *); ++ int inner_hlen = inner_ip->ip_hl << 2; ++ if (inner_hlen > len) { ++ len = -1; ++ errno = -EINVAL; ++ } else { ++ len -= inner_hlen; ++ memmove(icp, (unsigned char *)icp + inner_hlen, len); ++ } ++ } else { ++ len = -1; ++ errno = -EINVAL; ++ } ++#endif ++ icp->icmp_id = id; ++ ++ m->m_data -= hlen; ++ m->m_len += hlen; ++ ++ if (len == -1 || len == 0) { ++ if (errno == ENETUNREACH) { ++ error_code = ICMP_UNREACH_NET; ++ } else { ++ error_code = ICMP_UNREACH_HOST; ++ } ++ DEBUG_MISC(" udp icmp rx errno = %d-%s", errno, strerror(errno)); ++ icmp_send_error(so->so_m, ICMP_UNREACH, error_code, 0, strerror(errno)); ++ } else { ++ icmp_reflect(so->so_m); ++ so->so_m = NULL; /* Don't m_free() it again! */ ++ } ++ icmp_detach(so); ++} +diff --git a/slirp/src/ip_icmp.h b/slirp/src/ip_icmp.h +new file mode 100644 +index 0000000000..569a083061 +--- /dev/null ++++ b/slirp/src/ip_icmp.h +@@ -0,0 +1,168 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)ip_icmp.h 8.1 (Berkeley) 6/10/93 ++ * ip_icmp.h,v 1.4 1995/05/30 08:09:43 rgrimes Exp ++ */ ++ ++#ifndef NETINET_IP_ICMP_H ++#define NETINET_IP_ICMP_H ++ ++/* ++ * Interface Control Message Protocol Definitions. ++ * Per RFC 792, September 1981. ++ */ ++ ++typedef uint32_t n_time; ++ ++/* ++ * Structure of an icmp header. ++ */ ++struct icmp { ++ uint8_t icmp_type; /* type of message, see below */ ++ uint8_t icmp_code; /* type sub code */ ++ uint16_t icmp_cksum; /* ones complement cksum of struct */ ++ union { ++ uint8_t ih_pptr; /* ICMP_PARAMPROB */ ++ struct in_addr ih_gwaddr; /* ICMP_REDIRECT */ ++ struct ih_idseq { ++ uint16_t icd_id; ++ uint16_t icd_seq; ++ } ih_idseq; ++ int ih_void; ++ ++ /* ICMP_UNREACH_NEEDFRAG -- Path MTU Discovery (RFC1191) */ ++ struct ih_pmtu { ++ uint16_t ipm_void; ++ uint16_t ipm_nextmtu; ++ } ih_pmtu; ++ } icmp_hun; ++#define icmp_pptr icmp_hun.ih_pptr ++#define icmp_gwaddr icmp_hun.ih_gwaddr ++#define icmp_id icmp_hun.ih_idseq.icd_id ++#define icmp_seq icmp_hun.ih_idseq.icd_seq ++#define icmp_void icmp_hun.ih_void ++#define icmp_pmvoid icmp_hun.ih_pmtu.ipm_void ++#define icmp_nextmtu icmp_hun.ih_pmtu.ipm_nextmtu ++ union { ++ struct id_ts { ++ n_time its_otime; ++ n_time its_rtime; ++ n_time its_ttime; ++ } id_ts; ++ struct id_ip { ++ struct ip idi_ip; ++ /* options and then 64 bits of data */ ++ } id_ip; ++ uint32_t id_mask; ++ char id_data[1]; ++ } icmp_dun; ++#define icmp_otime icmp_dun.id_ts.its_otime ++#define icmp_rtime icmp_dun.id_ts.its_rtime ++#define icmp_ttime icmp_dun.id_ts.its_ttime ++#define icmp_ip icmp_dun.id_ip.idi_ip ++#define icmp_mask icmp_dun.id_mask ++#define icmp_data icmp_dun.id_data ++}; ++ ++/* ++ * Lower bounds on packet lengths for various types. ++ * For the error advice packets must first ensure that the ++ * packet is large enough to contain the returned ip header. ++ * Only then can we do the check to see if 64 bits of packet ++ * data have been returned, since we need to check the returned ++ * ip header length. ++ */ ++#define ICMP_MINLEN 8 /* abs minimum */ ++#define ICMP_TSLEN (8 + 3 * sizeof(n_time)) /* timestamp */ ++#define ICMP_MASKLEN 12 /* address mask */ ++#define ICMP_ADVLENMIN (8 + sizeof(struct ip) + 8) /* min */ ++#define ICMP_ADVLEN(p) (8 + ((p)->icmp_ip.ip_hl << 2) + 8) ++/* N.B.: must separately check that ip_hl >= 5 */ ++ ++/* ++ * Definition of type and code field values. ++ */ ++#define ICMP_ECHOREPLY 0 /* echo reply */ ++#define ICMP_UNREACH 3 /* dest unreachable, codes: */ ++#define ICMP_UNREACH_NET 0 /* bad net */ ++#define ICMP_UNREACH_HOST 1 /* bad host */ ++#define ICMP_UNREACH_PROTOCOL 2 /* bad protocol */ ++#define ICMP_UNREACH_PORT 3 /* bad port */ ++#define ICMP_UNREACH_NEEDFRAG 4 /* IP_DF caused drop */ ++#define ICMP_UNREACH_SRCFAIL 5 /* src route failed */ ++#define ICMP_UNREACH_NET_UNKNOWN 6 /* unknown net */ ++#define ICMP_UNREACH_HOST_UNKNOWN 7 /* unknown host */ ++#define ICMP_UNREACH_ISOLATED 8 /* src host isolated */ ++#define ICMP_UNREACH_NET_PROHIB 9 /* prohibited access */ ++#define ICMP_UNREACH_HOST_PROHIB 10 /* ditto */ ++#define ICMP_UNREACH_TOSNET 11 /* bad tos for net */ ++#define ICMP_UNREACH_TOSHOST 12 /* bad tos for host */ ++#define ICMP_SOURCEQUENCH 4 /* packet lost, slow down */ ++#define ICMP_REDIRECT 5 /* shorter route, codes: */ ++#define ICMP_REDIRECT_NET 0 /* for network */ ++#define ICMP_REDIRECT_HOST 1 /* for host */ ++#define ICMP_REDIRECT_TOSNET 2 /* for tos and net */ ++#define ICMP_REDIRECT_TOSHOST 3 /* for tos and host */ ++#define ICMP_ECHO 8 /* echo service */ ++#define ICMP_ROUTERADVERT 9 /* router advertisement */ ++#define ICMP_ROUTERSOLICIT 10 /* router solicitation */ ++#define ICMP_TIMXCEED 11 /* time exceeded, code: */ ++#define ICMP_TIMXCEED_INTRANS 0 /* ttl==0 in transit */ ++#define ICMP_TIMXCEED_REASS 1 /* ttl==0 in reass */ ++#define ICMP_PARAMPROB 12 /* ip header bad */ ++#define ICMP_PARAMPROB_OPTABSENT 1 /* req. opt. absent */ ++#define ICMP_TSTAMP 13 /* timestamp request */ ++#define ICMP_TSTAMPREPLY 14 /* timestamp reply */ ++#define ICMP_IREQ 15 /* information request */ ++#define ICMP_IREQREPLY 16 /* information reply */ ++#define ICMP_MASKREQ 17 /* address mask request */ ++#define ICMP_MASKREPLY 18 /* address mask reply */ ++ ++#define ICMP_MAXTYPE 18 ++ ++#define ICMP_INFOTYPE(type) \ ++ ((type) == ICMP_ECHOREPLY || (type) == ICMP_ECHO || \ ++ (type) == ICMP_ROUTERADVERT || (type) == ICMP_ROUTERSOLICIT || \ ++ (type) == ICMP_TSTAMP || (type) == ICMP_TSTAMPREPLY || \ ++ (type) == ICMP_IREQ || (type) == ICMP_IREQREPLY || \ ++ (type) == ICMP_MASKREQ || (type) == ICMP_MASKREPLY) ++ ++void icmp_init(Slirp *slirp); ++void icmp_cleanup(Slirp *slirp); ++void icmp_input(struct mbuf *, int); ++void icmp_forward_error(struct mbuf *msrc, uint8_t type, uint8_t code, int minsize, ++ const char *message, struct in_addr *src); ++void icmp_send_error(struct mbuf *msrc, uint8_t type, uint8_t code, int minsize, ++ const char *message); ++void icmp_reflect(struct mbuf *); ++void icmp_receive(struct socket *so); ++void icmp_detach(struct socket *so); ++ ++#endif +diff --git a/slirp/src/ip_input.c b/slirp/src/ip_input.c +new file mode 100644 +index 0000000000..a29c324cce +--- /dev/null ++++ b/slirp/src/ip_input.c +@@ -0,0 +1,463 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 ++ * ip_input.c,v 1.11 1994/11/16 10:17:08 jkh Exp ++ */ ++ ++/* ++ * Changes and additions relating to SLiRP are ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++#include "ip_icmp.h" ++ ++static struct ip *ip_reass(Slirp *slirp, struct ip *ip, struct ipq *fp); ++static void ip_freef(Slirp *slirp, struct ipq *fp); ++static void ip_enq(register struct ipasfrag *p, register struct ipasfrag *prev); ++static void ip_deq(register struct ipasfrag *p); ++ ++/* ++ * IP initialization: fill in IP protocol switch table. ++ * All protocols not implemented in kernel go to raw IP protocol handler. ++ */ ++void ip_init(Slirp *slirp) ++{ ++ slirp->ipq.ip_link.next = slirp->ipq.ip_link.prev = &slirp->ipq.ip_link; ++ udp_init(slirp); ++ tcp_init(slirp); ++ icmp_init(slirp); ++} ++ ++void ip_cleanup(Slirp *slirp) ++{ ++ udp_cleanup(slirp); ++ tcp_cleanup(slirp); ++ icmp_cleanup(slirp); ++} ++ ++/* ++ * Ip input routine. Checksum and byte swap header. If fragmented ++ * try to reassemble. Process options. Pass to next level. ++ */ ++void ip_input(struct mbuf *m) ++{ ++ Slirp *slirp = m->slirp; ++ M_DUP_DEBUG(slirp, m, 0, TCPIPHDR_DELTA); ++ ++ register struct ip *ip; ++ int hlen; ++ ++ if (!slirp->in_enabled) { ++ goto bad; ++ } ++ ++ DEBUG_CALL("ip_input"); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("m_len = %d", m->m_len); ++ ++ if (m->m_len < sizeof(struct ip)) { ++ goto bad; ++ } ++ ++ ip = mtod(m, struct ip *); ++ ++ if (ip->ip_v != IPVERSION) { ++ goto bad; ++ } ++ ++ hlen = ip->ip_hl << 2; ++ if (hlen < sizeof(struct ip) || hlen > m->m_len) { /* min header length */ ++ goto bad; /* or packet too short */ ++ } ++ ++ /* keep ip header intact for ICMP reply ++ * ip->ip_sum = cksum(m, hlen); ++ * if (ip->ip_sum) { ++ */ ++ if (cksum(m, hlen)) { ++ goto bad; ++ } ++ ++ /* ++ * Convert fields to host representation. ++ */ ++ NTOHS(ip->ip_len); ++ if (ip->ip_len < hlen) { ++ goto bad; ++ } ++ NTOHS(ip->ip_id); ++ NTOHS(ip->ip_off); ++ ++ /* ++ * Check that the amount of data in the buffers ++ * is as at least much as the IP header would have us expect. ++ * Trim mbufs if longer than we expect. ++ * Drop packet if shorter than we expect. ++ */ ++ if (m->m_len < ip->ip_len) { ++ goto bad; ++ } ++ ++ /* Should drop packet if mbuf too long? hmmm... */ ++ if (m->m_len > ip->ip_len) ++ m_adj(m, ip->ip_len - m->m_len); ++ ++ /* check ip_ttl for a correct ICMP reply */ ++ if (ip->ip_ttl == 0) { ++ icmp_send_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0, "ttl"); ++ goto bad; ++ } ++ ++ /* ++ * If offset or IP_MF are set, must reassemble. ++ * Otherwise, nothing need be done. ++ * (We could look in the reassembly queue to see ++ * if the packet was previously fragmented, ++ * but it's not worth the time; just let them time out.) ++ * ++ * XXX This should fail, don't fragment yet ++ */ ++ if (ip->ip_off & ~IP_DF) { ++ register struct ipq *fp; ++ struct qlink *l; ++ /* ++ * Look for queue of fragments ++ * of this datagram. ++ */ ++ for (l = slirp->ipq.ip_link.next; l != &slirp->ipq.ip_link; ++ l = l->next) { ++ fp = container_of(l, struct ipq, ip_link); ++ if (ip->ip_id == fp->ipq_id && ++ ip->ip_src.s_addr == fp->ipq_src.s_addr && ++ ip->ip_dst.s_addr == fp->ipq_dst.s_addr && ++ ip->ip_p == fp->ipq_p) ++ goto found; ++ } ++ fp = NULL; ++ found: ++ ++ /* ++ * Adjust ip_len to not reflect header, ++ * set ip_mff if more fragments are expected, ++ * convert offset of this to bytes. ++ */ ++ ip->ip_len -= hlen; ++ if (ip->ip_off & IP_MF) ++ ip->ip_tos |= 1; ++ else ++ ip->ip_tos &= ~1; ++ ++ ip->ip_off <<= 3; ++ ++ /* ++ * If datagram marked as having more fragments ++ * or if this is not the first fragment, ++ * attempt reassembly; if it succeeds, proceed. ++ */ ++ if (ip->ip_tos & 1 || ip->ip_off) { ++ ip = ip_reass(slirp, ip, fp); ++ if (ip == NULL) ++ return; ++ m = dtom(slirp, ip); ++ } else if (fp) ++ ip_freef(slirp, fp); ++ ++ } else ++ ip->ip_len -= hlen; ++ ++ /* ++ * Switch out to protocol's input routine. ++ */ ++ switch (ip->ip_p) { ++ case IPPROTO_TCP: ++ tcp_input(m, hlen, (struct socket *)NULL, AF_INET); ++ break; ++ case IPPROTO_UDP: ++ udp_input(m, hlen); ++ break; ++ case IPPROTO_ICMP: ++ icmp_input(m, hlen); ++ break; ++ default: ++ m_free(m); ++ } ++ return; ++bad: ++ m_free(m); ++} ++ ++#define iptofrag(P) ((struct ipasfrag *)(((char *)(P)) - sizeof(struct qlink))) ++#define fragtoip(P) ((struct ip *)(((char *)(P)) + sizeof(struct qlink))) ++/* ++ * Take incoming datagram fragment and try to ++ * reassemble it into whole datagram. If a chain for ++ * reassembly of this datagram already exists, then it ++ * is given as fp; otherwise have to make a chain. ++ */ ++static struct ip *ip_reass(Slirp *slirp, struct ip *ip, struct ipq *fp) ++{ ++ register struct mbuf *m = dtom(slirp, ip); ++ register struct ipasfrag *q; ++ int hlen = ip->ip_hl << 2; ++ int i, next; ++ ++ DEBUG_CALL("ip_reass"); ++ DEBUG_ARG("ip = %p", ip); ++ DEBUG_ARG("fp = %p", fp); ++ DEBUG_ARG("m = %p", m); ++ ++ /* ++ * Presence of header sizes in mbufs ++ * would confuse code below. ++ * Fragment m_data is concatenated. ++ */ ++ m->m_data += hlen; ++ m->m_len -= hlen; ++ ++ /* ++ * If first fragment to arrive, create a reassembly queue. ++ */ ++ if (fp == NULL) { ++ struct mbuf *t = m_get(slirp); ++ ++ if (t == NULL) { ++ goto dropfrag; ++ } ++ fp = mtod(t, struct ipq *); ++ insque(&fp->ip_link, &slirp->ipq.ip_link); ++ fp->ipq_ttl = IPFRAGTTL; ++ fp->ipq_p = ip->ip_p; ++ fp->ipq_id = ip->ip_id; ++ fp->frag_link.next = fp->frag_link.prev = &fp->frag_link; ++ fp->ipq_src = ip->ip_src; ++ fp->ipq_dst = ip->ip_dst; ++ q = (struct ipasfrag *)fp; ++ goto insert; ++ } ++ ++ /* ++ * Find a segment which begins after this one does. ++ */ ++ for (q = fp->frag_link.next; q != (struct ipasfrag *)&fp->frag_link; ++ q = q->ipf_next) ++ if (q->ipf_off > ip->ip_off) ++ break; ++ ++ /* ++ * If there is a preceding segment, it may provide some of ++ * our data already. If so, drop the data from the incoming ++ * segment. If it provides all of our data, drop us. ++ */ ++ if (q->ipf_prev != &fp->frag_link) { ++ struct ipasfrag *pq = q->ipf_prev; ++ i = pq->ipf_off + pq->ipf_len - ip->ip_off; ++ if (i > 0) { ++ if (i >= ip->ip_len) ++ goto dropfrag; ++ m_adj(dtom(slirp, ip), i); ++ ip->ip_off += i; ++ ip->ip_len -= i; ++ } ++ } ++ ++ /* ++ * While we overlap succeeding segments trim them or, ++ * if they are completely covered, dequeue them. ++ */ ++ while (q != (struct ipasfrag *)&fp->frag_link && ++ ip->ip_off + ip->ip_len > q->ipf_off) { ++ struct ipasfrag *prev; ++ i = (ip->ip_off + ip->ip_len) - q->ipf_off; ++ if (i < q->ipf_len) { ++ q->ipf_len -= i; ++ q->ipf_off += i; ++ m_adj(dtom(slirp, q), i); ++ break; ++ } ++ prev = q; ++ q = q->ipf_next; ++ ip_deq(prev); ++ m_free(dtom(slirp, prev)); ++ } ++ ++insert: ++ /* ++ * Stick new segment in its place; ++ * check for complete reassembly. ++ */ ++ ip_enq(iptofrag(ip), q->ipf_prev); ++ next = 0; ++ for (q = fp->frag_link.next; q != (struct ipasfrag *)&fp->frag_link; ++ q = q->ipf_next) { ++ if (q->ipf_off != next) ++ return NULL; ++ next += q->ipf_len; ++ } ++ if (((struct ipasfrag *)(q->ipf_prev))->ipf_tos & 1) ++ return NULL; ++ ++ /* ++ * Reassembly is complete; concatenate fragments. ++ */ ++ q = fp->frag_link.next; ++ m = dtom(slirp, q); ++ int delta = (char *)q - (m->m_flags & M_EXT ? m->m_ext : m->m_dat); ++ ++ q = (struct ipasfrag *)q->ipf_next; ++ while (q != (struct ipasfrag *)&fp->frag_link) { ++ struct mbuf *t = dtom(slirp, q); ++ q = (struct ipasfrag *)q->ipf_next; ++ m_cat(m, t); ++ } ++ ++ /* ++ * Create header for new ip packet by ++ * modifying header of first packet; ++ * dequeue and discard fragment reassembly header. ++ * Make header visible. ++ */ ++ q = fp->frag_link.next; ++ ++ /* ++ * If the fragments concatenated to an mbuf that's bigger than the total ++ * size of the fragment and the mbuf was not already using an m_ext buffer, ++ * then an m_ext buffer was allocated. But fp->ipq_next points to the old ++ * buffer (in the mbuf), so we must point ip into the new buffer. ++ */ ++ if (m->m_flags & M_EXT) { ++ q = (struct ipasfrag *)(m->m_ext + delta); ++ } ++ ++ ip = fragtoip(q); ++ ip->ip_len = next; ++ ip->ip_tos &= ~1; ++ ip->ip_src = fp->ipq_src; ++ ip->ip_dst = fp->ipq_dst; ++ remque(&fp->ip_link); ++ m_free(dtom(slirp, fp)); ++ m->m_len += (ip->ip_hl << 2); ++ m->m_data -= (ip->ip_hl << 2); ++ ++ return ip; ++ ++dropfrag: ++ m_free(m); ++ return NULL; ++} ++ ++/* ++ * Free a fragment reassembly header and all ++ * associated datagrams. ++ */ ++static void ip_freef(Slirp *slirp, struct ipq *fp) ++{ ++ register struct ipasfrag *q, *p; ++ ++ for (q = fp->frag_link.next; q != (struct ipasfrag *)&fp->frag_link; ++ q = p) { ++ p = q->ipf_next; ++ ip_deq(q); ++ m_free(dtom(slirp, q)); ++ } ++ remque(&fp->ip_link); ++ m_free(dtom(slirp, fp)); ++} ++ ++/* ++ * Put an ip fragment on a reassembly chain. ++ * Like insque, but pointers in middle of structure. ++ */ ++static void ip_enq(register struct ipasfrag *p, register struct ipasfrag *prev) ++{ ++ DEBUG_CALL("ip_enq"); ++ DEBUG_ARG("prev = %p", prev); ++ p->ipf_prev = prev; ++ p->ipf_next = prev->ipf_next; ++ ((struct ipasfrag *)(prev->ipf_next))->ipf_prev = p; ++ prev->ipf_next = p; ++} ++ ++/* ++ * To ip_enq as remque is to insque. ++ */ ++static void ip_deq(register struct ipasfrag *p) ++{ ++ ((struct ipasfrag *)(p->ipf_prev))->ipf_next = p->ipf_next; ++ ((struct ipasfrag *)(p->ipf_next))->ipf_prev = p->ipf_prev; ++} ++ ++/* ++ * IP timer processing; ++ * if a timer expires on a reassembly ++ * queue, discard it. ++ */ ++void ip_slowtimo(Slirp *slirp) ++{ ++ struct qlink *l; ++ ++ DEBUG_CALL("ip_slowtimo"); ++ ++ l = slirp->ipq.ip_link.next; ++ ++ if (l == NULL) ++ return; ++ ++ while (l != &slirp->ipq.ip_link) { ++ struct ipq *fp = container_of(l, struct ipq, ip_link); ++ l = l->next; ++ if (--fp->ipq_ttl == 0) { ++ ip_freef(slirp, fp); ++ } ++ } ++} ++ ++/* ++ * Strip out IP options, at higher ++ * level protocol in the kernel. ++ * Second argument is buffer to which options ++ * will be moved, and return value is their length. ++ * (XXX) should be deleted; last arg currently ignored. ++ */ ++void ip_stripoptions(register struct mbuf *m, struct mbuf *mopt) ++{ ++ register int i; ++ struct ip *ip = mtod(m, struct ip *); ++ register char *opts; ++ int olen; ++ ++ olen = (ip->ip_hl << 2) - sizeof(struct ip); ++ opts = (char *)(ip + 1); ++ i = m->m_len - (sizeof(struct ip) + olen); ++ memmove(opts, opts + olen, (unsigned)i); ++ m->m_len -= olen; ++ ++ ip->ip_hl = sizeof(struct ip) >> 2; ++} +diff --git a/slirp/src/ip_output.c b/slirp/src/ip_output.c +new file mode 100644 +index 0000000000..4f62605915 +--- /dev/null ++++ b/slirp/src/ip_output.c +@@ -0,0 +1,171 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1990, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 ++ * ip_output.c,v 1.9 1994/11/16 10:17:10 jkh Exp ++ */ ++ ++/* ++ * Changes and additions relating to SLiRP are ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++ ++/* Number of packets queued before we start sending ++ * (to prevent allocing too many mbufs) */ ++#define IF_THRESH 10 ++ ++/* ++ * IP output. The packet in mbuf chain m contains a skeletal IP ++ * header (with len, off, ttl, proto, tos, src, dst). ++ * The mbuf chain containing the packet will be freed. ++ * The mbuf opt, if present, will not be freed. ++ */ ++int ip_output(struct socket *so, struct mbuf *m0) ++{ ++ Slirp *slirp = m0->slirp; ++ M_DUP_DEBUG(slirp, m0, 0, 0); ++ ++ register struct ip *ip; ++ register struct mbuf *m = m0; ++ register int hlen = sizeof(struct ip); ++ int len, off, error = 0; ++ ++ DEBUG_CALL("ip_output"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("m0 = %p", m0); ++ ++ ip = mtod(m, struct ip *); ++ /* ++ * Fill in IP header. ++ */ ++ ip->ip_v = IPVERSION; ++ ip->ip_off &= IP_DF; ++ ip->ip_id = htons(slirp->ip_id++); ++ ip->ip_hl = hlen >> 2; ++ ++ /* ++ * If small enough for interface, can just send directly. ++ */ ++ if ((uint16_t)ip->ip_len <= slirp->if_mtu) { ++ ip->ip_len = htons((uint16_t)ip->ip_len); ++ ip->ip_off = htons((uint16_t)ip->ip_off); ++ ip->ip_sum = 0; ++ ip->ip_sum = cksum(m, hlen); ++ ++ if_output(so, m); ++ goto done; ++ } ++ ++ /* ++ * Too large for interface; fragment if possible. ++ * Must be able to put at least 8 bytes per fragment. ++ */ ++ if (ip->ip_off & IP_DF) { ++ error = -1; ++ goto bad; ++ } ++ ++ len = (slirp->if_mtu - hlen) & ~7; /* ip databytes per packet */ ++ if (len < 8) { ++ error = -1; ++ goto bad; ++ } ++ ++ { ++ int mhlen, firstlen = len; ++ struct mbuf **mnext = &m->m_nextpkt; ++ ++ /* ++ * Loop through length of segment after first fragment, ++ * make new header and copy data of each part and link onto chain. ++ */ ++ m0 = m; ++ mhlen = sizeof(struct ip); ++ for (off = hlen + len; off < (uint16_t)ip->ip_len; off += len) { ++ register struct ip *mhip; ++ m = m_get(slirp); ++ if (m == NULL) { ++ error = -1; ++ goto sendorfree; ++ } ++ m->m_data += IF_MAXLINKHDR; ++ mhip = mtod(m, struct ip *); ++ *mhip = *ip; ++ ++ m->m_len = mhlen; ++ mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF); ++ if (ip->ip_off & IP_MF) ++ mhip->ip_off |= IP_MF; ++ if (off + len >= (uint16_t)ip->ip_len) ++ len = (uint16_t)ip->ip_len - off; ++ else ++ mhip->ip_off |= IP_MF; ++ mhip->ip_len = htons((uint16_t)(len + mhlen)); ++ ++ if (m_copy(m, m0, off, len) < 0) { ++ error = -1; ++ goto sendorfree; ++ } ++ ++ mhip->ip_off = htons((uint16_t)mhip->ip_off); ++ mhip->ip_sum = 0; ++ mhip->ip_sum = cksum(m, mhlen); ++ *mnext = m; ++ mnext = &m->m_nextpkt; ++ } ++ /* ++ * Update first fragment by trimming what's been copied out ++ * and updating header, then send each fragment (in order). ++ */ ++ m = m0; ++ m_adj(m, hlen + firstlen - (uint16_t)ip->ip_len); ++ ip->ip_len = htons((uint16_t)m->m_len); ++ ip->ip_off = htons((uint16_t)(ip->ip_off | IP_MF)); ++ ip->ip_sum = 0; ++ ip->ip_sum = cksum(m, hlen); ++ sendorfree: ++ for (m = m0; m; m = m0) { ++ m0 = m->m_nextpkt; ++ m->m_nextpkt = NULL; ++ if (error == 0) ++ if_output(so, m); ++ else ++ m_free(m); ++ } ++ } ++ ++done: ++ return (error); ++ ++bad: ++ m_free(m0); ++ goto done; ++} +diff --git a/slirp/src/libslirp-version.h.in b/slirp/src/libslirp-version.h.in +new file mode 100644 +index 0000000000..faa6c85952 +--- /dev/null ++++ b/slirp/src/libslirp-version.h.in +@@ -0,0 +1,24 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++#ifndef LIBSLIRP_VERSION_H_ ++#define LIBSLIRP_VERSION_H_ ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++#define SLIRP_MAJOR_VERSION @SLIRP_MAJOR_VERSION@ ++#define SLIRP_MINOR_VERSION @SLIRP_MINOR_VERSION@ ++#define SLIRP_MICRO_VERSION @SLIRP_MICRO_VERSION@ ++#define SLIRP_VERSION_STRING @SLIRP_VERSION_STRING@ ++ ++#define SLIRP_CHECK_VERSION(major,minor,micro) \ ++ (SLIRP_MAJOR_VERSION > (major) || \ ++ (SLIRP_MAJOR_VERSION == (major) && SLIRP_MINOR_VERSION > (minor)) || \ ++ (SLIRP_MAJOR_VERSION == (major) && SLIRP_MINOR_VERSION == (minor) && \ ++ SLIRP_MICRO_VERSION >= (micro))) ++ ++#ifdef __cplusplus ++} /* extern "C" */ ++#endif ++ ++#endif /* LIBSLIRP_VERSION_H_ */ +diff --git a/slirp/src/libslirp.h b/slirp/src/libslirp.h +new file mode 100644 +index 0000000000..5760d53cea +--- /dev/null ++++ b/slirp/src/libslirp.h +@@ -0,0 +1,236 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++#ifndef LIBSLIRP_H ++#define LIBSLIRP_H ++ ++#include ++#include ++#include ++ ++#ifdef _WIN32 ++#include ++#include ++#include ++#else ++#include ++#include ++#endif ++ ++#include "libslirp-version.h" ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++/* Opaque structure containing the slirp state */ ++typedef struct Slirp Slirp; ++ ++/* Flags passed to SlirpAddPollCb and to be returned by SlirpGetREventsCb. */ ++enum { ++ SLIRP_POLL_IN = 1 << 0, ++ SLIRP_POLL_OUT = 1 << 1, ++ SLIRP_POLL_PRI = 1 << 2, ++ SLIRP_POLL_ERR = 1 << 3, ++ SLIRP_POLL_HUP = 1 << 4, ++}; ++ ++typedef ssize_t (*SlirpReadCb)(void *buf, size_t len, void *opaque); ++typedef ssize_t (*SlirpWriteCb)(const void *buf, size_t len, void *opaque); ++typedef void (*SlirpTimerCb)(void *opaque); ++typedef int (*SlirpAddPollCb)(int fd, int events, void *opaque); ++typedef int (*SlirpGetREventsCb)(int idx, void *opaque); ++ ++/* ++ * Callbacks from slirp, to be set by the application. ++ * ++ * The opaque parameter is set to the opaque pointer given in the slirp_new / ++ * slirp_init call. ++ */ ++typedef struct SlirpCb { ++ /* ++ * Send an ethernet frame to the guest network. The opaque parameter is the ++ * one given to slirp_init(). If the guest is not ready to receive a frame, ++ * the function can just drop the data. TCP will then handle retransmissions ++ * at a lower pace. ++ * <0 reports an IO error. ++ */ ++ SlirpWriteCb send_packet; ++ /* Print a message for an error due to guest misbehavior. */ ++ void (*guest_error)(const char *msg, void *opaque); ++ /* Return the virtual clock value in nanoseconds */ ++ int64_t (*clock_get_ns)(void *opaque); ++ /* Create a new timer with the given callback and opaque data */ ++ void *(*timer_new)(SlirpTimerCb cb, void *cb_opaque, void *opaque); ++ /* Remove and free a timer */ ++ void (*timer_free)(void *timer, void *opaque); ++ /* Modify a timer to expire at @expire_time (ms) */ ++ void (*timer_mod)(void *timer, int64_t expire_time, void *opaque); ++ /* Register a fd for future polling */ ++ void (*register_poll_fd)(int fd, void *opaque); ++ /* Unregister a fd */ ++ void (*unregister_poll_fd)(int fd, void *opaque); ++ /* Kick the io-thread, to signal that new events may be processed */ ++ void (*notify)(void *opaque); ++} SlirpCb; ++ ++#define SLIRP_CONFIG_VERSION_MIN 1 ++#define SLIRP_CONFIG_VERSION_MAX 3 ++ ++typedef struct SlirpConfig { ++ /* Version must be provided */ ++ uint32_t version; ++ /* ++ * Fields introduced in SlirpConfig version 1 begin ++ */ ++ int restricted; ++ bool in_enabled; ++ struct in_addr vnetwork; ++ struct in_addr vnetmask; ++ struct in_addr vhost; ++ bool in6_enabled; ++ struct in6_addr vprefix_addr6; ++ uint8_t vprefix_len; ++ struct in6_addr vhost6; ++ const char *vhostname; ++ const char *tftp_server_name; ++ const char *tftp_path; ++ const char *bootfile; ++ struct in_addr vdhcp_start; ++ struct in_addr vnameserver; ++ struct in6_addr vnameserver6; ++ const char **vdnssearch; ++ const char *vdomainname; ++ /* Default: IF_MTU_DEFAULT */ ++ size_t if_mtu; ++ /* Default: IF_MRU_DEFAULT */ ++ size_t if_mru; ++ /* Prohibit connecting to 127.0.0.1:* */ ++ bool disable_host_loopback; ++ /* ++ * Enable emulation code (*warning*: this code isn't safe, it is not ++ * recommended to enable it) ++ */ ++ bool enable_emu; ++ /* ++ * Fields introduced in SlirpConfig version 2 begin ++ */ ++ struct sockaddr_in *outbound_addr; ++ struct sockaddr_in6 *outbound_addr6; ++ /* ++ * Fields introduced in SlirpConfig version 3 begin ++ */ ++ bool disable_dns; /* slirp will not redirect/serve any DNS packet */ ++} SlirpConfig; ++ ++/* Create a new instance of a slirp stack */ ++Slirp *slirp_new(const SlirpConfig *cfg, const SlirpCb *callbacks, ++ void *opaque); ++/* slirp_init is deprecated in favor of slirp_new */ ++Slirp *slirp_init(int restricted, bool in_enabled, struct in_addr vnetwork, ++ struct in_addr vnetmask, struct in_addr vhost, ++ bool in6_enabled, struct in6_addr vprefix_addr6, ++ uint8_t vprefix_len, struct in6_addr vhost6, ++ const char *vhostname, const char *tftp_server_name, ++ const char *tftp_path, const char *bootfile, ++ struct in_addr vdhcp_start, struct in_addr vnameserver, ++ struct in6_addr vnameserver6, const char **vdnssearch, ++ const char *vdomainname, const SlirpCb *callbacks, ++ void *opaque); ++/* Shut down an instance of a slirp stack */ ++void slirp_cleanup(Slirp *slirp); ++ ++/* This is called by the application when it is about to sleep through poll(). ++ * *timeout is set to the amount of virtual time (in ms) that the application intends to ++ * wait (UINT32_MAX if infinite). slirp_pollfds_fill updates it according to ++ * e.g. TCP timers, so the application knows it should sleep a smaller amount of ++ * time. slirp_pollfds_fill calls add_poll for each file descriptor ++ * that should be monitored along the sleep. The opaque pointer is passed as ++ * such to add_poll, and add_poll returns an index. */ ++void slirp_pollfds_fill(Slirp *slirp, uint32_t *timeout, ++ SlirpAddPollCb add_poll, void *opaque); ++ ++/* This is called by the application after sleeping, to report which file ++ * descriptors are available. slirp_pollfds_poll calls get_revents on each file ++ * descriptor, giving it the index that add_poll returned during the ++ * slirp_pollfds_fill call, to know whether the descriptor is available for ++ * read/write/etc. (SLIRP_POLL_*) ++ * select_error should be passed 1 if poll() returned an error. */ ++void slirp_pollfds_poll(Slirp *slirp, int select_error, ++ SlirpGetREventsCb get_revents, void *opaque); ++ ++/* This is called by the application when the guest emits a packet on the ++ * guest network, to be interpreted by slirp. */ ++void slirp_input(Slirp *slirp, const uint8_t *pkt, int pkt_len); ++ ++/* These set up / remove port forwarding between a host port in the real world ++ * and the guest network. */ ++int slirp_add_hostfwd(Slirp *slirp, int is_udp, struct in_addr host_addr, ++ int host_port, struct in_addr guest_addr, int guest_port); ++int slirp_remove_hostfwd(Slirp *slirp, int is_udp, struct in_addr host_addr, ++ int host_port); ++ ++#define SLIRP_HOSTFWD_UDP 1 ++#define SLIRP_HOSTFWD_V6ONLY 2 ++int slirp_add_hostxfwd(Slirp *slirp, ++ const struct sockaddr *haddr, socklen_t haddrlen, ++ const struct sockaddr *gaddr, socklen_t gaddrlen, ++ int flags); ++int slirp_remove_hostxfwd(Slirp *slirp, ++ const struct sockaddr *haddr, socklen_t haddrlen, ++ int flags); ++ ++/* Set up port forwarding between a port in the guest network and a ++ * command running on the host */ ++int slirp_add_exec(Slirp *slirp, const char *cmdline, ++ struct in_addr *guest_addr, int guest_port); ++/* Set up port forwarding between a port in the guest network and a ++ * Unix port on the host */ ++int slirp_add_unix(Slirp *slirp, const char *unixsock, ++ struct in_addr *guest_addr, int guest_port); ++/* Set up port forwarding between a port in the guest network and a ++ * callback that will receive the data coming from the port */ ++int slirp_add_guestfwd(Slirp *slirp, SlirpWriteCb write_cb, void *opaque, ++ struct in_addr *guest_addr, int guest_port); ++ ++/* TODO: rather identify a guestfwd through an opaque pointer instead of through ++ * the guest_addr */ ++ ++/* This is called by the application for a guestfwd, to determine how much data ++ * can be received by the forwarded port through a call to slirp_socket_recv. */ ++size_t slirp_socket_can_recv(Slirp *slirp, struct in_addr guest_addr, ++ int guest_port); ++/* This is called by the application for a guestfwd, to provide the data to be ++ * sent on the forwarded port */ ++void slirp_socket_recv(Slirp *slirp, struct in_addr guest_addr, int guest_port, ++ const uint8_t *buf, int size); ++ ++/* Remove entries added by slirp_add_exec, slirp_add_unix or slirp_add_guestfwd */ ++int slirp_remove_guestfwd(Slirp *slirp, struct in_addr guest_addr, ++ int guest_port); ++ ++/* Return a human-readable state of the slirp stack */ ++char *slirp_connection_info(Slirp *slirp); ++ ++/* Return a human-readable state of the NDP/ARP tables */ ++char *slirp_neighbor_info(Slirp *slirp); ++ ++/* Save the slirp state through the write_cb. The opaque pointer is passed as ++ * such to the write_cb. */ ++void slirp_state_save(Slirp *s, SlirpWriteCb write_cb, void *opaque); ++ ++/* Returns the version of the slirp state, to be saved along the state */ ++int slirp_state_version(void); ++ ++/* Load the slirp state through the read_cb. The opaque pointer is passed as ++ * such to the read_cb. The version should be given as it was obtained from ++ * slirp_state_version when slirp_state_save was called. */ ++int slirp_state_load(Slirp *s, int version_id, SlirpReadCb read_cb, ++ void *opaque); ++ ++/* Return the version of the slirp implementation */ ++const char *slirp_version_string(void); ++ ++#ifdef __cplusplus ++} /* extern "C" */ ++#endif ++ ++#endif /* LIBSLIRP_H */ +diff --git a/slirp/src/libslirp.map b/slirp/src/libslirp.map +new file mode 100644 +index 0000000000..792b0a94ab +--- /dev/null ++++ b/slirp/src/libslirp.map +@@ -0,0 +1,36 @@ ++SLIRP_4.0 { ++global: ++ slirp_add_exec; ++ slirp_add_guestfwd; ++ slirp_add_hostfwd; ++ slirp_cleanup; ++ slirp_connection_info; ++ slirp_init; ++ slirp_input; ++ slirp_pollfds_fill; ++ slirp_pollfds_poll; ++ slirp_remove_hostfwd; ++ slirp_socket_can_recv; ++ slirp_socket_recv; ++ slirp_state_load; ++ slirp_state_save; ++ slirp_state_version; ++ slirp_version_string; ++local: ++ *; ++}; ++ ++SLIRP_4.1 { ++ slirp_new; ++} SLIRP_4.0; ++ ++SLIRP_4.2 { ++ slirp_add_unix; ++ slirp_remove_guestfwd; ++} SLIRP_4.1; ++ ++SLIRP_4.5 { ++ slirp_add_hostxfwd; ++ slirp_remove_hostxfwd; ++ slirp_neighbor_info; ++} SLIRP_4.2; +diff --git a/slirp/src/main.h b/slirp/src/main.h +new file mode 100644 +index 0000000000..3b3f883703 +--- /dev/null ++++ b/slirp/src/main.h +@@ -0,0 +1,16 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#ifndef SLIRP_MAIN_H ++#define SLIRP_MAIN_H ++ ++extern unsigned curtime; ++extern struct in_addr loopback_addr; ++extern unsigned long loopback_mask; ++ ++int if_encap(Slirp *slirp, struct mbuf *ifm); ++ssize_t slirp_send(struct socket *so, const void *buf, size_t len, int flags); ++ ++#endif +diff --git a/slirp/src/mbuf.c b/slirp/src/mbuf.c +new file mode 100644 +index 0000000000..36864a401f +--- /dev/null ++++ b/slirp/src/mbuf.c +@@ -0,0 +1,281 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski ++ */ ++ ++/* ++ * mbuf's in SLiRP are much simpler than the real mbufs in ++ * FreeBSD. They are fixed size, determined by the MTU, ++ * so that one whole packet can fit. Mbuf's cannot be ++ * chained together. If there's more data than the mbuf ++ * could hold, an external g_malloced buffer is pointed to ++ * by m_ext (and the data pointers) and M_EXT is set in ++ * the flags ++ */ ++ ++#include "slirp.h" ++ ++#define MBUF_THRESH 30 ++ ++/* ++ * Find a nice value for msize ++ */ ++#define SLIRP_MSIZE(mtu) \ ++ (offsetof(struct mbuf, m_dat) + IF_MAXLINKHDR + TCPIPHDR_DELTA + (mtu)) ++ ++void m_init(Slirp *slirp) ++{ ++ slirp->m_freelist.qh_link = slirp->m_freelist.qh_rlink = &slirp->m_freelist; ++ slirp->m_usedlist.qh_link = slirp->m_usedlist.qh_rlink = &slirp->m_usedlist; ++} ++ ++static void m_cleanup_list(struct quehead *list_head) ++{ ++ struct mbuf *m, *next; ++ ++ m = (struct mbuf *)list_head->qh_link; ++ while ((struct quehead *)m != list_head) { ++ next = m->m_next; ++ if (m->m_flags & M_EXT) { ++ g_free(m->m_ext); ++ } ++ g_free(m); ++ m = next; ++ } ++ list_head->qh_link = list_head; ++ list_head->qh_rlink = list_head; ++} ++ ++void m_cleanup(Slirp *slirp) ++{ ++ m_cleanup_list(&slirp->m_usedlist); ++ m_cleanup_list(&slirp->m_freelist); ++ m_cleanup_list(&slirp->if_batchq); ++ m_cleanup_list(&slirp->if_fastq); ++} ++ ++/* ++ * Get an mbuf from the free list, if there are none ++ * allocate one ++ * ++ * Because fragmentation can occur if we alloc new mbufs and ++ * free old mbufs, we mark all mbufs above mbuf_thresh as M_DOFREE, ++ * which tells m_free to actually g_free() it ++ */ ++struct mbuf *m_get(Slirp *slirp) ++{ ++ register struct mbuf *m; ++ int flags = 0; ++ ++ DEBUG_CALL("m_get"); ++ ++ if (MBUF_DEBUG || slirp->m_freelist.qh_link == &slirp->m_freelist) { ++ m = g_malloc(SLIRP_MSIZE(slirp->if_mtu)); ++ slirp->mbuf_alloced++; ++ if (MBUF_DEBUG || slirp->mbuf_alloced > MBUF_THRESH) ++ flags = M_DOFREE; ++ m->slirp = slirp; ++ } else { ++ m = (struct mbuf *)slirp->m_freelist.qh_link; ++ remque(m); ++ } ++ ++ /* Insert it in the used list */ ++ insque(m, &slirp->m_usedlist); ++ m->m_flags = (flags | M_USEDLIST); ++ ++ /* Initialise it */ ++ m->m_size = SLIRP_MSIZE(slirp->if_mtu) - offsetof(struct mbuf, m_dat); ++ m->m_data = m->m_dat; ++ m->m_len = 0; ++ m->m_nextpkt = NULL; ++ m->m_prevpkt = NULL; ++ m->resolution_requested = false; ++ m->expiration_date = (uint64_t)-1; ++ DEBUG_ARG("m = %p", m); ++ return m; ++} ++ ++void m_free(struct mbuf *m) ++{ ++ DEBUG_CALL("m_free"); ++ DEBUG_ARG("m = %p", m); ++ ++ if (m) { ++ /* Remove from m_usedlist */ ++ if (m->m_flags & M_USEDLIST) ++ remque(m); ++ ++ /* If it's M_EXT, free() it */ ++ if (m->m_flags & M_EXT) { ++ g_free(m->m_ext); ++ m->m_flags &= ~M_EXT; ++ } ++ /* ++ * Either free() it or put it on the free list ++ */ ++ if (m->m_flags & M_DOFREE) { ++ m->slirp->mbuf_alloced--; ++ g_free(m); ++ } else if ((m->m_flags & M_FREELIST) == 0) { ++ insque(m, &m->slirp->m_freelist); ++ m->m_flags = M_FREELIST; /* Clobber other flags */ ++ } ++ } /* if(m) */ ++} ++ ++/* ++ * Copy data from one mbuf to the end of ++ * the other.. if result is too big for one mbuf, allocate ++ * an M_EXT data segment ++ */ ++void m_cat(struct mbuf *m, struct mbuf *n) ++{ ++ /* ++ * If there's no room, realloc ++ */ ++ if (M_FREEROOM(m) < n->m_len) ++ m_inc(m, m->m_len + n->m_len); ++ ++ memcpy(m->m_data + m->m_len, n->m_data, n->m_len); ++ m->m_len += n->m_len; ++ ++ m_free(n); ++} ++ ++ ++/* make m 'size' bytes large from m_data */ ++void m_inc(struct mbuf *m, int size) ++{ ++ int gapsize; ++ ++ /* some compilers throw up on gotos. This one we can fake. */ ++ if (M_ROOM(m) > size) { ++ return; ++ } ++ ++ if (m->m_flags & M_EXT) { ++ gapsize = m->m_data - m->m_ext; ++ m->m_ext = g_realloc(m->m_ext, size + gapsize); ++ } else { ++ gapsize = m->m_data - m->m_dat; ++ m->m_ext = g_malloc(size + gapsize); ++ memcpy(m->m_ext, m->m_dat, m->m_size); ++ m->m_flags |= M_EXT; ++ } ++ ++ m->m_data = m->m_ext + gapsize; ++ m->m_size = size + gapsize; ++} ++ ++ ++void m_adj(struct mbuf *m, int len) ++{ ++ if (m == NULL) ++ return; ++ if (len >= 0) { ++ /* Trim from head */ ++ m->m_data += len; ++ m->m_len -= len; ++ } else { ++ /* Trim from tail */ ++ len = -len; ++ m->m_len -= len; ++ } ++} ++ ++ ++/* ++ * Copy len bytes from m, starting off bytes into n ++ */ ++int m_copy(struct mbuf *n, struct mbuf *m, int off, int len) ++{ ++ if (len > M_FREEROOM(n)) ++ return -1; ++ ++ memcpy((n->m_data + n->m_len), (m->m_data + off), len); ++ n->m_len += len; ++ return 0; ++} ++ ++ ++/* ++ * Given a pointer into an mbuf, return the mbuf ++ * XXX This is a kludge, I should eliminate the need for it ++ * Fortunately, it's not used often ++ */ ++struct mbuf *dtom(Slirp *slirp, void *dat) ++{ ++ struct mbuf *m; ++ ++ DEBUG_CALL("dtom"); ++ DEBUG_ARG("dat = %p", dat); ++ ++ /* bug corrected for M_EXT buffers */ ++ for (m = (struct mbuf *)slirp->m_usedlist.qh_link; ++ (struct quehead *)m != &slirp->m_usedlist; m = m->m_next) { ++ if (m->m_flags & M_EXT) { ++ if ((char *)dat >= m->m_ext && (char *)dat < (m->m_ext + m->m_size)) ++ return m; ++ } else { ++ if ((char *)dat >= m->m_dat && (char *)dat < (m->m_dat + m->m_size)) ++ return m; ++ } ++ } ++ ++ DEBUG_ERROR("dtom failed"); ++ ++ return (struct mbuf *)0; ++} ++ ++/* ++ * Duplicate the mbuf ++ * ++ * copy_header specifies whether the bytes before m_data should also be copied. ++ * header_size specifies how many bytes are to be reserved before m_data. ++ */ ++struct mbuf *m_dup(Slirp *slirp, struct mbuf *m, ++ bool copy_header, ++ size_t header_size) ++{ ++ struct mbuf *n; ++ int mcopy_result; ++ ++ /* The previous mbuf was supposed to have it already, we can check it along ++ * the way */ ++ assert(M_ROOMBEFORE(m) >= header_size); ++ ++ n = m_get(slirp); ++ m_inc(n, m->m_len + header_size); ++ ++ if (copy_header) { ++ m->m_len += header_size; ++ m->m_data -= header_size; ++ mcopy_result = m_copy(n, m, 0, m->m_len + header_size); ++ n->m_data += header_size; ++ m->m_len -= header_size; ++ m->m_data += header_size; ++ } else { ++ n->m_data += header_size; ++ mcopy_result = m_copy(n, m, 0, m->m_len); ++ } ++ g_assert(mcopy_result == 0); ++ ++ return n; ++} ++ ++void *mtod_check(struct mbuf *m, size_t len) ++{ ++ if (m->m_len >= len) { ++ return m->m_data; ++ } ++ ++ DEBUG_ERROR("mtod failed"); ++ ++ return NULL; ++} ++ ++void *m_end(struct mbuf *m) ++{ ++ return m->m_data + m->m_len; ++} +diff --git a/slirp/src/mbuf.h b/slirp/src/mbuf.h +new file mode 100644 +index 0000000000..34e697a914 +--- /dev/null ++++ b/slirp/src/mbuf.h +@@ -0,0 +1,192 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)mbuf.h 8.3 (Berkeley) 1/21/94 ++ * mbuf.h,v 1.9 1994/11/14 13:54:20 bde Exp ++ */ ++ ++#ifndef MBUF_H ++#define MBUF_H ++ ++/* ++ * Macros for type conversion ++ * mtod(m,t) - convert mbuf pointer to data pointer of correct type ++ */ ++#define mtod(m, t) ((t)(m)->m_data) ++ ++/* XXX About mbufs for slirp: ++ * Only one mbuf is ever used in a chain, for each "cell" of data. ++ * m_nextpkt points to the next packet, if fragmented. ++ * If the data is too large, the M_EXT is used, and a larger block ++ * is alloced. Therefore, m_free[m] must check for M_EXT and if set ++ * free the m_ext. This is inefficient memory-wise, but who cares. ++ */ ++ ++/* ++ * mbufs allow to have a gap between the start of the allocated buffer (m_ext if ++ * M_EXT is set, m_dat otherwise) and the in-use data: ++ * ++ * |--gapsize----->|---m_len-------> ++ * |----------m_size------------------------------> ++ * |----M_ROOM--------------------> ++ * |-M_FREEROOM--> ++ * ++ * ^ ^ ^ ++ * m_dat/m_ext m_data end of buffer ++ */ ++ ++/* ++ * How much room is in the mbuf, from m_data to the end of the mbuf ++ */ ++#define M_ROOM(m) \ ++ ((m->m_flags & M_EXT) ? (((m)->m_ext + (m)->m_size) - (m)->m_data) : \ ++ (((m)->m_dat + (m)->m_size) - (m)->m_data)) ++ ++/* ++ * How much free room there is ++ */ ++#define M_FREEROOM(m) (M_ROOM(m) - (m)->m_len) ++ ++/* ++ * How much free room there is before m_data ++ */ ++#define M_ROOMBEFORE(m) \ ++ (((m)->m_flags & M_EXT) ? (m)->m_data - (m)->m_ext \ ++ : (m)->m_data - (m)->m_dat) ++ ++struct mbuf { ++ /* XXX should union some of these! */ ++ /* header at beginning of each mbuf: */ ++ struct mbuf *m_next; /* Linked list of mbufs */ ++ struct mbuf *m_prev; ++ struct mbuf *m_nextpkt; /* Next packet in queue/record */ ++ struct mbuf *m_prevpkt; /* Flags aren't used in the output queue */ ++ int m_flags; /* Misc flags */ ++ ++ int m_size; /* Size of mbuf, from m_dat or m_ext */ ++ struct socket *m_so; ++ ++ char *m_data; /* Current location of data */ ++ int m_len; /* Amount of data in this mbuf, from m_data */ ++ ++ Slirp *slirp; ++ bool resolution_requested; ++ uint64_t expiration_date; ++ char *m_ext; ++ /* start of dynamic buffer area, must be last element */ ++ char m_dat[]; ++}; ++ ++#define ifq_prev m_prev ++#define ifq_next m_next ++#define ifs_prev m_prevpkt ++#define ifs_next m_nextpkt ++#define ifq_so m_so ++ ++#define M_EXT 0x01 /* m_ext points to more (malloced) data */ ++#define M_FREELIST 0x02 /* mbuf is on free list */ ++#define M_USEDLIST 0x04 /* XXX mbuf is on used list (for dtom()) */ ++#define M_DOFREE \ ++ 0x08 /* when m_free is called on the mbuf, free() \ ++ * it rather than putting it on the free list */ ++ ++void m_init(Slirp *); ++void m_cleanup(Slirp *slirp); ++struct mbuf *m_get(Slirp *); ++void m_free(struct mbuf *); ++void m_cat(register struct mbuf *, register struct mbuf *); ++void m_inc(struct mbuf *, int); ++void m_adj(struct mbuf *, int); ++int m_copy(struct mbuf *, struct mbuf *, int, int); ++struct mbuf *m_dup(Slirp *slirp, struct mbuf *m, bool copy_header, size_t header_size); ++struct mbuf *dtom(Slirp *, void *); ++void *mtod_check(struct mbuf *, size_t len); ++void *m_end(struct mbuf *); ++ ++static inline void ifs_init(struct mbuf *ifm) ++{ ++ ifm->ifs_next = ifm->ifs_prev = ifm; ++} ++ ++#ifdef DEBUG ++# define MBUF_DEBUG 1 ++#else ++# ifdef HAVE_VALGRIND ++# include ++# define MBUF_DEBUG RUNNING_ON_VALGRIND ++# else ++# define MBUF_DEBUG 0 ++# endif ++#endif ++ ++/* ++ * When a function is given an mbuf as well as the responsibility to free it, we ++ * want valgrind etc. to properly identify the new responsible for the ++ * free. Achieve this by making a new copy. For instance: ++ * ++ * f0(void) { ++ * struct mbuf *m = m_get(slirp); ++ * [...] ++ * switch (something) { ++ * case 1: ++ * f1(m); ++ * break; ++ * case 2: ++ * f2(m); ++ * break; ++ * [...] ++ * } ++ * } ++ * ++ * f1(struct mbuf *m) { ++ * M_DUP_DEBUG(m->slirp, m); ++ * [...] ++ * m_free(m); // but author of f1 might be forgetting this ++ * } ++ * ++ * f0 transfers the freeing responsibility to f1, f2, etc. Without the ++ * M_DUP_DEBUG call in f1, valgrind would tell us that it is f0 where the buffer ++ * was allocated, but it's difficult to know whether a leak is actually in f0, ++ * or in f1, or in f2, etc. Duplicating the mbuf in M_DUP_DEBUG each time the ++ * responsibility is transferred allows to immediately know where the leak ++ * actually is. ++ */ ++#define M_DUP_DEBUG(slirp, m, copy_header, header_size) do { \ ++ if (MBUF_DEBUG) { \ ++ struct mbuf *__n; \ ++ __n = m_dup((slirp), (m), (copy_header), (header_size)); \ ++ m_free(m); \ ++ (m) = __n; \ ++ } else { \ ++ (void) (slirp); (void) (copy_header); \ ++ g_assert(M_ROOMBEFORE(m) >= (header_size)); \ ++ } \ ++} while(0) ++ ++#endif +diff --git a/slirp/src/misc.c b/slirp/src/misc.c +new file mode 100644 +index 0000000000..48f180be43 +--- /dev/null ++++ b/slirp/src/misc.c +@@ -0,0 +1,440 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++#ifdef G_OS_UNIX ++#include ++#endif ++ ++inline void insque(void *a, void *b) ++{ ++ register struct quehead *element = (struct quehead *)a; ++ register struct quehead *head = (struct quehead *)b; ++ element->qh_link = head->qh_link; ++ head->qh_link = (struct quehead *)element; ++ element->qh_rlink = (struct quehead *)head; ++ ((struct quehead *)(element->qh_link))->qh_rlink = ++ (struct quehead *)element; ++} ++ ++inline void remque(void *a) ++{ ++ register struct quehead *element = (struct quehead *)a; ++ ((struct quehead *)(element->qh_link))->qh_rlink = element->qh_rlink; ++ ((struct quehead *)(element->qh_rlink))->qh_link = element->qh_link; ++ element->qh_rlink = NULL; ++} ++ ++/* TODO: IPv6 */ ++struct gfwd_list *add_guestfwd(struct gfwd_list **ex_ptr, SlirpWriteCb write_cb, ++ void *opaque, struct in_addr addr, int port) ++{ ++ struct gfwd_list *f = g_new0(struct gfwd_list, 1); ++ ++ f->write_cb = write_cb; ++ f->opaque = opaque; ++ f->ex_fport = port; ++ f->ex_addr = addr; ++ f->ex_next = *ex_ptr; ++ *ex_ptr = f; ++ ++ return f; ++} ++ ++struct gfwd_list *add_exec(struct gfwd_list **ex_ptr, const char *cmdline, ++ struct in_addr addr, int port) ++{ ++ struct gfwd_list *f = add_guestfwd(ex_ptr, NULL, NULL, addr, port); ++ ++ f->ex_exec = g_strdup(cmdline); ++ ++ return f; ++} ++ ++struct gfwd_list *add_unix(struct gfwd_list **ex_ptr, const char *unixsock, ++ struct in_addr addr, int port) ++{ ++ struct gfwd_list *f = add_guestfwd(ex_ptr, NULL, NULL, addr, port); ++ ++ f->ex_unix = g_strdup(unixsock); ++ ++ return f; ++} ++ ++int remove_guestfwd(struct gfwd_list **ex_ptr, struct in_addr addr, int port) ++{ ++ for (; *ex_ptr != NULL; ex_ptr = &((*ex_ptr)->ex_next)) { ++ struct gfwd_list *f = *ex_ptr; ++ if (f->ex_addr.s_addr == addr.s_addr && f->ex_fport == port) { ++ *ex_ptr = f->ex_next; ++ g_free(f->ex_exec); ++ g_free(f); ++ return 0; ++ } ++ } ++ return -1; ++} ++ ++static int slirp_socketpair_with_oob(int sv[2]) ++{ ++ struct sockaddr_in addr = { ++ .sin_family = AF_INET, ++ .sin_port = 0, ++ .sin_addr.s_addr = INADDR_ANY, ++ }; ++ socklen_t addrlen = sizeof(addr); ++ int ret, s; ++ ++ sv[1] = -1; ++ s = slirp_socket(AF_INET, SOCK_STREAM, 0); ++ if (s < 0 || bind(s, (struct sockaddr *)&addr, addrlen) < 0 || ++ listen(s, 1) < 0 || ++ getsockname(s, (struct sockaddr *)&addr, &addrlen) < 0) { ++ goto err; ++ } ++ ++ sv[1] = slirp_socket(AF_INET, SOCK_STREAM, 0); ++ if (sv[1] < 0) { ++ goto err; ++ } ++ /* ++ * This connect won't block because we've already listen()ed on ++ * the server end (even though we won't accept() the connection ++ * until later on). ++ */ ++ do { ++ ret = connect(sv[1], (struct sockaddr *)&addr, addrlen); ++ } while (ret < 0 && errno == EINTR); ++ if (ret < 0) { ++ goto err; ++ } ++ ++ do { ++ sv[0] = accept(s, (struct sockaddr *)&addr, &addrlen); ++ } while (sv[0] < 0 && errno == EINTR); ++ if (sv[0] < 0) { ++ goto err; ++ } ++ ++ closesocket(s); ++ return 0; ++ ++err: ++ g_critical("slirp_socketpair(): %s", strerror(errno)); ++ if (s >= 0) { ++ closesocket(s); ++ } ++ if (sv[1] >= 0) { ++ closesocket(sv[1]); ++ } ++ return -1; ++} ++ ++static void fork_exec_child_setup(gpointer data) ++{ ++#ifndef _WIN32 ++ setsid(); ++ ++ /* Unblock all signals and leave our exec()-ee to block what it wants */ ++ sigset_t ss; ++ sigemptyset(&ss); ++ sigprocmask(SIG_SETMASK, &ss, NULL); ++ ++ /* POSIX is obnoxious about SIGCHLD specifically across exec() */ ++ signal(SIGCHLD, SIG_DFL); ++#endif ++} ++ ++#pragma GCC diagnostic push ++#pragma GCC diagnostic ignored "-Wdeprecated-declarations" ++ ++#if !GLIB_CHECK_VERSION(2, 58, 0) ++typedef struct SlirpGSpawnFds { ++ GSpawnChildSetupFunc child_setup; ++ gpointer user_data; ++ gint stdin_fd; ++ gint stdout_fd; ++ gint stderr_fd; ++} SlirpGSpawnFds; ++ ++static inline void slirp_gspawn_fds_setup(gpointer user_data) ++{ ++ SlirpGSpawnFds *q = (SlirpGSpawnFds *)user_data; ++ ++ dup2(q->stdin_fd, 0); ++ dup2(q->stdout_fd, 1); ++ dup2(q->stderr_fd, 2); ++ q->child_setup(q->user_data); ++} ++#endif ++ ++static inline gboolean ++g_spawn_async_with_fds_slirp(const gchar *working_directory, gchar **argv, ++ gchar **envp, GSpawnFlags flags, ++ GSpawnChildSetupFunc child_setup, ++ gpointer user_data, GPid *child_pid, gint stdin_fd, ++ gint stdout_fd, gint stderr_fd, GError **error) ++{ ++#if GLIB_CHECK_VERSION(2, 58, 0) ++ return g_spawn_async_with_fds(working_directory, argv, envp, flags, ++ child_setup, user_data, child_pid, stdin_fd, ++ stdout_fd, stderr_fd, error); ++#else ++ SlirpGSpawnFds setup = { ++ .child_setup = child_setup, ++ .user_data = user_data, ++ .stdin_fd = stdin_fd, ++ .stdout_fd = stdout_fd, ++ .stderr_fd = stderr_fd, ++ }; ++ ++ return g_spawn_async(working_directory, argv, envp, flags, ++ slirp_gspawn_fds_setup, &setup, child_pid, error); ++#endif ++} ++ ++#define g_spawn_async_with_fds(wd, argv, env, f, c, d, p, ifd, ofd, efd, err) \ ++ g_spawn_async_with_fds_slirp(wd, argv, env, f, c, d, p, ifd, ofd, efd, err) ++ ++#pragma GCC diagnostic pop ++ ++int fork_exec(struct socket *so, const char *ex) ++{ ++ GError *err = NULL; ++ gint argc = 0; ++ gchar **argv = NULL; ++ int opt, sp[2]; ++ ++ DEBUG_CALL("fork_exec"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("ex = %p", ex); ++ ++ if (slirp_socketpair_with_oob(sp) < 0) { ++ return 0; ++ } ++ ++ if (!g_shell_parse_argv(ex, &argc, &argv, &err)) { ++ g_critical("fork_exec invalid command: %s\nerror: %s", ex, err->message); ++ g_error_free(err); ++ return 0; ++ } ++ ++ g_spawn_async_with_fds(NULL /* cwd */, argv, NULL /* env */, ++ G_SPAWN_SEARCH_PATH, fork_exec_child_setup, ++ NULL /* data */, NULL /* child_pid */, sp[1], sp[1], ++ sp[1], &err); ++ g_strfreev(argv); ++ ++ if (err) { ++ g_critical("fork_exec: %s", err->message); ++ g_error_free(err); ++ closesocket(sp[0]); ++ closesocket(sp[1]); ++ return 0; ++ } ++ ++ so->s = sp[0]; ++ closesocket(sp[1]); ++ slirp_socket_set_fast_reuse(so->s); ++ opt = 1; ++ setsockopt(so->s, SOL_SOCKET, SO_OOBINLINE, &opt, sizeof(int)); ++ slirp_set_nonblock(so->s); ++ so->slirp->cb->register_poll_fd(so->s, so->slirp->opaque); ++ return 1; ++} ++ ++int open_unix(struct socket *so, const char *unixpath) ++{ ++#ifdef G_OS_UNIX ++ struct sockaddr_un sa; ++ int s; ++ ++ DEBUG_CALL("open_unix"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("unixpath = %s", unixpath); ++ ++ memset(&sa, 0, sizeof(sa)); ++ sa.sun_family = AF_UNIX; ++ if (g_strlcpy(sa.sun_path, unixpath, sizeof(sa.sun_path)) >= sizeof(sa.sun_path)) { ++ g_critical("Bad unix path: %s", unixpath); ++ return 0; ++ } ++ ++ s = slirp_socket(PF_UNIX, SOCK_STREAM, 0); ++ if (s < 0) { ++ g_critical("open_unix(): %s", strerror(errno)); ++ return 0; ++ } ++ ++ if (connect(s, (struct sockaddr *)&sa, sizeof(sa)) < 0) { ++ g_critical("open_unix(): %s", strerror(errno)); ++ closesocket(s); ++ return 0; ++ } ++ ++ so->s = s; ++ slirp_set_nonblock(so->s); ++ so->slirp->cb->register_poll_fd(so->s, so->slirp->opaque); ++ ++ return 1; ++#else ++ g_assert_not_reached(); ++#endif ++} ++ ++char *slirp_connection_info(Slirp *slirp) ++{ ++ GString *str = g_string_new(NULL); ++ const char *const tcpstates[] = { ++ [TCPS_CLOSED] = "CLOSED", [TCPS_LISTEN] = "LISTEN", ++ [TCPS_SYN_SENT] = "SYN_SENT", [TCPS_SYN_RECEIVED] = "SYN_RCVD", ++ [TCPS_ESTABLISHED] = "ESTABLISHED", [TCPS_CLOSE_WAIT] = "CLOSE_WAIT", ++ [TCPS_FIN_WAIT_1] = "FIN_WAIT_1", [TCPS_CLOSING] = "CLOSING", ++ [TCPS_LAST_ACK] = "LAST_ACK", [TCPS_FIN_WAIT_2] = "FIN_WAIT_2", ++ [TCPS_TIME_WAIT] = "TIME_WAIT", ++ }; ++ struct in_addr dst_addr; ++ struct sockaddr_in src; ++ socklen_t src_len; ++ uint16_t dst_port; ++ struct socket *so; ++ const char *state; ++ char buf[20]; ++ ++ g_string_append_printf(str, ++ " Protocol[State] FD Source Address Port " ++ "Dest. Address Port RecvQ SendQ\n"); ++ ++ /* TODO: IPv6 */ ++ ++ for (so = slirp->tcb.so_next; so != &slirp->tcb; so = so->so_next) { ++ if (so->so_state & SS_HOSTFWD) { ++ state = "HOST_FORWARD"; ++ } else if (so->so_tcpcb) { ++ state = tcpstates[so->so_tcpcb->t_state]; ++ } else { ++ state = "NONE"; ++ } ++ if (so->so_state & (SS_HOSTFWD | SS_INCOMING)) { ++ src_len = sizeof(src); ++ getsockname(so->s, (struct sockaddr *)&src, &src_len); ++ dst_addr = so->so_laddr; ++ dst_port = so->so_lport; ++ } else { ++ src.sin_addr = so->so_laddr; ++ src.sin_port = so->so_lport; ++ dst_addr = so->so_faddr; ++ dst_port = so->so_fport; ++ } ++ slirp_fmt0(buf, sizeof(buf), " TCP[%s]", state); ++ g_string_append_printf(str, "%-19s %3d %15s %5d ", buf, so->s, ++ src.sin_addr.s_addr ? inet_ntoa(src.sin_addr) : ++ "*", ++ ntohs(src.sin_port)); ++ g_string_append_printf(str, "%15s %5d %5d %5d\n", inet_ntoa(dst_addr), ++ ntohs(dst_port), so->so_rcv.sb_cc, ++ so->so_snd.sb_cc); ++ } ++ ++ for (so = slirp->udb.so_next; so != &slirp->udb; so = so->so_next) { ++ if (so->so_state & SS_HOSTFWD) { ++ slirp_fmt0(buf, sizeof(buf), " UDP[HOST_FORWARD]"); ++ src_len = sizeof(src); ++ getsockname(so->s, (struct sockaddr *)&src, &src_len); ++ dst_addr = so->so_laddr; ++ dst_port = so->so_lport; ++ } else { ++ slirp_fmt0(buf, sizeof(buf), " UDP[%d sec]", ++ (so->so_expire - curtime) / 1000); ++ src.sin_addr = so->so_laddr; ++ src.sin_port = so->so_lport; ++ dst_addr = so->so_faddr; ++ dst_port = so->so_fport; ++ } ++ g_string_append_printf(str, "%-19s %3d %15s %5d ", buf, so->s, ++ src.sin_addr.s_addr ? inet_ntoa(src.sin_addr) : ++ "*", ++ ntohs(src.sin_port)); ++ g_string_append_printf(str, "%15s %5d %5d %5d\n", inet_ntoa(dst_addr), ++ ntohs(dst_port), so->so_rcv.sb_cc, ++ so->so_snd.sb_cc); ++ } ++ ++ for (so = slirp->icmp.so_next; so != &slirp->icmp; so = so->so_next) { ++ slirp_fmt0(buf, sizeof(buf), " ICMP[%d sec]", ++ (so->so_expire - curtime) / 1000); ++ src.sin_addr = so->so_laddr; ++ dst_addr = so->so_faddr; ++ g_string_append_printf(str, "%-19s %3d %15s - ", buf, so->s, ++ src.sin_addr.s_addr ? inet_ntoa(src.sin_addr) : ++ "*"); ++ g_string_append_printf(str, "%15s - %5d %5d\n", inet_ntoa(dst_addr), ++ so->so_rcv.sb_cc, so->so_snd.sb_cc); ++ } ++ ++ return g_string_free(str, FALSE); ++} ++ ++char *slirp_neighbor_info(Slirp *slirp) ++{ ++ GString *str = g_string_new(NULL); ++ ArpTable *arp_table = &slirp->arp_table; ++ NdpTable *ndp_table = &slirp->ndp_table; ++ char ip_addr[INET6_ADDRSTRLEN]; ++ char eth_addr[ETH_ADDRSTRLEN]; ++ const char *ip; ++ ++ g_string_append_printf(str, " %5s %-17s %s\n", ++ "Table", "MacAddr", "IP Address"); ++ ++ for (int i = 0; i < ARP_TABLE_SIZE; ++i) { ++ struct in_addr addr; ++ addr.s_addr = arp_table->table[i].ar_sip; ++ if (!addr.s_addr) { ++ continue; ++ } ++ ip = inet_ntop(AF_INET, &addr, ip_addr, sizeof(ip_addr)); ++ g_assert(ip != NULL); ++ g_string_append_printf(str, " %5s %-17s %s\n", "ARP", ++ slirp_ether_ntoa(arp_table->table[i].ar_sha, ++ eth_addr, sizeof(eth_addr)), ++ ip); ++ } ++ ++ for (int i = 0; i < NDP_TABLE_SIZE; ++i) { ++ if (in6_zero(&ndp_table->table[i].ip_addr)) { ++ continue; ++ } ++ ip = inet_ntop(AF_INET6, &ndp_table->table[i].ip_addr, ip_addr, ++ sizeof(ip_addr)); ++ g_assert(ip != NULL); ++ g_string_append_printf(str, " %5s %-17s %s\n", "NDP", ++ slirp_ether_ntoa(ndp_table->table[i].eth_addr, ++ eth_addr, sizeof(eth_addr)), ++ ip); ++ } ++ ++ return g_string_free(str, FALSE); ++} ++ ++int slirp_bind_outbound(struct socket *so, unsigned short af) ++{ ++ int ret = 0; ++ struct sockaddr *addr = NULL; ++ int addr_size = 0; ++ ++ if (af == AF_INET && so->slirp->outbound_addr != NULL) { ++ addr = (struct sockaddr *)so->slirp->outbound_addr; ++ addr_size = sizeof(struct sockaddr_in); ++ } else if (af == AF_INET6 && so->slirp->outbound_addr6 != NULL) { ++ addr = (struct sockaddr *)so->slirp->outbound_addr6; ++ addr_size = sizeof(struct sockaddr_in6); ++ } ++ ++ if (addr != NULL) { ++ ret = bind(so->s, addr, addr_size); ++ } ++ return ret; ++} +diff --git a/slirp/src/misc.h b/slirp/src/misc.h +new file mode 100644 +index 0000000000..81b370cfb1 +--- /dev/null ++++ b/slirp/src/misc.h +@@ -0,0 +1,72 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#ifndef MISC_H ++#define MISC_H ++ ++#include "libslirp.h" ++ ++struct gfwd_list { ++ SlirpWriteCb write_cb; ++ void *opaque; ++ struct in_addr ex_addr; /* Server address */ ++ int ex_fport; /* Port to telnet to */ ++ char *ex_exec; /* Command line of what to exec */ ++ char *ex_unix; /* unix socket */ ++ struct gfwd_list *ex_next; ++}; ++ ++#define EMU_NONE 0x0 ++ ++/* TCP emulations */ ++#define EMU_CTL 0x1 ++#define EMU_FTP 0x2 ++#define EMU_KSH 0x3 ++#define EMU_IRC 0x4 ++#define EMU_REALAUDIO 0x5 ++#define EMU_RLOGIN 0x6 ++#define EMU_IDENT 0x7 ++ ++#define EMU_NOCONNECT 0x10 /* Don't connect */ ++ ++struct tos_t { ++ uint16_t lport; ++ uint16_t fport; ++ uint8_t tos; ++ uint8_t emu; ++}; ++ ++struct emu_t { ++ uint16_t lport; ++ uint16_t fport; ++ uint8_t tos; ++ uint8_t emu; ++ struct emu_t *next; ++}; ++ ++struct slirp_quehead { ++ struct slirp_quehead *qh_link; ++ struct slirp_quehead *qh_rlink; ++}; ++ ++void slirp_insque(void *, void *); ++void slirp_remque(void *); ++int fork_exec(struct socket *so, const char *ex); ++int open_unix(struct socket *so, const char *unixsock); ++ ++struct gfwd_list *add_guestfwd(struct gfwd_list **ex_ptr, SlirpWriteCb write_cb, ++ void *opaque, struct in_addr addr, int port); ++ ++struct gfwd_list *add_exec(struct gfwd_list **ex_ptr, const char *cmdline, ++ struct in_addr addr, int port); ++ ++struct gfwd_list *add_unix(struct gfwd_list **ex_ptr, const char *unixsock, ++ struct in_addr addr, int port); ++ ++int remove_guestfwd(struct gfwd_list **ex_ptr, struct in_addr addr, int port); ++ ++int slirp_bind_outbound(struct socket *so, unsigned short af); ++ ++#endif +diff --git a/slirp/src/ncsi-pkt.h b/slirp/src/ncsi-pkt.h +new file mode 100644 +index 0000000000..39cf8446d6 +--- /dev/null ++++ b/slirp/src/ncsi-pkt.h +@@ -0,0 +1,445 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright Gavin Shan, IBM Corporation 2016. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer. ++ * ++ * 2. Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials provided ++ * with the distribution. ++ * ++ * 3. Neither the name of the copyright holder nor the names of its ++ * contributors may be used to endorse or promote products derived ++ * from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS ++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, ++ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR ++ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED ++ * OF THE POSSIBILITY OF SUCH DAMAGE. ++ */ ++ ++#ifndef NCSI_PKT_H ++#define NCSI_PKT_H ++ ++/* from linux/net/ncsi/ncsi-pkt.h */ ++#define __be32 uint32_t ++#define __be16 uint16_t ++ ++struct ncsi_pkt_hdr { ++ unsigned char mc_id; /* Management controller ID */ ++ unsigned char revision; /* NCSI version - 0x01 */ ++ unsigned char reserved; /* Reserved */ ++ unsigned char id; /* Packet sequence number */ ++ unsigned char type; /* Packet type */ ++ unsigned char channel; /* Network controller ID */ ++ __be16 length; /* Payload length */ ++ __be32 reserved1[2]; /* Reserved */ ++} SLIRP_PACKED; ++ ++struct ncsi_cmd_pkt_hdr { ++ struct ncsi_pkt_hdr common; /* Common NCSI packet header */ ++} SLIRP_PACKED; ++ ++struct ncsi_rsp_pkt_hdr { ++ struct ncsi_pkt_hdr common; /* Common NCSI packet header */ ++ __be16 code; /* Response code */ ++ __be16 reason; /* Response reason */ ++} SLIRP_PACKED; ++ ++struct ncsi_aen_pkt_hdr { ++ struct ncsi_pkt_hdr common; /* Common NCSI packet header */ ++ unsigned char reserved2[3]; /* Reserved */ ++ unsigned char type; /* AEN packet type */ ++} SLIRP_PACKED; ++ ++/* NCSI common command packet */ ++struct ncsi_cmd_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[26]; ++} SLIRP_PACKED; ++ ++struct ncsi_rsp_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++} SLIRP_PACKED; ++ ++/* Select Package */ ++struct ncsi_cmd_sp_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ unsigned char reserved[3]; /* Reserved */ ++ unsigned char hw_arbitration; /* HW arbitration */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++} SLIRP_PACKED; ++ ++/* Disable Channel */ ++struct ncsi_cmd_dc_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ unsigned char reserved[3]; /* Reserved */ ++ unsigned char ald; /* Allow link down */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++} SLIRP_PACKED; ++ ++/* Reset Channel */ ++struct ncsi_cmd_rc_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ __be32 reserved; /* Reserved */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++} SLIRP_PACKED; ++ ++/* AEN Enable */ ++struct ncsi_cmd_ae_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ unsigned char reserved[3]; /* Reserved */ ++ unsigned char mc_id; /* MC ID */ ++ __be32 mode; /* AEN working mode */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[18]; ++} SLIRP_PACKED; ++ ++/* Set Link */ ++struct ncsi_cmd_sl_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ __be32 mode; /* Link working mode */ ++ __be32 oem_mode; /* OEM link mode */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[18]; ++} SLIRP_PACKED; ++ ++/* Set VLAN Filter */ ++struct ncsi_cmd_svf_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ __be16 reserved; /* Reserved */ ++ __be16 vlan; /* VLAN ID */ ++ __be16 reserved1; /* Reserved */ ++ unsigned char index; /* VLAN table index */ ++ unsigned char enable; /* Enable or disable */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[14]; ++} SLIRP_PACKED; ++ ++/* Enable VLAN */ ++struct ncsi_cmd_ev_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ unsigned char reserved[3]; /* Reserved */ ++ unsigned char mode; /* VLAN filter mode */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++} SLIRP_PACKED; ++ ++/* Set MAC Address */ ++struct ncsi_cmd_sma_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ unsigned char mac[6]; /* MAC address */ ++ unsigned char index; /* MAC table index */ ++ unsigned char at_e; /* Addr type and operation */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[18]; ++} SLIRP_PACKED; ++ ++/* Enable Broadcast Filter */ ++struct ncsi_cmd_ebf_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ __be32 mode; /* Filter mode */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++} SLIRP_PACKED; ++ ++/* Enable Global Multicast Filter */ ++struct ncsi_cmd_egmf_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ __be32 mode; /* Global MC mode */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++} SLIRP_PACKED; ++ ++/* Set NCSI Flow Control */ ++struct ncsi_cmd_snfc_pkt { ++ struct ncsi_cmd_pkt_hdr cmd; /* Command header */ ++ unsigned char reserved[3]; /* Reserved */ ++ unsigned char mode; /* Flow control mode */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++} SLIRP_PACKED; ++ ++/* Get Link Status */ ++struct ncsi_rsp_gls_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 status; /* Link status */ ++ __be32 other; /* Other indications */ ++ __be32 oem_status; /* OEM link status */ ++ __be32 checksum; ++ unsigned char pad[10]; ++} SLIRP_PACKED; ++ ++/* Get Version ID */ ++struct ncsi_rsp_gvi_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 ncsi_version; /* NCSI version */ ++ unsigned char reserved[3]; /* Reserved */ ++ unsigned char alpha2; /* NCSI version */ ++ unsigned char fw_name[12]; /* f/w name string */ ++ __be32 fw_version; /* f/w version */ ++ __be16 pci_ids[4]; /* PCI IDs */ ++ __be32 mf_id; /* Manufacture ID */ ++ __be32 checksum; ++} SLIRP_PACKED; ++ ++/* Get Capabilities */ ++struct ncsi_rsp_gc_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 cap; /* Capabilities */ ++ __be32 bc_cap; /* Broadcast cap */ ++ __be32 mc_cap; /* Multicast cap */ ++ __be32 buf_cap; /* Buffering cap */ ++ __be32 aen_cap; /* AEN cap */ ++ unsigned char vlan_cnt; /* VLAN filter count */ ++ unsigned char mixed_cnt; /* Mix filter count */ ++ unsigned char mc_cnt; /* MC filter count */ ++ unsigned char uc_cnt; /* UC filter count */ ++ unsigned char reserved[2]; /* Reserved */ ++ unsigned char vlan_mode; /* VLAN mode */ ++ unsigned char channel_cnt; /* Channel count */ ++ __be32 checksum; /* Checksum */ ++} SLIRP_PACKED; ++ ++/* Get Parameters */ ++struct ncsi_rsp_gp_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ unsigned char mac_cnt; /* Number of MAC addr */ ++ unsigned char reserved[2]; /* Reserved */ ++ unsigned char mac_enable; /* MAC addr enable flags */ ++ unsigned char vlan_cnt; /* VLAN tag count */ ++ unsigned char reserved1; /* Reserved */ ++ __be16 vlan_enable; /* VLAN tag enable flags */ ++ __be32 link_mode; /* Link setting */ ++ __be32 bc_mode; /* BC filter mode */ ++ __be32 valid_modes; /* Valid mode parameters */ ++ unsigned char vlan_mode; /* VLAN mode */ ++ unsigned char fc_mode; /* Flow control mode */ ++ unsigned char reserved2[2]; /* Reserved */ ++ __be32 aen_mode; /* AEN mode */ ++ unsigned char mac[6]; /* Supported MAC addr */ ++ __be16 vlan; /* Supported VLAN tags */ ++ __be32 checksum; /* Checksum */ ++} SLIRP_PACKED; ++ ++/* Get Controller Packet Statistics */ ++struct ncsi_rsp_gcps_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 cnt_hi; /* Counter cleared */ ++ __be32 cnt_lo; /* Counter cleared */ ++ __be32 rx_bytes; /* Rx bytes */ ++ __be32 tx_bytes; /* Tx bytes */ ++ __be32 rx_uc_pkts; /* Rx UC packets */ ++ __be32 rx_mc_pkts; /* Rx MC packets */ ++ __be32 rx_bc_pkts; /* Rx BC packets */ ++ __be32 tx_uc_pkts; /* Tx UC packets */ ++ __be32 tx_mc_pkts; /* Tx MC packets */ ++ __be32 tx_bc_pkts; /* Tx BC packets */ ++ __be32 fcs_err; /* FCS errors */ ++ __be32 align_err; /* Alignment errors */ ++ __be32 false_carrier; /* False carrier detection */ ++ __be32 runt_pkts; /* Rx runt packets */ ++ __be32 jabber_pkts; /* Rx jabber packets */ ++ __be32 rx_pause_xon; /* Rx pause XON frames */ ++ __be32 rx_pause_xoff; /* Rx XOFF frames */ ++ __be32 tx_pause_xon; /* Tx XON frames */ ++ __be32 tx_pause_xoff; /* Tx XOFF frames */ ++ __be32 tx_s_collision; /* Single collision frames */ ++ __be32 tx_m_collision; /* Multiple collision frames */ ++ __be32 l_collision; /* Late collision frames */ ++ __be32 e_collision; /* Excessive collision frames */ ++ __be32 rx_ctl_frames; /* Rx control frames */ ++ __be32 rx_64_frames; /* Rx 64-bytes frames */ ++ __be32 rx_127_frames; /* Rx 65-127 bytes frames */ ++ __be32 rx_255_frames; /* Rx 128-255 bytes frames */ ++ __be32 rx_511_frames; /* Rx 256-511 bytes frames */ ++ __be32 rx_1023_frames; /* Rx 512-1023 bytes frames */ ++ __be32 rx_1522_frames; /* Rx 1024-1522 bytes frames */ ++ __be32 rx_9022_frames; /* Rx 1523-9022 bytes frames */ ++ __be32 tx_64_frames; /* Tx 64-bytes frames */ ++ __be32 tx_127_frames; /* Tx 65-127 bytes frames */ ++ __be32 tx_255_frames; /* Tx 128-255 bytes frames */ ++ __be32 tx_511_frames; /* Tx 256-511 bytes frames */ ++ __be32 tx_1023_frames; /* Tx 512-1023 bytes frames */ ++ __be32 tx_1522_frames; /* Tx 1024-1522 bytes frames */ ++ __be32 tx_9022_frames; /* Tx 1523-9022 bytes frames */ ++ __be32 rx_valid_bytes; /* Rx valid bytes */ ++ __be32 rx_runt_pkts; /* Rx error runt packets */ ++ __be32 rx_jabber_pkts; /* Rx error jabber packets */ ++ __be32 checksum; /* Checksum */ ++} SLIRP_PACKED; ++ ++/* Get NCSI Statistics */ ++struct ncsi_rsp_gns_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 rx_cmds; /* Rx NCSI commands */ ++ __be32 dropped_cmds; /* Dropped commands */ ++ __be32 cmd_type_errs; /* Command type errors */ ++ __be32 cmd_csum_errs; /* Command checksum errors */ ++ __be32 rx_pkts; /* Rx NCSI packets */ ++ __be32 tx_pkts; /* Tx NCSI packets */ ++ __be32 tx_aen_pkts; /* Tx AEN packets */ ++ __be32 checksum; /* Checksum */ ++} SLIRP_PACKED; ++ ++/* Get NCSI Pass-through Statistics */ ++struct ncsi_rsp_gnpts_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 tx_pkts; /* Tx packets */ ++ __be32 tx_dropped; /* Tx dropped packets */ ++ __be32 tx_channel_err; /* Tx channel errors */ ++ __be32 tx_us_err; /* Tx undersize errors */ ++ __be32 rx_pkts; /* Rx packets */ ++ __be32 rx_dropped; /* Rx dropped packets */ ++ __be32 rx_channel_err; /* Rx channel errors */ ++ __be32 rx_us_err; /* Rx undersize errors */ ++ __be32 rx_os_err; /* Rx oversize errors */ ++ __be32 checksum; /* Checksum */ ++} SLIRP_PACKED; ++ ++/* Get package status */ ++struct ncsi_rsp_gps_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ __be32 status; /* Hardware arbitration status */ ++ __be32 checksum; ++} SLIRP_PACKED; ++ ++/* Get package UUID */ ++struct ncsi_rsp_gpuuid_pkt { ++ struct ncsi_rsp_pkt_hdr rsp; /* Response header */ ++ unsigned char uuid[16]; /* UUID */ ++ __be32 checksum; ++} SLIRP_PACKED; ++ ++/* AEN: Link State Change */ ++struct ncsi_aen_lsc_pkt { ++ struct ncsi_aen_pkt_hdr aen; /* AEN header */ ++ __be32 status; /* Link status */ ++ __be32 oem_status; /* OEM link status */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[14]; ++} SLIRP_PACKED; ++ ++/* AEN: Configuration Required */ ++struct ncsi_aen_cr_pkt { ++ struct ncsi_aen_pkt_hdr aen; /* AEN header */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[22]; ++} SLIRP_PACKED; ++ ++/* AEN: Host Network Controller Driver Status Change */ ++struct ncsi_aen_hncdsc_pkt { ++ struct ncsi_aen_pkt_hdr aen; /* AEN header */ ++ __be32 status; /* Status */ ++ __be32 checksum; /* Checksum */ ++ unsigned char pad[18]; ++} SLIRP_PACKED; ++ ++/* NCSI packet revision */ ++#define NCSI_PKT_REVISION 0x01 ++ ++/* NCSI packet commands */ ++#define NCSI_PKT_CMD_CIS 0x00 /* Clear Initial State */ ++#define NCSI_PKT_CMD_SP 0x01 /* Select Package */ ++#define NCSI_PKT_CMD_DP 0x02 /* Deselect Package */ ++#define NCSI_PKT_CMD_EC 0x03 /* Enable Channel */ ++#define NCSI_PKT_CMD_DC 0x04 /* Disable Channel */ ++#define NCSI_PKT_CMD_RC 0x05 /* Reset Channel */ ++#define NCSI_PKT_CMD_ECNT 0x06 /* Enable Channel Network Tx */ ++#define NCSI_PKT_CMD_DCNT 0x07 /* Disable Channel Network Tx */ ++#define NCSI_PKT_CMD_AE 0x08 /* AEN Enable */ ++#define NCSI_PKT_CMD_SL 0x09 /* Set Link */ ++#define NCSI_PKT_CMD_GLS 0x0a /* Get Link */ ++#define NCSI_PKT_CMD_SVF 0x0b /* Set VLAN Filter */ ++#define NCSI_PKT_CMD_EV 0x0c /* Enable VLAN */ ++#define NCSI_PKT_CMD_DV 0x0d /* Disable VLAN */ ++#define NCSI_PKT_CMD_SMA 0x0e /* Set MAC address */ ++#define NCSI_PKT_CMD_EBF 0x10 /* Enable Broadcast Filter */ ++#define NCSI_PKT_CMD_DBF 0x11 /* Disable Broadcast Filter */ ++#define NCSI_PKT_CMD_EGMF 0x12 /* Enable Global Multicast Filter */ ++#define NCSI_PKT_CMD_DGMF 0x13 /* Disable Global Multicast Filter */ ++#define NCSI_PKT_CMD_SNFC 0x14 /* Set NCSI Flow Control */ ++#define NCSI_PKT_CMD_GVI 0x15 /* Get Version ID */ ++#define NCSI_PKT_CMD_GC 0x16 /* Get Capabilities */ ++#define NCSI_PKT_CMD_GP 0x17 /* Get Parameters */ ++#define NCSI_PKT_CMD_GCPS 0x18 /* Get Controller Packet Statistics */ ++#define NCSI_PKT_CMD_GNS 0x19 /* Get NCSI Statistics */ ++#define NCSI_PKT_CMD_GNPTS 0x1a /* Get NCSI Pass-throu Statistics */ ++#define NCSI_PKT_CMD_GPS 0x1b /* Get package status */ ++#define NCSI_PKT_CMD_OEM 0x50 /* OEM */ ++#define NCSI_PKT_CMD_PLDM 0x51 /* PLDM request over NCSI over RBT */ ++#define NCSI_PKT_CMD_GPUUID 0x52 /* Get package UUID */ ++ ++/* NCSI packet responses */ ++#define NCSI_PKT_RSP_CIS (NCSI_PKT_CMD_CIS + 0x80) ++#define NCSI_PKT_RSP_SP (NCSI_PKT_CMD_SP + 0x80) ++#define NCSI_PKT_RSP_DP (NCSI_PKT_CMD_DP + 0x80) ++#define NCSI_PKT_RSP_EC (NCSI_PKT_CMD_EC + 0x80) ++#define NCSI_PKT_RSP_DC (NCSI_PKT_CMD_DC + 0x80) ++#define NCSI_PKT_RSP_RC (NCSI_PKT_CMD_RC + 0x80) ++#define NCSI_PKT_RSP_ECNT (NCSI_PKT_CMD_ECNT + 0x80) ++#define NCSI_PKT_RSP_DCNT (NCSI_PKT_CMD_DCNT + 0x80) ++#define NCSI_PKT_RSP_AE (NCSI_PKT_CMD_AE + 0x80) ++#define NCSI_PKT_RSP_SL (NCSI_PKT_CMD_SL + 0x80) ++#define NCSI_PKT_RSP_GLS (NCSI_PKT_CMD_GLS + 0x80) ++#define NCSI_PKT_RSP_SVF (NCSI_PKT_CMD_SVF + 0x80) ++#define NCSI_PKT_RSP_EV (NCSI_PKT_CMD_EV + 0x80) ++#define NCSI_PKT_RSP_DV (NCSI_PKT_CMD_DV + 0x80) ++#define NCSI_PKT_RSP_SMA (NCSI_PKT_CMD_SMA + 0x80) ++#define NCSI_PKT_RSP_EBF (NCSI_PKT_CMD_EBF + 0x80) ++#define NCSI_PKT_RSP_DBF (NCSI_PKT_CMD_DBF + 0x80) ++#define NCSI_PKT_RSP_EGMF (NCSI_PKT_CMD_EGMF + 0x80) ++#define NCSI_PKT_RSP_DGMF (NCSI_PKT_CMD_DGMF + 0x80) ++#define NCSI_PKT_RSP_SNFC (NCSI_PKT_CMD_SNFC + 0x80) ++#define NCSI_PKT_RSP_GVI (NCSI_PKT_CMD_GVI + 0x80) ++#define NCSI_PKT_RSP_GC (NCSI_PKT_CMD_GC + 0x80) ++#define NCSI_PKT_RSP_GP (NCSI_PKT_CMD_GP + 0x80) ++#define NCSI_PKT_RSP_GCPS (NCSI_PKT_CMD_GCPS + 0x80) ++#define NCSI_PKT_RSP_GNS (NCSI_PKT_CMD_GNS + 0x80) ++#define NCSI_PKT_RSP_GNPTS (NCSI_PKT_CMD_GNPTS + 0x80) ++#define NCSI_PKT_RSP_GPS (NCSI_PKT_CMD_GPS + 0x80) ++#define NCSI_PKT_RSP_OEM (NCSI_PKT_CMD_OEM + 0x80) ++#define NCSI_PKT_RSP_PLDM (NCSI_PKT_CMD_PLDM + 0x80) ++#define NCSI_PKT_RSP_GPUUID (NCSI_PKT_CMD_GPUUID + 0x80) ++ ++/* NCSI response code/reason */ ++#define NCSI_PKT_RSP_C_COMPLETED 0x0000 /* Command Completed */ ++#define NCSI_PKT_RSP_C_FAILED 0x0001 /* Command Failed */ ++#define NCSI_PKT_RSP_C_UNAVAILABLE 0x0002 /* Command Unavailable */ ++#define NCSI_PKT_RSP_C_UNSUPPORTED 0x0003 /* Command Unsupported */ ++#define NCSI_PKT_RSP_R_NO_ERROR 0x0000 /* No Error */ ++#define NCSI_PKT_RSP_R_INTERFACE 0x0001 /* Interface not ready */ ++#define NCSI_PKT_RSP_R_PARAM 0x0002 /* Invalid Parameter */ ++#define NCSI_PKT_RSP_R_CHANNEL 0x0003 /* Channel not Ready */ ++#define NCSI_PKT_RSP_R_PACKAGE 0x0004 /* Package not Ready */ ++#define NCSI_PKT_RSP_R_LENGTH 0x0005 /* Invalid payload length */ ++#define NCSI_PKT_RSP_R_UNKNOWN 0x7fff /* Command type unsupported */ ++ ++/* NCSI AEN packet type */ ++#define NCSI_PKT_AEN 0xFF /* AEN Packet */ ++#define NCSI_PKT_AEN_LSC 0x00 /* Link status change */ ++#define NCSI_PKT_AEN_CR 0x01 /* Configuration required */ ++#define NCSI_PKT_AEN_HNCDSC 0x02 /* HNC driver status change */ ++ ++#endif /* NCSI_PKT_H */ +diff --git a/slirp/src/ncsi.c b/slirp/src/ncsi.c +new file mode 100644 +index 0000000000..f3427bd66d +--- /dev/null ++++ b/slirp/src/ncsi.c +@@ -0,0 +1,197 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * NC-SI (Network Controller Sideband Interface) "echo" model ++ * ++ * Copyright (C) 2016-2018 IBM Corp. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer. ++ * ++ * 2. Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials provided ++ * with the distribution. ++ * ++ * 3. Neither the name of the copyright holder nor the names of its ++ * contributors may be used to endorse or promote products derived ++ * from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS ++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, ++ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR ++ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED ++ * OF THE POSSIBILITY OF SUCH DAMAGE. ++ */ ++#include "slirp.h" ++ ++#include "ncsi-pkt.h" ++ ++static uint32_t ncsi_calculate_checksum(uint8_t *data, int len) ++{ ++ uint32_t checksum = 0; ++ int i; ++ ++ /* ++ * 32-bit unsigned sum of the NC-SI packet header and NC-SI packet ++ * payload interpreted as a series of 16-bit unsigned integer values. ++ */ ++ for (i = 0; i < len; i += 2) { ++ checksum += (((uint16_t) data[i]) << 8) + data[i+1]; ++ } ++ ++ checksum = (~checksum + 1); ++ return checksum; ++} ++ ++/* Get Capabilities */ ++static int ncsi_rsp_handler_gc(struct ncsi_rsp_pkt_hdr *rnh) ++{ ++ struct ncsi_rsp_gc_pkt *rsp = (struct ncsi_rsp_gc_pkt *)rnh; ++ ++ rsp->cap = htonl(~0); ++ rsp->bc_cap = htonl(~0); ++ rsp->mc_cap = htonl(~0); ++ rsp->buf_cap = htonl(~0); ++ rsp->aen_cap = htonl(~0); ++ rsp->vlan_mode = 0xff; ++ rsp->uc_cnt = 2; ++ return 0; ++} ++ ++/* Get Link status */ ++static int ncsi_rsp_handler_gls(struct ncsi_rsp_pkt_hdr *rnh) ++{ ++ struct ncsi_rsp_gls_pkt *rsp = (struct ncsi_rsp_gls_pkt *)rnh; ++ ++ rsp->status = htonl(0x1); ++ return 0; ++} ++ ++/* Get Parameters */ ++static int ncsi_rsp_handler_gp(struct ncsi_rsp_pkt_hdr *rnh) ++{ ++ struct ncsi_rsp_gp_pkt *rsp = (struct ncsi_rsp_gp_pkt *)rnh; ++ ++ /* no MAC address filters or VLAN filters on the channel */ ++ rsp->mac_cnt = 0; ++ rsp->mac_enable = 0; ++ rsp->vlan_cnt = 0; ++ rsp->vlan_enable = 0; ++ ++ return 0; ++} ++ ++static const struct ncsi_rsp_handler { ++ unsigned char type; ++ int payload; ++ int (*handler)(struct ncsi_rsp_pkt_hdr *rnh); ++} ncsi_rsp_handlers[] = { { NCSI_PKT_RSP_CIS, 4, NULL }, ++ { NCSI_PKT_RSP_SP, 4, NULL }, ++ { NCSI_PKT_RSP_DP, 4, NULL }, ++ { NCSI_PKT_RSP_EC, 4, NULL }, ++ { NCSI_PKT_RSP_DC, 4, NULL }, ++ { NCSI_PKT_RSP_RC, 4, NULL }, ++ { NCSI_PKT_RSP_ECNT, 4, NULL }, ++ { NCSI_PKT_RSP_DCNT, 4, NULL }, ++ { NCSI_PKT_RSP_AE, 4, NULL }, ++ { NCSI_PKT_RSP_SL, 4, NULL }, ++ { NCSI_PKT_RSP_GLS, 16, ncsi_rsp_handler_gls }, ++ { NCSI_PKT_RSP_SVF, 4, NULL }, ++ { NCSI_PKT_RSP_EV, 4, NULL }, ++ { NCSI_PKT_RSP_DV, 4, NULL }, ++ { NCSI_PKT_RSP_SMA, 4, NULL }, ++ { NCSI_PKT_RSP_EBF, 4, NULL }, ++ { NCSI_PKT_RSP_DBF, 4, NULL }, ++ { NCSI_PKT_RSP_EGMF, 4, NULL }, ++ { NCSI_PKT_RSP_DGMF, 4, NULL }, ++ { NCSI_PKT_RSP_SNFC, 4, NULL }, ++ { NCSI_PKT_RSP_GVI, 40, NULL }, ++ { NCSI_PKT_RSP_GC, 32, ncsi_rsp_handler_gc }, ++ { NCSI_PKT_RSP_GP, 40, ncsi_rsp_handler_gp }, ++ { NCSI_PKT_RSP_GCPS, 172, NULL }, ++ { NCSI_PKT_RSP_GNS, 172, NULL }, ++ { NCSI_PKT_RSP_GNPTS, 172, NULL }, ++ { NCSI_PKT_RSP_GPS, 8, NULL }, ++ { NCSI_PKT_RSP_OEM, 0, NULL }, ++ { NCSI_PKT_RSP_PLDM, 0, NULL }, ++ { NCSI_PKT_RSP_GPUUID, 20, NULL } }; ++ ++/* ++ * packet format : ncsi header + payload + checksum ++ */ ++#define NCSI_MAX_PAYLOAD 172 ++#define NCSI_MAX_LEN (sizeof(struct ncsi_pkt_hdr) + NCSI_MAX_PAYLOAD + 4) ++ ++void ncsi_input(Slirp *slirp, const uint8_t *pkt, int pkt_len) ++{ ++ const struct ncsi_pkt_hdr *nh = ++ (const struct ncsi_pkt_hdr *)(pkt + ETH_HLEN); ++ uint8_t ncsi_reply[ETH_HLEN + NCSI_MAX_LEN]; ++ struct ethhdr *reh = (struct ethhdr *)ncsi_reply; ++ struct ncsi_rsp_pkt_hdr *rnh = ++ (struct ncsi_rsp_pkt_hdr *)(ncsi_reply + ETH_HLEN); ++ const struct ncsi_rsp_handler *handler = NULL; ++ int i; ++ int ncsi_rsp_len = sizeof(*nh); ++ uint32_t checksum; ++ uint32_t *pchecksum; ++ ++ if (pkt_len < ETH_HLEN + sizeof(struct ncsi_pkt_hdr)) { ++ return; /* packet too short */ ++ } ++ ++ memset(ncsi_reply, 0, sizeof(ncsi_reply)); ++ ++ memset(reh->h_dest, 0xff, ETH_ALEN); ++ memset(reh->h_source, 0xff, ETH_ALEN); ++ reh->h_proto = htons(ETH_P_NCSI); ++ ++ for (i = 0; i < G_N_ELEMENTS(ncsi_rsp_handlers); i++) { ++ if (ncsi_rsp_handlers[i].type == nh->type + 0x80) { ++ handler = &ncsi_rsp_handlers[i]; ++ break; ++ } ++ } ++ ++ rnh->common.mc_id = nh->mc_id; ++ rnh->common.revision = NCSI_PKT_REVISION; ++ rnh->common.id = nh->id; ++ rnh->common.type = nh->type + 0x80; ++ rnh->common.channel = nh->channel; ++ ++ if (handler) { ++ rnh->common.length = htons(handler->payload); ++ rnh->code = htons(NCSI_PKT_RSP_C_COMPLETED); ++ rnh->reason = htons(NCSI_PKT_RSP_R_NO_ERROR); ++ ++ if (handler->handler) { ++ /* TODO: handle errors */ ++ handler->handler(rnh); ++ } ++ ncsi_rsp_len += handler->payload; ++ } else { ++ rnh->common.length = 0; ++ rnh->code = htons(NCSI_PKT_RSP_C_UNAVAILABLE); ++ rnh->reason = htons(NCSI_PKT_RSP_R_UNKNOWN); ++ } ++ ++ /* Add the optional checksum at the end of the frame. */ ++ checksum = ncsi_calculate_checksum((uint8_t *)rnh, ncsi_rsp_len); ++ pchecksum = (uint32_t *)((void *)rnh + ncsi_rsp_len); ++ *pchecksum = htonl(checksum); ++ ncsi_rsp_len += 4; ++ ++ slirp_send_packet_all(slirp, ncsi_reply, ETH_HLEN + ncsi_rsp_len); ++} +diff --git a/slirp/src/ndp_table.c b/slirp/src/ndp_table.c +new file mode 100644 +index 0000000000..fdb189d595 +--- /dev/null ++++ b/slirp/src/ndp_table.c +@@ -0,0 +1,98 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 2013 ++ * Guillaume Subiron, Yann Bordenave, Serigne Modou Wagne. ++ */ ++ ++#include "slirp.h" ++ ++void ndp_table_add(Slirp *slirp, struct in6_addr ip_addr, ++ uint8_t ethaddr[ETH_ALEN]) ++{ ++ char addrstr[INET6_ADDRSTRLEN]; ++ NdpTable *ndp_table = &slirp->ndp_table; ++ int i; ++ char ethaddr_str[ETH_ADDRSTRLEN]; ++ ++ inet_ntop(AF_INET6, &(ip_addr), addrstr, INET6_ADDRSTRLEN); ++ ++ DEBUG_CALL("ndp_table_add"); ++ DEBUG_ARG("ip = %s", addrstr); ++ DEBUG_ARG("hw addr = %s", slirp_ether_ntoa(ethaddr, ethaddr_str, ++ sizeof(ethaddr_str))); ++ ++ if (IN6_IS_ADDR_MULTICAST(&ip_addr) || in6_zero(&ip_addr)) { ++ /* Do not register multicast or unspecified addresses */ ++ DEBUG_CALL(" abort: do not register multicast or unspecified address"); ++ return; ++ } ++ ++ /* Search for an entry */ ++ for (i = 0; i < NDP_TABLE_SIZE; i++) { ++ if (in6_equal(&ndp_table->table[i].ip_addr, &ip_addr)) { ++ DEBUG_CALL(" already in table: update the entry"); ++ /* Update the entry */ ++ memcpy(ndp_table->table[i].eth_addr, ethaddr, ETH_ALEN); ++ return; ++ } ++ } ++ ++ /* No entry found, create a new one */ ++ DEBUG_CALL(" create new entry"); ++ /* Save the first entry, it is the guest. */ ++ if (in6_zero(&ndp_table->guest_in6_addr)) { ++ ndp_table->guest_in6_addr = ip_addr; ++ } ++ ndp_table->table[ndp_table->next_victim].ip_addr = ip_addr; ++ memcpy(ndp_table->table[ndp_table->next_victim].eth_addr, ethaddr, ++ ETH_ALEN); ++ ndp_table->next_victim = (ndp_table->next_victim + 1) % NDP_TABLE_SIZE; ++} ++ ++bool ndp_table_search(Slirp *slirp, struct in6_addr ip_addr, ++ uint8_t out_ethaddr[ETH_ALEN]) ++{ ++ char addrstr[INET6_ADDRSTRLEN]; ++ NdpTable *ndp_table = &slirp->ndp_table; ++ int i; ++ char ethaddr_str[ETH_ADDRSTRLEN]; ++ ++ inet_ntop(AF_INET6, &(ip_addr), addrstr, INET6_ADDRSTRLEN); ++ ++ DEBUG_CALL("ndp_table_search"); ++ DEBUG_ARG("ip = %s", addrstr); ++ ++ /* If unspecified address */ ++ if (in6_zero(&ip_addr)) { ++ /* return Ethernet broadcast address */ ++ memset(out_ethaddr, 0xff, ETH_ALEN); ++ return 1; ++ } ++ ++ /* Multicast address: fec0::abcd:efgh/8 -> 33:33:ab:cd:ef:gh */ ++ if (IN6_IS_ADDR_MULTICAST(&ip_addr)) { ++ out_ethaddr[0] = 0x33; ++ out_ethaddr[1] = 0x33; ++ out_ethaddr[2] = ip_addr.s6_addr[12]; ++ out_ethaddr[3] = ip_addr.s6_addr[13]; ++ out_ethaddr[4] = ip_addr.s6_addr[14]; ++ out_ethaddr[5] = ip_addr.s6_addr[15]; ++ DEBUG_ARG("multicast addr = %s", ++ slirp_ether_ntoa(out_ethaddr, ethaddr_str, ++ sizeof(ethaddr_str))); ++ return 1; ++ } ++ ++ for (i = 0; i < NDP_TABLE_SIZE; i++) { ++ if (in6_equal(&ndp_table->table[i].ip_addr, &ip_addr)) { ++ memcpy(out_ethaddr, ndp_table->table[i].eth_addr, ETH_ALEN); ++ DEBUG_ARG("found hw addr = %s", ++ slirp_ether_ntoa(out_ethaddr, ethaddr_str, ++ sizeof(ethaddr_str))); ++ return 1; ++ } ++ } ++ ++ DEBUG_CALL(" ip not found in table"); ++ return 0; ++} +diff --git a/slirp/src/sbuf.c b/slirp/src/sbuf.c +new file mode 100644 +index 0000000000..b357091705 +--- /dev/null ++++ b/slirp/src/sbuf.c +@@ -0,0 +1,168 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++ ++static void sbappendsb(struct sbuf *sb, struct mbuf *m); ++ ++void sbfree(struct sbuf *sb) ++{ ++ g_free(sb->sb_data); ++} ++ ++bool sbdrop(struct sbuf *sb, size_t num) ++{ ++ int limit = sb->sb_datalen / 2; ++ ++ g_warn_if_fail(num <= sb->sb_cc); ++ if (num > sb->sb_cc) ++ num = sb->sb_cc; ++ ++ sb->sb_cc -= num; ++ sb->sb_rptr += num; ++ if (sb->sb_rptr >= sb->sb_data + sb->sb_datalen) ++ sb->sb_rptr -= sb->sb_datalen; ++ ++ if (sb->sb_cc < limit && sb->sb_cc + num >= limit) { ++ return true; ++ } ++ ++ return false; ++} ++ ++void sbreserve(struct sbuf *sb, size_t size) ++{ ++ sb->sb_wptr = sb->sb_rptr = sb->sb_data = g_realloc(sb->sb_data, size); ++ sb->sb_cc = 0; ++ sb->sb_datalen = size; ++} ++ ++/* ++ * Try and write() to the socket, whatever doesn't get written ++ * append to the buffer... for a host with a fast net connection, ++ * this prevents an unnecessary copy of the data ++ * (the socket is non-blocking, so we won't hang) ++ */ ++void sbappend(struct socket *so, struct mbuf *m) ++{ ++ int ret = 0; ++ ++ DEBUG_CALL("sbappend"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("m->m_len = %d", m->m_len); ++ ++ /* Shouldn't happen, but... e.g. foreign host closes connection */ ++ if (m->m_len <= 0) { ++ m_free(m); ++ return; ++ } ++ ++ /* ++ * If there is urgent data, call sosendoob ++ * if not all was sent, sowrite will take care of the rest ++ * (The rest of this function is just an optimisation) ++ */ ++ if (so->so_urgc) { ++ sbappendsb(&so->so_rcv, m); ++ m_free(m); ++ sosendoob(so); ++ return; ++ } ++ ++ /* ++ * We only write if there's nothing in the buffer, ++ * ottherwise it'll arrive out of order, and hence corrupt ++ */ ++ if (!so->so_rcv.sb_cc) ++ ret = slirp_send(so, m->m_data, m->m_len, 0); ++ ++ if (ret <= 0) { ++ /* ++ * Nothing was written ++ * It's possible that the socket has closed, but ++ * we don't need to check because if it has closed, ++ * it will be detected in the normal way by soread() ++ */ ++ sbappendsb(&so->so_rcv, m); ++ } else if (ret != m->m_len) { ++ /* ++ * Something was written, but not everything.. ++ * sbappendsb the rest ++ */ ++ m->m_len -= ret; ++ m->m_data += ret; ++ sbappendsb(&so->so_rcv, m); ++ } /* else */ ++ /* Whatever happened, we free the mbuf */ ++ m_free(m); ++} ++ ++/* ++ * Copy the data from m into sb ++ * The caller is responsible to make sure there's enough room ++ */ ++static void sbappendsb(struct sbuf *sb, struct mbuf *m) ++{ ++ int len, n, nn; ++ ++ len = m->m_len; ++ ++ if (sb->sb_wptr < sb->sb_rptr) { ++ n = sb->sb_rptr - sb->sb_wptr; ++ if (n > len) ++ n = len; ++ memcpy(sb->sb_wptr, m->m_data, n); ++ } else { ++ /* Do the right edge first */ ++ n = sb->sb_data + sb->sb_datalen - sb->sb_wptr; ++ if (n > len) ++ n = len; ++ memcpy(sb->sb_wptr, m->m_data, n); ++ len -= n; ++ if (len) { ++ /* Now the left edge */ ++ nn = sb->sb_rptr - sb->sb_data; ++ if (nn > len) ++ nn = len; ++ memcpy(sb->sb_data, m->m_data + n, nn); ++ n += nn; ++ } ++ } ++ ++ sb->sb_cc += n; ++ sb->sb_wptr += n; ++ if (sb->sb_wptr >= sb->sb_data + sb->sb_datalen) ++ sb->sb_wptr -= sb->sb_datalen; ++} ++ ++/* ++ * Copy data from sbuf to a normal, straight buffer ++ * Don't update the sbuf rptr, this will be ++ * done in sbdrop when the data is acked ++ */ ++void sbcopy(struct sbuf *sb, size_t off, size_t len, char *to) ++{ ++ char *from; ++ ++ g_assert(len + off <= sb->sb_cc); ++ ++ from = sb->sb_rptr + off; ++ if (from >= sb->sb_data + sb->sb_datalen) ++ from -= sb->sb_datalen; ++ ++ if (from < sb->sb_wptr) { ++ memcpy(to, from, len); ++ } else { ++ /* re-use off */ ++ off = (sb->sb_data + sb->sb_datalen) - from; ++ if (off > len) ++ off = len; ++ memcpy(to, from, off); ++ len -= off; ++ if (len) ++ memcpy(to + off, sb->sb_data, len); ++ } ++} +diff --git a/slirp/src/sbuf.h b/slirp/src/sbuf.h +new file mode 100644 +index 0000000000..01886fbd01 +--- /dev/null ++++ b/slirp/src/sbuf.h +@@ -0,0 +1,27 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#ifndef SBUF_H ++#define SBUF_H ++ ++#define sbspace(sb) ((sb)->sb_datalen - (sb)->sb_cc) ++ ++struct sbuf { ++ uint32_t sb_cc; /* actual chars in buffer */ ++ uint32_t sb_datalen; /* Length of data */ ++ char *sb_wptr; /* write pointer. points to where the next ++ * bytes should be written in the sbuf */ ++ char *sb_rptr; /* read pointer. points to where the next ++ * byte should be read from the sbuf */ ++ char *sb_data; /* Actual data */ ++}; ++ ++void sbfree(struct sbuf *sb); ++bool sbdrop(struct sbuf *sb, size_t len); ++void sbreserve(struct sbuf *sb, size_t size); ++void sbappend(struct socket *sb, struct mbuf *mb); ++void sbcopy(struct sbuf *sb, size_t off, size_t len, char *p); ++ ++#endif +diff --git a/slirp/src/slirp.c b/slirp/src/slirp.c +new file mode 100644 +index 0000000000..9d3fee3e97 +--- /dev/null ++++ b/slirp/src/slirp.c +@@ -0,0 +1,1387 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * libslirp glue ++ * ++ * Copyright (c) 2004-2008 Fabrice Bellard ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++#include "slirp.h" ++ ++ ++#ifndef _WIN32 ++#include ++#endif ++ ++/* https://gitlab.freedesktop.org/slirp/libslirp/issues/18 */ ++#if defined(__NetBSD__) && defined(if_mtu) ++#undef if_mtu ++#endif ++ ++int slirp_debug; ++ ++/* Define to 1 if you want KEEPALIVE timers */ ++bool slirp_do_keepalive; ++ ++/* host loopback address */ ++struct in_addr loopback_addr; ++/* host loopback network mask */ ++unsigned long loopback_mask; ++ ++/* emulated hosts use the MAC addr 52:55:IP:IP:IP:IP */ ++static const uint8_t special_ethaddr[ETH_ALEN] = { 0x52, 0x55, 0x00, ++ 0x00, 0x00, 0x00 }; ++ ++unsigned curtime; ++ ++static struct in_addr dns_addr; ++#ifndef _WIN32 ++static struct in6_addr dns6_addr; ++#endif ++static unsigned dns_addr_time; ++#ifndef _WIN32 ++static unsigned dns6_addr_time; ++#endif ++ ++#define TIMEOUT_FAST 2 /* milliseconds */ ++#define TIMEOUT_SLOW 499 /* milliseconds */ ++/* for the aging of certain requests like DNS */ ++#define TIMEOUT_DEFAULT 1000 /* milliseconds */ ++ ++#if defined(_WIN32) ++ ++int get_dns_addr(struct in_addr *pdns_addr) ++{ ++ FIXED_INFO *FixedInfo = NULL; ++ ULONG BufLen; ++ DWORD ret; ++ IP_ADDR_STRING *pIPAddr; ++ struct in_addr tmp_addr; ++ ++ if (dns_addr.s_addr != 0 && (curtime - dns_addr_time) < TIMEOUT_DEFAULT) { ++ *pdns_addr = dns_addr; ++ return 0; ++ } ++ ++ FixedInfo = (FIXED_INFO *)GlobalAlloc(GPTR, sizeof(FIXED_INFO)); ++ BufLen = sizeof(FIXED_INFO); ++ ++ if (ERROR_BUFFER_OVERFLOW == GetNetworkParams(FixedInfo, &BufLen)) { ++ if (FixedInfo) { ++ GlobalFree(FixedInfo); ++ FixedInfo = NULL; ++ } ++ FixedInfo = GlobalAlloc(GPTR, BufLen); ++ } ++ ++ if ((ret = GetNetworkParams(FixedInfo, &BufLen)) != ERROR_SUCCESS) { ++ printf("GetNetworkParams failed. ret = %08x\n", (unsigned)ret); ++ if (FixedInfo) { ++ GlobalFree(FixedInfo); ++ FixedInfo = NULL; ++ } ++ return -1; ++ } ++ ++ pIPAddr = &(FixedInfo->DnsServerList); ++ inet_aton(pIPAddr->IpAddress.String, &tmp_addr); ++ *pdns_addr = tmp_addr; ++ dns_addr = tmp_addr; ++ dns_addr_time = curtime; ++ if (FixedInfo) { ++ GlobalFree(FixedInfo); ++ FixedInfo = NULL; ++ } ++ return 0; ++} ++ ++int get_dns6_addr(struct in6_addr *pdns6_addr, uint32_t *scope_id) ++{ ++ return -1; ++} ++ ++static void winsock_cleanup(void) ++{ ++ WSACleanup(); ++} ++ ++#elif defined(__APPLE__) ++ ++#include ++ ++static int get_dns_addr_cached(void *pdns_addr, void *cached_addr, ++ socklen_t addrlen, unsigned *cached_time) ++{ ++ if (curtime - *cached_time < TIMEOUT_DEFAULT) { ++ memcpy(pdns_addr, cached_addr, addrlen); ++ return 0; ++ } ++ return 1; ++} ++ ++static int get_dns_addr_libresolv(int af, void *pdns_addr, void *cached_addr, ++ socklen_t addrlen, uint32_t *scope_id, ++ unsigned *cached_time) ++{ ++ struct __res_state state; ++ union res_sockaddr_union servers[NI_MAXSERV]; ++ int count; ++ int found; ++ ++ if (res_ninit(&state) != 0) { ++ return -1; ++ } ++ ++ count = res_getservers(&state, servers, NI_MAXSERV); ++ found = 0; ++ DEBUG_MISC("IP address of your DNS(s):"); ++ for (int i = 0; i < count; i++) { ++ if (af == servers[i].sin.sin_family) { ++ found++; ++ } ++ ++ // we use the first found entry ++ if (found == 1) { ++ memcpy(pdns_addr, &servers[i].sin.sin_addr, addrlen); ++ memcpy(cached_addr, &servers[i].sin.sin_addr, addrlen); ++ if (scope_id) { ++ *scope_id = 0; ++ } ++ *cached_time = curtime; ++ } ++ ++ if (found > 3) { ++ DEBUG_MISC(" (more)"); ++ break; ++ } else if (slirp_debug & DBG_MISC) { ++ char s[INET6_ADDRSTRLEN]; ++ const char *res = inet_ntop(servers[i].sin.sin_family, ++ &servers[i].sin.sin_addr, ++ s, ++ sizeof(s)); ++ if (!res) { ++ res = " (string conversion error)"; ++ } ++ DEBUG_MISC(" %s", res); ++ } ++ } ++ ++ res_nclose(&state); ++ if (!found) ++ return -1; ++ return 0; ++} ++ ++int get_dns_addr(struct in_addr *pdns_addr) ++{ ++ if (dns_addr.s_addr != 0) { ++ int ret; ++ ret = get_dns_addr_cached(pdns_addr, &dns_addr, sizeof(dns_addr), ++ &dns_addr_time); ++ if (ret <= 0) { ++ return ret; ++ } ++ } ++ return get_dns_addr_libresolv(AF_INET, pdns_addr, &dns_addr, ++ sizeof(dns_addr), NULL, &dns_addr_time); ++} ++ ++int get_dns6_addr(struct in6_addr *pdns6_addr, uint32_t *scope_id) ++{ ++ if (!in6_zero(&dns6_addr)) { ++ int ret; ++ ret = get_dns_addr_cached(pdns6_addr, &dns6_addr, sizeof(dns6_addr), ++ &dns6_addr_time); ++ if (ret <= 0) { ++ return ret; ++ } ++ } ++ return get_dns_addr_libresolv(AF_INET6, pdns6_addr, &dns6_addr, ++ sizeof(dns6_addr), scope_id, &dns6_addr_time); ++} ++ ++#else // !defined(_WIN32) && !defined(__APPLE__) ++ ++#if defined(__HAIKU__) ++#define RESOLV_CONF_PATH "/boot/system/settings/network/resolv.conf" ++#else ++#define RESOLV_CONF_PATH "/etc/resolv.conf" ++#endif ++ ++static int get_dns_addr_cached(void *pdns_addr, void *cached_addr, ++ socklen_t addrlen, struct stat *cached_stat, ++ unsigned *cached_time) ++{ ++ struct stat old_stat; ++ if (curtime - *cached_time < TIMEOUT_DEFAULT) { ++ memcpy(pdns_addr, cached_addr, addrlen); ++ return 0; ++ } ++ old_stat = *cached_stat; ++ if (stat(RESOLV_CONF_PATH, cached_stat) != 0) { ++ return -1; ++ } ++ if (cached_stat->st_dev == old_stat.st_dev && ++ cached_stat->st_ino == old_stat.st_ino && ++ cached_stat->st_size == old_stat.st_size && ++ cached_stat->st_mtime == old_stat.st_mtime) { ++ memcpy(pdns_addr, cached_addr, addrlen); ++ return 0; ++ } ++ return 1; ++} ++ ++static int get_dns_addr_resolv_conf(int af, void *pdns_addr, void *cached_addr, ++ socklen_t addrlen, uint32_t *scope_id, ++ unsigned *cached_time) ++{ ++ char buff[512]; ++ char buff2[257]; ++ FILE *f; ++ int found = 0; ++ union { ++ struct in_addr dns_addr; ++ struct in6_addr dns6_addr; ++ } tmp_addr; ++ unsigned if_index; ++ ++ assert(sizeof(tmp_addr) >= addrlen); ++ f = fopen(RESOLV_CONF_PATH, "r"); ++ if (!f) ++ return -1; ++ ++ DEBUG_MISC("IP address of your DNS(s):"); ++ while (fgets(buff, 512, f) != NULL) { ++ if (sscanf(buff, "nameserver%*[ \t]%256s", buff2) == 1) { ++ char *c = strchr(buff2, '%'); ++ if (c) { ++ if_index = if_nametoindex(c + 1); ++ *c = '\0'; ++ } else { ++ if_index = 0; ++ } ++ ++ if (!inet_pton(af, buff2, &tmp_addr)) { ++ continue; ++ } ++ /* If it's the first one, set it to dns_addr */ ++ if (!found) { ++ memcpy(pdns_addr, &tmp_addr, addrlen); ++ memcpy(cached_addr, &tmp_addr, addrlen); ++ if (scope_id) { ++ *scope_id = if_index; ++ } ++ *cached_time = curtime; ++ } ++ ++ if (++found > 3) { ++ DEBUG_MISC(" (more)"); ++ break; ++ } else if (slirp_debug & DBG_MISC) { ++ char s[INET6_ADDRSTRLEN]; ++ const char *res = inet_ntop(af, &tmp_addr, s, sizeof(s)); ++ if (!res) { ++ res = " (string conversion error)"; ++ } ++ DEBUG_MISC(" %s", res); ++ } ++ } ++ } ++ fclose(f); ++ if (!found) ++ return -1; ++ return 0; ++} ++ ++int get_dns_addr(struct in_addr *pdns_addr) ++{ ++ static struct stat dns_addr_stat; ++ ++ if (dns_addr.s_addr != 0) { ++ int ret; ++ ret = get_dns_addr_cached(pdns_addr, &dns_addr, sizeof(dns_addr), ++ &dns_addr_stat, &dns_addr_time); ++ if (ret <= 0) { ++ return ret; ++ } ++ } ++ return get_dns_addr_resolv_conf(AF_INET, pdns_addr, &dns_addr, ++ sizeof(dns_addr), NULL, &dns_addr_time); ++} ++ ++int get_dns6_addr(struct in6_addr *pdns6_addr, uint32_t *scope_id) ++{ ++ static struct stat dns6_addr_stat; ++ ++ if (!in6_zero(&dns6_addr)) { ++ int ret; ++ ret = get_dns_addr_cached(pdns6_addr, &dns6_addr, sizeof(dns6_addr), ++ &dns6_addr_stat, &dns6_addr_time); ++ if (ret <= 0) { ++ return ret; ++ } ++ } ++ return get_dns_addr_resolv_conf(AF_INET6, pdns6_addr, &dns6_addr, ++ sizeof(dns6_addr), scope_id, ++ &dns6_addr_time); ++} ++ ++#endif ++ ++static void slirp_init_once(void) ++{ ++ static int initialized; ++ const char *debug; ++#ifdef _WIN32 ++ WSADATA Data; ++#endif ++ ++ if (initialized) { ++ return; ++ } ++ initialized = 1; ++ ++#ifdef _WIN32 ++ WSAStartup(MAKEWORD(2, 0), &Data); ++ atexit(winsock_cleanup); ++#endif ++ ++ loopback_addr.s_addr = htonl(INADDR_LOOPBACK); ++ loopback_mask = htonl(IN_CLASSA_NET); ++ ++ debug = g_getenv("SLIRP_DEBUG"); ++ if (debug) { ++ const GDebugKey keys[] = { ++ { "call", DBG_CALL }, ++ { "misc", DBG_MISC }, ++ { "error", DBG_ERROR }, ++ { "tftp", DBG_TFTP }, ++ { "verbose_call", DBG_VERBOSE_CALL }, ++ }; ++ slirp_debug = g_parse_debug_string(debug, keys, G_N_ELEMENTS(keys)); ++ } ++} ++ ++Slirp *slirp_new(const SlirpConfig *cfg, const SlirpCb *callbacks, void *opaque) ++{ ++ Slirp *slirp; ++ ++ g_return_val_if_fail(cfg != NULL, NULL); ++ g_return_val_if_fail(cfg->version >= SLIRP_CONFIG_VERSION_MIN, NULL); ++ g_return_val_if_fail(cfg->version <= SLIRP_CONFIG_VERSION_MAX, NULL); ++ g_return_val_if_fail(cfg->if_mtu >= IF_MTU_MIN || cfg->if_mtu == 0, NULL); ++ g_return_val_if_fail(cfg->if_mtu <= IF_MTU_MAX, NULL); ++ g_return_val_if_fail(cfg->if_mru >= IF_MRU_MIN || cfg->if_mru == 0, NULL); ++ g_return_val_if_fail(cfg->if_mru <= IF_MRU_MAX, NULL); ++ g_return_val_if_fail(!cfg->bootfile || ++ (strlen(cfg->bootfile) < ++ G_SIZEOF_MEMBER(struct bootp_t, bp_file)), NULL); ++ ++ slirp = g_malloc0(sizeof(Slirp)); ++ ++ slirp_init_once(); ++ ++ slirp->opaque = opaque; ++ slirp->cb = callbacks; ++ slirp->grand = g_rand_new(); ++ slirp->restricted = cfg->restricted; ++ ++ slirp->in_enabled = cfg->in_enabled; ++ slirp->in6_enabled = cfg->in6_enabled; ++ ++ if_init(slirp); ++ ip_init(slirp); ++ ip6_init(slirp); ++ ++ m_init(slirp); ++ ++ slirp->vnetwork_addr = cfg->vnetwork; ++ slirp->vnetwork_mask = cfg->vnetmask; ++ slirp->vhost_addr = cfg->vhost; ++ slirp->vprefix_addr6 = cfg->vprefix_addr6; ++ slirp->vprefix_len = cfg->vprefix_len; ++ slirp->vhost_addr6 = cfg->vhost6; ++ if (cfg->vhostname) { ++ slirp_pstrcpy(slirp->client_hostname, sizeof(slirp->client_hostname), ++ cfg->vhostname); ++ } ++ slirp->tftp_prefix = g_strdup(cfg->tftp_path); ++ slirp->bootp_filename = g_strdup(cfg->bootfile); ++ slirp->vdomainname = g_strdup(cfg->vdomainname); ++ slirp->vdhcp_startaddr = cfg->vdhcp_start; ++ slirp->vnameserver_addr = cfg->vnameserver; ++ slirp->vnameserver_addr6 = cfg->vnameserver6; ++ slirp->tftp_server_name = g_strdup(cfg->tftp_server_name); ++ ++ if (cfg->vdnssearch) { ++ translate_dnssearch(slirp, cfg->vdnssearch); ++ } ++ slirp->if_mtu = cfg->if_mtu == 0 ? IF_MTU_DEFAULT : cfg->if_mtu; ++ slirp->if_mru = cfg->if_mru == 0 ? IF_MRU_DEFAULT : cfg->if_mru; ++ slirp->disable_host_loopback = cfg->disable_host_loopback; ++ slirp->enable_emu = cfg->enable_emu; ++ ++ if (cfg->version >= 2) { ++ slirp->outbound_addr = cfg->outbound_addr; ++ slirp->outbound_addr6 = cfg->outbound_addr6; ++ } else { ++ slirp->outbound_addr = NULL; ++ slirp->outbound_addr6 = NULL; ++ } ++ ++ if (cfg->version >= 3) { ++ slirp->disable_dns = cfg->disable_dns; ++ } else { ++ slirp->disable_dns = false; ++ } ++ ++ return slirp; ++} ++ ++Slirp *slirp_init(int restricted, bool in_enabled, struct in_addr vnetwork, ++ struct in_addr vnetmask, struct in_addr vhost, ++ bool in6_enabled, struct in6_addr vprefix_addr6, ++ uint8_t vprefix_len, struct in6_addr vhost6, ++ const char *vhostname, const char *tftp_server_name, ++ const char *tftp_path, const char *bootfile, ++ struct in_addr vdhcp_start, struct in_addr vnameserver, ++ struct in6_addr vnameserver6, const char **vdnssearch, ++ const char *vdomainname, const SlirpCb *callbacks, ++ void *opaque) ++{ ++ SlirpConfig cfg; ++ memset(&cfg, 0, sizeof(cfg)); ++ cfg.version = 1; ++ cfg.restricted = restricted; ++ cfg.in_enabled = in_enabled; ++ cfg.vnetwork = vnetwork; ++ cfg.vnetmask = vnetmask; ++ cfg.vhost = vhost; ++ cfg.in6_enabled = in6_enabled; ++ cfg.vprefix_addr6 = vprefix_addr6; ++ cfg.vprefix_len = vprefix_len; ++ cfg.vhost6 = vhost6; ++ cfg.vhostname = vhostname; ++ cfg.tftp_server_name = tftp_server_name; ++ cfg.tftp_path = tftp_path; ++ cfg.bootfile = bootfile; ++ cfg.vdhcp_start = vdhcp_start; ++ cfg.vnameserver = vnameserver; ++ cfg.vnameserver6 = vnameserver6; ++ cfg.vdnssearch = vdnssearch; ++ cfg.vdomainname = vdomainname; ++ return slirp_new(&cfg, callbacks, opaque); ++} ++ ++void slirp_cleanup(Slirp *slirp) ++{ ++ struct gfwd_list *e, *next; ++ ++ for (e = slirp->guestfwd_list; e; e = next) { ++ next = e->ex_next; ++ g_free(e->ex_exec); ++ g_free(e->ex_unix); ++ g_free(e); ++ } ++ ++ ip_cleanup(slirp); ++ ip6_cleanup(slirp); ++ m_cleanup(slirp); ++ ++ g_rand_free(slirp->grand); ++ ++ g_free(slirp->vdnssearch); ++ g_free(slirp->tftp_prefix); ++ g_free(slirp->bootp_filename); ++ g_free(slirp->vdomainname); ++ g_free(slirp); ++} ++ ++#define CONN_CANFSEND(so) \ ++ (((so)->so_state & (SS_FCANTSENDMORE | SS_ISFCONNECTED)) == SS_ISFCONNECTED) ++#define CONN_CANFRCV(so) \ ++ (((so)->so_state & (SS_FCANTRCVMORE | SS_ISFCONNECTED)) == SS_ISFCONNECTED) ++ ++static void slirp_update_timeout(Slirp *slirp, uint32_t *timeout) ++{ ++ uint32_t t; ++ ++ if (*timeout <= TIMEOUT_FAST) { ++ return; ++ } ++ ++ t = MIN(1000, *timeout); ++ ++ /* If we have tcp timeout with slirp, then we will fill @timeout with ++ * more precise value. ++ */ ++ if (slirp->time_fasttimo) { ++ *timeout = TIMEOUT_FAST; ++ return; ++ } ++ if (slirp->do_slowtimo) { ++ t = MIN(TIMEOUT_SLOW, t); ++ } ++ *timeout = t; ++} ++ ++void slirp_pollfds_fill(Slirp *slirp, uint32_t *timeout, ++ SlirpAddPollCb add_poll, void *opaque) ++{ ++ struct socket *so, *so_next; ++ ++ /* ++ * First, TCP sockets ++ */ ++ ++ /* ++ * *_slowtimo needs calling if there are IP fragments ++ * in the fragment queue, or there are TCP connections active ++ */ ++ slirp->do_slowtimo = ((slirp->tcb.so_next != &slirp->tcb) || ++ (&slirp->ipq.ip_link != slirp->ipq.ip_link.next)); ++ ++ for (so = slirp->tcb.so_next; so != &slirp->tcb; so = so_next) { ++ int events = 0; ++ ++ so_next = so->so_next; ++ ++ so->pollfds_idx = -1; ++ ++ /* ++ * See if we need a tcp_fasttimo ++ */ ++ if (slirp->time_fasttimo == 0 && so->so_tcpcb->t_flags & TF_DELACK) { ++ slirp->time_fasttimo = curtime; /* Flag when want a fasttimo */ ++ } ++ ++ /* ++ * NOFDREF can include still connecting to local-host, ++ * newly socreated() sockets etc. Don't want to select these. ++ */ ++ if (so->so_state & SS_NOFDREF || so->s == -1) { ++ continue; ++ } ++ ++ /* ++ * Set for reading sockets which are accepting ++ */ ++ if (so->so_state & SS_FACCEPTCONN) { ++ so->pollfds_idx = add_poll( ++ so->s, SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR, opaque); ++ continue; ++ } ++ ++ /* ++ * Set for writing sockets which are connecting ++ */ ++ if (so->so_state & SS_ISFCONNECTING) { ++ so->pollfds_idx = ++ add_poll(so->s, SLIRP_POLL_OUT | SLIRP_POLL_ERR, opaque); ++ continue; ++ } ++ ++ /* ++ * Set for writing if we are connected, can send more, and ++ * we have something to send ++ */ ++ if (CONN_CANFSEND(so) && so->so_rcv.sb_cc) { ++ events |= SLIRP_POLL_OUT | SLIRP_POLL_ERR; ++ } ++ ++ /* ++ * Set for reading (and urgent data) if we are connected, can ++ * receive more, and we have room for it XXX /2 ? ++ */ ++ if (CONN_CANFRCV(so) && ++ (so->so_snd.sb_cc < (so->so_snd.sb_datalen / 2))) { ++ events |= SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR | ++ SLIRP_POLL_PRI; ++ } ++ ++ if (events) { ++ so->pollfds_idx = add_poll(so->s, events, opaque); ++ } ++ } ++ ++ /* ++ * UDP sockets ++ */ ++ for (so = slirp->udb.so_next; so != &slirp->udb; so = so_next) { ++ so_next = so->so_next; ++ ++ so->pollfds_idx = -1; ++ ++ /* ++ * See if it's timed out ++ */ ++ if (so->so_expire) { ++ if (so->so_expire <= curtime) { ++ udp_detach(so); ++ continue; ++ } else { ++ slirp->do_slowtimo = true; /* Let socket expire */ ++ } ++ } ++ ++ /* ++ * When UDP packets are received from over the ++ * link, they're sendto()'d straight away, so ++ * no need for setting for writing ++ * Limit the number of packets queued by this session ++ * to 4. Note that even though we try and limit this ++ * to 4 packets, the session could have more queued ++ * if the packets needed to be fragmented ++ * (XXX <= 4 ?) ++ */ ++ if ((so->so_state & SS_ISFCONNECTED) && so->so_queued <= 4) { ++ so->pollfds_idx = add_poll( ++ so->s, SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR, opaque); ++ } ++ } ++ ++ /* ++ * ICMP sockets ++ */ ++ for (so = slirp->icmp.so_next; so != &slirp->icmp; so = so_next) { ++ so_next = so->so_next; ++ ++ so->pollfds_idx = -1; ++ ++ /* ++ * See if it's timed out ++ */ ++ if (so->so_expire) { ++ if (so->so_expire <= curtime) { ++ icmp_detach(so); ++ continue; ++ } else { ++ slirp->do_slowtimo = true; /* Let socket expire */ ++ } ++ } ++ ++ if (so->so_state & SS_ISFCONNECTED) { ++ so->pollfds_idx = add_poll( ++ so->s, SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR, opaque); ++ } ++ } ++ ++ slirp_update_timeout(slirp, timeout); ++} ++ ++void slirp_pollfds_poll(Slirp *slirp, int select_error, ++ SlirpGetREventsCb get_revents, void *opaque) ++{ ++ struct socket *so, *so_next; ++ int ret; ++ ++ curtime = slirp->cb->clock_get_ns(slirp->opaque) / SCALE_MS; ++ ++ /* ++ * See if anything has timed out ++ */ ++ if (slirp->time_fasttimo && ++ ((curtime - slirp->time_fasttimo) >= TIMEOUT_FAST)) { ++ tcp_fasttimo(slirp); ++ slirp->time_fasttimo = 0; ++ } ++ if (slirp->do_slowtimo && ++ ((curtime - slirp->last_slowtimo) >= TIMEOUT_SLOW)) { ++ ip_slowtimo(slirp); ++ tcp_slowtimo(slirp); ++ slirp->last_slowtimo = curtime; ++ } ++ ++ /* ++ * Check sockets ++ */ ++ if (!select_error) { ++ /* ++ * Check TCP sockets ++ */ ++ for (so = slirp->tcb.so_next; so != &slirp->tcb; so = so_next) { ++ int revents; ++ ++ so_next = so->so_next; ++ ++ revents = 0; ++ if (so->pollfds_idx != -1) { ++ revents = get_revents(so->pollfds_idx, opaque); ++ } ++ ++ if (so->so_state & SS_NOFDREF || so->s == -1) { ++ continue; ++ } ++ ++#ifndef __APPLE__ ++ /* ++ * Check for URG data ++ * This will soread as well, so no need to ++ * test for SLIRP_POLL_IN below if this succeeds. ++ * ++ * This is however disabled on MacOS, which apparently always ++ * reports data as PRI when it is the last data of the ++ * connection. We would then report it out of band, which the guest ++ * would most probably not be ready for. ++ */ ++ if (revents & SLIRP_POLL_PRI) { ++ ret = sorecvoob(so); ++ if (ret < 0) { ++ /* Socket error might have resulted in the socket being ++ * removed, do not try to do anything more with it. */ ++ continue; ++ } ++ } ++ /* ++ * Check sockets for reading ++ */ ++ else ++#endif ++ if (revents & ++ (SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR | SLIRP_POLL_PRI)) { ++ /* ++ * Check for incoming connections ++ */ ++ if (so->so_state & SS_FACCEPTCONN) { ++ tcp_connect(so); ++ continue; ++ } /* else */ ++ ret = soread(so); ++ ++ /* Output it if we read something */ ++ if (ret > 0) { ++ tcp_output(sototcpcb(so)); ++ } ++ if (ret < 0) { ++ /* Socket error might have resulted in the socket being ++ * removed, do not try to do anything more with it. */ ++ continue; ++ } ++ } ++ ++ /* ++ * Check sockets for writing ++ */ ++ if (!(so->so_state & SS_NOFDREF) && ++ (revents & (SLIRP_POLL_OUT | SLIRP_POLL_ERR))) { ++ /* ++ * Check for non-blocking, still-connecting sockets ++ */ ++ if (so->so_state & SS_ISFCONNECTING) { ++ /* Connected */ ++ so->so_state &= ~SS_ISFCONNECTING; ++ ++ ret = send(so->s, (const void *)&ret, 0, 0); ++ if (ret < 0) { ++ /* XXXXX Must fix, zero bytes is a NOP */ ++ if (errno == EAGAIN || errno == EWOULDBLOCK || ++ errno == EINPROGRESS || errno == ENOTCONN) { ++ continue; ++ } ++ ++ /* else failed */ ++ so->so_state &= SS_PERSISTENT_MASK; ++ so->so_state |= SS_NOFDREF; ++ } ++ /* else so->so_state &= ~SS_ISFCONNECTING; */ ++ ++ /* ++ * Continue tcp_input ++ */ ++ tcp_input((struct mbuf *)NULL, sizeof(struct ip), so, ++ so->so_ffamily); ++ /* continue; */ ++ } else { ++ ret = sowrite(so); ++ if (ret > 0) { ++ /* Call tcp_output in case we need to send a window ++ * update to the guest, otherwise it will be stuck ++ * until it sends a window probe. */ ++ tcp_output(sototcpcb(so)); ++ } ++ } ++ } ++ } ++ ++ /* ++ * Now UDP sockets. ++ * Incoming packets are sent straight away, they're not buffered. ++ * Incoming UDP data isn't buffered either. ++ */ ++ for (so = slirp->udb.so_next; so != &slirp->udb; so = so_next) { ++ int revents; ++ ++ so_next = so->so_next; ++ ++ revents = 0; ++ if (so->pollfds_idx != -1) { ++ revents = get_revents(so->pollfds_idx, opaque); ++ } ++ ++ if (so->s != -1 && ++ (revents & (SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR))) { ++ sorecvfrom(so); ++ } ++ } ++ ++ /* ++ * Check incoming ICMP relies. ++ */ ++ for (so = slirp->icmp.so_next; so != &slirp->icmp; so = so_next) { ++ int revents; ++ ++ so_next = so->so_next; ++ ++ revents = 0; ++ if (so->pollfds_idx != -1) { ++ revents = get_revents(so->pollfds_idx, opaque); ++ } ++ ++ if (so->s != -1 && ++ (revents & (SLIRP_POLL_IN | SLIRP_POLL_HUP | SLIRP_POLL_ERR))) { ++ icmp_receive(so); ++ } ++ } ++ } ++ ++ if_start(slirp); ++} ++ ++static void arp_input(Slirp *slirp, const uint8_t *pkt, int pkt_len) ++{ ++ const struct slirp_arphdr *ah = ++ (const struct slirp_arphdr *)(pkt + ETH_HLEN); ++ uint8_t arp_reply[MAX(ETH_HLEN + sizeof(struct slirp_arphdr), 64)]; ++ struct ethhdr *reh = (struct ethhdr *)arp_reply; ++ struct slirp_arphdr *rah = (struct slirp_arphdr *)(arp_reply + ETH_HLEN); ++ int ar_op; ++ struct gfwd_list *ex_ptr; ++ ++ if (!slirp->in_enabled) { ++ return; ++ } ++ ++ if (pkt_len < ETH_HLEN + sizeof(struct slirp_arphdr)) { ++ return; /* packet too short */ ++ } ++ ++ ar_op = ntohs(ah->ar_op); ++ switch (ar_op) { ++ case ARPOP_REQUEST: ++ if (ah->ar_tip == ah->ar_sip) { ++ /* Gratuitous ARP */ ++ arp_table_add(slirp, ah->ar_sip, ah->ar_sha); ++ return; ++ } ++ ++ if ((ah->ar_tip & slirp->vnetwork_mask.s_addr) == ++ slirp->vnetwork_addr.s_addr) { ++ if (ah->ar_tip == slirp->vnameserver_addr.s_addr || ++ ah->ar_tip == slirp->vhost_addr.s_addr) ++ goto arp_ok; ++ /* TODO: IPv6 */ ++ for (ex_ptr = slirp->guestfwd_list; ex_ptr; ++ ex_ptr = ex_ptr->ex_next) { ++ if (ex_ptr->ex_addr.s_addr == ah->ar_tip) ++ goto arp_ok; ++ } ++ return; ++ arp_ok: ++ memset(arp_reply, 0, sizeof(arp_reply)); ++ ++ arp_table_add(slirp, ah->ar_sip, ah->ar_sha); ++ ++ /* ARP request for alias/dns mac address */ ++ memcpy(reh->h_dest, pkt + ETH_ALEN, ETH_ALEN); ++ memcpy(reh->h_source, special_ethaddr, ETH_ALEN - 4); ++ memcpy(&reh->h_source[2], &ah->ar_tip, 4); ++ reh->h_proto = htons(ETH_P_ARP); ++ ++ rah->ar_hrd = htons(1); ++ rah->ar_pro = htons(ETH_P_IP); ++ rah->ar_hln = ETH_ALEN; ++ rah->ar_pln = 4; ++ rah->ar_op = htons(ARPOP_REPLY); ++ memcpy(rah->ar_sha, reh->h_source, ETH_ALEN); ++ rah->ar_sip = ah->ar_tip; ++ memcpy(rah->ar_tha, ah->ar_sha, ETH_ALEN); ++ rah->ar_tip = ah->ar_sip; ++ slirp_send_packet_all(slirp, arp_reply, sizeof(arp_reply)); ++ } ++ break; ++ case ARPOP_REPLY: ++ arp_table_add(slirp, ah->ar_sip, ah->ar_sha); ++ break; ++ default: ++ break; ++ } ++} ++ ++void slirp_input(Slirp *slirp, const uint8_t *pkt, int pkt_len) ++{ ++ struct mbuf *m; ++ int proto; ++ ++ if (pkt_len < ETH_HLEN) ++ return; ++ ++ proto = (((uint16_t)pkt[12]) << 8) + pkt[13]; ++ switch (proto) { ++ case ETH_P_ARP: ++ arp_input(slirp, pkt, pkt_len); ++ break; ++ case ETH_P_IP: ++ case ETH_P_IPV6: ++ m = m_get(slirp); ++ if (!m) ++ return; ++ /* Note: we add 2 to align the IP header on 4 bytes, ++ * and add the margin for the tcpiphdr overhead */ ++ if (M_FREEROOM(m) < pkt_len + TCPIPHDR_DELTA + 2) { ++ m_inc(m, pkt_len + TCPIPHDR_DELTA + 2); ++ } ++ m->m_len = pkt_len + TCPIPHDR_DELTA + 2; ++ memcpy(m->m_data + TCPIPHDR_DELTA + 2, pkt, pkt_len); ++ ++ m->m_data += TCPIPHDR_DELTA + 2 + ETH_HLEN; ++ m->m_len -= TCPIPHDR_DELTA + 2 + ETH_HLEN; ++ ++ if (proto == ETH_P_IP) { ++ ip_input(m); ++ } else if (proto == ETH_P_IPV6) { ++ ip6_input(m); ++ } ++ break; ++ ++ case ETH_P_NCSI: ++ ncsi_input(slirp, pkt, pkt_len); ++ break; ++ ++ default: ++ break; ++ } ++} ++ ++/* Prepare the IPv4 packet to be sent to the ethernet device. Returns 1 if no ++ * packet should be sent, 0 if the packet must be re-queued, 2 if the packet ++ * is ready to go. ++ */ ++static int if_encap4(Slirp *slirp, struct mbuf *ifm, struct ethhdr *eh, ++ uint8_t ethaddr[ETH_ALEN]) ++{ ++ const struct ip *iph = (const struct ip *)ifm->m_data; ++ ++ if (!arp_table_search(slirp, iph->ip_dst.s_addr, ethaddr)) { ++ uint8_t arp_req[ETH_HLEN + sizeof(struct slirp_arphdr)]; ++ struct ethhdr *reh = (struct ethhdr *)arp_req; ++ struct slirp_arphdr *rah = (struct slirp_arphdr *)(arp_req + ETH_HLEN); ++ ++ if (!ifm->resolution_requested) { ++ /* If the client addr is not known, send an ARP request */ ++ memset(reh->h_dest, 0xff, ETH_ALEN); ++ memcpy(reh->h_source, special_ethaddr, ETH_ALEN - 4); ++ memcpy(&reh->h_source[2], &slirp->vhost_addr, 4); ++ reh->h_proto = htons(ETH_P_ARP); ++ rah->ar_hrd = htons(1); ++ rah->ar_pro = htons(ETH_P_IP); ++ rah->ar_hln = ETH_ALEN; ++ rah->ar_pln = 4; ++ rah->ar_op = htons(ARPOP_REQUEST); ++ ++ /* source hw addr */ ++ memcpy(rah->ar_sha, special_ethaddr, ETH_ALEN - 4); ++ memcpy(&rah->ar_sha[2], &slirp->vhost_addr, 4); ++ ++ /* source IP */ ++ rah->ar_sip = slirp->vhost_addr.s_addr; ++ ++ /* target hw addr (none) */ ++ memset(rah->ar_tha, 0, ETH_ALEN); ++ ++ /* target IP */ ++ rah->ar_tip = iph->ip_dst.s_addr; ++ slirp->client_ipaddr = iph->ip_dst; ++ slirp_send_packet_all(slirp, arp_req, sizeof(arp_req)); ++ ifm->resolution_requested = true; ++ ++ /* Expire request and drop outgoing packet after 1 second */ ++ ifm->expiration_date = ++ slirp->cb->clock_get_ns(slirp->opaque) + 1000000000ULL; ++ } ++ return 0; ++ } else { ++ memcpy(eh->h_source, special_ethaddr, ETH_ALEN - 4); ++ /* XXX: not correct */ ++ memcpy(&eh->h_source[2], &slirp->vhost_addr, 4); ++ eh->h_proto = htons(ETH_P_IP); ++ ++ /* Send this */ ++ return 2; ++ } ++} ++ ++/* Prepare the IPv6 packet to be sent to the ethernet device. Returns 1 if no ++ * packet should be sent, 0 if the packet must be re-queued, 2 if the packet ++ * is ready to go. ++ */ ++static int if_encap6(Slirp *slirp, struct mbuf *ifm, struct ethhdr *eh, ++ uint8_t ethaddr[ETH_ALEN]) ++{ ++ const struct ip6 *ip6h = mtod(ifm, const struct ip6 *); ++ if (!ndp_table_search(slirp, ip6h->ip_dst, ethaddr)) { ++ if (!ifm->resolution_requested) { ++ ndp_send_ns(slirp, ip6h->ip_dst); ++ ifm->resolution_requested = true; ++ ifm->expiration_date = ++ slirp->cb->clock_get_ns(slirp->opaque) + 1000000000ULL; ++ } ++ return 0; ++ } else { ++ eh->h_proto = htons(ETH_P_IPV6); ++ in6_compute_ethaddr(ip6h->ip_src, eh->h_source); ++ ++ /* Send this */ ++ return 2; ++ } ++} ++ ++/* Output the IP packet to the ethernet device. Returns 0 if the packet must be ++ * re-queued. ++ */ ++int if_encap(Slirp *slirp, struct mbuf *ifm) ++{ ++ uint8_t buf[IF_MTU_MAX + 100]; ++ struct ethhdr *eh = (struct ethhdr *)buf; ++ uint8_t ethaddr[ETH_ALEN]; ++ const struct ip *iph = (const struct ip *)ifm->m_data; ++ int ret; ++ char ethaddr_str[ETH_ADDRSTRLEN]; ++ ++ if (ifm->m_len + ETH_HLEN > sizeof(buf)) { ++ return 1; ++ } ++ ++ switch (iph->ip_v) { ++ case IPVERSION: ++ ret = if_encap4(slirp, ifm, eh, ethaddr); ++ if (ret < 2) { ++ return ret; ++ } ++ break; ++ ++ case IP6VERSION: ++ ret = if_encap6(slirp, ifm, eh, ethaddr); ++ if (ret < 2) { ++ return ret; ++ } ++ break; ++ ++ default: ++ g_assert_not_reached(); ++ } ++ ++ memcpy(eh->h_dest, ethaddr, ETH_ALEN); ++ DEBUG_ARG("src = %s", slirp_ether_ntoa(eh->h_source, ethaddr_str, ++ sizeof(ethaddr_str))); ++ DEBUG_ARG("dst = %s", slirp_ether_ntoa(eh->h_dest, ethaddr_str, ++ sizeof(ethaddr_str))); ++ memcpy(buf + sizeof(struct ethhdr), ifm->m_data, ifm->m_len); ++ slirp_send_packet_all(slirp, buf, ifm->m_len + ETH_HLEN); ++ return 1; ++} ++ ++/* Drop host forwarding rule, return 0 if found. */ ++int slirp_remove_hostfwd(Slirp *slirp, int is_udp, struct in_addr host_addr, ++ int host_port) ++{ ++ struct socket *so; ++ struct socket *head = (is_udp ? &slirp->udb : &slirp->tcb); ++ struct sockaddr_in addr; ++ int port = htons(host_port); ++ socklen_t addr_len; ++ ++ for (so = head->so_next; so != head; so = so->so_next) { ++ addr_len = sizeof(addr); ++ if ((so->so_state & SS_HOSTFWD) && ++ getsockname(so->s, (struct sockaddr *)&addr, &addr_len) == 0 && ++ addr_len == sizeof(addr) && ++ addr.sin_family == AF_INET && ++ addr.sin_addr.s_addr == host_addr.s_addr && ++ addr.sin_port == port) { ++ so->slirp->cb->unregister_poll_fd(so->s, so->slirp->opaque); ++ closesocket(so->s); ++ sofree(so); ++ return 0; ++ } ++ } ++ ++ return -1; ++} ++ ++int slirp_add_hostfwd(Slirp *slirp, int is_udp, struct in_addr host_addr, ++ int host_port, struct in_addr guest_addr, int guest_port) ++{ ++ if (!guest_addr.s_addr) { ++ guest_addr = slirp->vdhcp_startaddr; ++ } ++ if (is_udp) { ++ if (!udp_listen(slirp, host_addr.s_addr, htons(host_port), ++ guest_addr.s_addr, htons(guest_port), SS_HOSTFWD)) ++ return -1; ++ } else { ++ if (!tcp_listen(slirp, host_addr.s_addr, htons(host_port), ++ guest_addr.s_addr, htons(guest_port), SS_HOSTFWD)) ++ return -1; ++ } ++ return 0; ++} ++ ++int slirp_remove_hostxfwd(Slirp *slirp, ++ const struct sockaddr *haddr, socklen_t haddrlen, ++ int flags) ++{ ++ struct socket *so; ++ struct socket *head = (flags & SLIRP_HOSTFWD_UDP ? &slirp->udb : &slirp->tcb); ++ struct sockaddr_storage addr; ++ socklen_t addr_len; ++ ++ for (so = head->so_next; so != head; so = so->so_next) { ++ addr_len = sizeof(addr); ++ if ((so->so_state & SS_HOSTFWD) && ++ getsockname(so->s, (struct sockaddr *)&addr, &addr_len) == 0 && ++ sockaddr_equal(&addr, (const struct sockaddr_storage *) haddr)) { ++ so->slirp->cb->unregister_poll_fd(so->s, so->slirp->opaque); ++ closesocket(so->s); ++ sofree(so); ++ return 0; ++ } ++ } ++ ++ return -1; ++} ++ ++int slirp_add_hostxfwd(Slirp *slirp, ++ const struct sockaddr *haddr, socklen_t haddrlen, ++ const struct sockaddr *gaddr, socklen_t gaddrlen, ++ int flags) ++{ ++ struct sockaddr_in gdhcp_addr; ++ int fwd_flags = SS_HOSTFWD; ++ ++ if (flags & SLIRP_HOSTFWD_V6ONLY) ++ fwd_flags |= SS_HOSTFWD_V6ONLY; ++ ++ if (gaddr->sa_family == AF_INET) { ++ const struct sockaddr_in *gaddr_in = (const struct sockaddr_in *) gaddr; ++ ++ if (gaddrlen < sizeof(struct sockaddr_in)) { ++ errno = EINVAL; ++ return -1; ++ } ++ ++ if (!gaddr_in->sin_addr.s_addr) { ++ gdhcp_addr = *gaddr_in; ++ gdhcp_addr.sin_addr = slirp->vdhcp_startaddr; ++ gaddr = (struct sockaddr *) &gdhcp_addr; ++ gaddrlen = sizeof(gdhcp_addr); ++ } ++ } else { ++ if (gaddrlen < sizeof(struct sockaddr_in6)) { ++ errno = EINVAL; ++ return -1; ++ } ++ ++ /* ++ * Libslirp currently only provides a stateless DHCPv6 server, thus ++ * we can't translate "addr-any" to the guest here. Instead, we defer ++ * performing the translation to when it's needed. See ++ * soassign_guest_addr_if_needed(). ++ */ ++ } ++ ++ if (flags & SLIRP_HOSTFWD_UDP) { ++ if (!udpx_listen(slirp, haddr, haddrlen, ++ gaddr, gaddrlen, ++ fwd_flags)) ++ return -1; ++ } else { ++ if (!tcpx_listen(slirp, haddr, haddrlen, ++ gaddr, gaddrlen, ++ fwd_flags)) ++ return -1; ++ } ++ return 0; ++} ++ ++/* TODO: IPv6 */ ++static bool check_guestfwd(Slirp *slirp, struct in_addr *guest_addr, ++ int guest_port) ++{ ++ struct gfwd_list *tmp_ptr; ++ ++ if (!guest_addr->s_addr) { ++ guest_addr->s_addr = slirp->vnetwork_addr.s_addr | ++ (htonl(0x0204) & ~slirp->vnetwork_mask.s_addr); ++ } ++ if ((guest_addr->s_addr & slirp->vnetwork_mask.s_addr) != ++ slirp->vnetwork_addr.s_addr || ++ guest_addr->s_addr == slirp->vhost_addr.s_addr || ++ guest_addr->s_addr == slirp->vnameserver_addr.s_addr) { ++ return false; ++ } ++ ++ /* check if the port is "bound" */ ++ for (tmp_ptr = slirp->guestfwd_list; tmp_ptr; tmp_ptr = tmp_ptr->ex_next) { ++ if (guest_port == tmp_ptr->ex_fport && ++ guest_addr->s_addr == tmp_ptr->ex_addr.s_addr) ++ return false; ++ } ++ ++ return true; ++} ++ ++int slirp_add_exec(Slirp *slirp, const char *cmdline, ++ struct in_addr *guest_addr, int guest_port) ++{ ++ if (!check_guestfwd(slirp, guest_addr, guest_port)) { ++ return -1; ++ } ++ ++ add_exec(&slirp->guestfwd_list, cmdline, *guest_addr, htons(guest_port)); ++ return 0; ++} ++ ++int slirp_add_unix(Slirp *slirp, const char *unixsock, ++ struct in_addr *guest_addr, int guest_port) ++{ ++#ifdef G_OS_UNIX ++ if (!check_guestfwd(slirp, guest_addr, guest_port)) { ++ return -1; ++ } ++ ++ add_unix(&slirp->guestfwd_list, unixsock, *guest_addr, htons(guest_port)); ++ return 0; ++#else ++ g_warn_if_reached(); ++ return -1; ++#endif ++} ++ ++int slirp_add_guestfwd(Slirp *slirp, SlirpWriteCb write_cb, void *opaque, ++ struct in_addr *guest_addr, int guest_port) ++{ ++ if (!check_guestfwd(slirp, guest_addr, guest_port)) { ++ return -1; ++ } ++ ++ add_guestfwd(&slirp->guestfwd_list, write_cb, opaque, *guest_addr, ++ htons(guest_port)); ++ return 0; ++} ++ ++int slirp_remove_guestfwd(Slirp *slirp, struct in_addr guest_addr, ++ int guest_port) ++{ ++ return remove_guestfwd(&slirp->guestfwd_list, guest_addr, ++ htons(guest_port)); ++} ++ ++ssize_t slirp_send(struct socket *so, const void *buf, size_t len, int flags) ++{ ++ if (so->s == -1 && so->guestfwd) { ++ /* XXX this blocks entire thread. Rewrite to use ++ * qemu_chr_fe_write and background I/O callbacks */ ++ so->guestfwd->write_cb(buf, len, so->guestfwd->opaque); ++ return len; ++ } ++ ++ if (so->s == -1) { ++ /* ++ * This should in theory not happen but it is hard to be ++ * sure because some code paths will end up with so->s == -1 ++ * on a failure but don't dispose of the struct socket. ++ * Check specifically, so we don't pass -1 to send(). ++ */ ++ errno = EBADF; ++ return -1; ++ } ++ ++ return send(so->s, buf, len, flags); ++} ++ ++struct socket *slirp_find_ctl_socket(Slirp *slirp, struct in_addr guest_addr, ++ int guest_port) ++{ ++ struct socket *so; ++ ++ /* TODO: IPv6 */ ++ for (so = slirp->tcb.so_next; so != &slirp->tcb; so = so->so_next) { ++ if (so->so_faddr.s_addr == guest_addr.s_addr && ++ htons(so->so_fport) == guest_port) { ++ return so; ++ } ++ } ++ return NULL; ++} ++ ++size_t slirp_socket_can_recv(Slirp *slirp, struct in_addr guest_addr, ++ int guest_port) ++{ ++ struct iovec iov[2]; ++ struct socket *so; ++ ++ so = slirp_find_ctl_socket(slirp, guest_addr, guest_port); ++ ++ if (!so || so->so_state & SS_NOFDREF) { ++ return 0; ++ } ++ ++ if (!CONN_CANFRCV(so) || so->so_snd.sb_cc >= (so->so_snd.sb_datalen / 2)) { ++ return 0; ++ } ++ ++ return sopreprbuf(so, iov, NULL); ++} ++ ++void slirp_socket_recv(Slirp *slirp, struct in_addr guest_addr, int guest_port, ++ const uint8_t *buf, int size) ++{ ++ int ret; ++ struct socket *so = slirp_find_ctl_socket(slirp, guest_addr, guest_port); ++ ++ if (!so) ++ return; ++ ++ ret = soreadbuf(so, (const char *)buf, size); ++ ++ if (ret > 0) ++ tcp_output(sototcpcb(so)); ++} ++ ++void slirp_send_packet_all(Slirp *slirp, const void *buf, size_t len) ++{ ++ ssize_t ret = slirp->cb->send_packet(buf, len, slirp->opaque); ++ ++ if (ret < 0) { ++ g_critical("Failed to send packet, ret: %ld", (long)ret); ++ } else if (ret < len) { ++ DEBUG_ERROR("send_packet() didn't send all data: %ld < %lu", (long)ret, ++ (unsigned long)len); ++ } ++} +diff --git a/slirp/src/slirp.h b/slirp/src/slirp.h +new file mode 100644 +index 0000000000..89d79f3de5 +--- /dev/null ++++ b/slirp/src/slirp.h +@@ -0,0 +1,289 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++#ifndef SLIRP_H ++#define SLIRP_H ++ ++#ifdef _WIN32 ++ ++/* as defined in sdkddkver.h */ ++#ifndef _WIN32_WINNT ++#define _WIN32_WINNT 0x0600 /* Vista */ ++#endif ++/* reduces the number of implicitly included headers */ ++#ifndef WIN32_LEAN_AND_MEAN ++#define WIN32_LEAN_AND_MEAN ++#endif ++ ++#include ++#include ++#include ++#include ++#include ++ ++#else ++#define O_BINARY 0 ++#endif ++ ++#ifndef _WIN32 ++#include ++#include ++#include ++#include ++#include ++#endif ++ ++#ifdef __APPLE__ ++#include ++#endif ++ ++/* Avoid conflicting with the libc insque() and remque(), which ++ have different prototypes. */ ++#define insque slirp_insque ++#define remque slirp_remque ++#define quehead slirp_quehead ++ ++#include "debug.h" ++#include "util.h" ++ ++#include "libslirp.h" ++#include "ip.h" ++#include "ip6.h" ++#include "tcp.h" ++#include "tcp_timer.h" ++#include "tcp_var.h" ++#include "tcpip.h" ++#include "udp.h" ++#include "ip_icmp.h" ++#include "ip6_icmp.h" ++#include "mbuf.h" ++#include "sbuf.h" ++#include "socket.h" ++#include "if.h" ++#include "main.h" ++#include "misc.h" ++ ++#include "bootp.h" ++#include "tftp.h" ++ ++#define ARPOP_REQUEST 1 /* ARP request */ ++#define ARPOP_REPLY 2 /* ARP reply */ ++ ++struct ethhdr { ++ unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ ++ unsigned char h_source[ETH_ALEN]; /* source ether addr */ ++ unsigned short h_proto; /* packet type ID field */ ++}; ++ ++struct slirp_arphdr { ++ unsigned short ar_hrd; /* format of hardware address */ ++ unsigned short ar_pro; /* format of protocol address */ ++ unsigned char ar_hln; /* length of hardware address */ ++ unsigned char ar_pln; /* length of protocol address */ ++ unsigned short ar_op; /* ARP opcode (command) */ ++ ++ /* ++ * Ethernet looks like this : This bit is variable sized however... ++ */ ++ uint8_t ar_sha[ETH_ALEN]; /* sender hardware address */ ++ uint32_t ar_sip; /* sender IP address */ ++ uint8_t ar_tha[ETH_ALEN]; /* target hardware address */ ++ uint32_t ar_tip; /* target IP address */ ++} SLIRP_PACKED; ++ ++#define ARP_TABLE_SIZE 16 ++ ++typedef struct ArpTable { ++ struct slirp_arphdr table[ARP_TABLE_SIZE]; ++ int next_victim; ++} ArpTable; ++ ++void arp_table_add(Slirp *slirp, uint32_t ip_addr, ++ const uint8_t ethaddr[ETH_ALEN]); ++ ++bool arp_table_search(Slirp *slirp, uint32_t ip_addr, ++ uint8_t out_ethaddr[ETH_ALEN]); ++ ++struct ndpentry { ++ uint8_t eth_addr[ETH_ALEN]; /* sender hardware address */ ++ struct in6_addr ip_addr; /* sender IP address */ ++}; ++ ++#define NDP_TABLE_SIZE 16 ++ ++typedef struct NdpTable { ++ struct ndpentry table[NDP_TABLE_SIZE]; ++ /* ++ * The table is a cache with old entries overwritten when the table fills. ++ * Preserve the first entry: it is the guest, which is needed for lazy ++ * hostfwd guest address assignment. ++ */ ++ struct in6_addr guest_in6_addr; ++ int next_victim; ++} NdpTable; ++ ++void ndp_table_add(Slirp *slirp, struct in6_addr ip_addr, ++ uint8_t ethaddr[ETH_ALEN]); ++bool ndp_table_search(Slirp *slirp, struct in6_addr ip_addr, ++ uint8_t out_ethaddr[ETH_ALEN]); ++ ++struct Slirp { ++ unsigned time_fasttimo; ++ unsigned last_slowtimo; ++ bool do_slowtimo; ++ ++ bool in_enabled, in6_enabled; ++ ++ /* virtual network configuration */ ++ struct in_addr vnetwork_addr; ++ struct in_addr vnetwork_mask; ++ struct in_addr vhost_addr; ++ struct in6_addr vprefix_addr6; ++ uint8_t vprefix_len; ++ struct in6_addr vhost_addr6; ++ struct in_addr vdhcp_startaddr; ++ struct in_addr vnameserver_addr; ++ struct in6_addr vnameserver_addr6; ++ ++ struct in_addr client_ipaddr; ++ char client_hostname[33]; ++ ++ int restricted; ++ struct gfwd_list *guestfwd_list; ++ ++ int if_mtu; ++ int if_mru; ++ ++ bool disable_host_loopback; ++ ++ /* mbuf states */ ++ struct quehead m_freelist; ++ struct quehead m_usedlist; ++ int mbuf_alloced; ++ ++ /* if states */ ++ struct quehead if_fastq; /* fast queue (for interactive data) */ ++ struct quehead if_batchq; /* queue for non-interactive data */ ++ bool if_start_busy; /* avoid if_start recursion */ ++ ++ /* ip states */ ++ struct ipq ipq; /* ip reass. queue */ ++ uint16_t ip_id; /* ip packet ctr, for ids */ ++ ++ /* bootp/dhcp states */ ++ BOOTPClient bootp_clients[NB_BOOTP_CLIENTS]; ++ char *bootp_filename; ++ size_t vdnssearch_len; ++ uint8_t *vdnssearch; ++ char *vdomainname; ++ ++ /* tcp states */ ++ struct socket tcb; ++ struct socket *tcp_last_so; ++ tcp_seq tcp_iss; /* tcp initial send seq # */ ++ uint32_t tcp_now; /* for RFC 1323 timestamps */ ++ ++ /* udp states */ ++ struct socket udb; ++ struct socket *udp_last_so; ++ ++ /* icmp states */ ++ struct socket icmp; ++ struct socket *icmp_last_so; ++ ++ /* tftp states */ ++ char *tftp_prefix; ++ struct tftp_session tftp_sessions[TFTP_SESSIONS_MAX]; ++ char *tftp_server_name; ++ ++ ArpTable arp_table; ++ NdpTable ndp_table; ++ ++ GRand *grand; ++ void *ra_timer; ++ ++ bool enable_emu; ++ ++ const SlirpCb *cb; ++ void *opaque; ++ ++ struct sockaddr_in *outbound_addr; ++ struct sockaddr_in6 *outbound_addr6; ++ bool disable_dns; /* slirp will not redirect/serve any DNS packet */ ++}; ++ ++void if_start(Slirp *); ++ ++int get_dns_addr(struct in_addr *pdns_addr); ++int get_dns6_addr(struct in6_addr *pdns6_addr, uint32_t *scope_id); ++ ++/* ncsi.c */ ++void ncsi_input(Slirp *slirp, const uint8_t *pkt, int pkt_len); ++ ++#ifndef _WIN32 ++#include ++#endif ++ ++ ++extern bool slirp_do_keepalive; ++ ++#define TCP_MAXIDLE (TCPTV_KEEPCNT * TCPTV_KEEPINTVL) ++ ++/* dnssearch.c */ ++int translate_dnssearch(Slirp *s, const char **names); ++ ++/* cksum.c */ ++int cksum(struct mbuf *m, int len); ++int ip6_cksum(struct mbuf *m); ++ ++/* if.c */ ++void if_init(Slirp *); ++void if_output(struct socket *, struct mbuf *); ++ ++/* ip_input.c */ ++void ip_init(Slirp *); ++void ip_cleanup(Slirp *); ++void ip_input(struct mbuf *); ++void ip_slowtimo(Slirp *); ++void ip_stripoptions(register struct mbuf *, struct mbuf *); ++ ++/* ip_output.c */ ++int ip_output(struct socket *, struct mbuf *); ++ ++/* ip6_input.c */ ++void ip6_init(Slirp *); ++void ip6_cleanup(Slirp *); ++void ip6_input(struct mbuf *); ++ ++/* ip6_output */ ++int ip6_output(struct socket *, struct mbuf *, int fast); ++ ++/* tcp_input.c */ ++void tcp_input(register struct mbuf *, int, struct socket *, unsigned short af); ++int tcp_mss(register struct tcpcb *, unsigned); ++ ++/* tcp_output.c */ ++int tcp_output(register struct tcpcb *); ++void tcp_setpersist(register struct tcpcb *); ++ ++/* tcp_subr.c */ ++void tcp_init(Slirp *); ++void tcp_cleanup(Slirp *); ++void tcp_template(struct tcpcb *); ++void tcp_respond(struct tcpcb *, register struct tcpiphdr *, ++ register struct mbuf *, tcp_seq, tcp_seq, int, unsigned short); ++struct tcpcb *tcp_newtcpcb(struct socket *); ++struct tcpcb *tcp_close(register struct tcpcb *); ++void tcp_sockclosed(struct tcpcb *); ++int tcp_fconnect(struct socket *, unsigned short af); ++void tcp_connect(struct socket *); ++void tcp_attach(struct socket *); ++uint8_t tcp_tos(struct socket *); ++int tcp_emu(struct socket *, struct mbuf *); ++int tcp_ctl(struct socket *); ++struct tcpcb *tcp_drop(struct tcpcb *tp, int err); ++ ++struct socket *slirp_find_ctl_socket(Slirp *slirp, struct in_addr guest_addr, ++ int guest_port); ++ ++void slirp_send_packet_all(Slirp *slirp, const void *buf, size_t len); ++ ++#endif +diff --git a/slirp/src/socket.c b/slirp/src/socket.c +new file mode 100644 +index 0000000000..2c1b789d48 +--- /dev/null ++++ b/slirp/src/socket.c +@@ -0,0 +1,1104 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++#include "ip_icmp.h" ++#ifdef __sun__ ++#include ++#endif ++#ifdef __linux__ ++#include ++#endif ++ ++static void sofcantrcvmore(struct socket *so); ++static void sofcantsendmore(struct socket *so); ++ ++struct socket *solookup(struct socket **last, struct socket *head, ++ struct sockaddr_storage *lhost, ++ struct sockaddr_storage *fhost) ++{ ++ struct socket *so = *last; ++ ++ /* Optimisation */ ++ if (so != head && sockaddr_equal(&(so->lhost.ss), lhost) && ++ (!fhost || sockaddr_equal(&so->fhost.ss, fhost))) { ++ return so; ++ } ++ ++ for (so = head->so_next; so != head; so = so->so_next) { ++ if (sockaddr_equal(&(so->lhost.ss), lhost) && ++ (!fhost || sockaddr_equal(&so->fhost.ss, fhost))) { ++ *last = so; ++ return so; ++ } ++ } ++ ++ return (struct socket *)NULL; ++} ++ ++/* ++ * Create a new socket, initialise the fields ++ * It is the responsibility of the caller to ++ * insque() it into the correct linked-list ++ */ ++struct socket *socreate(Slirp *slirp) ++{ ++ struct socket *so = g_new(struct socket, 1); ++ ++ memset(so, 0, sizeof(struct socket)); ++ so->so_state = SS_NOFDREF; ++ so->s = -1; ++ so->slirp = slirp; ++ so->pollfds_idx = -1; ++ ++ return so; ++} ++ ++/* ++ * Remove references to so from the given message queue. ++ */ ++static void soqfree(struct socket *so, struct quehead *qh) ++{ ++ struct mbuf *ifq; ++ ++ for (ifq = (struct mbuf *)qh->qh_link; (struct quehead *)ifq != qh; ++ ifq = ifq->ifq_next) { ++ if (ifq->ifq_so == so) { ++ struct mbuf *ifm; ++ ifq->ifq_so = NULL; ++ for (ifm = ifq->ifs_next; ifm != ifq; ifm = ifm->ifs_next) { ++ ifm->ifq_so = NULL; ++ } ++ } ++ } ++} ++ ++/* ++ * remque and free a socket, clobber cache ++ */ ++void sofree(struct socket *so) ++{ ++ Slirp *slirp = so->slirp; ++ ++ soqfree(so, &slirp->if_fastq); ++ soqfree(so, &slirp->if_batchq); ++ ++ if (so == slirp->tcp_last_so) { ++ slirp->tcp_last_so = &slirp->tcb; ++ } else if (so == slirp->udp_last_so) { ++ slirp->udp_last_so = &slirp->udb; ++ } else if (so == slirp->icmp_last_so) { ++ slirp->icmp_last_so = &slirp->icmp; ++ } ++ m_free(so->so_m); ++ ++ if (so->so_next && so->so_prev) ++ remque(so); /* crashes if so is not in a queue */ ++ ++ if (so->so_tcpcb) { ++ g_free(so->so_tcpcb); ++ } ++ g_free(so); ++} ++ ++size_t sopreprbuf(struct socket *so, struct iovec *iov, int *np) ++{ ++ int n, lss, total; ++ struct sbuf *sb = &so->so_snd; ++ int len = sb->sb_datalen - sb->sb_cc; ++ int mss = so->so_tcpcb->t_maxseg; ++ ++ DEBUG_CALL("sopreprbuf"); ++ DEBUG_ARG("so = %p", so); ++ ++ if (len <= 0) ++ return 0; ++ ++ iov[0].iov_base = sb->sb_wptr; ++ iov[1].iov_base = NULL; ++ iov[1].iov_len = 0; ++ if (sb->sb_wptr < sb->sb_rptr) { ++ iov[0].iov_len = sb->sb_rptr - sb->sb_wptr; ++ /* Should never succeed, but... */ ++ if (iov[0].iov_len > len) ++ iov[0].iov_len = len; ++ if (iov[0].iov_len > mss) ++ iov[0].iov_len -= iov[0].iov_len % mss; ++ n = 1; ++ } else { ++ iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_wptr; ++ /* Should never succeed, but... */ ++ if (iov[0].iov_len > len) ++ iov[0].iov_len = len; ++ len -= iov[0].iov_len; ++ if (len) { ++ iov[1].iov_base = sb->sb_data; ++ iov[1].iov_len = sb->sb_rptr - sb->sb_data; ++ if (iov[1].iov_len > len) ++ iov[1].iov_len = len; ++ total = iov[0].iov_len + iov[1].iov_len; ++ if (total > mss) { ++ lss = total % mss; ++ if (iov[1].iov_len > lss) { ++ iov[1].iov_len -= lss; ++ n = 2; ++ } else { ++ lss -= iov[1].iov_len; ++ iov[0].iov_len -= lss; ++ n = 1; ++ } ++ } else ++ n = 2; ++ } else { ++ if (iov[0].iov_len > mss) ++ iov[0].iov_len -= iov[0].iov_len % mss; ++ n = 1; ++ } ++ } ++ if (np) ++ *np = n; ++ ++ return iov[0].iov_len + (n - 1) * iov[1].iov_len; ++} ++ ++/* ++ * Read from so's socket into sb_snd, updating all relevant sbuf fields ++ * NOTE: This will only be called if it is select()ed for reading, so ++ * a read() of 0 (or less) means it's disconnected ++ */ ++int soread(struct socket *so) ++{ ++ int n, nn; ++ size_t buf_len; ++ struct sbuf *sb = &so->so_snd; ++ struct iovec iov[2]; ++ ++ DEBUG_CALL("soread"); ++ DEBUG_ARG("so = %p", so); ++ ++ /* ++ * No need to check if there's enough room to read. ++ * soread wouldn't have been called if there weren't ++ */ ++ buf_len = sopreprbuf(so, iov, &n); ++ assert(buf_len != 0); ++ ++ nn = recv(so->s, iov[0].iov_base, iov[0].iov_len, 0); ++ if (nn <= 0) { ++ if (nn < 0 && (errno == EINTR || errno == EAGAIN)) ++ return 0; ++ else { ++ int err; ++ socklen_t elen = sizeof err; ++ struct sockaddr_storage addr; ++ struct sockaddr *paddr = (struct sockaddr *)&addr; ++ socklen_t alen = sizeof addr; ++ ++ err = errno; ++ if (nn == 0) { ++ int shutdown_wr = so->so_state & SS_FCANTSENDMORE; ++ ++ if (!shutdown_wr && getpeername(so->s, paddr, &alen) < 0) { ++ err = errno; ++ } else { ++ getsockopt(so->s, SOL_SOCKET, SO_ERROR, &err, &elen); ++ } ++ } ++ ++ DEBUG_MISC(" --- soread() disconnected, nn = %d, errno = %d-%s", nn, ++ errno, strerror(errno)); ++ sofcantrcvmore(so); ++ ++ if (err == ECONNRESET || err == ECONNREFUSED || err == ENOTCONN || ++ err == EPIPE) { ++ tcp_drop(sototcpcb(so), err); ++ } else { ++ tcp_sockclosed(sototcpcb(so)); ++ } ++ return -1; ++ } ++ } ++ ++ /* ++ * If there was no error, try and read the second time round ++ * We read again if n = 2 (ie, there's another part of the buffer) ++ * and we read as much as we could in the first read ++ * We don't test for <= 0 this time, because there legitimately ++ * might not be any more data (since the socket is non-blocking), ++ * a close will be detected on next iteration. ++ * A return of -1 won't (shouldn't) happen, since it didn't happen above ++ */ ++ if (n == 2 && nn == iov[0].iov_len) { ++ int ret; ++ ret = recv(so->s, iov[1].iov_base, iov[1].iov_len, 0); ++ if (ret > 0) ++ nn += ret; ++ } ++ ++ DEBUG_MISC(" ... read nn = %d bytes", nn); ++ ++ /* Update fields */ ++ sb->sb_cc += nn; ++ sb->sb_wptr += nn; ++ if (sb->sb_wptr >= (sb->sb_data + sb->sb_datalen)) ++ sb->sb_wptr -= sb->sb_datalen; ++ return nn; ++} ++ ++int soreadbuf(struct socket *so, const char *buf, int size) ++{ ++ int n, nn, copy = size; ++ struct sbuf *sb = &so->so_snd; ++ struct iovec iov[2]; ++ ++ DEBUG_CALL("soreadbuf"); ++ DEBUG_ARG("so = %p", so); ++ ++ /* ++ * No need to check if there's enough room to read. ++ * soread wouldn't have been called if there weren't ++ */ ++ assert(size > 0); ++ if (sopreprbuf(so, iov, &n) < size) ++ goto err; ++ ++ nn = MIN(iov[0].iov_len, copy); ++ memcpy(iov[0].iov_base, buf, nn); ++ ++ copy -= nn; ++ buf += nn; ++ ++ if (copy == 0) ++ goto done; ++ ++ memcpy(iov[1].iov_base, buf, copy); ++ ++done: ++ /* Update fields */ ++ sb->sb_cc += size; ++ sb->sb_wptr += size; ++ if (sb->sb_wptr >= (sb->sb_data + sb->sb_datalen)) ++ sb->sb_wptr -= sb->sb_datalen; ++ return size; ++err: ++ ++ sofcantrcvmore(so); ++ tcp_sockclosed(sototcpcb(so)); ++ g_critical("soreadbuf buffer too small"); ++ return -1; ++} ++ ++/* ++ * Get urgent data ++ * ++ * When the socket is created, we set it SO_OOBINLINE, ++ * so when OOB data arrives, we soread() it and everything ++ * in the send buffer is sent as urgent data ++ */ ++int sorecvoob(struct socket *so) ++{ ++ struct tcpcb *tp = sototcpcb(so); ++ int ret; ++ ++ DEBUG_CALL("sorecvoob"); ++ DEBUG_ARG("so = %p", so); ++ ++ /* ++ * We take a guess at how much urgent data has arrived. ++ * In most situations, when urgent data arrives, the next ++ * read() should get all the urgent data. This guess will ++ * be wrong however if more data arrives just after the ++ * urgent data, or the read() doesn't return all the ++ * urgent data. ++ */ ++ ret = soread(so); ++ if (ret > 0) { ++ tp->snd_up = tp->snd_una + so->so_snd.sb_cc; ++ tp->t_force = 1; ++ tcp_output(tp); ++ tp->t_force = 0; ++ } ++ ++ return ret; ++} ++ ++/* ++ * Send urgent data ++ * There's a lot duplicated code here, but... ++ */ ++int sosendoob(struct socket *so) ++{ ++ struct sbuf *sb = &so->so_rcv; ++ char buff[2048]; /* XXX Shouldn't be sending more oob data than this */ ++ ++ int n; ++ ++ DEBUG_CALL("sosendoob"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("sb->sb_cc = %d", sb->sb_cc); ++ ++ if (so->so_urgc > sizeof(buff)) ++ so->so_urgc = sizeof(buff); /* XXXX */ ++ ++ if (sb->sb_rptr < sb->sb_wptr) { ++ /* We can send it directly */ ++ n = slirp_send(so, sb->sb_rptr, so->so_urgc, ++ (MSG_OOB)); /* |MSG_DONTWAIT)); */ ++ } else { ++ /* ++ * Since there's no sendv or sendtov like writev, ++ * we must copy all data to a linear buffer then ++ * send it all ++ */ ++ uint32_t urgc = so->so_urgc; /* Amount of room left in buff */ ++ int len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr; ++ if (len > urgc) { ++ len = urgc; ++ } ++ memcpy(buff, sb->sb_rptr, len); ++ urgc -= len; ++ if (urgc) { ++ /* We still have some room for the rest */ ++ n = sb->sb_wptr - sb->sb_data; ++ if (n > urgc) { ++ n = urgc; ++ } ++ memcpy((buff + len), sb->sb_data, n); ++ len += n; ++ } ++ n = slirp_send(so, buff, len, (MSG_OOB)); /* |MSG_DONTWAIT)); */ ++#ifdef DEBUG ++ if (n != len) { ++ DEBUG_ERROR("Didn't send all data urgently XXXXX"); ++ } ++#endif ++ } ++ ++ if (n < 0) { ++ return n; ++ } ++ so->so_urgc -= n; ++ DEBUG_MISC(" ---2 sent %d bytes urgent data, %d urgent bytes left", n, ++ so->so_urgc); ++ ++ sb->sb_cc -= n; ++ sb->sb_rptr += n; ++ if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen)) ++ sb->sb_rptr -= sb->sb_datalen; ++ ++ return n; ++} ++ ++/* ++ * Write data from so_rcv to so's socket, ++ * updating all sbuf field as necessary ++ */ ++int sowrite(struct socket *so) ++{ ++ int n, nn; ++ struct sbuf *sb = &so->so_rcv; ++ int len = sb->sb_cc; ++ struct iovec iov[2]; ++ ++ DEBUG_CALL("sowrite"); ++ DEBUG_ARG("so = %p", so); ++ ++ if (so->so_urgc) { ++ uint32_t expected = so->so_urgc; ++ if (sosendoob(so) < expected) { ++ /* Treat a short write as a fatal error too, ++ * rather than continuing on and sending the urgent ++ * data as if it were non-urgent and leaving the ++ * so_urgc count wrong. ++ */ ++ goto err_disconnected; ++ } ++ if (sb->sb_cc == 0) ++ return 0; ++ } ++ ++ /* ++ * No need to check if there's something to write, ++ * sowrite wouldn't have been called otherwise ++ */ ++ ++ iov[0].iov_base = sb->sb_rptr; ++ iov[1].iov_base = NULL; ++ iov[1].iov_len = 0; ++ if (sb->sb_rptr < sb->sb_wptr) { ++ iov[0].iov_len = sb->sb_wptr - sb->sb_rptr; ++ /* Should never succeed, but... */ ++ if (iov[0].iov_len > len) ++ iov[0].iov_len = len; ++ n = 1; ++ } else { ++ iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr; ++ if (iov[0].iov_len > len) ++ iov[0].iov_len = len; ++ len -= iov[0].iov_len; ++ if (len) { ++ iov[1].iov_base = sb->sb_data; ++ iov[1].iov_len = sb->sb_wptr - sb->sb_data; ++ if (iov[1].iov_len > len) ++ iov[1].iov_len = len; ++ n = 2; ++ } else ++ n = 1; ++ } ++ /* Check if there's urgent data to send, and if so, send it */ ++ ++ nn = slirp_send(so, iov[0].iov_base, iov[0].iov_len, 0); ++ /* This should never happen, but people tell me it does *shrug* */ ++ if (nn < 0 && (errno == EAGAIN || errno == EINTR)) ++ return 0; ++ ++ if (nn <= 0) { ++ goto err_disconnected; ++ } ++ ++ if (n == 2 && nn == iov[0].iov_len) { ++ int ret; ++ ret = slirp_send(so, iov[1].iov_base, iov[1].iov_len, 0); ++ if (ret > 0) ++ nn += ret; ++ } ++ DEBUG_MISC(" ... wrote nn = %d bytes", nn); ++ ++ /* Update sbuf */ ++ sb->sb_cc -= nn; ++ sb->sb_rptr += nn; ++ if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen)) ++ sb->sb_rptr -= sb->sb_datalen; ++ ++ /* ++ * If in DRAIN mode, and there's no more data, set ++ * it CANTSENDMORE ++ */ ++ if ((so->so_state & SS_FWDRAIN) && sb->sb_cc == 0) ++ sofcantsendmore(so); ++ ++ return nn; ++ ++err_disconnected: ++ DEBUG_MISC(" --- sowrite disconnected, so->so_state = %x, errno = %d", ++ so->so_state, errno); ++ sofcantsendmore(so); ++ tcp_sockclosed(sototcpcb(so)); ++ return -1; ++} ++ ++/* ++ * recvfrom() a UDP socket ++ */ ++void sorecvfrom(struct socket *so) ++{ ++ struct sockaddr_storage addr; ++ struct sockaddr_storage saddr, daddr; ++ socklen_t addrlen = sizeof(struct sockaddr_storage); ++ char buff[256]; ++ ++#ifdef __linux__ ++ ssize_t size; ++ struct msghdr msg; ++ struct iovec iov; ++ char control[1024]; ++ ++ /* First look for errors */ ++ memset(&msg, 0, sizeof(msg)); ++ msg.msg_name = &saddr; ++ msg.msg_namelen = sizeof(saddr); ++ msg.msg_control = control; ++ msg.msg_controllen = sizeof(control); ++ iov.iov_base = buff; ++ iov.iov_len = sizeof(buff); ++ msg.msg_iov = &iov; ++ msg.msg_iovlen = 1; ++ ++ size = recvmsg(so->s, &msg, MSG_ERRQUEUE); ++ if (size >= 0) { ++ struct cmsghdr *cmsg; ++ for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) { ++ ++ if (cmsg->cmsg_level == IPPROTO_IP && ++ cmsg->cmsg_type == IP_RECVERR) { ++ struct sock_extended_err *ee = ++ (struct sock_extended_err *) CMSG_DATA(cmsg); ++ ++ if (ee->ee_origin == SO_EE_ORIGIN_ICMP) { ++ /* Got an ICMP error, forward it */ ++ struct sockaddr_in *sin; ++ ++ sin = (struct sockaddr_in *) SO_EE_OFFENDER(ee); ++ icmp_forward_error(so->so_m, ee->ee_type, ee->ee_code, ++ 0, NULL, &sin->sin_addr); ++ } ++ } ++ else if (cmsg->cmsg_level == IPPROTO_IPV6 && ++ cmsg->cmsg_type == IPV6_RECVERR) { ++ struct sock_extended_err *ee = ++ (struct sock_extended_err *) CMSG_DATA(cmsg); ++ ++ if (ee->ee_origin == SO_EE_ORIGIN_ICMP6) { ++ /* Got an ICMPv6 error, forward it */ ++ struct sockaddr_in6 *sin6; ++ ++ sin6 = (struct sockaddr_in6 *) SO_EE_OFFENDER(ee); ++ icmp6_forward_error(so->so_m, ee->ee_type, ee->ee_code, ++ &sin6->sin6_addr); ++ } ++ } ++ } ++ return; ++ } ++#endif ++ ++ DEBUG_CALL("sorecvfrom"); ++ DEBUG_ARG("so = %p", so); ++ ++ if (so->so_type == IPPROTO_ICMP) { /* This is a "ping" reply */ ++ int len; ++ ++ len = recvfrom(so->s, buff, 256, 0, (struct sockaddr *)&addr, &addrlen); ++ /* XXX Check if reply is "correct"? */ ++ ++ if (len == -1 || len == 0) { ++ uint8_t code = ICMP_UNREACH_PORT; ++ ++ if (errno == EHOSTUNREACH) ++ code = ICMP_UNREACH_HOST; ++ else if (errno == ENETUNREACH) ++ code = ICMP_UNREACH_NET; ++ ++ DEBUG_MISC(" udp icmp rx errno = %d-%s", errno, strerror(errno)); ++ icmp_send_error(so->so_m, ICMP_UNREACH, code, 0, strerror(errno)); ++ } else { ++ icmp_reflect(so->so_m); ++ so->so_m = NULL; /* Don't m_free() it again! */ ++ } ++ /* No need for this socket anymore, udp_detach it */ ++ udp_detach(so); ++ } else { /* A "normal" UDP packet */ ++ struct mbuf *m; ++ int len; ++#ifdef _WIN32 ++ unsigned long n; ++#else ++ int n; ++#endif ++ ++ if (ioctlsocket(so->s, FIONREAD, &n) != 0) { ++ DEBUG_MISC(" ioctlsocket errno = %d-%s\n", errno, strerror(errno)); ++ return; ++ } ++ ++ m = m_get(so->slirp); ++ if (!m) { ++ return; ++ } ++ switch (so->so_ffamily) { ++ case AF_INET: ++ m->m_data += IF_MAXLINKHDR + sizeof(struct udpiphdr); ++ break; ++ case AF_INET6: ++ m->m_data += ++ IF_MAXLINKHDR + sizeof(struct ip6) + sizeof(struct udphdr); ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ ++ /* ++ * XXX Shouldn't FIONREAD packets destined for port 53, ++ * but I don't know the max packet size for DNS lookups ++ */ ++ len = M_FREEROOM(m); ++ /* if (so->so_fport != htons(53)) { */ ++ ++ if (n > len) { ++ n = (m->m_data - m->m_dat) + m->m_len + n + 1; ++ m_inc(m, n); ++ len = M_FREEROOM(m); ++ } ++ /* } */ ++ ++ m->m_len = recvfrom(so->s, m->m_data, len, 0, (struct sockaddr *)&addr, ++ &addrlen); ++ DEBUG_MISC(" did recvfrom %d, errno = %d-%s", m->m_len, errno, ++ strerror(errno)); ++ if (m->m_len < 0) { ++ /* Report error as ICMP */ ++ switch (so->so_lfamily) { ++ uint8_t code; ++ case AF_INET: ++ code = ICMP_UNREACH_PORT; ++ ++ if (errno == EHOSTUNREACH) { ++ code = ICMP_UNREACH_HOST; ++ } else if (errno == ENETUNREACH) { ++ code = ICMP_UNREACH_NET; ++ } ++ ++ DEBUG_MISC(" rx error, tx icmp ICMP_UNREACH:%i", code); ++ icmp_send_error(so->so_m, ICMP_UNREACH, code, 0, ++ strerror(errno)); ++ break; ++ case AF_INET6: ++ code = ICMP6_UNREACH_PORT; ++ ++ if (errno == EHOSTUNREACH) { ++ code = ICMP6_UNREACH_ADDRESS; ++ } else if (errno == ENETUNREACH) { ++ code = ICMP6_UNREACH_NO_ROUTE; ++ } ++ ++ DEBUG_MISC(" rx error, tx icmp6 ICMP_UNREACH:%i", code); ++ icmp6_send_error(so->so_m, ICMP6_UNREACH, code); ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ m_free(m); ++ } else { ++ /* ++ * Hack: domain name lookup will be used the most for UDP, ++ * and since they'll only be used once there's no need ++ * for the 4 minute (or whatever) timeout... So we time them ++ * out much quicker (10 seconds for now...) ++ */ ++ if (so->so_expire) { ++ if (so->so_fport == htons(53)) ++ so->so_expire = curtime + SO_EXPIREFAST; ++ else ++ so->so_expire = curtime + SO_EXPIRE; ++ } ++ ++ /* ++ * If this packet was destined for CTL_ADDR, ++ * make it look like that's where it came from ++ */ ++ saddr = addr; ++ sotranslate_in(so, &saddr); ++ ++ /* Perform lazy guest IP address resolution if needed. */ ++ if (so->so_state & SS_HOSTFWD) { ++ if (soassign_guest_addr_if_needed(so) < 0) { ++ DEBUG_MISC(" guest address not available yet"); ++ switch (so->so_lfamily) { ++ case AF_INET: ++ icmp_send_error(so->so_m, ICMP_UNREACH, ++ ICMP_UNREACH_HOST, 0, ++ "guest address not available yet"); ++ break; ++ case AF_INET6: ++ icmp6_send_error(so->so_m, ICMP6_UNREACH, ++ ICMP6_UNREACH_ADDRESS); ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ m_free(m); ++ return; ++ } ++ } ++ daddr = so->lhost.ss; ++ ++ switch (so->so_ffamily) { ++ case AF_INET: ++ udp_output(so, m, (struct sockaddr_in *)&saddr, ++ (struct sockaddr_in *)&daddr, so->so_iptos); ++ break; ++ case AF_INET6: ++ udp6_output(so, m, (struct sockaddr_in6 *)&saddr, ++ (struct sockaddr_in6 *)&daddr); ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ } /* rx error */ ++ } /* if ping packet */ ++} ++ ++/* ++ * sendto() a socket ++ */ ++int sosendto(struct socket *so, struct mbuf *m) ++{ ++ int ret; ++ struct sockaddr_storage addr; ++ ++ DEBUG_CALL("sosendto"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("m = %p", m); ++ ++ addr = so->fhost.ss; ++ DEBUG_CALL(" sendto()ing)"); ++ if (sotranslate_out(so, &addr) < 0) { ++ return -1; ++ } ++ ++ /* Don't care what port we get */ ++ ret = sendto(so->s, m->m_data, m->m_len, 0, (struct sockaddr *)&addr, ++ sockaddr_size(&addr)); ++ if (ret < 0) ++ return -1; ++ ++ /* ++ * Kill the socket if there's no reply in 4 minutes, ++ * but only if it's an expirable socket ++ */ ++ if (so->so_expire) ++ so->so_expire = curtime + SO_EXPIRE; ++ so->so_state &= SS_PERSISTENT_MASK; ++ so->so_state |= SS_ISFCONNECTED; /* So that it gets select()ed */ ++ return 0; ++} ++ ++/* ++ * Listen for incoming TCP connections ++ * On failure errno contains the reason. ++ */ ++struct socket *tcpx_listen(Slirp *slirp, ++ const struct sockaddr *haddr, socklen_t haddrlen, ++ const struct sockaddr *laddr, socklen_t laddrlen, ++ int flags) ++{ ++ struct socket *so; ++ int s, opt = 1; ++ socklen_t addrlen; ++ ++ DEBUG_CALL("tcpx_listen"); ++ /* AF_INET6 addresses are bigger than AF_INET, so this is big enough. */ ++ char addrstr[INET6_ADDRSTRLEN]; ++ char portstr[6]; ++ int ret; ++ ret = getnameinfo(haddr, haddrlen, addrstr, sizeof(addrstr), portstr, sizeof(portstr), NI_NUMERICHOST|NI_NUMERICSERV); ++ g_assert(ret == 0); ++ DEBUG_ARG("haddr = %s", addrstr); ++ DEBUG_ARG("hport = %s", portstr); ++ ret = getnameinfo(laddr, laddrlen, addrstr, sizeof(addrstr), portstr, sizeof(portstr), NI_NUMERICHOST|NI_NUMERICSERV); ++ g_assert(ret == 0); ++ DEBUG_ARG("laddr = %s", addrstr); ++ DEBUG_ARG("lport = %s", portstr); ++ DEBUG_ARG("flags = %x", flags); ++ ++ /* ++ * SS_HOSTFWD sockets can be accepted multiple times, so they can't be ++ * SS_FACCEPTONCE. Also, SS_HOSTFWD connections can be accepted and ++ * immediately closed if the guest address isn't available yet, which is ++ * incompatible with the "accept once" concept. Correct code will never ++ * request both, so disallow their combination by assertion. ++ */ ++ g_assert(!((flags & SS_HOSTFWD) && (flags & SS_FACCEPTONCE))); ++ ++ so = socreate(slirp); ++ ++ /* Don't tcp_attach... we don't need so_snd nor so_rcv */ ++ so->so_tcpcb = tcp_newtcpcb(so); ++ insque(so, &slirp->tcb); ++ ++ /* ++ * SS_FACCEPTONCE sockets must time out. ++ */ ++ if (flags & SS_FACCEPTONCE) ++ so->so_tcpcb->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT * 2; ++ ++ so->so_state &= SS_PERSISTENT_MASK; ++ so->so_state |= (SS_FACCEPTCONN | flags); ++ ++ sockaddr_copy(&so->lhost.sa, sizeof(so->lhost), laddr, laddrlen); ++ ++ s = slirp_socket(haddr->sa_family, SOCK_STREAM, 0); ++ if ((s < 0) || ++ (haddr->sa_family == AF_INET6 && slirp_socket_set_v6only(s, (flags & SS_HOSTFWD_V6ONLY) != 0) < 0) || ++ (slirp_socket_set_fast_reuse(s) < 0) || ++ (bind(s, haddr, haddrlen) < 0) || ++ (listen(s, 1) < 0)) { ++ int tmperrno = errno; /* Don't clobber the real reason we failed */ ++ if (s >= 0) { ++ closesocket(s); ++ } ++ sofree(so); ++ /* Restore the real errno */ ++#ifdef _WIN32 ++ WSASetLastError(tmperrno); ++#else ++ errno = tmperrno; ++#endif ++ return NULL; ++ } ++ setsockopt(s, SOL_SOCKET, SO_OOBINLINE, &opt, sizeof(int)); ++ slirp_socket_set_nodelay(s); ++ ++ addrlen = sizeof(so->fhost); ++ getsockname(s, &so->fhost.sa, &addrlen); ++ sotranslate_accept(so); ++ ++ so->s = s; ++ return so; ++} ++ ++struct socket *tcp_listen(Slirp *slirp, uint32_t haddr, unsigned hport, ++ uint32_t laddr, unsigned lport, int flags) ++{ ++ struct sockaddr_in hsa, lsa; ++ ++ memset(&hsa, 0, sizeof(hsa)); ++ hsa.sin_family = AF_INET; ++ hsa.sin_addr.s_addr = haddr; ++ hsa.sin_port = hport; ++ ++ memset(&lsa, 0, sizeof(lsa)); ++ lsa.sin_family = AF_INET; ++ lsa.sin_addr.s_addr = laddr; ++ lsa.sin_port = lport; ++ ++ return tcpx_listen(slirp, (const struct sockaddr *) &hsa, sizeof(hsa), (struct sockaddr *) &lsa, sizeof(lsa), flags); ++} ++ ++/* ++ * Various session state calls ++ * XXX Should be #define's ++ * The socket state stuff needs work, these often get call 2 or 3 ++ * times each when only 1 was needed ++ */ ++void soisfconnecting(struct socket *so) ++{ ++ so->so_state &= ~(SS_NOFDREF | SS_ISFCONNECTED | SS_FCANTRCVMORE | ++ SS_FCANTSENDMORE | SS_FWDRAIN); ++ so->so_state |= SS_ISFCONNECTING; /* Clobber other states */ ++} ++ ++void soisfconnected(struct socket *so) ++{ ++ so->so_state &= ~(SS_ISFCONNECTING | SS_FWDRAIN | SS_NOFDREF); ++ so->so_state |= SS_ISFCONNECTED; /* Clobber other states */ ++} ++ ++static void sofcantrcvmore(struct socket *so) ++{ ++ if ((so->so_state & SS_NOFDREF) == 0) { ++ shutdown(so->s, 0); ++ } ++ so->so_state &= ~(SS_ISFCONNECTING); ++ if (so->so_state & SS_FCANTSENDMORE) { ++ so->so_state &= SS_PERSISTENT_MASK; ++ so->so_state |= SS_NOFDREF; /* Don't select it */ ++ } else { ++ so->so_state |= SS_FCANTRCVMORE; ++ } ++} ++ ++static void sofcantsendmore(struct socket *so) ++{ ++ if ((so->so_state & SS_NOFDREF) == 0) { ++ shutdown(so->s, 1); /* send FIN to fhost */ ++ } ++ so->so_state &= ~(SS_ISFCONNECTING); ++ if (so->so_state & SS_FCANTRCVMORE) { ++ so->so_state &= SS_PERSISTENT_MASK; ++ so->so_state |= SS_NOFDREF; /* as above */ ++ } else { ++ so->so_state |= SS_FCANTSENDMORE; ++ } ++} ++ ++/* ++ * Set write drain mode ++ * Set CANTSENDMORE once all data has been write()n ++ */ ++void sofwdrain(struct socket *so) ++{ ++ if (so->so_rcv.sb_cc) ++ so->so_state |= SS_FWDRAIN; ++ else ++ sofcantsendmore(so); ++} ++ ++static bool sotranslate_out4(Slirp *s, struct socket *so, struct sockaddr_in *sin) ++{ ++ if (!s->disable_dns && so->so_faddr.s_addr == s->vnameserver_addr.s_addr) { ++ return so->so_fport == htons(53) && get_dns_addr(&sin->sin_addr) >= 0; ++ } ++ ++ if (so->so_faddr.s_addr == s->vhost_addr.s_addr || ++ so->so_faddr.s_addr == 0xffffffff) { ++ if (s->disable_host_loopback) { ++ return false; ++ } ++ ++ sin->sin_addr = loopback_addr; ++ } ++ ++ return true; ++} ++ ++static bool sotranslate_out6(Slirp *s, struct socket *so, struct sockaddr_in6 *sin) ++{ ++ if (!s->disable_dns && in6_equal(&so->so_faddr6, &s->vnameserver_addr6)) { ++ uint32_t scope_id; ++ if (so->so_fport == htons(53) && get_dns6_addr(&sin->sin6_addr, &scope_id) >= 0) { ++ sin->sin6_scope_id = scope_id; ++ return true; ++ } ++ return false; ++ } ++ ++ if (in6_equal_net(&so->so_faddr6, &s->vprefix_addr6, s->vprefix_len) || ++ in6_equal(&so->so_faddr6, &(struct in6_addr)ALLNODES_MULTICAST)) { ++ if (s->disable_host_loopback) { ++ return false; ++ } ++ ++ sin->sin6_addr = in6addr_loopback; ++ } ++ ++ return true; ++} ++ ++ ++/* ++ * Translate addr in host addr when it is a virtual address ++ */ ++int sotranslate_out(struct socket *so, struct sockaddr_storage *addr) ++{ ++ bool ok = true; ++ ++ switch (addr->ss_family) { ++ case AF_INET: ++ ok = sotranslate_out4(so->slirp, so, (struct sockaddr_in *)addr); ++ break; ++ case AF_INET6: ++ ok = sotranslate_out6(so->slirp, so, (struct sockaddr_in6 *)addr); ++ break; ++ } ++ ++ if (!ok) { ++ errno = EPERM; ++ return -1; ++ } ++ ++ return 0; ++} ++ ++void sotranslate_in(struct socket *so, struct sockaddr_storage *addr) ++{ ++ Slirp *slirp = so->slirp; ++ struct sockaddr_in *sin = (struct sockaddr_in *)addr; ++ struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr; ++ ++ switch (addr->ss_family) { ++ case AF_INET: ++ if ((so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) == ++ slirp->vnetwork_addr.s_addr) { ++ uint32_t inv_mask = ~slirp->vnetwork_mask.s_addr; ++ ++ if ((so->so_faddr.s_addr & inv_mask) == inv_mask) { ++ sin->sin_addr = slirp->vhost_addr; ++ } else if (sin->sin_addr.s_addr == loopback_addr.s_addr || ++ so->so_faddr.s_addr != slirp->vhost_addr.s_addr) { ++ sin->sin_addr = so->so_faddr; ++ } ++ } ++ break; ++ ++ case AF_INET6: ++ if (in6_equal_net(&so->so_faddr6, &slirp->vprefix_addr6, ++ slirp->vprefix_len)) { ++ if (in6_equal(&sin6->sin6_addr, &in6addr_loopback) || ++ !in6_equal(&so->so_faddr6, &slirp->vhost_addr6)) { ++ sin6->sin6_addr = so->so_faddr6; ++ } ++ } ++ break; ++ ++ default: ++ break; ++ } ++} ++ ++/* ++ * Translate connections from localhost to the real hostname ++ */ ++void sotranslate_accept(struct socket *so) ++{ ++ Slirp *slirp = so->slirp; ++ ++ switch (so->so_ffamily) { ++ case AF_INET: ++ if (so->so_faddr.s_addr == INADDR_ANY || ++ (so->so_faddr.s_addr & loopback_mask) == ++ (loopback_addr.s_addr & loopback_mask)) { ++ so->so_faddr = slirp->vhost_addr; ++ } ++ break; ++ ++ case AF_INET6: ++ if (in6_equal(&so->so_faddr6, &in6addr_any) || ++ in6_equal(&so->so_faddr6, &in6addr_loopback)) { ++ so->so_faddr6 = slirp->vhost_addr6; ++ } ++ break; ++ ++ default: ++ break; ++ } ++} ++ ++void sodrop(struct socket *s, int num) ++{ ++ if (sbdrop(&s->so_snd, num)) { ++ s->slirp->cb->notify(s->slirp->opaque); ++ } ++} ++ ++/* ++ * Translate "addr-any" in so->lhost to the guest's actual address. ++ * Returns 0 for success, or -1 if the guest doesn't have an address yet ++ * with errno set to EHOSTUNREACH. ++ * ++ * The guest address is taken from the first entry in the ARP table for IPv4 ++ * and the first entry in the NDP table for IPv6. ++ * Note: The IPv4 path isn't exercised yet as all hostfwd "" guest translations ++ * are handled immediately by using slirp->vdhcp_startaddr. ++ */ ++int soassign_guest_addr_if_needed(struct socket *so) ++{ ++ Slirp *slirp = so->slirp; ++ /* AF_INET6 addresses are bigger than AF_INET, so this is big enough. */ ++ char addrstr[INET6_ADDRSTRLEN]; ++ char portstr[6]; ++ ++ g_assert(so->so_state & SS_HOSTFWD); ++ ++ switch (so->so_ffamily) { ++ case AF_INET: ++ if (so->so_laddr.s_addr == INADDR_ANY) { ++ g_assert_not_reached(); ++ } ++ break; ++ ++ case AF_INET6: ++ if (in6_zero(&so->so_laddr6)) { ++ int ret; ++ if (in6_zero(&slirp->ndp_table.guest_in6_addr)) { ++ errno = EHOSTUNREACH; ++ return -1; ++ } ++ so->so_laddr6 = slirp->ndp_table.guest_in6_addr; ++ ret = getnameinfo((const struct sockaddr *) &so->lhost.ss, ++ sizeof(so->lhost.ss), addrstr, sizeof(addrstr), ++ portstr, sizeof(portstr), ++ NI_NUMERICHOST|NI_NUMERICSERV); ++ g_assert(ret == 0); ++ DEBUG_MISC("%s: new ip = [%s]:%s", __func__, addrstr, portstr); ++ } ++ break; ++ ++ default: ++ break; ++ } ++ ++ return 0; ++} +diff --git a/slirp/src/socket.h b/slirp/src/socket.h +new file mode 100644 +index 0000000000..a73175dc29 +--- /dev/null ++++ b/slirp/src/socket.h +@@ -0,0 +1,186 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#ifndef SLIRP_SOCKET_H ++#define SLIRP_SOCKET_H ++ ++#include "misc.h" ++#include "sbuf.h" ++ ++#define SO_EXPIRE 240000 ++#define SO_EXPIREFAST 10000 ++ ++/* Helps unify some in/in6 routines. */ ++union in4or6_addr { ++ struct in_addr addr4; ++ struct in6_addr addr6; ++}; ++typedef union in4or6_addr in4or6_addr; ++ ++/* ++ * Our socket structure ++ */ ++ ++union slirp_sockaddr { ++ struct sockaddr sa; ++ struct sockaddr_storage ss; ++ struct sockaddr_in sin; ++ struct sockaddr_in6 sin6; ++}; ++ ++struct socket { ++ struct socket *so_next, *so_prev; /* For a linked list of sockets */ ++ ++ int s; /* The actual socket */ ++ struct gfwd_list *guestfwd; ++ ++ int pollfds_idx; /* GPollFD GArray index */ ++ ++ Slirp *slirp; /* managing slirp instance */ ++ ++ /* XXX union these with not-yet-used sbuf params */ ++ struct mbuf *so_m; /* Pointer to the original SYN packet, ++ * for non-blocking connect()'s, and ++ * PING reply's */ ++ struct tcpiphdr *so_ti; /* Pointer to the original ti within ++ * so_mconn, for non-blocking connections */ ++ uint32_t so_urgc; ++ union slirp_sockaddr fhost; /* Foreign host */ ++#define so_faddr fhost.sin.sin_addr ++#define so_fport fhost.sin.sin_port ++#define so_faddr6 fhost.sin6.sin6_addr ++#define so_fport6 fhost.sin6.sin6_port ++#define so_ffamily fhost.ss.ss_family ++ ++ union slirp_sockaddr lhost; /* Local host */ ++#define so_laddr lhost.sin.sin_addr ++#define so_lport lhost.sin.sin_port ++#define so_laddr6 lhost.sin6.sin6_addr ++#define so_lport6 lhost.sin6.sin6_port ++#define so_lfamily lhost.ss.ss_family ++ ++ uint8_t so_iptos; /* Type of service */ ++ uint8_t so_emu; /* Is the socket emulated? */ ++ ++ uint8_t so_type; /* Type of socket, UDP or TCP */ ++ int32_t so_state; /* internal state flags SS_*, below */ ++ ++ struct tcpcb *so_tcpcb; /* pointer to TCP protocol control block */ ++ unsigned so_expire; /* When the socket will expire */ ++ ++ int so_queued; /* Number of packets queued from this socket */ ++ int so_nqueued; /* Number of packets queued in a row ++ * Used to determine when to "downgrade" a session ++ * from fastq to batchq */ ++ ++ struct sbuf so_rcv; /* Receive buffer */ ++ struct sbuf so_snd; /* Send buffer */ ++}; ++ ++ ++/* ++ * Socket state bits. (peer means the host on the Internet, ++ * local host means the host on the other end of the modem) ++ */ ++#define SS_NOFDREF 0x001 /* No fd reference */ ++ ++#define SS_ISFCONNECTING \ ++ 0x002 /* Socket is connecting to peer (non-blocking connect()'s) */ ++#define SS_ISFCONNECTED 0x004 /* Socket is connected to peer */ ++#define SS_FCANTRCVMORE \ ++ 0x008 /* Socket can't receive more from peer (for half-closes) */ ++#define SS_FCANTSENDMORE \ ++ 0x010 /* Socket can't send more to peer (for half-closes) */ ++#define SS_FWDRAIN \ ++ 0x040 /* We received a FIN, drain data and set SS_FCANTSENDMORE */ ++ ++#define SS_CTL 0x080 ++#define SS_FACCEPTCONN \ ++ 0x100 /* Socket is accepting connections from a host on the internet */ ++#define SS_FACCEPTONCE \ ++ 0x200 /* If set, the SS_FACCEPTCONN socket will die after one accept */ ++ ++#define SS_PERSISTENT_MASK 0xf000 /* Unremovable state bits */ ++#define SS_HOSTFWD 0x1000 /* Socket describes host->guest forwarding */ ++#define SS_INCOMING \ ++ 0x2000 /* Connection was initiated by a host on the internet */ ++#define SS_HOSTFWD_V6ONLY 0x4000 /* Only bind on v6 addresses */ ++ ++static inline int sockaddr_equal(const struct sockaddr_storage *a, ++ const struct sockaddr_storage *b) ++{ ++ if (a->ss_family != b->ss_family) { ++ return 0; ++ } ++ ++ switch (a->ss_family) { ++ case AF_INET: { ++ const struct sockaddr_in *a4 = (const struct sockaddr_in *)a; ++ const struct sockaddr_in *b4 = (const struct sockaddr_in *)b; ++ return a4->sin_addr.s_addr == b4->sin_addr.s_addr && ++ a4->sin_port == b4->sin_port; ++ } ++ case AF_INET6: { ++ const struct sockaddr_in6 *a6 = (const struct sockaddr_in6 *)a; ++ const struct sockaddr_in6 *b6 = (const struct sockaddr_in6 *)b; ++ return (in6_equal(&a6->sin6_addr, &b6->sin6_addr) && ++ a6->sin6_port == b6->sin6_port); ++ } ++ default: ++ g_assert_not_reached(); ++ } ++ ++ return 0; ++} ++ ++static inline socklen_t sockaddr_size(const struct sockaddr_storage *a) ++{ ++ switch (a->ss_family) { ++ case AF_INET: ++ return sizeof(struct sockaddr_in); ++ case AF_INET6: ++ return sizeof(struct sockaddr_in6); ++ default: ++ g_assert_not_reached(); ++ } ++} ++ ++static inline void sockaddr_copy(struct sockaddr *dst, socklen_t dstlen, const struct sockaddr *src, socklen_t srclen) ++{ ++ socklen_t len = sockaddr_size((const struct sockaddr_storage *) src); ++ g_assert(len <= srclen); ++ g_assert(len <= dstlen); ++ memcpy(dst, src, len); ++} ++ ++struct socket *solookup(struct socket **, struct socket *, ++ struct sockaddr_storage *, struct sockaddr_storage *); ++struct socket *socreate(Slirp *); ++void sofree(struct socket *); ++int soread(struct socket *); ++int sorecvoob(struct socket *); ++int sosendoob(struct socket *); ++int sowrite(struct socket *); ++void sorecvfrom(struct socket *); ++int sosendto(struct socket *, struct mbuf *); ++struct socket *tcp_listen(Slirp *, uint32_t, unsigned, uint32_t, unsigned, int); ++struct socket *tcpx_listen(Slirp *slirp, ++ const struct sockaddr *haddr, socklen_t haddrlen, ++ const struct sockaddr *laddr, socklen_t laddrlen, ++ int flags); ++void soisfconnecting(register struct socket *); ++void soisfconnected(register struct socket *); ++void sofwdrain(struct socket *); ++struct iovec; /* For win32 */ ++size_t sopreprbuf(struct socket *so, struct iovec *iov, int *np); ++int soreadbuf(struct socket *so, const char *buf, int size); ++ ++int sotranslate_out(struct socket *, struct sockaddr_storage *); ++void sotranslate_in(struct socket *, struct sockaddr_storage *); ++void sotranslate_accept(struct socket *); ++void sodrop(struct socket *, int num); ++int soassign_guest_addr_if_needed(struct socket *so); ++ ++#endif /* SLIRP_SOCKET_H */ +diff --git a/slirp/src/state.c b/slirp/src/state.c +new file mode 100644 +index 0000000000..22af77b256 +--- /dev/null ++++ b/slirp/src/state.c +@@ -0,0 +1,379 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * libslirp ++ * ++ * Copyright (c) 2004-2008 Fabrice Bellard ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++#include "slirp.h" ++#include "vmstate.h" ++#include "stream.h" ++ ++static int slirp_tcp_post_load(void *opaque, int version) ++{ ++ tcp_template((struct tcpcb *)opaque); ++ ++ return 0; ++} ++ ++static const VMStateDescription vmstate_slirp_tcp = { ++ .name = "slirp-tcp", ++ .version_id = 0, ++ .post_load = slirp_tcp_post_load, ++ .fields = (VMStateField[]){ VMSTATE_INT16(t_state, struct tcpcb), ++ VMSTATE_INT16_ARRAY(t_timer, struct tcpcb, ++ TCPT_NTIMERS), ++ VMSTATE_INT16(t_rxtshift, struct tcpcb), ++ VMSTATE_INT16(t_rxtcur, struct tcpcb), ++ VMSTATE_INT16(t_dupacks, struct tcpcb), ++ VMSTATE_UINT16(t_maxseg, struct tcpcb), ++ VMSTATE_UINT8(t_force, struct tcpcb), ++ VMSTATE_UINT16(t_flags, struct tcpcb), ++ VMSTATE_UINT32(snd_una, struct tcpcb), ++ VMSTATE_UINT32(snd_nxt, struct tcpcb), ++ VMSTATE_UINT32(snd_up, struct tcpcb), ++ VMSTATE_UINT32(snd_wl1, struct tcpcb), ++ VMSTATE_UINT32(snd_wl2, struct tcpcb), ++ VMSTATE_UINT32(iss, struct tcpcb), ++ VMSTATE_UINT32(snd_wnd, struct tcpcb), ++ VMSTATE_UINT32(rcv_wnd, struct tcpcb), ++ VMSTATE_UINT32(rcv_nxt, struct tcpcb), ++ VMSTATE_UINT32(rcv_up, struct tcpcb), ++ VMSTATE_UINT32(irs, struct tcpcb), ++ VMSTATE_UINT32(rcv_adv, struct tcpcb), ++ VMSTATE_UINT32(snd_max, struct tcpcb), ++ VMSTATE_UINT32(snd_cwnd, struct tcpcb), ++ VMSTATE_UINT32(snd_ssthresh, struct tcpcb), ++ VMSTATE_INT16(t_idle, struct tcpcb), ++ VMSTATE_INT16(t_rtt, struct tcpcb), ++ VMSTATE_UINT32(t_rtseq, struct tcpcb), ++ VMSTATE_INT16(t_srtt, struct tcpcb), ++ VMSTATE_INT16(t_rttvar, struct tcpcb), ++ VMSTATE_UINT16(t_rttmin, struct tcpcb), ++ VMSTATE_UINT32(max_sndwnd, struct tcpcb), ++ VMSTATE_UINT8(t_oobflags, struct tcpcb), ++ VMSTATE_UINT8(t_iobc, struct tcpcb), ++ VMSTATE_INT16(t_softerror, struct tcpcb), ++ VMSTATE_UINT8(snd_scale, struct tcpcb), ++ VMSTATE_UINT8(rcv_scale, struct tcpcb), ++ VMSTATE_UINT8(request_r_scale, struct tcpcb), ++ VMSTATE_UINT8(requested_s_scale, struct tcpcb), ++ VMSTATE_UINT32(ts_recent, struct tcpcb), ++ VMSTATE_UINT32(ts_recent_age, struct tcpcb), ++ VMSTATE_UINT32(last_ack_sent, struct tcpcb), ++ VMSTATE_END_OF_LIST() } ++}; ++ ++/* The sbuf has a pair of pointers that are migrated as offsets; ++ * we calculate the offsets and restore the pointers using ++ * pre_save/post_load on a tmp structure. ++ */ ++struct sbuf_tmp { ++ struct sbuf *parent; ++ uint32_t roff, woff; ++}; ++ ++static int sbuf_tmp_pre_save(void *opaque) ++{ ++ struct sbuf_tmp *tmp = opaque; ++ tmp->woff = tmp->parent->sb_wptr - tmp->parent->sb_data; ++ tmp->roff = tmp->parent->sb_rptr - tmp->parent->sb_data; ++ ++ return 0; ++} ++ ++static int sbuf_tmp_post_load(void *opaque, int version) ++{ ++ struct sbuf_tmp *tmp = opaque; ++ uint32_t requested_len = tmp->parent->sb_datalen; ++ ++ /* Allocate the buffer space used by the field after the tmp */ ++ sbreserve(tmp->parent, tmp->parent->sb_datalen); ++ ++ if (tmp->woff >= requested_len || tmp->roff >= requested_len) { ++ g_critical("invalid sbuf offsets r/w=%u/%u len=%u", tmp->roff, ++ tmp->woff, requested_len); ++ return -EINVAL; ++ } ++ ++ tmp->parent->sb_wptr = tmp->parent->sb_data + tmp->woff; ++ tmp->parent->sb_rptr = tmp->parent->sb_data + tmp->roff; ++ ++ return 0; ++} ++ ++ ++static const VMStateDescription vmstate_slirp_sbuf_tmp = { ++ .name = "slirp-sbuf-tmp", ++ .post_load = sbuf_tmp_post_load, ++ .pre_save = sbuf_tmp_pre_save, ++ .version_id = 0, ++ .fields = (VMStateField[]){ VMSTATE_UINT32(woff, struct sbuf_tmp), ++ VMSTATE_UINT32(roff, struct sbuf_tmp), ++ VMSTATE_END_OF_LIST() } ++}; ++ ++static const VMStateDescription vmstate_slirp_sbuf = { ++ .name = "slirp-sbuf", ++ .version_id = 0, ++ .fields = (VMStateField[]){ VMSTATE_UINT32(sb_cc, struct sbuf), ++ VMSTATE_UINT32(sb_datalen, struct sbuf), ++ VMSTATE_WITH_TMP(struct sbuf, struct sbuf_tmp, ++ vmstate_slirp_sbuf_tmp), ++ VMSTATE_VBUFFER_UINT32(sb_data, struct sbuf, 0, ++ NULL, sb_datalen), ++ VMSTATE_END_OF_LIST() } ++}; ++ ++static bool slirp_older_than_v4(void *opaque, int version_id) ++{ ++ return version_id < 4; ++} ++ ++static bool slirp_family_inet(void *opaque, int version_id) ++{ ++ union slirp_sockaddr *ssa = (union slirp_sockaddr *)opaque; ++ return ssa->ss.ss_family == AF_INET; ++} ++ ++static int slirp_socket_pre_load(void *opaque) ++{ ++ struct socket *so = opaque; ++ ++ tcp_attach(so); ++ /* Older versions don't load these fields */ ++ so->so_ffamily = AF_INET; ++ so->so_lfamily = AF_INET; ++ return 0; ++} ++ ++#ifndef _WIN32 ++#define VMSTATE_SIN4_ADDR(f, s, t) VMSTATE_UINT32_TEST(f, s, t) ++#else ++/* Win uses u_long rather than uint32_t - but it's still 32bits long */ ++#define VMSTATE_SIN4_ADDR(f, s, t) \ ++ VMSTATE_SINGLE_TEST(f, s, t, 0, slirp_vmstate_info_uint32, u_long) ++#endif ++ ++/* The OS provided ss_family field isn't that portable; it's size ++ * and type varies (16/8 bit, signed, unsigned) ++ * and the values it contains aren't fully portable. ++ */ ++typedef struct SS_FamilyTmpStruct { ++ union slirp_sockaddr *parent; ++ uint16_t portable_family; ++} SS_FamilyTmpStruct; ++ ++#define SS_FAMILY_MIG_IPV4 2 /* Linux, BSD, Win... */ ++#define SS_FAMILY_MIG_IPV6 10 /* Linux */ ++#define SS_FAMILY_MIG_OTHER 0xffff ++ ++static int ss_family_pre_save(void *opaque) ++{ ++ SS_FamilyTmpStruct *tss = opaque; ++ ++ tss->portable_family = SS_FAMILY_MIG_OTHER; ++ ++ if (tss->parent->ss.ss_family == AF_INET) { ++ tss->portable_family = SS_FAMILY_MIG_IPV4; ++ } else if (tss->parent->ss.ss_family == AF_INET6) { ++ tss->portable_family = SS_FAMILY_MIG_IPV6; ++ } ++ ++ return 0; ++} ++ ++static int ss_family_post_load(void *opaque, int version_id) ++{ ++ SS_FamilyTmpStruct *tss = opaque; ++ ++ switch (tss->portable_family) { ++ case SS_FAMILY_MIG_IPV4: ++ tss->parent->ss.ss_family = AF_INET; ++ break; ++ case SS_FAMILY_MIG_IPV6: ++ case 23: /* compatibility: AF_INET6 from mingw */ ++ case 28: /* compatibility: AF_INET6 from FreeBSD sys/socket.h */ ++ tss->parent->ss.ss_family = AF_INET6; ++ break; ++ default: ++ g_critical("invalid ss_family type %x", tss->portable_family); ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++static const VMStateDescription vmstate_slirp_ss_family = { ++ .name = "slirp-socket-addr/ss_family", ++ .pre_save = ss_family_pre_save, ++ .post_load = ss_family_post_load, ++ .fields = ++ (VMStateField[]){ VMSTATE_UINT16(portable_family, SS_FamilyTmpStruct), ++ VMSTATE_END_OF_LIST() } ++}; ++ ++static const VMStateDescription vmstate_slirp_socket_addr = { ++ .name = "slirp-socket-addr", ++ .version_id = 4, ++ .fields = ++ (VMStateField[]){ ++ VMSTATE_WITH_TMP(union slirp_sockaddr, SS_FamilyTmpStruct, ++ vmstate_slirp_ss_family), ++ VMSTATE_SIN4_ADDR(sin.sin_addr.s_addr, union slirp_sockaddr, ++ slirp_family_inet), ++ VMSTATE_UINT16_TEST(sin.sin_port, union slirp_sockaddr, ++ slirp_family_inet), ++ ++#if 0 ++ /* Untested: Needs checking by someone with IPv6 test */ ++ VMSTATE_BUFFER_TEST(sin6.sin6_addr, union slirp_sockaddr, ++ slirp_family_inet6), ++ VMSTATE_UINT16_TEST(sin6.sin6_port, union slirp_sockaddr, ++ slirp_family_inet6), ++ VMSTATE_UINT32_TEST(sin6.sin6_flowinfo, union slirp_sockaddr, ++ slirp_family_inet6), ++ VMSTATE_UINT32_TEST(sin6.sin6_scope_id, union slirp_sockaddr, ++ slirp_family_inet6), ++#endif ++ ++ VMSTATE_END_OF_LIST() } ++}; ++ ++static const VMStateDescription vmstate_slirp_socket = { ++ .name = "slirp-socket", ++ .version_id = 4, ++ .pre_load = slirp_socket_pre_load, ++ .fields = ++ (VMStateField[]){ ++ VMSTATE_UINT32(so_urgc, struct socket), ++ /* Pre-v4 versions */ ++ VMSTATE_SIN4_ADDR(so_faddr.s_addr, struct socket, ++ slirp_older_than_v4), ++ VMSTATE_SIN4_ADDR(so_laddr.s_addr, struct socket, ++ slirp_older_than_v4), ++ VMSTATE_UINT16_TEST(so_fport, struct socket, slirp_older_than_v4), ++ VMSTATE_UINT16_TEST(so_lport, struct socket, slirp_older_than_v4), ++ /* v4 and newer */ ++ VMSTATE_STRUCT(fhost, struct socket, 4, vmstate_slirp_socket_addr, ++ union slirp_sockaddr), ++ VMSTATE_STRUCT(lhost, struct socket, 4, vmstate_slirp_socket_addr, ++ union slirp_sockaddr), ++ ++ VMSTATE_UINT8(so_iptos, struct socket), ++ VMSTATE_UINT8(so_emu, struct socket), ++ VMSTATE_UINT8(so_type, struct socket), ++ VMSTATE_INT32(so_state, struct socket), ++ VMSTATE_STRUCT(so_rcv, struct socket, 0, vmstate_slirp_sbuf, ++ struct sbuf), ++ VMSTATE_STRUCT(so_snd, struct socket, 0, vmstate_slirp_sbuf, ++ struct sbuf), ++ VMSTATE_STRUCT_POINTER(so_tcpcb, struct socket, vmstate_slirp_tcp, ++ struct tcpcb), ++ VMSTATE_END_OF_LIST() } ++}; ++ ++static const VMStateDescription vmstate_slirp_bootp_client = { ++ .name = "slirp_bootpclient", ++ .fields = (VMStateField[]){ VMSTATE_UINT16(allocated, BOOTPClient), ++ VMSTATE_BUFFER(macaddr, BOOTPClient), ++ VMSTATE_END_OF_LIST() } ++}; ++ ++static const VMStateDescription vmstate_slirp = { ++ .name = "slirp", ++ .version_id = 4, ++ .fields = (VMStateField[]){ VMSTATE_UINT16_V(ip_id, Slirp, 2), ++ VMSTATE_STRUCT_ARRAY( ++ bootp_clients, Slirp, NB_BOOTP_CLIENTS, 3, ++ vmstate_slirp_bootp_client, BOOTPClient), ++ VMSTATE_END_OF_LIST() } ++}; ++ ++void slirp_state_save(Slirp *slirp, SlirpWriteCb write_cb, void *opaque) ++{ ++ struct gfwd_list *ex_ptr; ++ SlirpOStream f = { ++ .write_cb = write_cb, ++ .opaque = opaque, ++ }; ++ ++ for (ex_ptr = slirp->guestfwd_list; ex_ptr; ex_ptr = ex_ptr->ex_next) ++ if (ex_ptr->write_cb) { ++ struct socket *so; ++ so = slirp_find_ctl_socket(slirp, ex_ptr->ex_addr, ++ ntohs(ex_ptr->ex_fport)); ++ if (!so) { ++ continue; ++ } ++ ++ slirp_ostream_write_u8(&f, 42); ++ slirp_vmstate_save_state(&f, &vmstate_slirp_socket, so); ++ } ++ slirp_ostream_write_u8(&f, 0); ++ ++ slirp_vmstate_save_state(&f, &vmstate_slirp, slirp); ++} ++ ++ ++int slirp_state_load(Slirp *slirp, int version_id, SlirpReadCb read_cb, ++ void *opaque) ++{ ++ struct gfwd_list *ex_ptr; ++ SlirpIStream f = { ++ .read_cb = read_cb, ++ .opaque = opaque, ++ }; ++ ++ while (slirp_istream_read_u8(&f)) { ++ int ret; ++ struct socket *so = socreate(slirp); ++ ++ ret = ++ slirp_vmstate_load_state(&f, &vmstate_slirp_socket, so, version_id); ++ if (ret < 0) { ++ return ret; ++ } ++ ++ if ((so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) != ++ slirp->vnetwork_addr.s_addr) { ++ return -EINVAL; ++ } ++ for (ex_ptr = slirp->guestfwd_list; ex_ptr; ex_ptr = ex_ptr->ex_next) { ++ if (ex_ptr->write_cb && ++ so->so_faddr.s_addr == ex_ptr->ex_addr.s_addr && ++ so->so_fport == ex_ptr->ex_fport) { ++ break; ++ } ++ } ++ if (!ex_ptr) { ++ return -EINVAL; ++ } ++ ++ so->guestfwd = ex_ptr; ++ } ++ ++ return slirp_vmstate_load_state(&f, &vmstate_slirp, slirp, version_id); ++} ++ ++int slirp_state_version(void) ++{ ++ return 4; ++} +diff --git a/slirp/src/stream.c b/slirp/src/stream.c +new file mode 100644 +index 0000000000..6cf326f669 +--- /dev/null ++++ b/slirp/src/stream.c +@@ -0,0 +1,120 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * libslirp io streams ++ * ++ * Copyright (c) 2018 Red Hat, Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++#include "stream.h" ++#include ++ ++bool slirp_istream_read(SlirpIStream *f, void *buf, size_t size) ++{ ++ return f->read_cb(buf, size, f->opaque) == size; ++} ++ ++bool slirp_ostream_write(SlirpOStream *f, const void *buf, size_t size) ++{ ++ return f->write_cb(buf, size, f->opaque) == size; ++} ++ ++uint8_t slirp_istream_read_u8(SlirpIStream *f) ++{ ++ uint8_t b; ++ ++ if (slirp_istream_read(f, &b, sizeof(b))) { ++ return b; ++ } ++ ++ return 0; ++} ++ ++bool slirp_ostream_write_u8(SlirpOStream *f, uint8_t b) ++{ ++ return slirp_ostream_write(f, &b, sizeof(b)); ++} ++ ++uint16_t slirp_istream_read_u16(SlirpIStream *f) ++{ ++ uint16_t b; ++ ++ if (slirp_istream_read(f, &b, sizeof(b))) { ++ return GUINT16_FROM_BE(b); ++ } ++ ++ return 0; ++} ++ ++bool slirp_ostream_write_u16(SlirpOStream *f, uint16_t b) ++{ ++ b = GUINT16_TO_BE(b); ++ return slirp_ostream_write(f, &b, sizeof(b)); ++} ++ ++uint32_t slirp_istream_read_u32(SlirpIStream *f) ++{ ++ uint32_t b; ++ ++ if (slirp_istream_read(f, &b, sizeof(b))) { ++ return GUINT32_FROM_BE(b); ++ } ++ ++ return 0; ++} ++ ++bool slirp_ostream_write_u32(SlirpOStream *f, uint32_t b) ++{ ++ b = GUINT32_TO_BE(b); ++ return slirp_ostream_write(f, &b, sizeof(b)); ++} ++ ++int16_t slirp_istream_read_i16(SlirpIStream *f) ++{ ++ int16_t b; ++ ++ if (slirp_istream_read(f, &b, sizeof(b))) { ++ return GINT16_FROM_BE(b); ++ } ++ ++ return 0; ++} ++ ++bool slirp_ostream_write_i16(SlirpOStream *f, int16_t b) ++{ ++ b = GINT16_TO_BE(b); ++ return slirp_ostream_write(f, &b, sizeof(b)); ++} ++ ++int32_t slirp_istream_read_i32(SlirpIStream *f) ++{ ++ int32_t b; ++ ++ if (slirp_istream_read(f, &b, sizeof(b))) { ++ return GINT32_FROM_BE(b); ++ } ++ ++ return 0; ++} ++ ++bool slirp_ostream_write_i32(SlirpOStream *f, int32_t b) ++{ ++ b = GINT32_TO_BE(b); ++ return slirp_ostream_write(f, &b, sizeof(b)); ++} +diff --git a/slirp/src/stream.h b/slirp/src/stream.h +new file mode 100644 +index 0000000000..08bb5b6610 +--- /dev/null ++++ b/slirp/src/stream.h +@@ -0,0 +1,35 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++#ifndef STREAM_H_ ++#define STREAM_H_ ++ ++#include "libslirp.h" ++ ++typedef struct SlirpIStream { ++ SlirpReadCb read_cb; ++ void *opaque; ++} SlirpIStream; ++ ++typedef struct SlirpOStream { ++ SlirpWriteCb write_cb; ++ void *opaque; ++} SlirpOStream; ++ ++bool slirp_istream_read(SlirpIStream *f, void *buf, size_t size); ++bool slirp_ostream_write(SlirpOStream *f, const void *buf, size_t size); ++ ++uint8_t slirp_istream_read_u8(SlirpIStream *f); ++bool slirp_ostream_write_u8(SlirpOStream *f, uint8_t b); ++ ++uint16_t slirp_istream_read_u16(SlirpIStream *f); ++bool slirp_ostream_write_u16(SlirpOStream *f, uint16_t b); ++ ++uint32_t slirp_istream_read_u32(SlirpIStream *f); ++bool slirp_ostream_write_u32(SlirpOStream *f, uint32_t b); ++ ++int16_t slirp_istream_read_i16(SlirpIStream *f); ++bool slirp_ostream_write_i16(SlirpOStream *f, int16_t b); ++ ++int32_t slirp_istream_read_i32(SlirpIStream *f); ++bool slirp_ostream_write_i32(SlirpOStream *f, int32_t b); ++ ++#endif /* STREAM_H_ */ +diff --git a/slirp/src/tcp.h b/slirp/src/tcp.h +new file mode 100644 +index 0000000000..70a9760664 +--- /dev/null ++++ b/slirp/src/tcp.h +@@ -0,0 +1,169 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcp.h 8.1 (Berkeley) 6/10/93 ++ * tcp.h,v 1.3 1994/08/21 05:27:34 paul Exp ++ */ ++ ++#ifndef TCP_H ++#define TCP_H ++ ++#include ++ ++typedef uint32_t tcp_seq; ++ ++#define PR_SLOWHZ 2 /* 2 slow timeouts per second (approx) */ ++#define PR_FASTHZ 5 /* 5 fast timeouts per second (not important) */ ++ ++#define TCP_SNDSPACE 1024 * 128 ++#define TCP_RCVSPACE 1024 * 128 ++#define TCP_MAXSEG_MAX 32768 ++ ++/* ++ * TCP header. ++ * Per RFC 793, September, 1981. ++ */ ++#define tcphdr slirp_tcphdr ++struct tcphdr { ++ uint16_t th_sport; /* source port */ ++ uint16_t th_dport; /* destination port */ ++ tcp_seq th_seq; /* sequence number */ ++ tcp_seq th_ack; /* acknowledgement number */ ++#if G_BYTE_ORDER == G_BIG_ENDIAN ++ uint8_t th_off : 4, /* data offset */ ++ th_x2 : 4; /* (unused) */ ++#else ++ uint8_t th_x2 : 4, /* (unused) */ ++ th_off : 4; /* data offset */ ++#endif ++ uint8_t th_flags; ++ uint16_t th_win; /* window */ ++ uint16_t th_sum; /* checksum */ ++ uint16_t th_urp; /* urgent pointer */ ++}; ++ ++#include "tcp_var.h" ++ ++#ifndef TH_FIN ++#define TH_FIN 0x01 ++#define TH_SYN 0x02 ++#define TH_RST 0x04 ++#define TH_PUSH 0x08 ++#define TH_ACK 0x10 ++#define TH_URG 0x20 ++#endif ++ ++#ifndef TCPOPT_EOL ++#define TCPOPT_EOL 0 ++#define TCPOPT_NOP 1 ++#define TCPOPT_MAXSEG 2 ++#define TCPOPT_WINDOW 3 ++#define TCPOPT_SACK_PERMITTED 4 /* Experimental */ ++#define TCPOPT_SACK 5 /* Experimental */ ++#define TCPOPT_TIMESTAMP 8 ++ ++#define TCPOPT_TSTAMP_HDR \ ++ (TCPOPT_NOP << 24 | TCPOPT_NOP << 16 | TCPOPT_TIMESTAMP << 8 | \ ++ TCPOLEN_TIMESTAMP) ++#endif ++ ++#ifndef TCPOLEN_MAXSEG ++#define TCPOLEN_MAXSEG 4 ++#define TCPOLEN_WINDOW 3 ++#define TCPOLEN_SACK_PERMITTED 2 ++#define TCPOLEN_TIMESTAMP 10 ++#define TCPOLEN_TSTAMP_APPA (TCPOLEN_TIMESTAMP + 2) /* appendix A */ ++#endif ++ ++#undef TCP_MAXWIN ++#define TCP_MAXWIN 65535 /* largest value for (unscaled) window */ ++ ++#undef TCP_MAX_WINSHIFT ++#define TCP_MAX_WINSHIFT 14 /* maximum window shift */ ++ ++/* ++ * User-settable options (used with setsockopt). ++ * ++ * We don't use the system headers on unix because we have conflicting ++ * local structures. We can't avoid the system definitions on Windows, ++ * so we undefine them. ++ */ ++#undef TCP_NODELAY ++#define TCP_NODELAY 0x01 /* don't delay send to coalesce packets */ ++#undef TCP_MAXSEG ++ ++/* ++ * TCP FSM state definitions. ++ * Per RFC793, September, 1981. ++ */ ++ ++#define TCP_NSTATES 11 ++ ++#define TCPS_CLOSED 0 /* closed */ ++#define TCPS_LISTEN 1 /* listening for connection */ ++#define TCPS_SYN_SENT 2 /* active, have sent syn */ ++#define TCPS_SYN_RECEIVED 3 /* have send and received syn */ ++/* states < TCPS_ESTABLISHED are those where connections not established */ ++#define TCPS_ESTABLISHED 4 /* established */ ++#define TCPS_CLOSE_WAIT 5 /* rcvd fin, waiting for close */ ++/* states > TCPS_CLOSE_WAIT are those where user has closed */ ++#define TCPS_FIN_WAIT_1 6 /* have closed, sent fin */ ++#define TCPS_CLOSING 7 /* closed xchd FIN; await FIN ACK */ ++#define TCPS_LAST_ACK 8 /* had fin and close; await FIN ACK */ ++/* states > TCPS_CLOSE_WAIT && < TCPS_FIN_WAIT_2 await ACK of FIN */ ++#define TCPS_FIN_WAIT_2 9 /* have closed, fin is acked */ ++#define TCPS_TIME_WAIT 10 /* in 2*msl quiet wait after close */ ++ ++#define TCPS_HAVERCVDSYN(s) ((s) >= TCPS_SYN_RECEIVED) ++#define TCPS_HAVEESTABLISHED(s) ((s) >= TCPS_ESTABLISHED) ++#define TCPS_HAVERCVDFIN(s) ((s) >= TCPS_TIME_WAIT) ++ ++/* ++ * TCP sequence numbers are 32 bit integers operated ++ * on with modular arithmetic. These macros can be ++ * used to compare such integers. ++ */ ++#define SEQ_LT(a, b) ((int)((a) - (b)) < 0) ++#define SEQ_LEQ(a, b) ((int)((a) - (b)) <= 0) ++#define SEQ_GT(a, b) ((int)((a) - (b)) > 0) ++#define SEQ_GEQ(a, b) ((int)((a) - (b)) >= 0) ++ ++/* ++ * Macros to initialize tcp sequence numbers for ++ * send and receive from initial send and receive ++ * sequence numbers. ++ */ ++#define tcp_rcvseqinit(tp) (tp)->rcv_adv = (tp)->rcv_nxt = (tp)->irs + 1 ++ ++#define tcp_sendseqinit(tp) \ ++ (tp)->snd_una = (tp)->snd_nxt = (tp)->snd_max = (tp)->snd_up = (tp)->iss ++ ++#define TCP_ISSINCR (125 * 1024) /* increment for tcp_iss each second */ ++ ++#endif +diff --git a/slirp/src/tcp_input.c b/slirp/src/tcp_input.c +new file mode 100644 +index 0000000000..36a4844a7d +--- /dev/null ++++ b/slirp/src/tcp_input.c +@@ -0,0 +1,1552 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcp_input.c 8.5 (Berkeley) 4/10/94 ++ * tcp_input.c,v 1.10 1994/10/13 18:36:32 wollman Exp ++ */ ++ ++/* ++ * Changes and additions relating to SLiRP ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++#include "ip_icmp.h" ++ ++#define TCPREXMTTHRESH 3 ++ ++#define TCP_PAWS_IDLE (24 * 24 * 60 * 60 * PR_SLOWHZ) ++ ++/* for modulo comparisons of timestamps */ ++#define TSTMP_LT(a, b) ((int)((a) - (b)) < 0) ++#define TSTMP_GEQ(a, b) ((int)((a) - (b)) >= 0) ++ ++/* ++ * Insert segment ti into reassembly queue of tcp with ++ * control block tp. Return TH_FIN if reassembly now includes ++ * a segment with FIN. The macro form does the common case inline ++ * (segment is the next to be received on an established connection, ++ * and the queue is empty), avoiding linkage into and removal ++ * from the queue and repetition of various conversions. ++ * Set DELACK for segments received in order, but ack immediately ++ * when segments are out of order (so fast retransmit can work). ++ */ ++#define TCP_REASS(tp, ti, m, so, flags) \ ++ { \ ++ if ((ti)->ti_seq == (tp)->rcv_nxt && tcpfrag_list_empty(tp) && \ ++ (tp)->t_state == TCPS_ESTABLISHED) { \ ++ tp->t_flags |= TF_DELACK; \ ++ (tp)->rcv_nxt += (ti)->ti_len; \ ++ flags = (ti)->ti_flags & TH_FIN; \ ++ if (so->so_emu) { \ ++ if (tcp_emu((so), (m))) \ ++ sbappend(so, (m)); \ ++ } else \ ++ sbappend((so), (m)); \ ++ } else { \ ++ (flags) = tcp_reass((tp), (ti), (m)); \ ++ tp->t_flags |= TF_ACKNOW; \ ++ } \ ++ } ++ ++static void tcp_dooptions(struct tcpcb *tp, uint8_t *cp, int cnt, ++ struct tcpiphdr *ti); ++static void tcp_xmit_timer(register struct tcpcb *tp, int rtt); ++ ++static int tcp_reass(register struct tcpcb *tp, register struct tcpiphdr *ti, ++ struct mbuf *m) ++{ ++ if (m) ++ M_DUP_DEBUG(m->slirp, m, 0, 0); ++ ++ register struct tcpiphdr *q; ++ struct socket *so = tp->t_socket; ++ int flags; ++ ++ /* ++ * Call with ti==NULL after become established to ++ * force pre-ESTABLISHED data up to user socket. ++ */ ++ if (ti == NULL) ++ goto present; ++ ++ /* ++ * Find a segment which begins after this one does. ++ */ ++ for (q = tcpfrag_list_first(tp); !tcpfrag_list_end(q, tp); ++ q = tcpiphdr_next(q)) ++ if (SEQ_GT(q->ti_seq, ti->ti_seq)) ++ break; ++ ++ /* ++ * If there is a preceding segment, it may provide some of ++ * our data already. If so, drop the data from the incoming ++ * segment. If it provides all of our data, drop us. ++ */ ++ if (!tcpfrag_list_end(tcpiphdr_prev(q), tp)) { ++ register int i; ++ q = tcpiphdr_prev(q); ++ /* conversion to int (in i) handles seq wraparound */ ++ i = q->ti_seq + q->ti_len - ti->ti_seq; ++ if (i > 0) { ++ if (i >= ti->ti_len) { ++ m_free(m); ++ /* ++ * Try to present any queued data ++ * at the left window edge to the user. ++ * This is needed after the 3-WHS ++ * completes. ++ */ ++ goto present; /* ??? */ ++ } ++ m_adj(m, i); ++ ti->ti_len -= i; ++ ti->ti_seq += i; ++ } ++ q = tcpiphdr_next(q); ++ } ++ ti->ti_mbuf = m; ++ ++ /* ++ * While we overlap succeeding segments trim them or, ++ * if they are completely covered, dequeue them. ++ */ ++ while (!tcpfrag_list_end(q, tp)) { ++ register int i = (ti->ti_seq + ti->ti_len) - q->ti_seq; ++ if (i <= 0) ++ break; ++ if (i < q->ti_len) { ++ q->ti_seq += i; ++ q->ti_len -= i; ++ m_adj(q->ti_mbuf, i); ++ break; ++ } ++ q = tcpiphdr_next(q); ++ m = tcpiphdr_prev(q)->ti_mbuf; ++ remque(tcpiphdr2qlink(tcpiphdr_prev(q))); ++ m_free(m); ++ } ++ ++ /* ++ * Stick new segment in its place. ++ */ ++ insque(tcpiphdr2qlink(ti), tcpiphdr2qlink(tcpiphdr_prev(q))); ++ ++present: ++ /* ++ * Present data to user, advancing rcv_nxt through ++ * completed sequence space. ++ */ ++ if (!TCPS_HAVEESTABLISHED(tp->t_state)) ++ return (0); ++ ti = tcpfrag_list_first(tp); ++ if (tcpfrag_list_end(ti, tp) || ti->ti_seq != tp->rcv_nxt) ++ return (0); ++ if (tp->t_state == TCPS_SYN_RECEIVED && ti->ti_len) ++ return (0); ++ do { ++ tp->rcv_nxt += ti->ti_len; ++ flags = ti->ti_flags & TH_FIN; ++ remque(tcpiphdr2qlink(ti)); ++ m = ti->ti_mbuf; ++ ti = tcpiphdr_next(ti); ++ if (so->so_state & SS_FCANTSENDMORE) ++ m_free(m); ++ else { ++ if (so->so_emu) { ++ if (tcp_emu(so, m)) ++ sbappend(so, m); ++ } else ++ sbappend(so, m); ++ } ++ } while (ti != (struct tcpiphdr *)tp && ti->ti_seq == tp->rcv_nxt); ++ return (flags); ++} ++ ++/* ++ * TCP input routine, follows pages 65-76 of the ++ * protocol specification dated September, 1981 very closely. ++ */ ++void tcp_input(struct mbuf *m, int iphlen, struct socket *inso, ++ unsigned short af) ++{ ++ struct ip save_ip, *ip; ++ struct ip6 save_ip6, *ip6; ++ register struct tcpiphdr *ti; ++ char *optp = NULL; ++ int optlen = 0; ++ int len, tlen, off; ++ register struct tcpcb *tp = NULL; ++ register int tiflags; ++ struct socket *so = NULL; ++ int todrop, acked, ourfinisacked, needoutput = 0; ++ int iss = 0; ++ uint32_t tiwin; ++ int ret; ++ struct sockaddr_storage lhost, fhost; ++ struct sockaddr_in *lhost4, *fhost4; ++ struct sockaddr_in6 *lhost6, *fhost6; ++ struct gfwd_list *ex_ptr; ++ Slirp *slirp; ++ ++ DEBUG_CALL("tcp_input"); ++ DEBUG_ARG("m = %p iphlen = %2d inso = %p", m, iphlen, inso); ++ ++ /* ++ * If called with m == 0, then we're continuing the connect ++ */ ++ if (m == NULL) { ++ so = inso; ++ slirp = so->slirp; ++ ++ /* Re-set a few variables */ ++ tp = sototcpcb(so); ++ m = so->so_m; ++ so->so_m = NULL; ++ ti = so->so_ti; ++ tiwin = ti->ti_win; ++ tiflags = ti->ti_flags; ++ ++ goto cont_conn; ++ } ++ slirp = m->slirp; ++ switch (af) { ++ case AF_INET: ++ M_DUP_DEBUG(slirp, m, 0, ++ sizeof(struct tcpiphdr) - sizeof(struct ip) - sizeof(struct tcphdr)); ++ break; ++ case AF_INET6: ++ M_DUP_DEBUG(slirp, m, 0, ++ sizeof(struct tcpiphdr) - sizeof(struct ip6) - sizeof(struct tcphdr)); ++ break; ++ } ++ ++ ip = mtod(m, struct ip *); ++ ip6 = mtod(m, struct ip6 *); ++ ++ switch (af) { ++ case AF_INET: ++ if (iphlen > sizeof(struct ip)) { ++ ip_stripoptions(m, (struct mbuf *)0); ++ iphlen = sizeof(struct ip); ++ } ++ /* XXX Check if too short */ ++ ++ ++ /* ++ * Save a copy of the IP header in case we want restore it ++ * for sending an ICMP error message in response. ++ */ ++ save_ip = *ip; ++ save_ip.ip_len += iphlen; ++ ++ /* ++ * Get IP and TCP header together in first mbuf. ++ * Note: IP leaves IP header in first mbuf. ++ */ ++ m->m_data -= ++ sizeof(struct tcpiphdr) - sizeof(struct ip) - sizeof(struct tcphdr); ++ m->m_len += ++ sizeof(struct tcpiphdr) - sizeof(struct ip) - sizeof(struct tcphdr); ++ ti = mtod(m, struct tcpiphdr *); ++ ++ /* ++ * Checksum extended TCP header and data. ++ */ ++ tlen = ip->ip_len; ++ tcpiphdr2qlink(ti)->next = tcpiphdr2qlink(ti)->prev = NULL; ++ memset(&ti->ih_mbuf, 0, sizeof(struct mbuf_ptr)); ++ memset(&ti->ti, 0, sizeof(ti->ti)); ++ ti->ti_x0 = 0; ++ ti->ti_src = save_ip.ip_src; ++ ti->ti_dst = save_ip.ip_dst; ++ ti->ti_pr = save_ip.ip_p; ++ ti->ti_len = htons((uint16_t)tlen); ++ break; ++ ++ case AF_INET6: ++ /* ++ * Save a copy of the IP header in case we want restore it ++ * for sending an ICMP error message in response. ++ */ ++ save_ip6 = *ip6; ++ /* ++ * Get IP and TCP header together in first mbuf. ++ * Note: IP leaves IP header in first mbuf. ++ */ ++ m->m_data -= sizeof(struct tcpiphdr) - ++ (sizeof(struct ip6) + sizeof(struct tcphdr)); ++ m->m_len += sizeof(struct tcpiphdr) - ++ (sizeof(struct ip6) + sizeof(struct tcphdr)); ++ ti = mtod(m, struct tcpiphdr *); ++ ++ tlen = ip6->ip_pl; ++ tcpiphdr2qlink(ti)->next = tcpiphdr2qlink(ti)->prev = NULL; ++ memset(&ti->ih_mbuf, 0, sizeof(struct mbuf_ptr)); ++ memset(&ti->ti, 0, sizeof(ti->ti)); ++ ti->ti_x0 = 0; ++ ti->ti_src6 = save_ip6.ip_src; ++ ti->ti_dst6 = save_ip6.ip_dst; ++ ti->ti_nh6 = save_ip6.ip_nh; ++ ti->ti_len = htons((uint16_t)tlen); ++ break; ++ ++ default: ++ g_assert_not_reached(); ++ } ++ ++ len = ((sizeof(struct tcpiphdr) - sizeof(struct tcphdr)) + tlen); ++ if (cksum(m, len)) { ++ goto drop; ++ } ++ ++ /* ++ * Check that TCP offset makes sense, ++ * pull out TCP options and adjust length. XXX ++ */ ++ off = ti->ti_off << 2; ++ if (off < sizeof(struct tcphdr) || off > tlen) { ++ goto drop; ++ } ++ tlen -= off; ++ ti->ti_len = tlen; ++ if (off > sizeof(struct tcphdr)) { ++ optlen = off - sizeof(struct tcphdr); ++ optp = mtod(m, char *) + sizeof(struct tcpiphdr); ++ } ++ tiflags = ti->ti_flags; ++ ++ /* ++ * Convert TCP protocol specific fields to host format. ++ */ ++ NTOHL(ti->ti_seq); ++ NTOHL(ti->ti_ack); ++ NTOHS(ti->ti_win); ++ NTOHS(ti->ti_urp); ++ ++ /* ++ * Drop TCP, IP headers and TCP options. ++ */ ++ m->m_data += sizeof(struct tcpiphdr) + off - sizeof(struct tcphdr); ++ m->m_len -= sizeof(struct tcpiphdr) + off - sizeof(struct tcphdr); ++ ++ /* ++ * Locate pcb for segment. ++ */ ++findso: ++ lhost.ss_family = af; ++ fhost.ss_family = af; ++ switch (af) { ++ case AF_INET: ++ lhost4 = (struct sockaddr_in *)&lhost; ++ lhost4->sin_addr = ti->ti_src; ++ lhost4->sin_port = ti->ti_sport; ++ fhost4 = (struct sockaddr_in *)&fhost; ++ fhost4->sin_addr = ti->ti_dst; ++ fhost4->sin_port = ti->ti_dport; ++ break; ++ case AF_INET6: ++ lhost6 = (struct sockaddr_in6 *)&lhost; ++ lhost6->sin6_addr = ti->ti_src6; ++ lhost6->sin6_port = ti->ti_sport; ++ fhost6 = (struct sockaddr_in6 *)&fhost; ++ fhost6->sin6_addr = ti->ti_dst6; ++ fhost6->sin6_port = ti->ti_dport; ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ ++ so = solookup(&slirp->tcp_last_so, &slirp->tcb, &lhost, &fhost); ++ ++ /* ++ * If the state is CLOSED (i.e., TCB does not exist) then ++ * all data in the incoming segment is discarded. ++ * If the TCB exists but is in CLOSED state, it is embryonic, ++ * but should either do a listen or a connect soon. ++ * ++ * state == CLOSED means we've done socreate() but haven't ++ * attached it to a protocol yet... ++ * ++ * XXX If a TCB does not exist, and the TH_SYN flag is ++ * the only flag set, then create a session, mark it ++ * as if it was LISTENING, and continue... ++ */ ++ if (so == NULL) { ++ /* TODO: IPv6 */ ++ if (slirp->restricted) { ++ /* Any hostfwds will have an existing socket, so we only get here ++ * for non-hostfwd connections. These should be dropped, unless it ++ * happens to be a guestfwd. ++ */ ++ for (ex_ptr = slirp->guestfwd_list; ex_ptr; ++ ex_ptr = ex_ptr->ex_next) { ++ if (ex_ptr->ex_fport == ti->ti_dport && ++ ti->ti_dst.s_addr == ex_ptr->ex_addr.s_addr) { ++ break; ++ } ++ } ++ if (!ex_ptr) { ++ goto dropwithreset; ++ } ++ } ++ ++ if ((tiflags & (TH_SYN | TH_FIN | TH_RST | TH_URG | TH_ACK)) != TH_SYN) ++ goto dropwithreset; ++ ++ so = socreate(slirp); ++ tcp_attach(so); ++ ++ sbreserve(&so->so_snd, TCP_SNDSPACE); ++ sbreserve(&so->so_rcv, TCP_RCVSPACE); ++ ++ so->lhost.ss = lhost; ++ so->fhost.ss = fhost; ++ ++ so->so_iptos = tcp_tos(so); ++ if (so->so_iptos == 0) { ++ switch (af) { ++ case AF_INET: ++ so->so_iptos = ((struct ip *)ti)->ip_tos; ++ break; ++ case AF_INET6: ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ } ++ ++ tp = sototcpcb(so); ++ tp->t_state = TCPS_LISTEN; ++ } ++ ++ /* ++ * If this is a still-connecting socket, this probably ++ * a retransmit of the SYN. Whether it's a retransmit SYN ++ * or something else, we nuke it. ++ */ ++ if (so->so_state & SS_ISFCONNECTING) ++ goto drop; ++ ++ tp = sototcpcb(so); ++ ++ /* XXX Should never fail */ ++ if (tp == NULL) ++ goto dropwithreset; ++ if (tp->t_state == TCPS_CLOSED) ++ goto drop; ++ ++ tiwin = ti->ti_win; ++ ++ /* ++ * Segment received on connection. ++ * Reset idle time and keep-alive timer. ++ */ ++ tp->t_idle = 0; ++ if (slirp_do_keepalive) ++ tp->t_timer[TCPT_KEEP] = TCPTV_KEEPINTVL; ++ else ++ tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_IDLE; ++ ++ /* ++ * Process options if not in LISTEN state, ++ * else do it below (after getting remote address). ++ */ ++ if (optp && tp->t_state != TCPS_LISTEN) ++ tcp_dooptions(tp, (uint8_t *)optp, optlen, ti); ++ ++ /* ++ * Header prediction: check for the two common cases ++ * of a uni-directional data xfer. If the packet has ++ * no control flags, is in-sequence, the window didn't ++ * change and we're not retransmitting, it's a ++ * candidate. If the length is zero and the ack moved ++ * forward, we're the sender side of the xfer. Just ++ * free the data acked & wake any higher level process ++ * that was blocked waiting for space. If the length ++ * is non-zero and the ack didn't move, we're the ++ * receiver side. If we're getting packets in-order ++ * (the reassembly queue is empty), add the data to ++ * the socket buffer and note that we need a delayed ack. ++ * ++ * XXX Some of these tests are not needed ++ * eg: the tiwin == tp->snd_wnd prevents many more ++ * predictions.. with no *real* advantage.. ++ */ ++ if (tp->t_state == TCPS_ESTABLISHED && ++ (tiflags & (TH_SYN | TH_FIN | TH_RST | TH_URG | TH_ACK)) == TH_ACK && ++ ti->ti_seq == tp->rcv_nxt && tiwin && tiwin == tp->snd_wnd && ++ tp->snd_nxt == tp->snd_max) { ++ if (ti->ti_len == 0) { ++ if (SEQ_GT(ti->ti_ack, tp->snd_una) && ++ SEQ_LEQ(ti->ti_ack, tp->snd_max) && ++ tp->snd_cwnd >= tp->snd_wnd) { ++ /* ++ * this is a pure ack for outstanding data. ++ */ ++ if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq)) ++ tcp_xmit_timer(tp, tp->t_rtt); ++ acked = ti->ti_ack - tp->snd_una; ++ sodrop(so, acked); ++ tp->snd_una = ti->ti_ack; ++ m_free(m); ++ ++ /* ++ * If all outstanding data are acked, stop ++ * retransmit timer, otherwise restart timer ++ * using current (possibly backed-off) value. ++ * If process is waiting for space, ++ * wakeup/selwakeup/signal. If data ++ * are ready to send, let tcp_output ++ * decide between more output or persist. ++ */ ++ if (tp->snd_una == tp->snd_max) ++ tp->t_timer[TCPT_REXMT] = 0; ++ else if (tp->t_timer[TCPT_PERSIST] == 0) ++ tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; ++ ++ /* ++ * This is called because sowwakeup might have ++ * put data into so_snd. Since we don't so sowwakeup, ++ * we don't need this.. XXX??? ++ */ ++ if (so->so_snd.sb_cc) ++ tcp_output(tp); ++ ++ return; ++ } ++ } else if (ti->ti_ack == tp->snd_una && tcpfrag_list_empty(tp) && ++ ti->ti_len <= sbspace(&so->so_rcv)) { ++ /* ++ * this is a pure, in-sequence data packet ++ * with nothing on the reassembly queue and ++ * we have enough buffer space to take it. ++ */ ++ tp->rcv_nxt += ti->ti_len; ++ /* ++ * Add data to socket buffer. ++ */ ++ if (so->so_emu) { ++ if (tcp_emu(so, m)) ++ sbappend(so, m); ++ } else ++ sbappend(so, m); ++ ++ /* ++ * If this is a short packet, then ACK now - with Nagel ++ * congestion avoidance sender won't send more until ++ * he gets an ACK. ++ * ++ * It is better to not delay acks at all to maximize ++ * TCP throughput. See RFC 2581. ++ */ ++ tp->t_flags |= TF_ACKNOW; ++ tcp_output(tp); ++ return; ++ } ++ } /* header prediction */ ++ /* ++ * Calculate amount of space in receive window, ++ * and then do TCP input processing. ++ * Receive window is amount of space in rcv queue, ++ * but not less than advertised window. ++ */ ++ { ++ int win; ++ win = sbspace(&so->so_rcv); ++ if (win < 0) ++ win = 0; ++ tp->rcv_wnd = MAX(win, (int)(tp->rcv_adv - tp->rcv_nxt)); ++ } ++ ++ switch (tp->t_state) { ++ /* ++ * If the state is LISTEN then ignore segment if it contains an RST. ++ * If the segment contains an ACK then it is bad and send a RST. ++ * If it does not contain a SYN then it is not interesting; drop it. ++ * Don't bother responding if the destination was a broadcast. ++ * Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial ++ * tp->iss, and send a segment: ++ * ++ * Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss. ++ * Fill in remote peer address fields if not previously specified. ++ * Enter SYN_RECEIVED state, and process any other fields of this ++ * segment in this state. ++ */ ++ case TCPS_LISTEN: { ++ if (tiflags & TH_RST) ++ goto drop; ++ if (tiflags & TH_ACK) ++ goto dropwithreset; ++ if ((tiflags & TH_SYN) == 0) ++ goto drop; ++ ++ /* ++ * This has way too many gotos... ++ * But a bit of spaghetti code never hurt anybody :) ++ */ ++ ++ /* ++ * If this is destined for the control address, then flag to ++ * tcp_ctl once connected, otherwise connect ++ */ ++ /* TODO: IPv6 */ ++ if (af == AF_INET && ++ (so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) == ++ slirp->vnetwork_addr.s_addr) { ++ if (so->so_faddr.s_addr != slirp->vhost_addr.s_addr && ++ so->so_faddr.s_addr != slirp->vnameserver_addr.s_addr) { ++ /* May be an add exec */ ++ for (ex_ptr = slirp->guestfwd_list; ex_ptr; ++ ex_ptr = ex_ptr->ex_next) { ++ if (ex_ptr->ex_fport == so->so_fport && ++ so->so_faddr.s_addr == ex_ptr->ex_addr.s_addr) { ++ so->so_state |= SS_CTL; ++ break; ++ } ++ } ++ if (so->so_state & SS_CTL) { ++ goto cont_input; ++ } ++ } ++ /* CTL_ALIAS: Do nothing, tcp_fconnect will be called on it */ ++ } ++ ++ if (so->so_emu & EMU_NOCONNECT) { ++ so->so_emu &= ~EMU_NOCONNECT; ++ goto cont_input; ++ } ++ ++ if ((tcp_fconnect(so, so->so_ffamily) == -1) && (errno != EAGAIN) && ++ (errno != EINPROGRESS) && (errno != EWOULDBLOCK)) { ++ uint8_t code; ++ DEBUG_MISC(" tcp fconnect errno = %d-%s", errno, strerror(errno)); ++ if (errno == ECONNREFUSED) { ++ /* ACK the SYN, send RST to refuse the connection */ ++ tcp_respond(tp, ti, m, ti->ti_seq + 1, (tcp_seq)0, ++ TH_RST | TH_ACK, af); ++ } else { ++ switch (af) { ++ case AF_INET: ++ code = ICMP_UNREACH_NET; ++ if (errno == EHOSTUNREACH) { ++ code = ICMP_UNREACH_HOST; ++ } ++ break; ++ case AF_INET6: ++ code = ICMP6_UNREACH_NO_ROUTE; ++ if (errno == EHOSTUNREACH) { ++ code = ICMP6_UNREACH_ADDRESS; ++ } ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ HTONL(ti->ti_seq); /* restore tcp header */ ++ HTONL(ti->ti_ack); ++ HTONS(ti->ti_win); ++ HTONS(ti->ti_urp); ++ m->m_data -= ++ sizeof(struct tcpiphdr) + off - sizeof(struct tcphdr); ++ m->m_len += ++ sizeof(struct tcpiphdr) + off - sizeof(struct tcphdr); ++ switch (af) { ++ case AF_INET: ++ m->m_data += sizeof(struct tcpiphdr) - sizeof(struct ip) - ++ sizeof(struct tcphdr); ++ m->m_len -= sizeof(struct tcpiphdr) - sizeof(struct ip) - ++ sizeof(struct tcphdr); ++ *ip = save_ip; ++ icmp_send_error(m, ICMP_UNREACH, code, 0, strerror(errno)); ++ break; ++ case AF_INET6: ++ m->m_data += sizeof(struct tcpiphdr) - ++ (sizeof(struct ip6) + sizeof(struct tcphdr)); ++ m->m_len -= sizeof(struct tcpiphdr) - ++ (sizeof(struct ip6) + sizeof(struct tcphdr)); ++ *ip6 = save_ip6; ++ icmp6_send_error(m, ICMP6_UNREACH, code); ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ } ++ tcp_close(tp); ++ m_free(m); ++ } else { ++ /* ++ * Haven't connected yet, save the current mbuf ++ * and ti, and return ++ * XXX Some OS's don't tell us whether the connect() ++ * succeeded or not. So we must time it out. ++ */ ++ so->so_m = m; ++ so->so_ti = ti; ++ tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; ++ tp->t_state = TCPS_SYN_RECEIVED; ++ /* ++ * Initialize receive sequence numbers now so that we can send a ++ * valid RST if the remote end rejects our connection. ++ */ ++ tp->irs = ti->ti_seq; ++ tcp_rcvseqinit(tp); ++ tcp_template(tp); ++ } ++ return; ++ ++ cont_conn: ++ /* m==NULL ++ * Check if the connect succeeded ++ */ ++ if (so->so_state & SS_NOFDREF) { ++ tp = tcp_close(tp); ++ goto dropwithreset; ++ } ++ cont_input: ++ tcp_template(tp); ++ ++ if (optp) ++ tcp_dooptions(tp, (uint8_t *)optp, optlen, ti); ++ ++ if (iss) ++ tp->iss = iss; ++ else ++ tp->iss = slirp->tcp_iss; ++ slirp->tcp_iss += TCP_ISSINCR / 2; ++ tp->irs = ti->ti_seq; ++ tcp_sendseqinit(tp); ++ tcp_rcvseqinit(tp); ++ tp->t_flags |= TF_ACKNOW; ++ tp->t_state = TCPS_SYN_RECEIVED; ++ tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; ++ goto trimthenstep6; ++ } /* case TCPS_LISTEN */ ++ ++ /* ++ * If the state is SYN_SENT: ++ * if seg contains an ACK, but not for our SYN, drop the input. ++ * if seg contains a RST, then drop the connection. ++ * if seg does not contain SYN, then drop it. ++ * Otherwise this is an acceptable SYN segment ++ * initialize tp->rcv_nxt and tp->irs ++ * if seg contains ack then advance tp->snd_una ++ * if SYN has been acked change to ESTABLISHED else SYN_RCVD state ++ * arrange for segment to be acked (eventually) ++ * continue processing rest of data/controls, beginning with URG ++ */ ++ case TCPS_SYN_SENT: ++ if ((tiflags & TH_ACK) && ++ (SEQ_LEQ(ti->ti_ack, tp->iss) || SEQ_GT(ti->ti_ack, tp->snd_max))) ++ goto dropwithreset; ++ ++ if (tiflags & TH_RST) { ++ if (tiflags & TH_ACK) { ++ tcp_drop(tp, 0); /* XXX Check t_softerror! */ ++ } ++ goto drop; ++ } ++ ++ if ((tiflags & TH_SYN) == 0) ++ goto drop; ++ if (tiflags & TH_ACK) { ++ tp->snd_una = ti->ti_ack; ++ if (SEQ_LT(tp->snd_nxt, tp->snd_una)) ++ tp->snd_nxt = tp->snd_una; ++ } ++ ++ tp->t_timer[TCPT_REXMT] = 0; ++ tp->irs = ti->ti_seq; ++ tcp_rcvseqinit(tp); ++ tp->t_flags |= TF_ACKNOW; ++ if (tiflags & TH_ACK && SEQ_GT(tp->snd_una, tp->iss)) { ++ soisfconnected(so); ++ tp->t_state = TCPS_ESTABLISHED; ++ ++ tcp_reass(tp, (struct tcpiphdr *)0, (struct mbuf *)0); ++ /* ++ * if we didn't have to retransmit the SYN, ++ * use its rtt as our initial srtt & rtt var. ++ */ ++ if (tp->t_rtt) ++ tcp_xmit_timer(tp, tp->t_rtt); ++ } else ++ tp->t_state = TCPS_SYN_RECEIVED; ++ ++ trimthenstep6: ++ /* ++ * Advance ti->ti_seq to correspond to first data byte. ++ * If data, trim to stay within window, ++ * dropping FIN if necessary. ++ */ ++ ti->ti_seq++; ++ if (ti->ti_len > tp->rcv_wnd) { ++ todrop = ti->ti_len - tp->rcv_wnd; ++ m_adj(m, -todrop); ++ ti->ti_len = tp->rcv_wnd; ++ tiflags &= ~TH_FIN; ++ } ++ tp->snd_wl1 = ti->ti_seq - 1; ++ tp->rcv_up = ti->ti_seq; ++ goto step6; ++ } /* switch tp->t_state */ ++ /* ++ * States other than LISTEN or SYN_SENT. ++ * Check that at least some bytes of segment are within ++ * receive window. If segment begins before rcv_nxt, ++ * drop leading data (and SYN); if nothing left, just ack. ++ */ ++ todrop = tp->rcv_nxt - ti->ti_seq; ++ if (todrop > 0) { ++ if (tiflags & TH_SYN) { ++ tiflags &= ~TH_SYN; ++ ti->ti_seq++; ++ if (ti->ti_urp > 1) ++ ti->ti_urp--; ++ else ++ tiflags &= ~TH_URG; ++ todrop--; ++ } ++ /* ++ * Following if statement from Stevens, vol. 2, p. 960. ++ */ ++ if (todrop > ti->ti_len || ++ (todrop == ti->ti_len && (tiflags & TH_FIN) == 0)) { ++ /* ++ * Any valid FIN must be to the left of the window. ++ * At this point the FIN must be a duplicate or out ++ * of sequence; drop it. ++ */ ++ tiflags &= ~TH_FIN; ++ ++ /* ++ * Send an ACK to resynchronize and drop any data. ++ * But keep on processing for RST or ACK. ++ */ ++ tp->t_flags |= TF_ACKNOW; ++ todrop = ti->ti_len; ++ } ++ m_adj(m, todrop); ++ ti->ti_seq += todrop; ++ ti->ti_len -= todrop; ++ if (ti->ti_urp > todrop) ++ ti->ti_urp -= todrop; ++ else { ++ tiflags &= ~TH_URG; ++ ti->ti_urp = 0; ++ } ++ } ++ /* ++ * If new data are received on a connection after the ++ * user processes are gone, then RST the other end. ++ */ ++ if ((so->so_state & SS_NOFDREF) && tp->t_state > TCPS_CLOSE_WAIT && ++ ti->ti_len) { ++ tp = tcp_close(tp); ++ goto dropwithreset; ++ } ++ ++ /* ++ * If segment ends after window, drop trailing data ++ * (and PUSH and FIN); if nothing left, just ACK. ++ */ ++ todrop = (ti->ti_seq + ti->ti_len) - (tp->rcv_nxt + tp->rcv_wnd); ++ if (todrop > 0) { ++ if (todrop >= ti->ti_len) { ++ /* ++ * If a new connection request is received ++ * while in TIME_WAIT, drop the old connection ++ * and start over if the sequence numbers ++ * are above the previous ones. ++ */ ++ if (tiflags & TH_SYN && tp->t_state == TCPS_TIME_WAIT && ++ SEQ_GT(ti->ti_seq, tp->rcv_nxt)) { ++ iss = tp->rcv_nxt + TCP_ISSINCR; ++ tp = tcp_close(tp); ++ goto findso; ++ } ++ /* ++ * If window is closed can only take segments at ++ * window edge, and have to drop data and PUSH from ++ * incoming segments. Continue processing, but ++ * remember to ack. Otherwise, drop segment ++ * and ack. ++ */ ++ if (tp->rcv_wnd == 0 && ti->ti_seq == tp->rcv_nxt) { ++ tp->t_flags |= TF_ACKNOW; ++ } else { ++ goto dropafterack; ++ } ++ } ++ m_adj(m, -todrop); ++ ti->ti_len -= todrop; ++ tiflags &= ~(TH_PUSH | TH_FIN); ++ } ++ ++ /* ++ * If the RST bit is set examine the state: ++ * SYN_RECEIVED STATE: ++ * If passive open, return to LISTEN state. ++ * If active open, inform user that connection was refused. ++ * ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES: ++ * Inform user that connection was reset, and close tcb. ++ * CLOSING, LAST_ACK, TIME_WAIT STATES ++ * Close the tcb. ++ */ ++ if (tiflags & TH_RST) ++ switch (tp->t_state) { ++ case TCPS_SYN_RECEIVED: ++ case TCPS_ESTABLISHED: ++ case TCPS_FIN_WAIT_1: ++ case TCPS_FIN_WAIT_2: ++ case TCPS_CLOSE_WAIT: ++ tp->t_state = TCPS_CLOSED; ++ tcp_close(tp); ++ goto drop; ++ ++ case TCPS_CLOSING: ++ case TCPS_LAST_ACK: ++ case TCPS_TIME_WAIT: ++ tcp_close(tp); ++ goto drop; ++ } ++ ++ /* ++ * If a SYN is in the window, then this is an ++ * error and we send an RST and drop the connection. ++ */ ++ if (tiflags & TH_SYN) { ++ tp = tcp_drop(tp, 0); ++ goto dropwithreset; ++ } ++ ++ /* ++ * If the ACK bit is off we drop the segment and return. ++ */ ++ if ((tiflags & TH_ACK) == 0) ++ goto drop; ++ ++ /* ++ * Ack processing. ++ */ ++ switch (tp->t_state) { ++ /* ++ * In SYN_RECEIVED state if the ack ACKs our SYN then enter ++ * ESTABLISHED state and continue processing, otherwise ++ * send an RST. una<=ack<=max ++ */ ++ case TCPS_SYN_RECEIVED: ++ ++ if (SEQ_GT(tp->snd_una, ti->ti_ack) || SEQ_GT(ti->ti_ack, tp->snd_max)) ++ goto dropwithreset; ++ tp->t_state = TCPS_ESTABLISHED; ++ /* ++ * The sent SYN is ack'ed with our sequence number +1 ++ * The first data byte already in the buffer will get ++ * lost if no correction is made. This is only needed for ++ * SS_CTL since the buffer is empty otherwise. ++ * tp->snd_una++; or: ++ */ ++ tp->snd_una = ti->ti_ack; ++ if (so->so_state & SS_CTL) { ++ /* So tcp_ctl reports the right state */ ++ ret = tcp_ctl(so); ++ if (ret == 1) { ++ soisfconnected(so); ++ so->so_state &= ~SS_CTL; /* success XXX */ ++ } else if (ret == 2) { ++ so->so_state &= SS_PERSISTENT_MASK; ++ so->so_state |= SS_NOFDREF; /* CTL_CMD */ ++ } else { ++ needoutput = 1; ++ tp->t_state = TCPS_FIN_WAIT_1; ++ } ++ } else { ++ soisfconnected(so); ++ } ++ ++ tcp_reass(tp, (struct tcpiphdr *)0, (struct mbuf *)0); ++ tp->snd_wl1 = ti->ti_seq - 1; ++ /* Avoid ack processing; snd_una==ti_ack => dup ack */ ++ goto synrx_to_est; ++ /* fall into ... */ ++ ++ /* ++ * In ESTABLISHED state: drop duplicate ACKs; ACK out of range ++ * ACKs. If the ack is in the range ++ * tp->snd_una < ti->ti_ack <= tp->snd_max ++ * then advance tp->snd_una to ti->ti_ack and drop ++ * data from the retransmission queue. If this ACK reflects ++ * more up to date window information we update our window information. ++ */ ++ case TCPS_ESTABLISHED: ++ case TCPS_FIN_WAIT_1: ++ case TCPS_FIN_WAIT_2: ++ case TCPS_CLOSE_WAIT: ++ case TCPS_CLOSING: ++ case TCPS_LAST_ACK: ++ case TCPS_TIME_WAIT: ++ ++ if (SEQ_LEQ(ti->ti_ack, tp->snd_una)) { ++ if (ti->ti_len == 0 && tiwin == tp->snd_wnd) { ++ DEBUG_MISC(" dup ack m = %p so = %p", m, so); ++ /* ++ * If we have outstanding data (other than ++ * a window probe), this is a completely ++ * duplicate ack (ie, window info didn't ++ * change), the ack is the biggest we've ++ * seen and we've seen exactly our rexmt ++ * threshold of them, assume a packet ++ * has been dropped and retransmit it. ++ * Kludge snd_nxt & the congestion ++ * window so we send only this one ++ * packet. ++ * ++ * We know we're losing at the current ++ * window size so do congestion avoidance ++ * (set ssthresh to half the current window ++ * and pull our congestion window back to ++ * the new ssthresh). ++ * ++ * Dup acks mean that packets have left the ++ * network (they're now cached at the receiver) ++ * so bump cwnd by the amount in the receiver ++ * to keep a constant cwnd packets in the ++ * network. ++ */ ++ if (tp->t_timer[TCPT_REXMT] == 0 || ti->ti_ack != tp->snd_una) ++ tp->t_dupacks = 0; ++ else if (++tp->t_dupacks == TCPREXMTTHRESH) { ++ tcp_seq onxt = tp->snd_nxt; ++ unsigned win = ++ MIN(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg; ++ ++ if (win < 2) ++ win = 2; ++ tp->snd_ssthresh = win * tp->t_maxseg; ++ tp->t_timer[TCPT_REXMT] = 0; ++ tp->t_rtt = 0; ++ tp->snd_nxt = ti->ti_ack; ++ tp->snd_cwnd = tp->t_maxseg; ++ tcp_output(tp); ++ tp->snd_cwnd = ++ tp->snd_ssthresh + tp->t_maxseg * tp->t_dupacks; ++ if (SEQ_GT(onxt, tp->snd_nxt)) ++ tp->snd_nxt = onxt; ++ goto drop; ++ } else if (tp->t_dupacks > TCPREXMTTHRESH) { ++ tp->snd_cwnd += tp->t_maxseg; ++ tcp_output(tp); ++ goto drop; ++ } ++ } else ++ tp->t_dupacks = 0; ++ break; ++ } ++ synrx_to_est: ++ /* ++ * If the congestion window was inflated to account ++ * for the other side's cached packets, retract it. ++ */ ++ if (tp->t_dupacks > TCPREXMTTHRESH && tp->snd_cwnd > tp->snd_ssthresh) ++ tp->snd_cwnd = tp->snd_ssthresh; ++ tp->t_dupacks = 0; ++ if (SEQ_GT(ti->ti_ack, tp->snd_max)) { ++ goto dropafterack; ++ } ++ acked = ti->ti_ack - tp->snd_una; ++ ++ /* ++ * If transmit timer is running and timed sequence ++ * number was acked, update smoothed round trip time. ++ * Since we now have an rtt measurement, cancel the ++ * timer backoff (cf., Phil Karn's retransmit alg.). ++ * Recompute the initial retransmit timer. ++ */ ++ if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq)) ++ tcp_xmit_timer(tp, tp->t_rtt); ++ ++ /* ++ * If all outstanding data is acked, stop retransmit ++ * timer and remember to restart (more output or persist). ++ * If there is more data to be acked, restart retransmit ++ * timer, using current (possibly backed-off) value. ++ */ ++ if (ti->ti_ack == tp->snd_max) { ++ tp->t_timer[TCPT_REXMT] = 0; ++ needoutput = 1; ++ } else if (tp->t_timer[TCPT_PERSIST] == 0) ++ tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; ++ /* ++ * When new data is acked, open the congestion window. ++ * If the window gives us less than ssthresh packets ++ * in flight, open exponentially (maxseg per packet). ++ * Otherwise open linearly: maxseg per window ++ * (maxseg^2 / cwnd per packet). ++ */ ++ { ++ register unsigned cw = tp->snd_cwnd; ++ register unsigned incr = tp->t_maxseg; ++ ++ if (cw > tp->snd_ssthresh) ++ incr = incr * incr / cw; ++ tp->snd_cwnd = MIN(cw + incr, TCP_MAXWIN << tp->snd_scale); ++ } ++ if (acked > so->so_snd.sb_cc) { ++ tp->snd_wnd -= so->so_snd.sb_cc; ++ sodrop(so, (int)so->so_snd.sb_cc); ++ ourfinisacked = 1; ++ } else { ++ sodrop(so, acked); ++ tp->snd_wnd -= acked; ++ ourfinisacked = 0; ++ } ++ tp->snd_una = ti->ti_ack; ++ if (SEQ_LT(tp->snd_nxt, tp->snd_una)) ++ tp->snd_nxt = tp->snd_una; ++ ++ switch (tp->t_state) { ++ /* ++ * In FIN_WAIT_1 STATE in addition to the processing ++ * for the ESTABLISHED state if our FIN is now acknowledged ++ * then enter FIN_WAIT_2. ++ */ ++ case TCPS_FIN_WAIT_1: ++ if (ourfinisacked) { ++ /* ++ * If we can't receive any more ++ * data, then closing user can proceed. ++ * Starting the timer is contrary to the ++ * specification, but if we don't get a FIN ++ * we'll hang forever. ++ */ ++ if (so->so_state & SS_FCANTRCVMORE) { ++ tp->t_timer[TCPT_2MSL] = TCP_MAXIDLE; ++ } ++ tp->t_state = TCPS_FIN_WAIT_2; ++ } ++ break; ++ ++ /* ++ * In CLOSING STATE in addition to the processing for ++ * the ESTABLISHED state if the ACK acknowledges our FIN ++ * then enter the TIME-WAIT state, otherwise ignore ++ * the segment. ++ */ ++ case TCPS_CLOSING: ++ if (ourfinisacked) { ++ tp->t_state = TCPS_TIME_WAIT; ++ tcp_canceltimers(tp); ++ tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; ++ } ++ break; ++ ++ /* ++ * In LAST_ACK, we may still be waiting for data to drain ++ * and/or to be acked, as well as for the ack of our FIN. ++ * If our FIN is now acknowledged, delete the TCB, ++ * enter the closed state and return. ++ */ ++ case TCPS_LAST_ACK: ++ if (ourfinisacked) { ++ tcp_close(tp); ++ goto drop; ++ } ++ break; ++ ++ /* ++ * In TIME_WAIT state the only thing that should arrive ++ * is a retransmission of the remote FIN. Acknowledge ++ * it and restart the finack timer. ++ */ ++ case TCPS_TIME_WAIT: ++ tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; ++ goto dropafterack; ++ } ++ } /* switch(tp->t_state) */ ++ ++step6: ++ /* ++ * Update window information. ++ * Don't look at window if no ACK: TAC's send garbage on first SYN. ++ */ ++ if ((tiflags & TH_ACK) && ++ (SEQ_LT(tp->snd_wl1, ti->ti_seq) || ++ (tp->snd_wl1 == ti->ti_seq && ++ (SEQ_LT(tp->snd_wl2, ti->ti_ack) || ++ (tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd))))) { ++ tp->snd_wnd = tiwin; ++ tp->snd_wl1 = ti->ti_seq; ++ tp->snd_wl2 = ti->ti_ack; ++ if (tp->snd_wnd > tp->max_sndwnd) ++ tp->max_sndwnd = tp->snd_wnd; ++ needoutput = 1; ++ } ++ ++ /* ++ * Process segments with URG. ++ */ ++ if ((tiflags & TH_URG) && ti->ti_urp && ++ TCPS_HAVERCVDFIN(tp->t_state) == 0) { ++ /* ++ * This is a kludge, but if we receive and accept ++ * random urgent pointers, we'll crash in ++ * soreceive. It's hard to imagine someone ++ * actually wanting to send this much urgent data. ++ */ ++ if (ti->ti_urp + so->so_rcv.sb_cc > so->so_rcv.sb_datalen) { ++ ti->ti_urp = 0; ++ tiflags &= ~TH_URG; ++ goto dodata; ++ } ++ /* ++ * If this segment advances the known urgent pointer, ++ * then mark the data stream. This should not happen ++ * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since ++ * a FIN has been received from the remote side. ++ * In these states we ignore the URG. ++ * ++ * According to RFC961 (Assigned Protocols), ++ * the urgent pointer points to the last octet ++ * of urgent data. We continue, however, ++ * to consider it to indicate the first octet ++ * of data past the urgent section as the original ++ * spec states (in one of two places). ++ */ ++ if (SEQ_GT(ti->ti_seq + ti->ti_urp, tp->rcv_up)) { ++ tp->rcv_up = ti->ti_seq + ti->ti_urp; ++ so->so_urgc = ++ so->so_rcv.sb_cc + (tp->rcv_up - tp->rcv_nxt); /* -1; */ ++ tp->rcv_up = ti->ti_seq + ti->ti_urp; ++ } ++ } else ++ /* ++ * If no out of band data is expected, ++ * pull receive urgent pointer along ++ * with the receive window. ++ */ ++ if (SEQ_GT(tp->rcv_nxt, tp->rcv_up)) ++ tp->rcv_up = tp->rcv_nxt; ++dodata: ++ ++ /* ++ * If this is a small packet, then ACK now - with Nagel ++ * congestion avoidance sender won't send more until ++ * he gets an ACK. ++ */ ++ if (ti->ti_len && (unsigned)ti->ti_len <= 5 && ++ ((struct tcpiphdr_2 *)ti)->first_char == (char)27) { ++ tp->t_flags |= TF_ACKNOW; ++ } ++ ++ /* ++ * Process the segment text, merging it into the TCP sequencing queue, ++ * and arranging for acknowledgment of receipt if necessary. ++ * This process logically involves adjusting tp->rcv_wnd as data ++ * is presented to the user (this happens in tcp_usrreq.c, ++ * case PRU_RCVD). If a FIN has already been received on this ++ * connection then we just ignore the text. ++ */ ++ if ((ti->ti_len || (tiflags & TH_FIN)) && ++ TCPS_HAVERCVDFIN(tp->t_state) == 0) { ++ TCP_REASS(tp, ti, m, so, tiflags); ++ } else { ++ m_free(m); ++ tiflags &= ~TH_FIN; ++ } ++ ++ /* ++ * If FIN is received ACK the FIN and let the user know ++ * that the connection is closing. ++ */ ++ if (tiflags & TH_FIN) { ++ if (TCPS_HAVERCVDFIN(tp->t_state) == 0) { ++ /* ++ * If we receive a FIN we can't send more data, ++ * set it SS_FDRAIN ++ * Shutdown the socket if there is no rx data in the ++ * buffer. ++ * soread() is called on completion of shutdown() and ++ * will got to TCPS_LAST_ACK, and use tcp_output() ++ * to send the FIN. ++ */ ++ sofwdrain(so); ++ ++ tp->t_flags |= TF_ACKNOW; ++ tp->rcv_nxt++; ++ } ++ switch (tp->t_state) { ++ /* ++ * In SYN_RECEIVED and ESTABLISHED STATES ++ * enter the CLOSE_WAIT state. ++ */ ++ case TCPS_SYN_RECEIVED: ++ case TCPS_ESTABLISHED: ++ if (so->so_emu == EMU_CTL) /* no shutdown on socket */ ++ tp->t_state = TCPS_LAST_ACK; ++ else ++ tp->t_state = TCPS_CLOSE_WAIT; ++ break; ++ ++ /* ++ * If still in FIN_WAIT_1 STATE FIN has not been acked so ++ * enter the CLOSING state. ++ */ ++ case TCPS_FIN_WAIT_1: ++ tp->t_state = TCPS_CLOSING; ++ break; ++ ++ /* ++ * In FIN_WAIT_2 state enter the TIME_WAIT state, ++ * starting the time-wait timer, turning off the other ++ * standard timers. ++ */ ++ case TCPS_FIN_WAIT_2: ++ tp->t_state = TCPS_TIME_WAIT; ++ tcp_canceltimers(tp); ++ tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; ++ break; ++ ++ /* ++ * In TIME_WAIT state restart the 2 MSL time_wait timer. ++ */ ++ case TCPS_TIME_WAIT: ++ tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL; ++ break; ++ } ++ } ++ ++ /* ++ * Return any desired output. ++ */ ++ if (needoutput || (tp->t_flags & TF_ACKNOW)) { ++ tcp_output(tp); ++ } ++ return; ++ ++dropafterack: ++ /* ++ * Generate an ACK dropping incoming segment if it occupies ++ * sequence space, where the ACK reflects our state. ++ */ ++ if (tiflags & TH_RST) ++ goto drop; ++ m_free(m); ++ tp->t_flags |= TF_ACKNOW; ++ tcp_output(tp); ++ return; ++ ++dropwithreset: ++ /* reuses m if m!=NULL, m_free() unnecessary */ ++ if (tiflags & TH_ACK) ++ tcp_respond(tp, ti, m, (tcp_seq)0, ti->ti_ack, TH_RST, af); ++ else { ++ if (tiflags & TH_SYN) ++ ti->ti_len++; ++ tcp_respond(tp, ti, m, ti->ti_seq + ti->ti_len, (tcp_seq)0, ++ TH_RST | TH_ACK, af); ++ } ++ ++ return; ++ ++drop: ++ /* ++ * Drop space held by incoming segment and return. ++ */ ++ m_free(m); ++} ++ ++static void tcp_dooptions(struct tcpcb *tp, uint8_t *cp, int cnt, ++ struct tcpiphdr *ti) ++{ ++ uint16_t mss; ++ int opt, optlen; ++ ++ DEBUG_CALL("tcp_dooptions"); ++ DEBUG_ARG("tp = %p cnt=%i", tp, cnt); ++ ++ for (; cnt > 0; cnt -= optlen, cp += optlen) { ++ opt = cp[0]; ++ if (opt == TCPOPT_EOL) ++ break; ++ if (opt == TCPOPT_NOP) ++ optlen = 1; ++ else { ++ optlen = cp[1]; ++ if (optlen <= 0) ++ break; ++ } ++ switch (opt) { ++ default: ++ continue; ++ ++ case TCPOPT_MAXSEG: ++ if (optlen != TCPOLEN_MAXSEG) ++ continue; ++ if (!(ti->ti_flags & TH_SYN)) ++ continue; ++ memcpy((char *)&mss, (char *)cp + 2, sizeof(mss)); ++ NTOHS(mss); ++ tcp_mss(tp, mss); /* sets t_maxseg */ ++ break; ++ } ++ } ++} ++ ++/* ++ * Collect new round-trip time estimate ++ * and update averages and current timeout. ++ */ ++ ++static void tcp_xmit_timer(register struct tcpcb *tp, int rtt) ++{ ++ register short delta; ++ ++ DEBUG_CALL("tcp_xmit_timer"); ++ DEBUG_ARG("tp = %p", tp); ++ DEBUG_ARG("rtt = %d", rtt); ++ ++ if (tp->t_srtt != 0) { ++ /* ++ * srtt is stored as fixed point with 3 bits after the ++ * binary point (i.e., scaled by 8). The following magic ++ * is equivalent to the smoothing algorithm in rfc793 with ++ * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed ++ * point). Adjust rtt to origin 0. ++ */ ++ delta = rtt - 1 - (tp->t_srtt >> TCP_RTT_SHIFT); ++ if ((tp->t_srtt += delta) <= 0) ++ tp->t_srtt = 1; ++ /* ++ * We accumulate a smoothed rtt variance (actually, a ++ * smoothed mean difference), then set the retransmit ++ * timer to smoothed rtt + 4 times the smoothed variance. ++ * rttvar is stored as fixed point with 2 bits after the ++ * binary point (scaled by 4). The following is ++ * equivalent to rfc793 smoothing with an alpha of .75 ++ * (rttvar = rttvar*3/4 + |delta| / 4). This replaces ++ * rfc793's wired-in beta. ++ */ ++ if (delta < 0) ++ delta = -delta; ++ delta -= (tp->t_rttvar >> TCP_RTTVAR_SHIFT); ++ if ((tp->t_rttvar += delta) <= 0) ++ tp->t_rttvar = 1; ++ } else { ++ /* ++ * No rtt measurement yet - use the unsmoothed rtt. ++ * Set the variance to half the rtt (so our first ++ * retransmit happens at 3*rtt). ++ */ ++ tp->t_srtt = rtt << TCP_RTT_SHIFT; ++ tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1); ++ } ++ tp->t_rtt = 0; ++ tp->t_rxtshift = 0; ++ ++ /* ++ * the retransmit should happen at rtt + 4 * rttvar. ++ * Because of the way we do the smoothing, srtt and rttvar ++ * will each average +1/2 tick of bias. When we compute ++ * the retransmit timer, we want 1/2 tick of rounding and ++ * 1 extra tick because of +-1/2 tick uncertainty in the ++ * firing of the timer. The bias will give us exactly the ++ * 1.5 tick we need. But, because the bias is ++ * statistical, we have to test that we don't drop below ++ * the minimum feasible timer (which is 2 ticks). ++ */ ++ TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp), (short)tp->t_rttmin, ++ TCPTV_REXMTMAX); /* XXX */ ++ ++ /* ++ * We received an ack for a packet that wasn't retransmitted; ++ * it is probably safe to discard any error indications we've ++ * received recently. This isn't quite right, but close enough ++ * for now (a route might have failed after we sent a segment, ++ * and the return path might not be symmetrical). ++ */ ++ tp->t_softerror = 0; ++} ++ ++/* ++ * Determine a reasonable value for maxseg size. ++ * If the route is known, check route for mtu. ++ * If none, use an mss that can be handled on the outgoing ++ * interface without forcing IP to fragment; if bigger than ++ * an mbuf cluster (MCLBYTES), round down to nearest multiple of MCLBYTES ++ * to utilize large mbufs. If no route is found, route has no mtu, ++ * or the destination isn't local, use a default, hopefully conservative ++ * size (usually 512 or the default IP max size, but no more than the mtu ++ * of the interface), as we can't discover anything about intervening ++ * gateways or networks. We also initialize the congestion/slow start ++ * window to be a single segment if the destination isn't local. ++ * While looking at the routing entry, we also initialize other path-dependent ++ * parameters from pre-set or cached values in the routing entry. ++ */ ++ ++int tcp_mss(struct tcpcb *tp, unsigned offer) ++{ ++ struct socket *so = tp->t_socket; ++ int mss; ++ ++ DEBUG_CALL("tcp_mss"); ++ DEBUG_ARG("tp = %p", tp); ++ DEBUG_ARG("offer = %d", offer); ++ ++ switch (so->so_ffamily) { ++ case AF_INET: ++ mss = MIN(so->slirp->if_mtu, so->slirp->if_mru) - ++ sizeof(struct tcphdr) - sizeof(struct ip); ++ break; ++ case AF_INET6: ++ mss = MIN(so->slirp->if_mtu, so->slirp->if_mru) - ++ sizeof(struct tcphdr) - sizeof(struct ip6); ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ ++ if (offer) ++ mss = MIN(mss, offer); ++ mss = MAX(mss, 32); ++ if (mss < tp->t_maxseg || offer != 0) ++ tp->t_maxseg = MIN(mss, TCP_MAXSEG_MAX); ++ ++ tp->snd_cwnd = mss; ++ ++ sbreserve(&so->so_snd, ++ TCP_SNDSPACE + ++ ((TCP_SNDSPACE % mss) ? (mss - (TCP_SNDSPACE % mss)) : 0)); ++ sbreserve(&so->so_rcv, ++ TCP_RCVSPACE + ++ ((TCP_RCVSPACE % mss) ? (mss - (TCP_RCVSPACE % mss)) : 0)); ++ ++ DEBUG_MISC(" returning mss = %d", mss); ++ ++ return mss; ++} +diff --git a/slirp/src/tcp_output.c b/slirp/src/tcp_output.c +new file mode 100644 +index 0000000000..383fe31dcf +--- /dev/null ++++ b/slirp/src/tcp_output.c +@@ -0,0 +1,516 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1990, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcp_output.c 8.3 (Berkeley) 12/30/93 ++ * tcp_output.c,v 1.3 1994/09/15 10:36:55 davidg Exp ++ */ ++ ++/* ++ * Changes and additions relating to SLiRP ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++ ++static const uint8_t tcp_outflags[TCP_NSTATES] = { ++ TH_RST | TH_ACK, 0, TH_SYN, TH_SYN | TH_ACK, ++ TH_ACK, TH_ACK, TH_FIN | TH_ACK, TH_FIN | TH_ACK, ++ TH_FIN | TH_ACK, TH_ACK, TH_ACK, ++}; ++ ++ ++#undef MAX_TCPOPTLEN ++#define MAX_TCPOPTLEN 32 /* max # bytes that go in options */ ++ ++/* ++ * Tcp output routine: figure out what should be sent and send it. ++ */ ++int tcp_output(struct tcpcb *tp) ++{ ++ register struct socket *so = tp->t_socket; ++ register long len, win; ++ int off, flags, error; ++ register struct mbuf *m; ++ register struct tcpiphdr *ti, tcpiph_save; ++ struct ip *ip; ++ struct ip6 *ip6; ++ uint8_t opt[MAX_TCPOPTLEN]; ++ unsigned optlen, hdrlen; ++ int idle, sendalot; ++ ++ DEBUG_CALL("tcp_output"); ++ DEBUG_ARG("tp = %p", tp); ++ ++ /* ++ * Determine length of data that should be transmitted, ++ * and flags that will be used. ++ * If there is some data or critical controls (SYN, RST) ++ * to send, then transmit; otherwise, investigate further. ++ */ ++ idle = (tp->snd_max == tp->snd_una); ++ if (idle && tp->t_idle >= tp->t_rxtcur) ++ /* ++ * We have been idle for "a while" and no acks are ++ * expected to clock out any data we send -- ++ * slow start to get ack "clock" running again. ++ */ ++ tp->snd_cwnd = tp->t_maxseg; ++again: ++ sendalot = 0; ++ off = tp->snd_nxt - tp->snd_una; ++ win = MIN(tp->snd_wnd, tp->snd_cwnd); ++ ++ flags = tcp_outflags[tp->t_state]; ++ ++ DEBUG_MISC(" --- tcp_output flags = 0x%x", flags); ++ ++ /* ++ * If in persist timeout with window of 0, send 1 byte. ++ * Otherwise, if window is small but nonzero ++ * and timer expired, we will send what we can ++ * and go to transmit state. ++ */ ++ if (tp->t_force) { ++ if (win == 0) { ++ /* ++ * If we still have some data to send, then ++ * clear the FIN bit. Usually this would ++ * happen below when it realizes that we ++ * aren't sending all the data. However, ++ * if we have exactly 1 byte of unset data, ++ * then it won't clear the FIN bit below, ++ * and if we are in persist state, we wind ++ * up sending the packet without recording ++ * that we sent the FIN bit. ++ * ++ * We can't just blindly clear the FIN bit, ++ * because if we don't have any more data ++ * to send then the probe will be the FIN ++ * itself. ++ */ ++ if (off < so->so_snd.sb_cc) ++ flags &= ~TH_FIN; ++ win = 1; ++ } else { ++ tp->t_timer[TCPT_PERSIST] = 0; ++ tp->t_rxtshift = 0; ++ } ++ } ++ ++ len = MIN(so->so_snd.sb_cc, win) - off; ++ ++ if (len < 0) { ++ /* ++ * If FIN has been sent but not acked, ++ * but we haven't been called to retransmit, ++ * len will be -1. Otherwise, window shrank ++ * after we sent into it. If window shrank to 0, ++ * cancel pending retransmit and pull snd_nxt ++ * back to (closed) window. We will enter persist ++ * state below. If the window didn't close completely, ++ * just wait for an ACK. ++ */ ++ len = 0; ++ if (win == 0) { ++ tp->t_timer[TCPT_REXMT] = 0; ++ tp->snd_nxt = tp->snd_una; ++ } ++ } ++ ++ if (len > tp->t_maxseg) { ++ len = tp->t_maxseg; ++ sendalot = 1; ++ } ++ if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + so->so_snd.sb_cc)) ++ flags &= ~TH_FIN; ++ ++ win = sbspace(&so->so_rcv); ++ ++ /* ++ * Sender silly window avoidance. If connection is idle ++ * and can send all data, a maximum segment, ++ * at least a maximum default-size segment do it, ++ * or are forced, do it; otherwise don't bother. ++ * If peer's buffer is tiny, then send ++ * when window is at least half open. ++ * If retransmitting (possibly after persist timer forced us ++ * to send into a small window), then must resend. ++ */ ++ if (len) { ++ if (len == tp->t_maxseg) ++ goto send; ++ if ((1 || idle || tp->t_flags & TF_NODELAY) && ++ len + off >= so->so_snd.sb_cc) ++ goto send; ++ if (tp->t_force) ++ goto send; ++ if (len >= tp->max_sndwnd / 2 && tp->max_sndwnd > 0) ++ goto send; ++ if (SEQ_LT(tp->snd_nxt, tp->snd_max)) ++ goto send; ++ } ++ ++ /* ++ * Compare available window to amount of window ++ * known to peer (as advertised window less ++ * next expected input). If the difference is at least two ++ * max size segments, or at least 50% of the maximum possible ++ * window, then want to send a window update to peer. ++ */ ++ if (win > 0) { ++ /* ++ * "adv" is the amount we can increase the window, ++ * taking into account that we are limited by ++ * TCP_MAXWIN << tp->rcv_scale. ++ */ ++ long adv = MIN(win, (long)TCP_MAXWIN << tp->rcv_scale) - ++ (tp->rcv_adv - tp->rcv_nxt); ++ ++ if (adv >= (long)(2 * tp->t_maxseg)) ++ goto send; ++ if (2 * adv >= (long)so->so_rcv.sb_datalen) ++ goto send; ++ } ++ ++ /* ++ * Send if we owe peer an ACK. ++ */ ++ if (tp->t_flags & TF_ACKNOW) ++ goto send; ++ if (flags & (TH_SYN | TH_RST)) ++ goto send; ++ if (SEQ_GT(tp->snd_up, tp->snd_una)) ++ goto send; ++ /* ++ * If our state indicates that FIN should be sent ++ * and we have not yet done so, or we're retransmitting the FIN, ++ * then we need to send. ++ */ ++ if (flags & TH_FIN && ++ ((tp->t_flags & TF_SENTFIN) == 0 || tp->snd_nxt == tp->snd_una)) ++ goto send; ++ ++ /* ++ * TCP window updates are not reliable, rather a polling protocol ++ * using ``persist'' packets is used to insure receipt of window ++ * updates. The three ``states'' for the output side are: ++ * idle not doing retransmits or persists ++ * persisting to move a small or zero window ++ * (re)transmitting and thereby not persisting ++ * ++ * tp->t_timer[TCPT_PERSIST] ++ * is set when we are in persist state. ++ * tp->t_force ++ * is set when we are called to send a persist packet. ++ * tp->t_timer[TCPT_REXMT] ++ * is set when we are retransmitting ++ * The output side is idle when both timers are zero. ++ * ++ * If send window is too small, there is data to transmit, and no ++ * retransmit or persist is pending, then go to persist state. ++ * If nothing happens soon, send when timer expires: ++ * if window is nonzero, transmit what we can, ++ * otherwise force out a byte. ++ */ ++ if (so->so_snd.sb_cc && tp->t_timer[TCPT_REXMT] == 0 && ++ tp->t_timer[TCPT_PERSIST] == 0) { ++ tp->t_rxtshift = 0; ++ tcp_setpersist(tp); ++ } ++ ++ /* ++ * No reason to send a segment, just return. ++ */ ++ return (0); ++ ++send: ++ /* ++ * Before ESTABLISHED, force sending of initial options ++ * unless TCP set not to do any options. ++ * NOTE: we assume that the IP/TCP header plus TCP options ++ * always fit in a single mbuf, leaving room for a maximum ++ * link header, i.e. ++ * max_linkhdr + sizeof (struct tcpiphdr) + optlen <= MHLEN ++ */ ++ optlen = 0; ++ hdrlen = sizeof(struct tcpiphdr); ++ if (flags & TH_SYN) { ++ tp->snd_nxt = tp->iss; ++ if ((tp->t_flags & TF_NOOPT) == 0) { ++ uint16_t mss; ++ ++ opt[0] = TCPOPT_MAXSEG; ++ opt[1] = 4; ++ mss = htons((uint16_t)tcp_mss(tp, 0)); ++ memcpy((char *)(opt + 2), (char *)&mss, sizeof(mss)); ++ optlen = 4; ++ } ++ } ++ ++ hdrlen += optlen; ++ ++ /* ++ * Adjust data length if insertion of options will ++ * bump the packet length beyond the t_maxseg length. ++ */ ++ if (len > tp->t_maxseg - optlen) { ++ len = tp->t_maxseg - optlen; ++ sendalot = 1; ++ } ++ ++ /* ++ * Grab a header mbuf, attaching a copy of data to ++ * be transmitted, and initialize the header from ++ * the template for sends on this connection. ++ */ ++ if (len) { ++ m = m_get(so->slirp); ++ if (m == NULL) { ++ error = 1; ++ goto out; ++ } ++ m->m_data += IF_MAXLINKHDR; ++ m->m_len = hdrlen; ++ ++ sbcopy(&so->so_snd, off, (int)len, mtod(m, char *) + hdrlen); ++ m->m_len += len; ++ ++ /* ++ * If we're sending everything we've got, set PUSH. ++ * (This will keep happy those implementations which only ++ * give data to the user when a buffer fills or ++ * a PUSH comes in.) ++ */ ++ if (off + len == so->so_snd.sb_cc) ++ flags |= TH_PUSH; ++ } else { ++ m = m_get(so->slirp); ++ if (m == NULL) { ++ error = 1; ++ goto out; ++ } ++ m->m_data += IF_MAXLINKHDR; ++ m->m_len = hdrlen; ++ } ++ ++ ti = mtod(m, struct tcpiphdr *); ++ ++ memcpy((char *)ti, &tp->t_template, sizeof(struct tcpiphdr)); ++ ++ /* ++ * Fill in fields, remembering maximum advertised ++ * window for use in delaying messages about window sizes. ++ * If resending a FIN, be sure not to use a new sequence number. ++ */ ++ if (flags & TH_FIN && tp->t_flags & TF_SENTFIN && ++ tp->snd_nxt == tp->snd_max) ++ tp->snd_nxt--; ++ /* ++ * If we are doing retransmissions, then snd_nxt will ++ * not reflect the first unsent octet. For ACK only ++ * packets, we do not want the sequence number of the ++ * retransmitted packet, we want the sequence number ++ * of the next unsent octet. So, if there is no data ++ * (and no SYN or FIN), use snd_max instead of snd_nxt ++ * when filling in ti_seq. But if we are in persist ++ * state, snd_max might reflect one byte beyond the ++ * right edge of the window, so use snd_nxt in that ++ * case, since we know we aren't doing a retransmission. ++ * (retransmit and persist are mutually exclusive...) ++ */ ++ if (len || (flags & (TH_SYN | TH_FIN)) || tp->t_timer[TCPT_PERSIST]) ++ ti->ti_seq = htonl(tp->snd_nxt); ++ else ++ ti->ti_seq = htonl(tp->snd_max); ++ ti->ti_ack = htonl(tp->rcv_nxt); ++ if (optlen) { ++ memcpy((char *)(ti + 1), (char *)opt, optlen); ++ ti->ti_off = (sizeof(struct tcphdr) + optlen) >> 2; ++ } ++ ti->ti_flags = flags; ++ /* ++ * Calculate receive window. Don't shrink window, ++ * but avoid silly window syndrome. ++ */ ++ if (win < (long)(so->so_rcv.sb_datalen / 4) && win < (long)tp->t_maxseg) ++ win = 0; ++ if (win > (long)TCP_MAXWIN << tp->rcv_scale) ++ win = (long)TCP_MAXWIN << tp->rcv_scale; ++ if (win < (long)(tp->rcv_adv - tp->rcv_nxt)) ++ win = (long)(tp->rcv_adv - tp->rcv_nxt); ++ ti->ti_win = htons((uint16_t)(win >> tp->rcv_scale)); ++ ++ if (SEQ_GT(tp->snd_up, tp->snd_una)) { ++ ti->ti_urp = htons((uint16_t)(tp->snd_up - ntohl(ti->ti_seq))); ++ ti->ti_flags |= TH_URG; ++ } else ++ /* ++ * If no urgent pointer to send, then we pull ++ * the urgent pointer to the left edge of the send window ++ * so that it doesn't drift into the send window on sequence ++ * number wraparound. ++ */ ++ tp->snd_up = tp->snd_una; /* drag it along */ ++ ++ /* ++ * Put TCP length in extended header, and then ++ * checksum extended header and data. ++ */ ++ if (len + optlen) ++ ti->ti_len = htons((uint16_t)(sizeof(struct tcphdr) + optlen + len)); ++ ti->ti_sum = cksum(m, (int)(hdrlen + len)); ++ ++ /* ++ * In transmit state, time the transmission and arrange for ++ * the retransmit. In persist state, just set snd_max. ++ */ ++ if (tp->t_force == 0 || tp->t_timer[TCPT_PERSIST] == 0) { ++ tcp_seq startseq = tp->snd_nxt; ++ ++ /* ++ * Advance snd_nxt over sequence space of this segment. ++ */ ++ if (flags & (TH_SYN | TH_FIN)) { ++ if (flags & TH_SYN) ++ tp->snd_nxt++; ++ if (flags & TH_FIN) { ++ tp->snd_nxt++; ++ tp->t_flags |= TF_SENTFIN; ++ } ++ } ++ tp->snd_nxt += len; ++ if (SEQ_GT(tp->snd_nxt, tp->snd_max)) { ++ tp->snd_max = tp->snd_nxt; ++ /* ++ * Time this transmission if not a retransmission and ++ * not currently timing anything. ++ */ ++ if (tp->t_rtt == 0) { ++ tp->t_rtt = 1; ++ tp->t_rtseq = startseq; ++ } ++ } ++ ++ /* ++ * Set retransmit timer if not currently set, ++ * and not doing an ack or a keep-alive probe. ++ * Initial value for retransmit timer is smoothed ++ * round-trip time + 2 * round-trip time variance. ++ * Initialize shift counter which is used for backoff ++ * of retransmit time. ++ */ ++ if (tp->t_timer[TCPT_REXMT] == 0 && tp->snd_nxt != tp->snd_una) { ++ tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; ++ if (tp->t_timer[TCPT_PERSIST]) { ++ tp->t_timer[TCPT_PERSIST] = 0; ++ tp->t_rxtshift = 0; ++ } ++ } ++ } else if (SEQ_GT(tp->snd_nxt + len, tp->snd_max)) ++ tp->snd_max = tp->snd_nxt + len; ++ ++ /* ++ * Fill in IP length and desired time to live and ++ * send to IP level. There should be a better way ++ * to handle ttl and tos; we could keep them in ++ * the template, but need a way to checksum without them. ++ */ ++ m->m_len = hdrlen + len; /* XXX Needed? m_len should be correct */ ++ tcpiph_save = *mtod(m, struct tcpiphdr *); ++ ++ switch (so->so_ffamily) { ++ case AF_INET: ++ m->m_data += ++ sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - sizeof(struct ip); ++ m->m_len -= ++ sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - sizeof(struct ip); ++ ip = mtod(m, struct ip *); ++ ++ ip->ip_len = m->m_len; ++ ip->ip_dst = tcpiph_save.ti_dst; ++ ip->ip_src = tcpiph_save.ti_src; ++ ip->ip_p = tcpiph_save.ti_pr; ++ ++ ip->ip_ttl = IPDEFTTL; ++ ip->ip_tos = so->so_iptos; ++ error = ip_output(so, m); ++ break; ++ ++ case AF_INET6: ++ m->m_data += sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - ++ sizeof(struct ip6); ++ m->m_len -= sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - ++ sizeof(struct ip6); ++ ip6 = mtod(m, struct ip6 *); ++ ++ ip6->ip_pl = tcpiph_save.ti_len; ++ ip6->ip_dst = tcpiph_save.ti_dst6; ++ ip6->ip_src = tcpiph_save.ti_src6; ++ ip6->ip_nh = tcpiph_save.ti_nh6; ++ ++ error = ip6_output(so, m, 0); ++ break; ++ ++ default: ++ g_assert_not_reached(); ++ } ++ ++ if (error) { ++ out: ++ return (error); ++ } ++ ++ /* ++ * Data sent (as far as we can tell). ++ * If this advertises a larger window than any other segment, ++ * then remember the size of the advertised window. ++ * Any pending ACK has now been sent. ++ */ ++ if (win > 0 && SEQ_GT(tp->rcv_nxt + win, tp->rcv_adv)) ++ tp->rcv_adv = tp->rcv_nxt + win; ++ tp->last_ack_sent = tp->rcv_nxt; ++ tp->t_flags &= ~(TF_ACKNOW | TF_DELACK); ++ if (sendalot) ++ goto again; ++ ++ return (0); ++} ++ ++void tcp_setpersist(struct tcpcb *tp) ++{ ++ int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1; ++ ++ /* ++ * Start/restart persistence timer. ++ */ ++ TCPT_RANGESET(tp->t_timer[TCPT_PERSIST], t * tcp_backoff[tp->t_rxtshift], ++ TCPTV_PERSMIN, TCPTV_PERSMAX); ++ if (tp->t_rxtshift < TCP_MAXRXTSHIFT) ++ tp->t_rxtshift++; ++} +diff --git a/slirp/src/tcp_subr.c b/slirp/src/tcp_subr.c +new file mode 100644 +index 0000000000..600cfa1456 +--- /dev/null ++++ b/slirp/src/tcp_subr.c +@@ -0,0 +1,1011 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1990, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcp_subr.c 8.1 (Berkeley) 6/10/93 ++ * tcp_subr.c,v 1.5 1994/10/08 22:39:58 phk Exp ++ */ ++ ++/* ++ * Changes and additions relating to SLiRP ++ * Copyright (c) 1995 Danny Gasparovski. ++ */ ++ ++#include "slirp.h" ++ ++/* patchable/settable parameters for tcp */ ++/* Don't do rfc1323 performance enhancements */ ++#define TCP_DO_RFC1323 0 ++ ++/* ++ * Tcp initialization ++ */ ++void tcp_init(Slirp *slirp) ++{ ++ slirp->tcp_iss = 1; /* wrong */ ++ slirp->tcb.so_next = slirp->tcb.so_prev = &slirp->tcb; ++ slirp->tcp_last_so = &slirp->tcb; ++} ++ ++void tcp_cleanup(Slirp *slirp) ++{ ++ while (slirp->tcb.so_next != &slirp->tcb) { ++ tcp_close(sototcpcb(slirp->tcb.so_next)); ++ } ++} ++ ++/* ++ * Create template to be used to send tcp packets on a connection. ++ * Call after host entry created, fills ++ * in a skeletal tcp/ip header, minimizing the amount of work ++ * necessary when the connection is used. ++ */ ++void tcp_template(struct tcpcb *tp) ++{ ++ struct socket *so = tp->t_socket; ++ register struct tcpiphdr *n = &tp->t_template; ++ ++ n->ti_mbuf = NULL; ++ memset(&n->ti, 0, sizeof(n->ti)); ++ n->ti_x0 = 0; ++ switch (so->so_ffamily) { ++ case AF_INET: ++ n->ti_pr = IPPROTO_TCP; ++ n->ti_len = htons(sizeof(struct tcphdr)); ++ n->ti_src = so->so_faddr; ++ n->ti_dst = so->so_laddr; ++ n->ti_sport = so->so_fport; ++ n->ti_dport = so->so_lport; ++ break; ++ ++ case AF_INET6: ++ n->ti_nh6 = IPPROTO_TCP; ++ n->ti_len = htons(sizeof(struct tcphdr)); ++ n->ti_src6 = so->so_faddr6; ++ n->ti_dst6 = so->so_laddr6; ++ n->ti_sport = so->so_fport6; ++ n->ti_dport = so->so_lport6; ++ break; ++ ++ default: ++ g_assert_not_reached(); ++ } ++ ++ n->ti_seq = 0; ++ n->ti_ack = 0; ++ n->ti_x2 = 0; ++ n->ti_off = 5; ++ n->ti_flags = 0; ++ n->ti_win = 0; ++ n->ti_sum = 0; ++ n->ti_urp = 0; ++} ++ ++/* ++ * Send a single message to the TCP at address specified by ++ * the given TCP/IP header. If m == 0, then we make a copy ++ * of the tcpiphdr at ti and send directly to the addressed host. ++ * This is used to force keep alive messages out using the TCP ++ * template for a connection tp->t_template. If flags are given ++ * then we send a message back to the TCP which originated the ++ * segment ti, and discard the mbuf containing it and any other ++ * attached mbufs. ++ * ++ * In any case the ack and sequence number of the transmitted ++ * segment are as specified by the parameters. ++ */ ++void tcp_respond(struct tcpcb *tp, struct tcpiphdr *ti, struct mbuf *m, ++ tcp_seq ack, tcp_seq seq, int flags, unsigned short af) ++{ ++ register int tlen; ++ int win = 0; ++ ++ DEBUG_CALL("tcp_respond"); ++ DEBUG_ARG("tp = %p", tp); ++ DEBUG_ARG("ti = %p", ti); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("ack = %u", ack); ++ DEBUG_ARG("seq = %u", seq); ++ DEBUG_ARG("flags = %x", flags); ++ ++ if (tp) ++ win = sbspace(&tp->t_socket->so_rcv); ++ if (m == NULL) { ++ if (!tp || (m = m_get(tp->t_socket->slirp)) == NULL) ++ return; ++ tlen = 0; ++ m->m_data += IF_MAXLINKHDR; ++ *mtod(m, struct tcpiphdr *) = *ti; ++ ti = mtod(m, struct tcpiphdr *); ++ switch (af) { ++ case AF_INET: ++ ti->ti.ti_i4.ih_x1 = 0; ++ break; ++ case AF_INET6: ++ ti->ti.ti_i6.ih_x1 = 0; ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ flags = TH_ACK; ++ } else { ++ /* ++ * ti points into m so the next line is just making ++ * the mbuf point to ti ++ */ ++ m->m_data = (char *)ti; ++ ++ m->m_len = sizeof(struct tcpiphdr); ++ tlen = 0; ++#define xchg(a, b, type) \ ++ { \ ++ type t; \ ++ t = a; \ ++ a = b; \ ++ b = t; \ ++ } ++ switch (af) { ++ case AF_INET: ++ xchg(ti->ti_dst.s_addr, ti->ti_src.s_addr, uint32_t); ++ xchg(ti->ti_dport, ti->ti_sport, uint16_t); ++ break; ++ case AF_INET6: ++ xchg(ti->ti_dst6, ti->ti_src6, struct in6_addr); ++ xchg(ti->ti_dport, ti->ti_sport, uint16_t); ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++#undef xchg ++ } ++ ti->ti_len = htons((uint16_t)(sizeof(struct tcphdr) + tlen)); ++ tlen += sizeof(struct tcpiphdr); ++ m->m_len = tlen; ++ ++ ti->ti_mbuf = NULL; ++ ti->ti_x0 = 0; ++ ti->ti_seq = htonl(seq); ++ ti->ti_ack = htonl(ack); ++ ti->ti_x2 = 0; ++ ti->ti_off = sizeof(struct tcphdr) >> 2; ++ ti->ti_flags = flags; ++ if (tp) ++ ti->ti_win = htons((uint16_t)(win >> tp->rcv_scale)); ++ else ++ ti->ti_win = htons((uint16_t)win); ++ ti->ti_urp = 0; ++ ti->ti_sum = 0; ++ ti->ti_sum = cksum(m, tlen); ++ ++ struct tcpiphdr tcpiph_save = *(mtod(m, struct tcpiphdr *)); ++ struct ip *ip; ++ struct ip6 *ip6; ++ ++ switch (af) { ++ case AF_INET: ++ m->m_data += ++ sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - sizeof(struct ip); ++ m->m_len -= ++ sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - sizeof(struct ip); ++ ip = mtod(m, struct ip *); ++ ip->ip_len = m->m_len; ++ ip->ip_dst = tcpiph_save.ti_dst; ++ ip->ip_src = tcpiph_save.ti_src; ++ ip->ip_p = tcpiph_save.ti_pr; ++ ++ if (flags & TH_RST) { ++ ip->ip_ttl = MAXTTL; ++ } else { ++ ip->ip_ttl = IPDEFTTL; ++ } ++ ++ ip_output(NULL, m); ++ break; ++ ++ case AF_INET6: ++ m->m_data += sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - ++ sizeof(struct ip6); ++ m->m_len -= sizeof(struct tcpiphdr) - sizeof(struct tcphdr) - ++ sizeof(struct ip6); ++ ip6 = mtod(m, struct ip6 *); ++ ip6->ip_pl = tcpiph_save.ti_len; ++ ip6->ip_dst = tcpiph_save.ti_dst6; ++ ip6->ip_src = tcpiph_save.ti_src6; ++ ip6->ip_nh = tcpiph_save.ti_nh6; ++ ++ ip6_output(NULL, m, 0); ++ break; ++ ++ default: ++ g_assert_not_reached(); ++ } ++} ++ ++/* ++ * Create a new TCP control block, making an ++ * empty reassembly queue and hooking it to the argument ++ * protocol control block. ++ */ ++struct tcpcb *tcp_newtcpcb(struct socket *so) ++{ ++ register struct tcpcb *tp; ++ ++ tp = g_new0(struct tcpcb, 1); ++ tp->seg_next = tp->seg_prev = (struct tcpiphdr *)tp; ++ /* ++ * 40: length of IPv4 header (20) + TCP header (20) ++ * 60: length of IPv6 header (40) + TCP header (20) ++ */ ++ tp->t_maxseg = ++ MIN(so->slirp->if_mtu - ((so->so_ffamily == AF_INET) ? 40 : 60), ++ TCP_MAXSEG_MAX); ++ ++ tp->t_flags = TCP_DO_RFC1323 ? (TF_REQ_SCALE | TF_REQ_TSTMP) : 0; ++ tp->t_socket = so; ++ ++ /* ++ * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no ++ * rtt estimate. Set rttvar so that srtt + 2 * rttvar gives ++ * reasonable initial retransmit time. ++ */ ++ tp->t_srtt = TCPTV_SRTTBASE; ++ tp->t_rttvar = TCPTV_SRTTDFLT << 2; ++ tp->t_rttmin = TCPTV_MIN; ++ ++ TCPT_RANGESET(tp->t_rxtcur, ++ ((TCPTV_SRTTBASE >> 2) + (TCPTV_SRTTDFLT << 2)) >> 1, ++ TCPTV_MIN, TCPTV_REXMTMAX); ++ ++ tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT; ++ tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT; ++ tp->t_state = TCPS_CLOSED; ++ ++ so->so_tcpcb = tp; ++ ++ return (tp); ++} ++ ++/* ++ * Drop a TCP connection, reporting ++ * the specified error. If connection is synchronized, ++ * then send a RST to peer. ++ */ ++struct tcpcb *tcp_drop(struct tcpcb *tp, int err) ++{ ++ DEBUG_CALL("tcp_drop"); ++ DEBUG_ARG("tp = %p", tp); ++ DEBUG_ARG("errno = %d", errno); ++ ++ if (TCPS_HAVERCVDSYN(tp->t_state)) { ++ tp->t_state = TCPS_CLOSED; ++ tcp_output(tp); ++ } ++ return (tcp_close(tp)); ++} ++ ++/* ++ * Close a TCP control block: ++ * discard all space held by the tcp ++ * discard internet protocol block ++ * wake up any sleepers ++ */ ++struct tcpcb *tcp_close(struct tcpcb *tp) ++{ ++ register struct tcpiphdr *t; ++ struct socket *so = tp->t_socket; ++ Slirp *slirp = so->slirp; ++ register struct mbuf *m; ++ ++ DEBUG_CALL("tcp_close"); ++ DEBUG_ARG("tp = %p", tp); ++ ++ /* free the reassembly queue, if any */ ++ t = tcpfrag_list_first(tp); ++ while (!tcpfrag_list_end(t, tp)) { ++ t = tcpiphdr_next(t); ++ m = tcpiphdr_prev(t)->ti_mbuf; ++ remque(tcpiphdr2qlink(tcpiphdr_prev(t))); ++ m_free(m); ++ } ++ g_free(tp); ++ so->so_tcpcb = NULL; ++ /* clobber input socket cache if we're closing the cached connection */ ++ if (so == slirp->tcp_last_so) ++ slirp->tcp_last_so = &slirp->tcb; ++ so->slirp->cb->unregister_poll_fd(so->s, so->slirp->opaque); ++ closesocket(so->s); ++ sbfree(&so->so_rcv); ++ sbfree(&so->so_snd); ++ sofree(so); ++ return ((struct tcpcb *)0); ++} ++ ++/* ++ * TCP protocol interface to socket abstraction. ++ */ ++ ++/* ++ * User issued close, and wish to trail through shutdown states: ++ * if never received SYN, just forget it. If got a SYN from peer, ++ * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. ++ * If already got a FIN from peer, then almost done; go to LAST_ACK ++ * state. In all other cases, have already sent FIN to peer (e.g. ++ * after PRU_SHUTDOWN), and just have to play tedious game waiting ++ * for peer to send FIN or not respond to keep-alives, etc. ++ * We can let the user exit from the close as soon as the FIN is acked. ++ */ ++void tcp_sockclosed(struct tcpcb *tp) ++{ ++ DEBUG_CALL("tcp_sockclosed"); ++ DEBUG_ARG("tp = %p", tp); ++ ++ if (!tp) { ++ return; ++ } ++ ++ switch (tp->t_state) { ++ case TCPS_CLOSED: ++ case TCPS_LISTEN: ++ case TCPS_SYN_SENT: ++ tp->t_state = TCPS_CLOSED; ++ tcp_close(tp); ++ return; ++ ++ case TCPS_SYN_RECEIVED: ++ case TCPS_ESTABLISHED: ++ tp->t_state = TCPS_FIN_WAIT_1; ++ break; ++ ++ case TCPS_CLOSE_WAIT: ++ tp->t_state = TCPS_LAST_ACK; ++ break; ++ } ++ tcp_output(tp); ++} ++ ++/* ++ * Connect to a host on the Internet ++ * Called by tcp_input ++ * Only do a connect, the tcp fields will be set in tcp_input ++ * return 0 if there's a result of the connect, ++ * else return -1 means we're still connecting ++ * The return value is almost always -1 since the socket is ++ * nonblocking. Connect returns after the SYN is sent, and does ++ * not wait for ACK+SYN. ++ */ ++int tcp_fconnect(struct socket *so, unsigned short af) ++{ ++ int ret = 0; ++ ++ DEBUG_CALL("tcp_fconnect"); ++ DEBUG_ARG("so = %p", so); ++ ++ ret = so->s = slirp_socket(af, SOCK_STREAM, 0); ++ if (ret >= 0) { ++ ret = slirp_bind_outbound(so, af); ++ if (ret < 0) { ++ // bind failed - close socket ++ closesocket(so->s); ++ so->s = -1; ++ return (ret); ++ } ++ } ++ ++ if (ret >= 0) { ++ int opt, s = so->s; ++ struct sockaddr_storage addr; ++ ++ slirp_set_nonblock(s); ++ so->slirp->cb->register_poll_fd(s, so->slirp->opaque); ++ slirp_socket_set_fast_reuse(s); ++ opt = 1; ++ setsockopt(s, SOL_SOCKET, SO_OOBINLINE, &opt, sizeof(opt)); ++ opt = 1; ++ setsockopt(s, IPPROTO_TCP, TCP_NODELAY, &opt, sizeof(opt)); ++ ++ addr = so->fhost.ss; ++ DEBUG_CALL(" connect()ing"); ++ if (sotranslate_out(so, &addr) < 0) { ++ return -1; ++ } ++ ++ /* We don't care what port we get */ ++ ret = connect(s, (struct sockaddr *)&addr, sockaddr_size(&addr)); ++ ++ /* ++ * If it's not in progress, it failed, so we just return 0, ++ * without clearing SS_NOFDREF ++ */ ++ soisfconnecting(so); ++ } ++ ++ return (ret); ++} ++ ++/* ++ * Accept the socket and connect to the local-host ++ * ++ * We have a problem. The correct thing to do would be ++ * to first connect to the local-host, and only if the ++ * connection is accepted, then do an accept() here. ++ * But, a) we need to know who's trying to connect ++ * to the socket to be able to SYN the local-host, and ++ * b) we are already connected to the foreign host by ++ * the time it gets to accept(), so... We simply accept ++ * here and SYN the local-host. ++ */ ++void tcp_connect(struct socket *inso) ++{ ++ Slirp *slirp = inso->slirp; ++ struct socket *so; ++ struct sockaddr_storage addr; ++ socklen_t addrlen = sizeof(struct sockaddr_storage); ++ struct tcpcb *tp; ++ int s, opt, ret; ++ /* AF_INET6 addresses are bigger than AF_INET, so this is big enough. */ ++ char addrstr[INET6_ADDRSTRLEN]; ++ char portstr[6]; ++ ++ DEBUG_CALL("tcp_connect"); ++ DEBUG_ARG("inso = %p", inso); ++ ret = getnameinfo((const struct sockaddr *) &inso->lhost.ss, sizeof(inso->lhost.ss), addrstr, sizeof(addrstr), portstr, sizeof(portstr), NI_NUMERICHOST|NI_NUMERICSERV); ++ g_assert(ret == 0); ++ DEBUG_ARG("ip = [%s]:%s", addrstr, portstr); ++ DEBUG_ARG("so_state = 0x%x", inso->so_state); ++ ++ /* Perform lazy guest IP address resolution if needed. */ ++ if (inso->so_state & SS_HOSTFWD) { ++ /* ++ * We can only reject the connection request by accepting it and ++ * then immediately closing it. Note that SS_FACCEPTONCE sockets can't ++ * get here. ++ */ ++ if (soassign_guest_addr_if_needed(inso) < 0) { ++ /* ++ * Guest address isn't available yet. We could either try to defer ++ * completing this connection request until the guest address is ++ * available, or punt. It's easier to punt. Otherwise we need to ++ * complicate the mechanism by which we're called to defer calling ++ * us again until the guest address is available. ++ */ ++ DEBUG_MISC(" guest address not available yet"); ++ s = accept(inso->s, (struct sockaddr *)&addr, &addrlen); ++ if (s >= 0) { ++ close(s); ++ } ++ return; ++ } ++ } ++ ++ /* ++ * If it's an SS_ACCEPTONCE socket, no need to socreate() ++ * another socket, just use the accept() socket. ++ */ ++ if (inso->so_state & SS_FACCEPTONCE) { ++ /* FACCEPTONCE already have a tcpcb */ ++ so = inso; ++ } else { ++ so = socreate(slirp); ++ tcp_attach(so); ++ so->lhost = inso->lhost; ++ so->so_ffamily = inso->so_ffamily; ++ } ++ ++ tcp_mss(sototcpcb(so), 0); ++ ++ s = accept(inso->s, (struct sockaddr *)&addr, &addrlen); ++ if (s < 0) { ++ tcp_close(sototcpcb(so)); /* This will sofree() as well */ ++ return; ++ } ++ slirp_set_nonblock(s); ++ so->slirp->cb->register_poll_fd(s, so->slirp->opaque); ++ slirp_socket_set_fast_reuse(s); ++ opt = 1; ++ setsockopt(s, SOL_SOCKET, SO_OOBINLINE, &opt, sizeof(int)); ++ slirp_socket_set_nodelay(s); ++ ++ so->fhost.ss = addr; ++ sotranslate_accept(so); ++ ++ /* Close the accept() socket, set right state */ ++ if (inso->so_state & SS_FACCEPTONCE) { ++ /* If we only accept once, close the accept() socket */ ++ so->slirp->cb->unregister_poll_fd(so->s, so->slirp->opaque); ++ closesocket(so->s); ++ ++ /* Don't select it yet, even though we have an FD */ ++ /* if it's not FACCEPTONCE, it's already NOFDREF */ ++ so->so_state = SS_NOFDREF; ++ } ++ so->s = s; ++ so->so_state |= SS_INCOMING; ++ ++ so->so_iptos = tcp_tos(so); ++ tp = sototcpcb(so); ++ ++ tcp_template(tp); ++ ++ tp->t_state = TCPS_SYN_SENT; ++ tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; ++ tp->iss = slirp->tcp_iss; ++ slirp->tcp_iss += TCP_ISSINCR / 2; ++ tcp_sendseqinit(tp); ++ tcp_output(tp); ++} ++ ++/* ++ * Attach a TCPCB to a socket. ++ */ ++void tcp_attach(struct socket *so) ++{ ++ so->so_tcpcb = tcp_newtcpcb(so); ++ insque(so, &so->slirp->tcb); ++} ++ ++/* ++ * Set the socket's type of service field ++ */ ++static const struct tos_t tcptos[] = { ++ { 0, 20, IPTOS_THROUGHPUT, 0 }, /* ftp data */ ++ { 21, 21, IPTOS_LOWDELAY, EMU_FTP }, /* ftp control */ ++ { 0, 23, IPTOS_LOWDELAY, 0 }, /* telnet */ ++ { 0, 80, IPTOS_THROUGHPUT, 0 }, /* WWW */ ++ { 0, 513, IPTOS_LOWDELAY, EMU_RLOGIN | EMU_NOCONNECT }, /* rlogin */ ++ { 0, 544, IPTOS_LOWDELAY, EMU_KSH }, /* kshell */ ++ { 0, 543, IPTOS_LOWDELAY, 0 }, /* klogin */ ++ { 0, 6667, IPTOS_THROUGHPUT, EMU_IRC }, /* IRC */ ++ { 0, 6668, IPTOS_THROUGHPUT, EMU_IRC }, /* IRC undernet */ ++ { 0, 7070, IPTOS_LOWDELAY, EMU_REALAUDIO }, /* RealAudio control */ ++ { 0, 113, IPTOS_LOWDELAY, EMU_IDENT }, /* identd protocol */ ++ { 0, 0, 0, 0 } ++}; ++ ++/* ++ * Return TOS according to the above table ++ */ ++uint8_t tcp_tos(struct socket *so) ++{ ++ int i = 0; ++ ++ while (tcptos[i].tos) { ++ if ((tcptos[i].fport && (ntohs(so->so_fport) == tcptos[i].fport)) || ++ (tcptos[i].lport && (ntohs(so->so_lport) == tcptos[i].lport))) { ++ if (so->slirp->enable_emu) ++ so->so_emu = tcptos[i].emu; ++ return tcptos[i].tos; ++ } ++ i++; ++ } ++ return 0; ++} ++ ++/* ++ * Emulate programs that try and connect to us ++ * This includes ftp (the data connection is ++ * initiated by the server) and IRC (DCC CHAT and ++ * DCC SEND) for now ++ * ++ * NOTE: It's possible to crash SLiRP by sending it ++ * unstandard strings to emulate... if this is a problem, ++ * more checks are needed here ++ * ++ * XXX Assumes the whole command came in one packet ++ * XXX If there is more than one command in the packet, the others may ++ * be truncated. ++ * XXX If the command is too long, it may be truncated. ++ * ++ * XXX Some ftp clients will have their TOS set to ++ * LOWDELAY and so Nagel will kick in. Because of this, ++ * we'll get the first letter, followed by the rest, so ++ * we simply scan for ORT instead of PORT... ++ * DCC doesn't have this problem because there's other stuff ++ * in the packet before the DCC command. ++ * ++ * Return 1 if the mbuf m is still valid and should be ++ * sbappend()ed ++ * ++ * NOTE: if you return 0 you MUST m_free() the mbuf! ++ */ ++int tcp_emu(struct socket *so, struct mbuf *m) ++{ ++ Slirp *slirp = so->slirp; ++ unsigned n1, n2, n3, n4, n5, n6; ++ char buff[257]; ++ uint32_t laddr; ++ unsigned lport; ++ char *bptr; ++ ++ DEBUG_CALL("tcp_emu"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("m = %p", m); ++ ++ switch (so->so_emu) { ++ int x, i; ++ ++ /* TODO: IPv6 */ ++ case EMU_IDENT: ++ /* ++ * Identification protocol as per rfc-1413 ++ */ ++ ++ { ++ struct socket *tmpso; ++ struct sockaddr_in addr; ++ socklen_t addrlen = sizeof(struct sockaddr_in); ++ char *eol = g_strstr_len(m->m_data, m->m_len, "\r\n"); ++ ++ if (!eol) { ++ return 1; ++ } ++ ++ *eol = '\0'; ++ if (sscanf(m->m_data, "%u%*[ ,]%u", &n1, &n2) == 2) { ++ HTONS(n1); ++ HTONS(n2); ++ /* n2 is the one on our host */ ++ for (tmpso = slirp->tcb.so_next; tmpso != &slirp->tcb; ++ tmpso = tmpso->so_next) { ++ if (tmpso->so_laddr.s_addr == so->so_laddr.s_addr && ++ tmpso->so_lport == n2 && ++ tmpso->so_faddr.s_addr == so->so_faddr.s_addr && ++ tmpso->so_fport == n1) { ++ if (getsockname(tmpso->s, (struct sockaddr *)&addr, ++ &addrlen) == 0) ++ n2 = addr.sin_port; ++ break; ++ } ++ } ++ NTOHS(n1); ++ NTOHS(n2); ++ m_inc(m, g_snprintf(NULL, 0, "%d,%d\r\n", n1, n2) + 1); ++ m->m_len = slirp_fmt(m->m_data, M_ROOM(m), "%d,%d\r\n", n1, n2); ++ } else { ++ *eol = '\r'; ++ } ++ ++ return 1; ++ } ++ ++ case EMU_FTP: /* ftp */ ++ m_inc(m, m->m_len + 1); ++ *(m->m_data + m->m_len) = 0; /* NUL terminate for strstr */ ++ if ((bptr = (char *)strstr(m->m_data, "ORT")) != NULL) { ++ /* ++ * Need to emulate the PORT command ++ */ ++ x = sscanf(bptr, "ORT %u,%u,%u,%u,%u,%u\r\n%256[^\177]", &n1, &n2, ++ &n3, &n4, &n5, &n6, buff); ++ if (x < 6) ++ return 1; ++ ++ laddr = htonl((n1 << 24) | (n2 << 16) | (n3 << 8) | (n4)); ++ lport = htons((n5 << 8) | (n6)); ++ ++ if ((so = tcp_listen(slirp, INADDR_ANY, 0, laddr, lport, ++ SS_FACCEPTONCE)) == NULL) { ++ return 1; ++ } ++ n6 = ntohs(so->so_fport); ++ ++ n5 = (n6 >> 8) & 0xff; ++ n6 &= 0xff; ++ ++ laddr = ntohl(so->so_faddr.s_addr); ++ ++ n1 = ((laddr >> 24) & 0xff); ++ n2 = ((laddr >> 16) & 0xff); ++ n3 = ((laddr >> 8) & 0xff); ++ n4 = (laddr & 0xff); ++ ++ m->m_len = bptr - m->m_data; /* Adjust length */ ++ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), ++ "ORT %d,%d,%d,%d,%d,%d\r\n%s", ++ n1, n2, n3, n4, n5, n6, x == 7 ? buff : ""); ++ return 1; ++ } else if ((bptr = (char *)strstr(m->m_data, "27 Entering")) != NULL) { ++ /* ++ * Need to emulate the PASV response ++ */ ++ x = sscanf( ++ bptr, ++ "27 Entering Passive Mode (%u,%u,%u,%u,%u,%u)\r\n%256[^\177]", ++ &n1, &n2, &n3, &n4, &n5, &n6, buff); ++ if (x < 6) ++ return 1; ++ ++ laddr = htonl((n1 << 24) | (n2 << 16) | (n3 << 8) | (n4)); ++ lport = htons((n5 << 8) | (n6)); ++ ++ if ((so = tcp_listen(slirp, INADDR_ANY, 0, laddr, lport, ++ SS_FACCEPTONCE)) == NULL) { ++ return 1; ++ } ++ n6 = ntohs(so->so_fport); ++ ++ n5 = (n6 >> 8) & 0xff; ++ n6 &= 0xff; ++ ++ laddr = ntohl(so->so_faddr.s_addr); ++ ++ n1 = ((laddr >> 24) & 0xff); ++ n2 = ((laddr >> 16) & 0xff); ++ n3 = ((laddr >> 8) & 0xff); ++ n4 = (laddr & 0xff); ++ ++ m->m_len = bptr - m->m_data; /* Adjust length */ ++ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), ++ "27 Entering Passive Mode (%d,%d,%d,%d,%d,%d)\r\n%s", ++ n1, n2, n3, n4, n5, n6, x == 7 ? buff : ""); ++ return 1; ++ } ++ ++ return 1; ++ ++ case EMU_KSH: ++ /* ++ * The kshell (Kerberos rsh) and shell services both pass ++ * a local port port number to carry signals to the server ++ * and stderr to the client. It is passed at the beginning ++ * of the connection as a NUL-terminated decimal ASCII string. ++ */ ++ so->so_emu = 0; ++ for (lport = 0, i = 0; i < m->m_len - 1; ++i) { ++ if (m->m_data[i] < '0' || m->m_data[i] > '9') ++ return 1; /* invalid number */ ++ lport *= 10; ++ lport += m->m_data[i] - '0'; ++ } ++ if (m->m_data[m->m_len - 1] == '\0' && lport != 0 && ++ (so = tcp_listen(slirp, INADDR_ANY, 0, so->so_laddr.s_addr, ++ htons(lport), SS_FACCEPTONCE)) != NULL) ++ m->m_len = slirp_fmt0(m->m_data, M_ROOM(m), ++ "%d", ntohs(so->so_fport)); ++ return 1; ++ ++ case EMU_IRC: ++ /* ++ * Need to emulate DCC CHAT, DCC SEND and DCC MOVE ++ */ ++ m_inc(m, m->m_len + 1); ++ *(m->m_data + m->m_len) = 0; /* NULL terminate the string for strstr */ ++ if ((bptr = (char *)strstr(m->m_data, "DCC")) == NULL) ++ return 1; ++ ++ /* The %256s is for the broken mIRC */ ++ if (sscanf(bptr, "DCC CHAT %256s %u %u", buff, &laddr, &lport) == 3) { ++ if ((so = tcp_listen(slirp, INADDR_ANY, 0, htonl(laddr), ++ htons(lport), SS_FACCEPTONCE)) == NULL) { ++ return 1; ++ } ++ m->m_len = bptr - m->m_data; /* Adjust length */ ++ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), ++ "DCC CHAT chat %lu %u%c\n", ++ (unsigned long)ntohl(so->so_faddr.s_addr), ++ ntohs(so->so_fport), 1); ++ } else if (sscanf(bptr, "DCC SEND %256s %u %u %u", buff, &laddr, &lport, ++ &n1) == 4) { ++ if ((so = tcp_listen(slirp, INADDR_ANY, 0, htonl(laddr), ++ htons(lport), SS_FACCEPTONCE)) == NULL) { ++ return 1; ++ } ++ m->m_len = bptr - m->m_data; /* Adjust length */ ++ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), ++ "DCC SEND %s %lu %u %u%c\n", buff, ++ (unsigned long)ntohl(so->so_faddr.s_addr), ++ ntohs(so->so_fport), n1, 1); ++ } else if (sscanf(bptr, "DCC MOVE %256s %u %u %u", buff, &laddr, &lport, ++ &n1) == 4) { ++ if ((so = tcp_listen(slirp, INADDR_ANY, 0, htonl(laddr), ++ htons(lport), SS_FACCEPTONCE)) == NULL) { ++ return 1; ++ } ++ m->m_len = bptr - m->m_data; /* Adjust length */ ++ m->m_len += slirp_fmt(bptr, M_FREEROOM(m), ++ "DCC MOVE %s %lu %u %u%c\n", buff, ++ (unsigned long)ntohl(so->so_faddr.s_addr), ++ ntohs(so->so_fport), n1, 1); ++ } ++ return 1; ++ ++ case EMU_REALAUDIO: ++ /* ++ * RealAudio emulation - JP. We must try to parse the incoming ++ * data and try to find the two characters that contain the ++ * port number. Then we redirect an udp port and replace the ++ * number with the real port we got. ++ * ++ * The 1.0 beta versions of the player are not supported ++ * any more. ++ * ++ * A typical packet for player version 1.0 (release version): ++ * ++ * 0000:50 4E 41 00 05 ++ * 0000:00 01 00 02 1B D7 00 00 67 E6 6C DC 63 00 12 50 ........g.l.c..P ++ * 0010:4E 43 4C 49 45 4E 54 20 31 30 31 20 41 4C 50 48 NCLIENT 101 ALPH ++ * 0020:41 6C 00 00 52 00 17 72 61 66 69 6C 65 73 2F 76 Al..R..rafiles/v ++ * 0030:6F 61 2F 65 6E 67 6C 69 73 68 5F 2E 72 61 79 42 oa/english_.rayB ++ * ++ * Now the port number 0x1BD7 is found at offset 0x04 of the ++ * Now the port number 0x1BD7 is found at offset 0x04 of the ++ * second packet. This time we received five bytes first and ++ * then the rest. You never know how many bytes you get. ++ * ++ * A typical packet for player version 2.0 (beta): ++ * ++ * 0000:50 4E 41 00 06 00 02 00 00 00 01 00 02 1B C1 00 PNA............. ++ * 0010:00 67 75 78 F5 63 00 0A 57 69 6E 32 2E 30 2E 30 .gux.c..Win2.0.0 ++ * 0020:2E 35 6C 00 00 52 00 1C 72 61 66 69 6C 65 73 2F .5l..R..rafiles/ ++ * 0030:77 65 62 73 69 74 65 2F 32 30 72 65 6C 65 61 73 website/20releas ++ * 0040:65 2E 72 61 79 53 00 00 06 36 42 e.rayS...6B ++ * ++ * Port number 0x1BC1 is found at offset 0x0d. ++ * ++ * This is just a horrible switch statement. Variable ra tells ++ * us where we're going. ++ */ ++ ++ bptr = m->m_data; ++ while (bptr < m->m_data + m->m_len) { ++ uint16_t p; ++ static int ra = 0; ++ char ra_tbl[4]; ++ ++ ra_tbl[0] = 0x50; ++ ra_tbl[1] = 0x4e; ++ ra_tbl[2] = 0x41; ++ ra_tbl[3] = 0; ++ ++ switch (ra) { ++ case 0: ++ case 2: ++ case 3: ++ if (*bptr++ != ra_tbl[ra]) { ++ ra = 0; ++ continue; ++ } ++ break; ++ ++ case 1: ++ /* ++ * We may get 0x50 several times, ignore them ++ */ ++ if (*bptr == 0x50) { ++ ra = 1; ++ bptr++; ++ continue; ++ } else if (*bptr++ != ra_tbl[ra]) { ++ ra = 0; ++ continue; ++ } ++ break; ++ ++ case 4: ++ /* ++ * skip version number ++ */ ++ bptr++; ++ break; ++ ++ case 5: ++ if (bptr == m->m_data + m->m_len - 1) ++ return 1; /* We need two bytes */ ++ ++ /* ++ * The difference between versions 1.0 and ++ * 2.0 is here. For future versions of ++ * the player this may need to be modified. ++ */ ++ if (*(bptr + 1) == 0x02) ++ bptr += 8; ++ else ++ bptr += 4; ++ break; ++ ++ case 6: ++ /* This is the field containing the port ++ * number that RA-player is listening to. ++ */ ++ ++ if (bptr == m->m_data + m->m_len - 1) ++ return 1; /* We need two bytes */ ++ ++ lport = (((uint8_t *)bptr)[0] << 8) + ((uint8_t *)bptr)[1]; ++ if (lport < 6970) ++ lport += 256; /* don't know why */ ++ if (lport < 6970 || lport > 7170) ++ return 1; /* failed */ ++ ++ /* try to get udp port between 6970 - 7170 */ ++ for (p = 6970; p < 7071; p++) { ++ if (udp_listen(slirp, INADDR_ANY, htons(p), ++ so->so_laddr.s_addr, htons(lport), ++ SS_FACCEPTONCE)) { ++ break; ++ } ++ } ++ if (p == 7071) ++ p = 0; ++ *(uint8_t *)bptr++ = (p >> 8) & 0xff; ++ *(uint8_t *)bptr = p & 0xff; ++ ra = 0; ++ return 1; /* port redirected, we're done */ ++ break; ++ ++ default: ++ ra = 0; ++ } ++ ra++; ++ } ++ return 1; ++ ++ default: ++ /* Ooops, not emulated, won't call tcp_emu again */ ++ so->so_emu = 0; ++ return 1; ++ } ++} ++ ++/* ++ * Do misc. config of SLiRP while its running. ++ * Return 0 if this connections is to be closed, 1 otherwise, ++ * return 2 if this is a command-line connection ++ */ ++int tcp_ctl(struct socket *so) ++{ ++ Slirp *slirp = so->slirp; ++ struct sbuf *sb = &so->so_snd; ++ struct gfwd_list *ex_ptr; ++ ++ DEBUG_CALL("tcp_ctl"); ++ DEBUG_ARG("so = %p", so); ++ ++ /* TODO: IPv6 */ ++ if (so->so_faddr.s_addr != slirp->vhost_addr.s_addr) { ++ /* Check if it's pty_exec */ ++ for (ex_ptr = slirp->guestfwd_list; ex_ptr; ex_ptr = ex_ptr->ex_next) { ++ if (ex_ptr->ex_fport == so->so_fport && ++ so->so_faddr.s_addr == ex_ptr->ex_addr.s_addr) { ++ if (ex_ptr->write_cb) { ++ so->s = -1; ++ so->guestfwd = ex_ptr; ++ return 1; ++ } ++ DEBUG_MISC(" executing %s", ex_ptr->ex_exec); ++ if (ex_ptr->ex_unix) ++ return open_unix(so, ex_ptr->ex_unix); ++ else ++ return fork_exec(so, ex_ptr->ex_exec); ++ } ++ } ++ } ++ sb->sb_cc = slirp_fmt(sb->sb_wptr, sb->sb_datalen - (sb->sb_wptr - sb->sb_data), ++ "Error: No application configured.\r\n"); ++ sb->sb_wptr += sb->sb_cc; ++ return 0; ++} +diff --git a/slirp/src/tcp_timer.c b/slirp/src/tcp_timer.c +new file mode 100644 +index 0000000000..bc4db2d15e +--- /dev/null ++++ b/slirp/src/tcp_timer.c +@@ -0,0 +1,286 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1990, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcp_timer.c 8.1 (Berkeley) 6/10/93 ++ * tcp_timer.c,v 1.2 1994/08/02 07:49:10 davidg Exp ++ */ ++ ++#include "slirp.h" ++ ++static struct tcpcb *tcp_timers(register struct tcpcb *tp, int timer); ++ ++/* ++ * Fast timeout routine for processing delayed acks ++ */ ++void tcp_fasttimo(Slirp *slirp) ++{ ++ register struct socket *so; ++ register struct tcpcb *tp; ++ ++ DEBUG_CALL("tcp_fasttimo"); ++ ++ so = slirp->tcb.so_next; ++ if (so) ++ for (; so != &slirp->tcb; so = so->so_next) ++ if ((tp = (struct tcpcb *)so->so_tcpcb) && ++ (tp->t_flags & TF_DELACK)) { ++ tp->t_flags &= ~TF_DELACK; ++ tp->t_flags |= TF_ACKNOW; ++ tcp_output(tp); ++ } ++} ++ ++/* ++ * Tcp protocol timeout routine called every 500 ms. ++ * Updates the timers in all active tcb's and ++ * causes finite state machine actions if timers expire. ++ */ ++void tcp_slowtimo(Slirp *slirp) ++{ ++ register struct socket *ip, *ipnxt; ++ register struct tcpcb *tp; ++ register int i; ++ ++ DEBUG_CALL("tcp_slowtimo"); ++ ++ /* ++ * Search through tcb's and update active timers. ++ */ ++ ip = slirp->tcb.so_next; ++ if (ip == NULL) { ++ return; ++ } ++ for (; ip != &slirp->tcb; ip = ipnxt) { ++ ipnxt = ip->so_next; ++ tp = sototcpcb(ip); ++ if (tp == NULL) { ++ continue; ++ } ++ for (i = 0; i < TCPT_NTIMERS; i++) { ++ if (tp->t_timer[i] && --tp->t_timer[i] == 0) { ++ tcp_timers(tp, i); ++ if (ipnxt->so_prev != ip) ++ goto tpgone; ++ } ++ } ++ tp->t_idle++; ++ if (tp->t_rtt) ++ tp->t_rtt++; ++ tpgone:; ++ } ++ slirp->tcp_iss += TCP_ISSINCR / PR_SLOWHZ; /* increment iss */ ++ slirp->tcp_now++; /* for timestamps */ ++} ++ ++/* ++ * Cancel all timers for TCP tp. ++ */ ++void tcp_canceltimers(struct tcpcb *tp) ++{ ++ register int i; ++ ++ for (i = 0; i < TCPT_NTIMERS; i++) ++ tp->t_timer[i] = 0; ++} ++ ++const int tcp_backoff[TCP_MAXRXTSHIFT + 1] = { 1, 2, 4, 8, 16, 32, 64, ++ 64, 64, 64, 64, 64, 64 }; ++ ++/* ++ * TCP timer processing. ++ */ ++static struct tcpcb *tcp_timers(register struct tcpcb *tp, int timer) ++{ ++ register int rexmt; ++ ++ DEBUG_CALL("tcp_timers"); ++ ++ switch (timer) { ++ /* ++ * 2 MSL timeout in shutdown went off. If we're closed but ++ * still waiting for peer to close and connection has been idle ++ * too long, or if 2MSL time is up from TIME_WAIT, delete connection ++ * control block. Otherwise, check again in a bit. ++ */ ++ case TCPT_2MSL: ++ if (tp->t_state != TCPS_TIME_WAIT && tp->t_idle <= TCP_MAXIDLE) ++ tp->t_timer[TCPT_2MSL] = TCPTV_KEEPINTVL; ++ else ++ tp = tcp_close(tp); ++ break; ++ ++ /* ++ * Retransmission timer went off. Message has not ++ * been acked within retransmit interval. Back off ++ * to a longer retransmit interval and retransmit one segment. ++ */ ++ case TCPT_REXMT: ++ ++ /* ++ * XXXXX If a packet has timed out, then remove all the queued ++ * packets for that session. ++ */ ++ ++ if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { ++ /* ++ * This is a hack to suit our terminal server here at the uni of ++ * canberra since they have trouble with zeroes... It usually lets ++ * them through unharmed, but under some conditions, it'll eat the ++ * zeros. If we keep retransmitting it, it'll keep eating the ++ * zeroes, so we keep retransmitting, and eventually the connection ++ * dies... (this only happens on incoming data) ++ * ++ * So, if we were gonna drop the connection from too many ++ * retransmits, don't... instead halve the t_maxseg, which might ++ * break up the NULLs and let them through ++ * ++ * *sigh* ++ */ ++ ++ tp->t_maxseg >>= 1; ++ if (tp->t_maxseg < 32) { ++ /* ++ * We tried our best, now the connection must die! ++ */ ++ tp->t_rxtshift = TCP_MAXRXTSHIFT; ++ tp = tcp_drop(tp, tp->t_softerror); ++ /* tp->t_softerror : ETIMEDOUT); */ /* XXX */ ++ return (tp); /* XXX */ ++ } ++ ++ /* ++ * Set rxtshift to 6, which is still at the maximum ++ * backoff time ++ */ ++ tp->t_rxtshift = 6; ++ } ++ rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift]; ++ TCPT_RANGESET(tp->t_rxtcur, rexmt, (short)tp->t_rttmin, ++ TCPTV_REXMTMAX); /* XXX */ ++ tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; ++ /* ++ * If losing, let the lower level know and try for ++ * a better route. Also, if we backed off this far, ++ * our srtt estimate is probably bogus. Clobber it ++ * so we'll take the next rtt measurement as our srtt; ++ * move the current srtt into rttvar to keep the current ++ * retransmit times until then. ++ */ ++ if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) { ++ tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT); ++ tp->t_srtt = 0; ++ } ++ tp->snd_nxt = tp->snd_una; ++ /* ++ * If timing a segment in this window, stop the timer. ++ */ ++ tp->t_rtt = 0; ++ /* ++ * Close the congestion window down to one segment ++ * (we'll open it by one segment for each ack we get). ++ * Since we probably have a window's worth of unacked ++ * data accumulated, this "slow start" keeps us from ++ * dumping all that data as back-to-back packets (which ++ * might overwhelm an intermediate gateway). ++ * ++ * There are two phases to the opening: Initially we ++ * open by one mss on each ack. This makes the window ++ * size increase exponentially with time. If the ++ * window is larger than the path can handle, this ++ * exponential growth results in dropped packet(s) ++ * almost immediately. To get more time between ++ * drops but still "push" the network to take advantage ++ * of improving conditions, we switch from exponential ++ * to linear window opening at some threshold size. ++ * For a threshold, we use half the current window ++ * size, truncated to a multiple of the mss. ++ * ++ * (the minimum cwnd that will give us exponential ++ * growth is 2 mss. We don't allow the threshold ++ * to go below this.) ++ */ ++ { ++ unsigned win = MIN(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg; ++ if (win < 2) ++ win = 2; ++ tp->snd_cwnd = tp->t_maxseg; ++ tp->snd_ssthresh = win * tp->t_maxseg; ++ tp->t_dupacks = 0; ++ } ++ tcp_output(tp); ++ break; ++ ++ /* ++ * Persistence timer into zero window. ++ * Force a byte to be output, if possible. ++ */ ++ case TCPT_PERSIST: ++ tcp_setpersist(tp); ++ tp->t_force = 1; ++ tcp_output(tp); ++ tp->t_force = 0; ++ break; ++ ++ /* ++ * Keep-alive timer went off; send something ++ * or drop connection if idle for too long. ++ */ ++ case TCPT_KEEP: ++ if (tp->t_state < TCPS_ESTABLISHED) ++ goto dropit; ++ ++ if (slirp_do_keepalive && tp->t_state <= TCPS_CLOSE_WAIT) { ++ if (tp->t_idle >= TCPTV_KEEP_IDLE + TCP_MAXIDLE) ++ goto dropit; ++ /* ++ * Send a packet designed to force a response ++ * if the peer is up and reachable: ++ * either an ACK if the connection is still alive, ++ * or an RST if the peer has closed the connection ++ * due to timeout or reboot. ++ * Using sequence number tp->snd_una-1 ++ * causes the transmitted zero-length segment ++ * to lie outside the receive window; ++ * by the protocol spec, this requires the ++ * correspondent TCP to respond. ++ */ ++ tcp_respond(tp, &tp->t_template, (struct mbuf *)NULL, tp->rcv_nxt, ++ tp->snd_una - 1, 0, tp->t_socket->so_ffamily); ++ tp->t_timer[TCPT_KEEP] = TCPTV_KEEPINTVL; ++ } else ++ tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_IDLE; ++ break; ++ ++ dropit: ++ tp = tcp_drop(tp, 0); ++ break; ++ } ++ ++ return (tp); ++} +diff --git a/slirp/src/tcp_timer.h b/slirp/src/tcp_timer.h +new file mode 100644 +index 0000000000..584a5594e4 +--- /dev/null ++++ b/slirp/src/tcp_timer.h +@@ -0,0 +1,130 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcp_timer.h 8.1 (Berkeley) 6/10/93 ++ * tcp_timer.h,v 1.4 1994/08/21 05:27:38 paul Exp ++ */ ++ ++#ifndef TCP_TIMER_H ++#define TCP_TIMER_H ++ ++/* ++ * Definitions of the TCP timers. These timers are counted ++ * down PR_SLOWHZ times a second. ++ */ ++#define TCPT_NTIMERS 4 ++ ++#define TCPT_REXMT 0 /* retransmit */ ++#define TCPT_PERSIST 1 /* retransmit persistence */ ++#define TCPT_KEEP 2 /* keep alive */ ++#define TCPT_2MSL 3 /* 2*msl quiet time timer */ ++ ++/* ++ * The TCPT_REXMT timer is used to force retransmissions. ++ * The TCP has the TCPT_REXMT timer set whenever segments ++ * have been sent for which ACKs are expected but not yet ++ * received. If an ACK is received which advances tp->snd_una, ++ * then the retransmit timer is cleared (if there are no more ++ * outstanding segments) or reset to the base value (if there ++ * are more ACKs expected). Whenever the retransmit timer goes off, ++ * we retransmit one unacknowledged segment, and do a backoff ++ * on the retransmit timer. ++ * ++ * The TCPT_PERSIST timer is used to keep window size information ++ * flowing even if the window goes shut. If all previous transmissions ++ * have been acknowledged (so that there are no retransmissions in progress), ++ * and the window is too small to bother sending anything, then we start ++ * the TCPT_PERSIST timer. When it expires, if the window is nonzero, ++ * we go to transmit state. Otherwise, at intervals send a single byte ++ * into the peer's window to force him to update our window information. ++ * We do this at most as often as TCPT_PERSMIN time intervals, ++ * but no more frequently than the current estimate of round-trip ++ * packet time. The TCPT_PERSIST timer is cleared whenever we receive ++ * a window update from the peer. ++ * ++ * The TCPT_KEEP timer is used to keep connections alive. If an ++ * connection is idle (no segments received) for TCPTV_KEEP_INIT amount of time, ++ * but not yet established, then we drop the connection. Once the connection ++ * is established, if the connection is idle for TCPTV_KEEP_IDLE time ++ * (and keepalives have been enabled on the socket), we begin to probe ++ * the connection. We force the peer to send us a segment by sending: ++ * ++ * This segment is (deliberately) outside the window, and should elicit ++ * an ack segment in response from the peer. If, despite the TCPT_KEEP ++ * initiated segments we cannot elicit a response from a peer in TCPT_MAXIDLE ++ * amount of time probing, then we drop the connection. ++ */ ++ ++/* ++ * Time constants. ++ */ ++#define TCPTV_MSL (5 * PR_SLOWHZ) /* max seg lifetime (hah!) */ ++ ++#define TCPTV_SRTTBASE \ ++ 0 /* base roundtrip time; \ ++ if 0, no idea yet */ ++#define TCPTV_SRTTDFLT (3 * PR_SLOWHZ) /* assumed RTT if no info */ ++ ++#define TCPTV_PERSMIN (5 * PR_SLOWHZ) /* retransmit persistence */ ++#define TCPTV_PERSMAX (60 * PR_SLOWHZ) /* maximum persist interval */ ++ ++#define TCPTV_KEEP_INIT (75 * PR_SLOWHZ) /* initial connect keep alive */ ++#define TCPTV_KEEP_IDLE (120 * 60 * PR_SLOWHZ) /* dflt time before probing */ ++#define TCPTV_KEEPINTVL (75 * PR_SLOWHZ) /* default probe interval */ ++#define TCPTV_KEEPCNT 8 /* max probes before drop */ ++ ++#define TCPTV_MIN (1 * PR_SLOWHZ) /* minimum allowable value */ ++#define TCPTV_REXMTMAX (12 * PR_SLOWHZ) /* max allowable REXMT value */ ++ ++#define TCP_LINGERTIME 120 /* linger at most 2 minutes */ ++ ++#define TCP_MAXRXTSHIFT 12 /* maximum retransmits */ ++ ++ ++/* ++ * Force a time value to be in a certain range. ++ */ ++#define TCPT_RANGESET(tv, value, tvmin, tvmax) \ ++ { \ ++ (tv) = (value); \ ++ if ((tv) < (tvmin)) \ ++ (tv) = (tvmin); \ ++ else if ((tv) > (tvmax)) \ ++ (tv) = (tvmax); \ ++ } ++ ++extern const int tcp_backoff[]; ++ ++struct tcpcb; ++ ++void tcp_fasttimo(Slirp *); ++void tcp_slowtimo(Slirp *); ++void tcp_canceltimers(struct tcpcb *); ++ ++#endif +diff --git a/slirp/src/tcp_var.h b/slirp/src/tcp_var.h +new file mode 100644 +index 0000000000..c8da8cbd16 +--- /dev/null ++++ b/slirp/src/tcp_var.h +@@ -0,0 +1,161 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1993, 1994 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcp_var.h 8.3 (Berkeley) 4/10/94 ++ * tcp_var.h,v 1.3 1994/08/21 05:27:39 paul Exp ++ */ ++ ++#ifndef TCP_VAR_H ++#define TCP_VAR_H ++ ++#include "tcpip.h" ++#include "tcp_timer.h" ++ ++/* ++ * Tcp control block, one per tcp; fields: ++ */ ++struct tcpcb { ++ struct tcpiphdr *seg_next; /* sequencing queue */ ++ struct tcpiphdr *seg_prev; ++ short t_state; /* state of this connection */ ++ short t_timer[TCPT_NTIMERS]; /* tcp timers */ ++ short t_rxtshift; /* log(2) of rexmt exp. backoff */ ++ short t_rxtcur; /* current retransmit value */ ++ short t_dupacks; /* consecutive dup acks recd */ ++ uint16_t t_maxseg; /* maximum segment size */ ++ uint8_t t_force; /* 1 if forcing out a byte */ ++ uint16_t t_flags; ++#define TF_ACKNOW 0x0001 /* ack peer immediately */ ++#define TF_DELACK 0x0002 /* ack, but try to delay it */ ++#define TF_NODELAY 0x0004 /* don't delay packets to coalesce */ ++#define TF_NOOPT 0x0008 /* don't use tcp options */ ++#define TF_SENTFIN 0x0010 /* have sent FIN */ ++#define TF_REQ_SCALE 0x0020 /* have/will request window scaling */ ++#define TF_RCVD_SCALE 0x0040 /* other side has requested scaling */ ++#define TF_REQ_TSTMP 0x0080 /* have/will request timestamps */ ++#define TF_RCVD_TSTMP 0x0100 /* a timestamp was received in SYN */ ++#define TF_SACK_PERMIT 0x0200 /* other side said I could SACK */ ++ ++ struct tcpiphdr t_template; /* static skeletal packet for xmit */ ++ ++ struct socket *t_socket; /* back pointer to socket */ ++ /* ++ * The following fields are used as in the protocol specification. ++ * See RFC783, Dec. 1981, page 21. ++ */ ++ /* send sequence variables */ ++ tcp_seq snd_una; /* send unacknowledged */ ++ tcp_seq snd_nxt; /* send next */ ++ tcp_seq snd_up; /* send urgent pointer */ ++ tcp_seq snd_wl1; /* window update seg seq number */ ++ tcp_seq snd_wl2; /* window update seg ack number */ ++ tcp_seq iss; /* initial send sequence number */ ++ uint32_t snd_wnd; /* send window */ ++ /* receive sequence variables */ ++ uint32_t rcv_wnd; /* receive window */ ++ tcp_seq rcv_nxt; /* receive next */ ++ tcp_seq rcv_up; /* receive urgent pointer */ ++ tcp_seq irs; /* initial receive sequence number */ ++ /* ++ * Additional variables for this implementation. ++ */ ++ /* receive variables */ ++ tcp_seq rcv_adv; /* advertised window */ ++ /* retransmit variables */ ++ tcp_seq snd_max; /* highest sequence number sent; ++ * used to recognize retransmits ++ */ ++ /* congestion control (for slow start, source quench, retransmit after loss) ++ */ ++ uint32_t snd_cwnd; /* congestion-controlled window */ ++ uint32_t snd_ssthresh; /* snd_cwnd size threshold for ++ * for slow start exponential to ++ * linear switch ++ */ ++ /* ++ * transmit timing stuff. See below for scale of srtt and rttvar. ++ * "Variance" is actually smoothed difference. ++ */ ++ short t_idle; /* inactivity time */ ++ short t_rtt; /* round trip time */ ++ tcp_seq t_rtseq; /* sequence number being timed */ ++ short t_srtt; /* smoothed round-trip time */ ++ short t_rttvar; /* variance in round-trip time */ ++ uint16_t t_rttmin; /* minimum rtt allowed */ ++ uint32_t max_sndwnd; /* largest window peer has offered */ ++ ++ /* out-of-band data */ ++ uint8_t t_oobflags; /* have some */ ++ uint8_t t_iobc; /* input character */ ++#define TCPOOB_HAVEDATA 0x01 ++#define TCPOOB_HADDATA 0x02 ++ short t_softerror; /* possible error not yet reported */ ++ ++ /* RFC 1323 variables */ ++ uint8_t snd_scale; /* window scaling for send window */ ++ uint8_t rcv_scale; /* window scaling for recv window */ ++ uint8_t request_r_scale; /* pending window scaling */ ++ uint8_t requested_s_scale; ++ uint32_t ts_recent; /* timestamp echo data */ ++ uint32_t ts_recent_age; /* when last updated */ ++ tcp_seq last_ack_sent; ++}; ++ ++#define sototcpcb(so) ((so)->so_tcpcb) ++ ++/* ++ * The smoothed round-trip time and estimated variance ++ * are stored as fixed point numbers scaled by the values below. ++ * For convenience, these scales are also used in smoothing the average ++ * (smoothed = (1/scale)sample + ((scale-1)/scale)smoothed). ++ * With these scales, srtt has 3 bits to the right of the binary point, ++ * and thus an "ALPHA" of 0.875. rttvar has 2 bits to the right of the ++ * binary point, and is smoothed with an ALPHA of 0.75. ++ */ ++#define TCP_RTT_SCALE 8 /* multiplier for srtt; 3 bits frac. */ ++#define TCP_RTT_SHIFT 3 /* shift for srtt; 3 bits frac. */ ++#define TCP_RTTVAR_SCALE 4 /* multiplier for rttvar; 2 bits */ ++#define TCP_RTTVAR_SHIFT 2 /* multiplier for rttvar; 2 bits */ ++ ++/* ++ * The initial retransmission should happen at rtt + 4 * rttvar. ++ * Because of the way we do the smoothing, srtt and rttvar ++ * will each average +1/2 tick of bias. When we compute ++ * the retransmit timer, we want 1/2 tick of rounding and ++ * 1 extra tick because of +-1/2 tick uncertainty in the ++ * firing of the timer. The bias will give us exactly the ++ * 1.5 tick we need. But, because the bias is ++ * statistical, we have to test that we don't drop below ++ * the minimum feasible timer (which is 2 ticks). ++ * This macro assumes that the value of TCP_RTTVAR_SCALE ++ * is the same as the multiplier for rttvar. ++ */ ++#define TCP_REXMTVAL(tp) (((tp)->t_srtt >> TCP_RTT_SHIFT) + (tp)->t_rttvar) ++ ++#endif +diff --git a/slirp/src/tcpip.h b/slirp/src/tcpip.h +new file mode 100644 +index 0000000000..a0fb2282f2 +--- /dev/null ++++ b/slirp/src/tcpip.h +@@ -0,0 +1,104 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)tcpip.h 8.1 (Berkeley) 6/10/93 ++ * tcpip.h,v 1.3 1994/08/21 05:27:40 paul Exp ++ */ ++ ++#ifndef TCPIP_H ++#define TCPIP_H ++ ++/* ++ * Tcp+ip header, after ip options removed. ++ */ ++struct tcpiphdr { ++ struct mbuf_ptr ih_mbuf; /* backpointer to mbuf */ ++ union { ++ struct { ++ struct in_addr ih_src; /* source internet address */ ++ struct in_addr ih_dst; /* destination internet address */ ++ uint8_t ih_x1; /* (unused) */ ++ uint8_t ih_pr; /* protocol */ ++ } ti_i4; ++ struct { ++ struct in6_addr ih_src; ++ struct in6_addr ih_dst; ++ uint8_t ih_x1; ++ uint8_t ih_nh; ++ } ti_i6; ++ } ti; ++ uint16_t ti_x0; ++ uint16_t ti_len; /* protocol length */ ++ struct tcphdr ti_t; /* tcp header */ ++}; ++#define ti_mbuf ih_mbuf.mptr ++#define ti_pr ti.ti_i4.ih_pr ++#define ti_src ti.ti_i4.ih_src ++#define ti_dst ti.ti_i4.ih_dst ++#define ti_src6 ti.ti_i6.ih_src ++#define ti_dst6 ti.ti_i6.ih_dst ++#define ti_nh6 ti.ti_i6.ih_nh ++#define ti_sport ti_t.th_sport ++#define ti_dport ti_t.th_dport ++#define ti_seq ti_t.th_seq ++#define ti_ack ti_t.th_ack ++#define ti_x2 ti_t.th_x2 ++#define ti_off ti_t.th_off ++#define ti_flags ti_t.th_flags ++#define ti_win ti_t.th_win ++#define ti_sum ti_t.th_sum ++#define ti_urp ti_t.th_urp ++ ++#define tcpiphdr2qlink(T) \ ++ ((struct qlink *)(((char *)(T)) - sizeof(struct qlink))) ++#define qlink2tcpiphdr(Q) \ ++ ((struct tcpiphdr *)(((char *)(Q)) + sizeof(struct qlink))) ++#define tcpiphdr_next(T) qlink2tcpiphdr(tcpiphdr2qlink(T)->next) ++#define tcpiphdr_prev(T) qlink2tcpiphdr(tcpiphdr2qlink(T)->prev) ++#define tcpfrag_list_first(T) qlink2tcpiphdr((T)->seg_next) ++#define tcpfrag_list_end(F, T) (tcpiphdr2qlink(F) == (struct qlink *)(T)) ++#define tcpfrag_list_empty(T) ((T)->seg_next == (struct tcpiphdr *)(T)) ++ ++/* This is the difference between the size of a tcpiphdr structure, and the ++ * size of actual ip+tcp headers, rounded up since we need to align data. */ ++#define TCPIPHDR_DELTA \ ++ (MAX(0, ((int) sizeof(struct tcpiphdr) - (int) sizeof(struct ip) - \ ++ (int) sizeof(struct tcphdr) + 3) & \ ++ ~3)) ++ ++/* ++ * Just a clean way to get to the first byte ++ * of the packet ++ */ ++struct tcpiphdr_2 { ++ struct tcpiphdr dummy; ++ char first_char; ++}; ++ ++#endif +diff --git a/slirp/src/tftp.c b/slirp/src/tftp.c +new file mode 100644 +index 0000000000..a19c889d34 +--- /dev/null ++++ b/slirp/src/tftp.c +@@ -0,0 +1,470 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * tftp.c - a simple, read-only tftp server for qemu ++ * ++ * Copyright (c) 2004 Magnus Damm ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++ ++#include "slirp.h" ++ ++#include ++#include ++#include ++ ++static inline int tftp_session_in_use(struct tftp_session *spt) ++{ ++ return (spt->slirp != NULL); ++} ++ ++static inline void tftp_session_update(struct tftp_session *spt) ++{ ++ spt->timestamp = curtime; ++} ++ ++static void tftp_session_terminate(struct tftp_session *spt) ++{ ++ if (spt->fd >= 0) { ++ close(spt->fd); ++ spt->fd = -1; ++ } ++ g_free(spt->filename); ++ spt->slirp = NULL; ++} ++ ++static int tftp_session_allocate(Slirp *slirp, struct sockaddr_storage *srcsas, ++ struct tftphdr *hdr) ++{ ++ struct tftp_session *spt; ++ int k; ++ ++ for (k = 0; k < TFTP_SESSIONS_MAX; k++) { ++ spt = &slirp->tftp_sessions[k]; ++ ++ if (!tftp_session_in_use(spt)) ++ goto found; ++ ++ /* sessions time out after 5 inactive seconds */ ++ if ((int)(curtime - spt->timestamp) > 5000) { ++ tftp_session_terminate(spt); ++ goto found; ++ } ++ } ++ ++ return -1; ++ ++found: ++ memset(spt, 0, sizeof(*spt)); ++ memcpy(&spt->client_addr, srcsas, sockaddr_size(srcsas)); ++ spt->fd = -1; ++ spt->block_size = 512; ++ spt->client_port = hdr->udp.uh_sport; ++ spt->slirp = slirp; ++ ++ tftp_session_update(spt); ++ ++ return k; ++} ++ ++static int tftp_session_find(Slirp *slirp, struct sockaddr_storage *srcsas, ++ struct tftphdr *hdr) ++{ ++ struct tftp_session *spt; ++ int k; ++ ++ for (k = 0; k < TFTP_SESSIONS_MAX; k++) { ++ spt = &slirp->tftp_sessions[k]; ++ ++ if (tftp_session_in_use(spt)) { ++ if (sockaddr_equal(&spt->client_addr, srcsas)) { ++ if (spt->client_port == hdr->udp.uh_sport) { ++ return k; ++ } ++ } ++ } ++ } ++ ++ return -1; ++} ++ ++static int tftp_read_data(struct tftp_session *spt, uint32_t block_nr, ++ uint8_t *buf, int len) ++{ ++ int bytes_read = 0; ++ ++ if (spt->fd < 0) { ++ spt->fd = open(spt->filename, O_RDONLY | O_BINARY); ++ } ++ ++ if (spt->fd < 0) { ++ return -1; ++ } ++ ++ if (len) { ++ if (lseek(spt->fd, block_nr * spt->block_size, SEEK_SET) == (off_t)-1) { ++ return -1; ++ } ++ ++ bytes_read = read(spt->fd, buf, len); ++ } ++ ++ return bytes_read; ++} ++ ++static struct tftp_t *tftp_prep_mbuf_data(struct tftp_session *spt, ++ struct mbuf *m) ++{ ++ struct tftp_t *tp; ++ ++ memset(m->m_data, 0, m->m_size); ++ ++ m->m_data += IF_MAXLINKHDR; ++ if (spt->client_addr.ss_family == AF_INET6) { ++ m->m_data += sizeof(struct ip6); ++ } else { ++ m->m_data += sizeof(struct ip); ++ } ++ tp = (void *)m->m_data; ++ m->m_data += sizeof(struct udphdr); ++ ++ return tp; ++} ++ ++static void tftp_udp_output(struct tftp_session *spt, struct mbuf *m, ++ struct tftphdr *hdr) ++{ ++ if (spt->client_addr.ss_family == AF_INET6) { ++ struct sockaddr_in6 sa6, da6; ++ ++ sa6.sin6_addr = spt->slirp->vhost_addr6; ++ sa6.sin6_port = hdr->udp.uh_dport; ++ da6.sin6_addr = ((struct sockaddr_in6 *)&spt->client_addr)->sin6_addr; ++ da6.sin6_port = spt->client_port; ++ ++ udp6_output(NULL, m, &sa6, &da6); ++ } else { ++ struct sockaddr_in sa4, da4; ++ ++ sa4.sin_addr = spt->slirp->vhost_addr; ++ sa4.sin_port = hdr->udp.uh_dport; ++ da4.sin_addr = ((struct sockaddr_in *)&spt->client_addr)->sin_addr; ++ da4.sin_port = spt->client_port; ++ ++ udp_output(NULL, m, &sa4, &da4, IPTOS_LOWDELAY); ++ } ++} ++ ++static int tftp_send_oack(struct tftp_session *spt, const char *keys[], ++ uint32_t values[], int nb, struct tftp_t *recv_tp) ++{ ++ struct mbuf *m; ++ struct tftp_t *tp; ++ int i, n = 0; ++ ++ m = m_get(spt->slirp); ++ ++ if (!m) ++ return -1; ++ ++ tp = tftp_prep_mbuf_data(spt, m); ++ ++ tp->hdr.tp_op = htons(TFTP_OACK); ++ for (i = 0; i < nb; i++) { ++ n += slirp_fmt0(tp->x.tp_buf + n, sizeof(tp->x.tp_buf) - n, "%s", keys[i]); ++ n += slirp_fmt0(tp->x.tp_buf + n, sizeof(tp->x.tp_buf) - n, "%u", values[i]); ++ } ++ ++ m->m_len = G_SIZEOF_MEMBER(struct tftp_t, hdr.tp_op) + n; ++ tftp_udp_output(spt, m, &recv_tp->hdr); ++ ++ return 0; ++} ++ ++static void tftp_send_error(struct tftp_session *spt, uint16_t errorcode, ++ const char *msg, struct tftp_t *recv_tp) ++{ ++ struct mbuf *m; ++ struct tftp_t *tp; ++ ++ DEBUG_TFTP("tftp error msg: %s", msg); ++ ++ m = m_get(spt->slirp); ++ ++ if (!m) { ++ goto out; ++ } ++ ++ tp = tftp_prep_mbuf_data(spt, m); ++ ++ tp->hdr.tp_op = htons(TFTP_ERROR); ++ tp->x.tp_error.tp_error_code = htons(errorcode); ++ slirp_pstrcpy((char *)tp->x.tp_error.tp_msg, sizeof(tp->x.tp_error.tp_msg), ++ msg); ++ ++ m->m_len = sizeof(struct tftp_t) - (TFTP_BLOCKSIZE_MAX + 2) + 3 + ++ strlen(msg) - sizeof(struct udphdr); ++ tftp_udp_output(spt, m, &recv_tp->hdr); ++ ++out: ++ tftp_session_terminate(spt); ++} ++ ++static void tftp_send_next_block(struct tftp_session *spt, ++ struct tftphdr *hdr) ++{ ++ struct mbuf *m; ++ struct tftp_t *tp; ++ int nobytes; ++ ++ m = m_get(spt->slirp); ++ ++ if (!m) { ++ return; ++ } ++ ++ tp = tftp_prep_mbuf_data(spt, m); ++ ++ tp->hdr.tp_op = htons(TFTP_DATA); ++ tp->x.tp_data.tp_block_nr = htons((spt->block_nr + 1) & 0xffff); ++ ++ nobytes = tftp_read_data(spt, spt->block_nr, tp->x.tp_data.tp_buf, ++ spt->block_size); ++ ++ if (nobytes < 0) { ++ m_free(m); ++ ++ /* send "file not found" error back */ ++ ++ tftp_send_error(spt, 1, "File not found", tp); ++ ++ return; ++ } ++ ++ m->m_len = sizeof(struct tftp_t) - (TFTP_BLOCKSIZE_MAX - nobytes) - ++ sizeof(struct udphdr); ++ tftp_udp_output(spt, m, hdr); ++ ++ if (nobytes == spt->block_size) { ++ tftp_session_update(spt); ++ } else { ++ tftp_session_terminate(spt); ++ } ++ ++ spt->block_nr++; ++} ++ ++static void tftp_handle_rrq(Slirp *slirp, struct sockaddr_storage *srcsas, ++ struct tftp_t *tp, int pktlen) ++{ ++ struct tftp_session *spt; ++ int s, k; ++ size_t prefix_len; ++ char *req_fname; ++ const char *option_name[2]; ++ uint32_t option_value[2]; ++ int nb_options = 0; ++ ++ /* check if a session already exists and if so terminate it */ ++ s = tftp_session_find(slirp, srcsas, &tp->hdr); ++ if (s >= 0) { ++ tftp_session_terminate(&slirp->tftp_sessions[s]); ++ } ++ ++ s = tftp_session_allocate(slirp, srcsas, &tp->hdr); ++ ++ if (s < 0) { ++ return; ++ } ++ ++ spt = &slirp->tftp_sessions[s]; ++ ++ /* unspecified prefix means service disabled */ ++ if (!slirp->tftp_prefix) { ++ tftp_send_error(spt, 2, "Access violation", tp); ++ return; ++ } ++ ++ /* skip header fields */ ++ k = 0; ++ pktlen -= offsetof(struct tftp_t, x.tp_buf); ++ ++ /* prepend tftp_prefix */ ++ prefix_len = strlen(slirp->tftp_prefix); ++ spt->filename = g_malloc(prefix_len + TFTP_FILENAME_MAX + 2); ++ memcpy(spt->filename, slirp->tftp_prefix, prefix_len); ++ spt->filename[prefix_len] = '/'; ++ ++ /* get name */ ++ req_fname = spt->filename + prefix_len + 1; ++ ++ while (1) { ++ if (k >= TFTP_FILENAME_MAX || k >= pktlen) { ++ tftp_send_error(spt, 2, "Access violation", tp); ++ return; ++ } ++ req_fname[k] = tp->x.tp_buf[k]; ++ if (req_fname[k++] == '\0') { ++ break; ++ } ++ } ++ ++ DEBUG_TFTP("tftp rrq file: %s", req_fname); ++ ++ /* check mode */ ++ if ((pktlen - k) < 6) { ++ tftp_send_error(spt, 2, "Access violation", tp); ++ return; ++ } ++ ++ if (strcasecmp(&tp->x.tp_buf[k], "octet") != 0) { ++ tftp_send_error(spt, 4, "Unsupported transfer mode", tp); ++ return; ++ } ++ ++ k += 6; /* skipping octet */ ++ ++ /* do sanity checks on the filename */ ++ if ( ++#ifdef G_OS_WIN32 ++ strstr(req_fname, "..\\") || ++ req_fname[strlen(req_fname) - 1] == '\\' || ++#endif ++ strstr(req_fname, "../") || ++ req_fname[strlen(req_fname) - 1] == '/') { ++ tftp_send_error(spt, 2, "Access violation", tp); ++ return; ++ } ++ ++ /* check if the file exists */ ++ if (tftp_read_data(spt, 0, NULL, 0) < 0) { ++ tftp_send_error(spt, 1, "File not found", tp); ++ return; ++ } ++ ++ if (tp->x.tp_buf[pktlen - 1] != 0) { ++ tftp_send_error(spt, 2, "Access violation", tp); ++ return; ++ } ++ ++ while (k < pktlen && nb_options < G_N_ELEMENTS(option_name)) { ++ const char *key, *value; ++ ++ key = &tp->x.tp_buf[k]; ++ k += strlen(key) + 1; ++ ++ if (k >= pktlen) { ++ tftp_send_error(spt, 2, "Access violation", tp); ++ return; ++ } ++ ++ value = &tp->x.tp_buf[k]; ++ k += strlen(value) + 1; ++ ++ if (strcasecmp(key, "tsize") == 0) { ++ int tsize = atoi(value); ++ struct stat stat_p; ++ ++ if (tsize == 0) { ++ if (stat(spt->filename, &stat_p) == 0) ++ tsize = stat_p.st_size; ++ else { ++ tftp_send_error(spt, 1, "File not found", tp); ++ return; ++ } ++ } ++ ++ option_name[nb_options] = "tsize"; ++ option_value[nb_options] = tsize; ++ nb_options++; ++ } else if (strcasecmp(key, "blksize") == 0) { ++ int blksize = atoi(value); ++ ++ /* Accept blksize up to our maximum size */ ++ if (blksize > 0) { ++ spt->block_size = MIN(blksize, TFTP_BLOCKSIZE_MAX); ++ option_name[nb_options] = "blksize"; ++ option_value[nb_options] = spt->block_size; ++ nb_options++; ++ } ++ } ++ } ++ ++ if (nb_options > 0) { ++ assert(nb_options <= G_N_ELEMENTS(option_name)); ++ tftp_send_oack(spt, option_name, option_value, nb_options, tp); ++ return; ++ } ++ ++ spt->block_nr = 0; ++ tftp_send_next_block(spt, &tp->hdr); ++} ++ ++static void tftp_handle_ack(Slirp *slirp, struct sockaddr_storage *srcsas, ++ struct tftphdr *hdr) ++{ ++ int s; ++ ++ s = tftp_session_find(slirp, srcsas, hdr); ++ ++ if (s < 0) { ++ return; ++ } ++ ++ tftp_send_next_block(&slirp->tftp_sessions[s], hdr); ++} ++ ++static void tftp_handle_error(Slirp *slirp, struct sockaddr_storage *srcsas, ++ struct tftphdr *hdr) ++{ ++ int s; ++ ++ s = tftp_session_find(slirp, srcsas, hdr); ++ ++ if (s < 0) { ++ return; ++ } ++ ++ tftp_session_terminate(&slirp->tftp_sessions[s]); ++} ++ ++void tftp_input(struct sockaddr_storage *srcsas, struct mbuf *m) ++{ ++ struct tftphdr *hdr = mtod_check(m, sizeof(struct tftphdr)); ++ ++ if (hdr == NULL) { ++ return; ++ } ++ ++ switch (ntohs(hdr->tp_op)) { ++ case TFTP_RRQ: ++ tftp_handle_rrq(m->slirp, srcsas, ++ mtod(m, struct tftp_t *), ++ m->m_len); ++ break; ++ ++ case TFTP_ACK: ++ tftp_handle_ack(m->slirp, srcsas, hdr); ++ break; ++ ++ case TFTP_ERROR: ++ tftp_handle_error(m->slirp, srcsas, hdr); ++ break; ++ } ++} +diff --git a/slirp/src/tftp.h b/slirp/src/tftp.h +new file mode 100644 +index 0000000000..cafab03f2f +--- /dev/null ++++ b/slirp/src/tftp.h +@@ -0,0 +1,58 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* tftp defines */ ++ ++#ifndef SLIRP_TFTP_H ++#define SLIRP_TFTP_H ++ ++#include "util.h" ++ ++#define TFTP_SESSIONS_MAX 20 ++ ++#define TFTP_SERVER 69 ++ ++#define TFTP_RRQ 1 ++#define TFTP_WRQ 2 ++#define TFTP_DATA 3 ++#define TFTP_ACK 4 ++#define TFTP_ERROR 5 ++#define TFTP_OACK 6 ++ ++#define TFTP_FILENAME_MAX 512 ++#define TFTP_BLOCKSIZE_MAX 1428 ++ ++struct tftphdr { ++ struct udphdr udp; ++ uint16_t tp_op; ++} SLIRP_PACKED; ++ ++struct tftp_t { ++ struct tftphdr hdr; ++ union { ++ struct { ++ uint16_t tp_block_nr; ++ uint8_t tp_buf[TFTP_BLOCKSIZE_MAX]; ++ } tp_data; ++ struct { ++ uint16_t tp_error_code; ++ uint8_t tp_msg[TFTP_BLOCKSIZE_MAX]; ++ } tp_error; ++ char tp_buf[TFTP_BLOCKSIZE_MAX + 2]; ++ } x; ++} SLIRP_PACKED; ++ ++struct tftp_session { ++ Slirp *slirp; ++ char *filename; ++ int fd; ++ uint16_t block_size; ++ ++ struct sockaddr_storage client_addr; ++ uint16_t client_port; ++ uint32_t block_nr; ++ ++ int timestamp; ++}; ++ ++void tftp_input(struct sockaddr_storage *srcsas, struct mbuf *m); ++ ++#endif +diff --git a/slirp/src/udp.c b/slirp/src/udp.c +new file mode 100644 +index 0000000000..06b7b7d032 +--- /dev/null ++++ b/slirp/src/udp.c +@@ -0,0 +1,425 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1988, 1990, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)udp_usrreq.c 8.4 (Berkeley) 1/21/94 ++ * udp_usrreq.c,v 1.4 1994/10/02 17:48:45 phk Exp ++ */ ++ ++/* ++ * Changes and additions relating to SLiRP ++ * Copyright (c) 1995 Danny Gasparovski. ++ * ++ * Please read the file COPYRIGHT for the ++ * terms and conditions of the copyright. ++ */ ++ ++#include "slirp.h" ++#include "ip_icmp.h" ++ ++static uint8_t udp_tos(struct socket *so); ++ ++void udp_init(Slirp *slirp) ++{ ++ slirp->udb.so_next = slirp->udb.so_prev = &slirp->udb; ++ slirp->udp_last_so = &slirp->udb; ++} ++ ++void udp_cleanup(Slirp *slirp) ++{ ++ struct socket *so, *so_next; ++ ++ for (so = slirp->udb.so_next; so != &slirp->udb; so = so_next) { ++ so_next = so->so_next; ++ udp_detach(slirp->udb.so_next); ++ } ++} ++ ++/* m->m_data points at ip packet header ++ * m->m_len length ip packet ++ * ip->ip_len length data (IPDU) ++ */ ++void udp_input(register struct mbuf *m, int iphlen) ++{ ++ Slirp *slirp = m->slirp; ++ M_DUP_DEBUG(slirp, m, 0, 0); ++ ++ register struct ip *ip; ++ register struct udphdr *uh; ++ int len; ++ struct ip save_ip; ++ struct socket *so; ++ struct sockaddr_storage lhost; ++ struct sockaddr_in *lhost4; ++ int ttl; ++ ++ DEBUG_CALL("udp_input"); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("iphlen = %d", iphlen); ++ ++ /* ++ * Strip IP options, if any; should skip this, ++ * make available to user, and use on returned packets, ++ * but we don't yet have a way to check the checksum ++ * with options still present. ++ */ ++ if (iphlen > sizeof(struct ip)) { ++ ip_stripoptions(m, (struct mbuf *)0); ++ iphlen = sizeof(struct ip); ++ } ++ ++ /* ++ * Get IP and UDP header together in first mbuf. ++ */ ++ ip = mtod_check(m, iphlen + sizeof(struct udphdr)); ++ if (ip == NULL) { ++ goto bad; ++ } ++ uh = (struct udphdr *)((char *)ip + iphlen); ++ ++ /* ++ * Make mbuf data length reflect UDP length. ++ * If not enough data to reflect UDP length, drop. ++ */ ++ len = ntohs((uint16_t)uh->uh_ulen); ++ ++ if (ip->ip_len != len) { ++ if (len > ip->ip_len) { ++ goto bad; ++ } ++ m_adj(m, len - ip->ip_len); ++ ip->ip_len = len; ++ } ++ ++ /* ++ * Save a copy of the IP header in case we want restore it ++ * for sending an ICMP error message in response. ++ */ ++ save_ip = *ip; ++ save_ip.ip_len += iphlen; /* tcp_input subtracts this */ ++ ++ /* ++ * Checksum extended UDP header and data. ++ */ ++ if (uh->uh_sum) { ++ memset(&((struct ipovly *)ip)->ih_mbuf, 0, sizeof(struct mbuf_ptr)); ++ ((struct ipovly *)ip)->ih_x1 = 0; ++ ((struct ipovly *)ip)->ih_len = uh->uh_ulen; ++ if (cksum(m, len + sizeof(struct ip))) { ++ goto bad; ++ } ++ } ++ ++ lhost.ss_family = AF_INET; ++ lhost4 = (struct sockaddr_in *)&lhost; ++ lhost4->sin_addr = ip->ip_src; ++ lhost4->sin_port = uh->uh_sport; ++ ++ /* ++ * handle DHCP/BOOTP ++ */ ++ if (ntohs(uh->uh_dport) == BOOTP_SERVER && ++ (ip->ip_dst.s_addr == slirp->vhost_addr.s_addr || ++ ip->ip_dst.s_addr == 0xffffffff)) { ++ bootp_input(m); ++ goto bad; ++ } ++ ++ /* ++ * handle TFTP ++ */ ++ if (ntohs(uh->uh_dport) == TFTP_SERVER && ++ ip->ip_dst.s_addr == slirp->vhost_addr.s_addr) { ++ m->m_data += iphlen; ++ m->m_len -= iphlen; ++ tftp_input(&lhost, m); ++ m->m_data -= iphlen; ++ m->m_len += iphlen; ++ goto bad; ++ } ++ ++ if (slirp->restricted) { ++ goto bad; ++ } ++ ++ /* ++ * Locate pcb for datagram. ++ */ ++ so = solookup(&slirp->udp_last_so, &slirp->udb, &lhost, NULL); ++ ++ if (so == NULL) { ++ /* ++ * If there's no socket for this packet, ++ * create one ++ */ ++ so = socreate(slirp); ++ if (udp_attach(so, AF_INET) == -1) { ++ DEBUG_MISC(" udp_attach errno = %d-%s", errno, strerror(errno)); ++ sofree(so); ++ goto bad; ++ } ++ ++ /* ++ * Setup fields ++ */ ++ so->so_lfamily = AF_INET; ++ so->so_laddr = ip->ip_src; ++ so->so_lport = uh->uh_sport; ++ ++ if ((so->so_iptos = udp_tos(so)) == 0) ++ so->so_iptos = ip->ip_tos; ++ ++ /* ++ * XXXXX Here, check if it's in udpexec_list, ++ * and if it is, do the fork_exec() etc. ++ */ ++ } ++ ++ so->so_ffamily = AF_INET; ++ so->so_faddr = ip->ip_dst; /* XXX */ ++ so->so_fport = uh->uh_dport; /* XXX */ ++ ++ iphlen += sizeof(struct udphdr); ++ m->m_len -= iphlen; ++ m->m_data += iphlen; ++ ++ /* ++ * Check for TTL ++ */ ++ ttl = save_ip.ip_ttl-1; ++ if (ttl <= 0) { ++ m->m_len += iphlen; ++ m->m_data -= iphlen; ++ *ip = save_ip; ++ DEBUG_MISC("udp ttl exceeded"); ++ icmp_send_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0, NULL); ++ goto bad; ++ } ++ setsockopt(so->s, IPPROTO_IP, IP_TTL, &ttl, sizeof(ttl)); ++ ++ /* ++ * Now we sendto() the packet. ++ */ ++ if (sosendto(so, m) == -1) { ++ m->m_len += iphlen; ++ m->m_data -= iphlen; ++ *ip = save_ip; ++ DEBUG_MISC("udp tx errno = %d-%s", errno, strerror(errno)); ++ icmp_send_error(m, ICMP_UNREACH, ICMP_UNREACH_NET, 0, strerror(errno)); ++ goto bad; ++ } ++ ++ m_free(so->so_m); /* used for ICMP if error on sorecvfrom */ ++ ++ /* restore the orig mbuf packet */ ++ m->m_len += iphlen; ++ m->m_data -= iphlen; ++ *ip = save_ip; ++ so->so_m = m; /* ICMP backup */ ++ ++ return; ++bad: ++ m_free(m); ++} ++ ++int udp_output(struct socket *so, struct mbuf *m, struct sockaddr_in *saddr, ++ struct sockaddr_in *daddr, int iptos) ++{ ++ Slirp *slirp = m->slirp; ++ M_DUP_DEBUG(slirp, m, 0, sizeof(struct udpiphdr)); ++ ++ register struct udpiphdr *ui; ++ int error = 0; ++ ++ DEBUG_CALL("udp_output"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("m = %p", m); ++ DEBUG_ARG("saddr = %s", inet_ntoa(saddr->sin_addr)); ++ DEBUG_ARG("daddr = %s", inet_ntoa(daddr->sin_addr)); ++ ++ /* ++ * Adjust for header ++ */ ++ m->m_data -= sizeof(struct udpiphdr); ++ m->m_len += sizeof(struct udpiphdr); ++ ++ /* ++ * Fill in mbuf with extended UDP header ++ * and addresses and length put into network format. ++ */ ++ ui = mtod(m, struct udpiphdr *); ++ memset(&ui->ui_i.ih_mbuf, 0, sizeof(struct mbuf_ptr)); ++ ui->ui_x1 = 0; ++ ui->ui_pr = IPPROTO_UDP; ++ ui->ui_len = htons(m->m_len - sizeof(struct ip)); ++ /* XXXXX Check for from-one-location sockets, or from-any-location sockets ++ */ ++ ui->ui_src = saddr->sin_addr; ++ ui->ui_dst = daddr->sin_addr; ++ ui->ui_sport = saddr->sin_port; ++ ui->ui_dport = daddr->sin_port; ++ ui->ui_ulen = ui->ui_len; ++ ++ /* ++ * Stuff checksum and output datagram. ++ */ ++ ui->ui_sum = 0; ++ if ((ui->ui_sum = cksum(m, m->m_len)) == 0) ++ ui->ui_sum = 0xffff; ++ ((struct ip *)ui)->ip_len = m->m_len; ++ ++ ((struct ip *)ui)->ip_ttl = IPDEFTTL; ++ ((struct ip *)ui)->ip_tos = iptos; ++ ++ error = ip_output(so, m); ++ ++ return (error); ++} ++ ++int udp_attach(struct socket *so, unsigned short af) ++{ ++ so->s = slirp_socket(af, SOCK_DGRAM, 0); ++ if (so->s != -1) { ++ if (slirp_bind_outbound(so, af) != 0) { ++ // bind failed - close socket ++ closesocket(so->s); ++ so->s = -1; ++ return -1; ++ } ++ ++#ifdef __linux__ ++ { ++ int opt = 1; ++ switch (af) { ++ case AF_INET: ++ setsockopt(so->s, IPPROTO_IP, IP_RECVERR, &opt, sizeof(opt)); ++ break; ++ case AF_INET6: ++ setsockopt(so->s, IPPROTO_IPV6, IPV6_RECVERR, &opt, sizeof(opt)); ++ break; ++ default: ++ g_assert_not_reached(); ++ } ++ } ++#endif ++ ++ so->so_expire = curtime + SO_EXPIRE; ++ insque(so, &so->slirp->udb); ++ } ++ so->slirp->cb->register_poll_fd(so->s, so->slirp->opaque); ++ return (so->s); ++} ++ ++void udp_detach(struct socket *so) ++{ ++ so->slirp->cb->unregister_poll_fd(so->s, so->slirp->opaque); ++ closesocket(so->s); ++ sofree(so); ++} ++ ++static const struct tos_t udptos[] = { { 0, 53, IPTOS_LOWDELAY, 0 }, /* DNS */ ++ { 0, 0, 0, 0 } }; ++ ++static uint8_t udp_tos(struct socket *so) ++{ ++ int i = 0; ++ ++ while (udptos[i].tos) { ++ if ((udptos[i].fport && ntohs(so->so_fport) == udptos[i].fport) || ++ (udptos[i].lport && ntohs(so->so_lport) == udptos[i].lport)) { ++ if (so->slirp->enable_emu) ++ so->so_emu = udptos[i].emu; ++ return udptos[i].tos; ++ } ++ i++; ++ } ++ ++ return 0; ++} ++ ++struct socket *udpx_listen(Slirp *slirp, ++ const struct sockaddr *haddr, socklen_t haddrlen, ++ const struct sockaddr *laddr, socklen_t laddrlen, ++ int flags) ++{ ++ struct socket *so; ++ socklen_t addrlen; ++ int save_errno; ++ ++ so = socreate(slirp); ++ so->s = slirp_socket(haddr->sa_family, SOCK_DGRAM, 0); ++ if (so->s < 0) { ++ save_errno = errno; ++ sofree(so); ++ errno = save_errno; ++ return NULL; ++ } ++ if (haddr->sa_family == AF_INET6) ++ slirp_socket_set_v6only(so->s, (flags & SS_HOSTFWD_V6ONLY) != 0); ++ so->so_expire = curtime + SO_EXPIRE; ++ insque(so, &slirp->udb); ++ ++ if (bind(so->s, haddr, haddrlen) < 0) { ++ save_errno = errno; ++ udp_detach(so); ++ errno = save_errno; ++ return NULL; ++ } ++ slirp_socket_set_fast_reuse(so->s); ++ ++ addrlen = sizeof(so->fhost); ++ getsockname(so->s, &so->fhost.sa, &addrlen); ++ sotranslate_accept(so); ++ ++ sockaddr_copy(&so->lhost.sa, sizeof(so->lhost), laddr, laddrlen); ++ ++ if (flags != SS_FACCEPTONCE) ++ so->so_expire = 0; ++ so->so_state &= SS_PERSISTENT_MASK; ++ so->so_state |= SS_ISFCONNECTED | flags; ++ ++ return so; ++} ++ ++struct socket *udp_listen(Slirp *slirp, uint32_t haddr, unsigned hport, ++ uint32_t laddr, unsigned lport, int flags) ++{ ++ struct sockaddr_in hsa, lsa; ++ ++ memset(&hsa, 0, sizeof(hsa)); ++ hsa.sin_family = AF_INET; ++ hsa.sin_addr.s_addr = haddr; ++ hsa.sin_port = hport; ++ ++ memset(&lsa, 0, sizeof(lsa)); ++ lsa.sin_family = AF_INET; ++ lsa.sin_addr.s_addr = laddr; ++ lsa.sin_port = lport; ++ ++ return udpx_listen(slirp, (const struct sockaddr *) &hsa, sizeof(hsa), (struct sockaddr *) &lsa, sizeof(lsa), flags); ++} +diff --git a/slirp/src/udp.h b/slirp/src/udp.h +new file mode 100644 +index 0000000000..47f4ed34d8 +--- /dev/null ++++ b/slirp/src/udp.h +@@ -0,0 +1,96 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 1982, 1986, 1993 ++ * The Regents of the University of California. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its contributors ++ * may be used to endorse or promote products derived from this software ++ * without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * @(#)udp.h 8.1 (Berkeley) 6/10/93 ++ * udp.h,v 1.3 1994/08/21 05:27:41 paul Exp ++ */ ++ ++#ifndef UDP_H ++#define UDP_H ++ ++#include "socket.h" ++ ++#define UDP_TTL 0x60 ++#define UDP_UDPDATALEN 16192 ++ ++/* ++ * Udp protocol header. ++ * Per RFC 768, September, 1981. ++ */ ++struct udphdr { ++ uint16_t uh_sport; /* source port */ ++ uint16_t uh_dport; /* destination port */ ++ int16_t uh_ulen; /* udp length */ ++ uint16_t uh_sum; /* udp checksum */ ++}; ++ ++/* ++ * UDP kernel structures and variables. ++ */ ++struct udpiphdr { ++ struct ipovly ui_i; /* overlaid ip structure */ ++ struct udphdr ui_u; /* udp header */ ++}; ++#define ui_mbuf ui_i.ih_mbuf.mptr ++#define ui_x1 ui_i.ih_x1 ++#define ui_pr ui_i.ih_pr ++#define ui_len ui_i.ih_len ++#define ui_src ui_i.ih_src ++#define ui_dst ui_i.ih_dst ++#define ui_sport ui_u.uh_sport ++#define ui_dport ui_u.uh_dport ++#define ui_ulen ui_u.uh_ulen ++#define ui_sum ui_u.uh_sum ++ ++/* ++ * Names for UDP sysctl objects ++ */ ++#define UDPCTL_CHECKSUM 1 /* checksum UDP packets */ ++#define UDPCTL_MAXID 2 ++ ++struct mbuf; ++ ++void udp_init(Slirp *); ++void udp_cleanup(Slirp *); ++void udp_input(register struct mbuf *, int); ++int udp_attach(struct socket *, unsigned short af); ++void udp_detach(struct socket *); ++struct socket *udp_listen(Slirp *, uint32_t, unsigned, uint32_t, unsigned, int); ++struct socket *udpx_listen(Slirp *, ++ const struct sockaddr *haddr, socklen_t haddrlen, ++ const struct sockaddr *laddr, socklen_t laddrlen, ++ int flags); ++int udp_output(struct socket *so, struct mbuf *m, struct sockaddr_in *saddr, ++ struct sockaddr_in *daddr, int iptos); ++ ++void udp6_input(register struct mbuf *); ++int udp6_output(struct socket *so, struct mbuf *m, struct sockaddr_in6 *saddr, ++ struct sockaddr_in6 *daddr); ++ ++#endif +diff --git a/slirp/src/udp6.c b/slirp/src/udp6.c +new file mode 100644 +index 0000000000..efeac5c19a +--- /dev/null ++++ b/slirp/src/udp6.c +@@ -0,0 +1,196 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * Copyright (c) 2013 ++ * Guillaume Subiron ++ */ ++ ++#include "slirp.h" ++#include "udp.h" ++#include "dhcpv6.h" ++ ++void udp6_input(struct mbuf *m) ++{ ++ Slirp *slirp = m->slirp; ++ M_DUP_DEBUG(slirp, m, 0, 0); ++ ++ struct ip6 *ip, save_ip; ++ struct udphdr *uh; ++ int iphlen = sizeof(struct ip6); ++ int len; ++ struct socket *so; ++ struct sockaddr_in6 lhost; ++ int hop_limit; ++ ++ DEBUG_CALL("udp6_input"); ++ DEBUG_ARG("m = %p", m); ++ ++ if (slirp->restricted) { ++ goto bad; ++ } ++ ++ ip = mtod(m, struct ip6 *); ++ m->m_len -= iphlen; ++ m->m_data += iphlen; ++ uh = mtod_check(m, sizeof(struct udphdr)); ++ if (uh == NULL) { ++ goto bad; ++ } ++ m->m_len += iphlen; ++ m->m_data -= iphlen; ++ ++ if (ip6_cksum(m)) { ++ goto bad; ++ } ++ ++ len = ntohs((uint16_t)uh->uh_ulen); ++ ++ /* ++ * Make mbuf data length reflect UDP length. ++ * If not enough data to reflect UDP length, drop. ++ */ ++ if (ntohs(ip->ip_pl) != len) { ++ if (len > ntohs(ip->ip_pl)) { ++ goto bad; ++ } ++ m_adj(m, len - ntohs(ip->ip_pl)); ++ ip->ip_pl = htons(len); ++ } ++ ++ /* ++ * Save a copy of the IP header in case we want restore it ++ * for sending an ICMP error message in response. ++ */ ++ save_ip = *ip; ++ ++ /* Locate pcb for datagram. */ ++ lhost.sin6_family = AF_INET6; ++ lhost.sin6_addr = ip->ip_src; ++ lhost.sin6_port = uh->uh_sport; ++ ++ /* handle DHCPv6 */ ++ if (ntohs(uh->uh_dport) == DHCPV6_SERVER_PORT && ++ (in6_equal(&ip->ip_dst, &slirp->vhost_addr6) || ++ in6_dhcp_multicast(&ip->ip_dst))) { ++ m->m_data += iphlen; ++ m->m_len -= iphlen; ++ dhcpv6_input(&lhost, m); ++ m->m_data -= iphlen; ++ m->m_len += iphlen; ++ goto bad; ++ } ++ ++ /* handle TFTP */ ++ if (ntohs(uh->uh_dport) == TFTP_SERVER && ++ !memcmp(ip->ip_dst.s6_addr, slirp->vhost_addr6.s6_addr, 16)) { ++ m->m_data += iphlen; ++ m->m_len -= iphlen; ++ tftp_input((struct sockaddr_storage *)&lhost, m); ++ m->m_data -= iphlen; ++ m->m_len += iphlen; ++ goto bad; ++ } ++ ++ so = solookup(&slirp->udp_last_so, &slirp->udb, ++ (struct sockaddr_storage *)&lhost, NULL); ++ ++ if (so == NULL) { ++ /* If there's no socket for this packet, create one. */ ++ so = socreate(slirp); ++ if (udp_attach(so, AF_INET6) == -1) { ++ DEBUG_MISC(" udp6_attach errno = %d-%s", errno, strerror(errno)); ++ sofree(so); ++ goto bad; ++ } ++ ++ /* Setup fields */ ++ so->so_lfamily = AF_INET6; ++ so->so_laddr6 = ip->ip_src; ++ so->so_lport6 = uh->uh_sport; ++ } ++ ++ so->so_ffamily = AF_INET6; ++ so->so_faddr6 = ip->ip_dst; /* XXX */ ++ so->so_fport6 = uh->uh_dport; /* XXX */ ++ ++ iphlen += sizeof(struct udphdr); ++ m->m_len -= iphlen; ++ m->m_data += iphlen; ++ ++ /* ++ * Check for TTL ++ */ ++ hop_limit = save_ip.ip_hl-1; ++ if (hop_limit <= 0) { ++ m->m_len += iphlen; ++ m->m_data -= iphlen; ++ *ip = save_ip; ++ DEBUG_MISC("udp ttl exceeded"); ++ icmp6_send_error(m, ICMP6_TIMXCEED, ICMP6_TIMXCEED_INTRANS); ++ goto bad; ++ } ++ setsockopt(so->s, IPPROTO_IPV6, IPV6_UNICAST_HOPS, &hop_limit, sizeof(hop_limit)); ++ ++ /* ++ * Now we sendto() the packet. ++ */ ++ if (sosendto(so, m) == -1) { ++ m->m_len += iphlen; ++ m->m_data -= iphlen; ++ *ip = save_ip; ++ DEBUG_MISC("udp tx errno = %d-%s", errno, strerror(errno)); ++ icmp6_send_error(m, ICMP6_UNREACH, ICMP6_UNREACH_NO_ROUTE); ++ goto bad; ++ } ++ ++ m_free(so->so_m); /* used for ICMP if error on sorecvfrom */ ++ ++ /* restore the orig mbuf packet */ ++ m->m_len += iphlen; ++ m->m_data -= iphlen; ++ *ip = save_ip; ++ so->so_m = m; ++ ++ return; ++bad: ++ m_free(m); ++} ++ ++int udp6_output(struct socket *so, struct mbuf *m, struct sockaddr_in6 *saddr, ++ struct sockaddr_in6 *daddr) ++{ ++ Slirp *slirp = m->slirp; ++ M_DUP_DEBUG(slirp, m, 0, sizeof(struct ip6) + sizeof(struct udphdr)); ++ ++ struct ip6 *ip; ++ struct udphdr *uh; ++ ++ DEBUG_CALL("udp6_output"); ++ DEBUG_ARG("so = %p", so); ++ DEBUG_ARG("m = %p", m); ++ ++ /* adjust for header */ ++ m->m_data -= sizeof(struct udphdr); ++ m->m_len += sizeof(struct udphdr); ++ uh = mtod(m, struct udphdr *); ++ m->m_data -= sizeof(struct ip6); ++ m->m_len += sizeof(struct ip6); ++ ip = mtod(m, struct ip6 *); ++ ++ /* Build IP header */ ++ ip->ip_pl = htons(m->m_len - sizeof(struct ip6)); ++ ip->ip_nh = IPPROTO_UDP; ++ ip->ip_src = saddr->sin6_addr; ++ ip->ip_dst = daddr->sin6_addr; ++ ++ /* Build UDP header */ ++ uh->uh_sport = saddr->sin6_port; ++ uh->uh_dport = daddr->sin6_port; ++ uh->uh_ulen = ip->ip_pl; ++ uh->uh_sum = 0; ++ uh->uh_sum = ip6_cksum(m); ++ if (uh->uh_sum == 0) { ++ uh->uh_sum = 0xffff; ++ } ++ ++ return ip6_output(so, m, 0); ++} +diff --git a/slirp/src/util.c b/slirp/src/util.c +new file mode 100644 +index 0000000000..e6bccbe0fa +--- /dev/null ++++ b/slirp/src/util.c +@@ -0,0 +1,441 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * util.c (mostly based on QEMU os-win32.c) ++ * ++ * Copyright (c) 2003-2008 Fabrice Bellard ++ * Copyright (c) 2010-2016 Red Hat, Inc. ++ * ++ * QEMU library functions for win32 which are shared between QEMU and ++ * the QEMU tools. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++#include "util.h" ++ ++#include ++#include ++#include ++ ++#if defined(_WIN32) ++int slirp_inet_aton(const char *cp, struct in_addr *ia) ++{ ++ uint32_t addr = inet_addr(cp); ++ if (addr == 0xffffffff) { ++ return 0; ++ } ++ ia->s_addr = addr; ++ return 1; ++} ++#endif ++ ++void slirp_set_nonblock(int fd) ++{ ++#ifndef _WIN32 ++ int f; ++ f = fcntl(fd, F_GETFL); ++ assert(f != -1); ++ f = fcntl(fd, F_SETFL, f | O_NONBLOCK); ++ assert(f != -1); ++#else ++ unsigned long opt = 1; ++ ioctlsocket(fd, FIONBIO, &opt); ++#endif ++} ++ ++static void slirp_set_cloexec(int fd) ++{ ++#ifndef _WIN32 ++ int f; ++ f = fcntl(fd, F_GETFD); ++ assert(f != -1); ++ f = fcntl(fd, F_SETFD, f | FD_CLOEXEC); ++ assert(f != -1); ++#endif ++} ++ ++/* ++ * Opens a socket with FD_CLOEXEC set ++ * On failure errno contains the reason. ++ */ ++int slirp_socket(int domain, int type, int protocol) ++{ ++ int ret; ++ ++#ifdef SOCK_CLOEXEC ++ ret = socket(domain, type | SOCK_CLOEXEC, protocol); ++ if (ret != -1 || errno != EINVAL) { ++ return ret; ++ } ++#endif ++ ret = socket(domain, type, protocol); ++ if (ret >= 0) { ++ slirp_set_cloexec(ret); ++ } ++ ++ return ret; ++} ++ ++#ifdef _WIN32 ++static int socket_error(void) ++{ ++ switch (WSAGetLastError()) { ++ case 0: ++ return 0; ++ case WSAEINTR: ++ return EINTR; ++ case WSAEINVAL: ++ return EINVAL; ++ case WSA_INVALID_HANDLE: ++ return EBADF; ++ case WSA_NOT_ENOUGH_MEMORY: ++ return ENOMEM; ++ case WSA_INVALID_PARAMETER: ++ return EINVAL; ++ case WSAENAMETOOLONG: ++ return ENAMETOOLONG; ++ case WSAENOTEMPTY: ++ return ENOTEMPTY; ++ case WSAEWOULDBLOCK: ++ /* not using EWOULDBLOCK as we don't want code to have ++ * to check both EWOULDBLOCK and EAGAIN */ ++ return EAGAIN; ++ case WSAEINPROGRESS: ++ return EINPROGRESS; ++ case WSAEALREADY: ++ return EALREADY; ++ case WSAENOTSOCK: ++ return ENOTSOCK; ++ case WSAEDESTADDRREQ: ++ return EDESTADDRREQ; ++ case WSAEMSGSIZE: ++ return EMSGSIZE; ++ case WSAEPROTOTYPE: ++ return EPROTOTYPE; ++ case WSAENOPROTOOPT: ++ return ENOPROTOOPT; ++ case WSAEPROTONOSUPPORT: ++ return EPROTONOSUPPORT; ++ case WSAEOPNOTSUPP: ++ return EOPNOTSUPP; ++ case WSAEAFNOSUPPORT: ++ return EAFNOSUPPORT; ++ case WSAEADDRINUSE: ++ return EADDRINUSE; ++ case WSAEADDRNOTAVAIL: ++ return EADDRNOTAVAIL; ++ case WSAENETDOWN: ++ return ENETDOWN; ++ case WSAENETUNREACH: ++ return ENETUNREACH; ++ case WSAENETRESET: ++ return ENETRESET; ++ case WSAECONNABORTED: ++ return ECONNABORTED; ++ case WSAECONNRESET: ++ return ECONNRESET; ++ case WSAENOBUFS: ++ return ENOBUFS; ++ case WSAEISCONN: ++ return EISCONN; ++ case WSAENOTCONN: ++ return ENOTCONN; ++ case WSAETIMEDOUT: ++ return ETIMEDOUT; ++ case WSAECONNREFUSED: ++ return ECONNREFUSED; ++ case WSAELOOP: ++ return ELOOP; ++ case WSAEHOSTUNREACH: ++ return EHOSTUNREACH; ++ default: ++ return EIO; ++ } ++} ++ ++#undef ioctlsocket ++int slirp_ioctlsocket_wrap(int fd, int req, void *val) ++{ ++ int ret; ++ ret = ioctlsocket(fd, req, val); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef closesocket ++int slirp_closesocket_wrap(int fd) ++{ ++ int ret; ++ ret = closesocket(fd); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef connect ++int slirp_connect_wrap(int sockfd, const struct sockaddr *addr, int addrlen) ++{ ++ int ret; ++ ret = connect(sockfd, addr, addrlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef listen ++int slirp_listen_wrap(int sockfd, int backlog) ++{ ++ int ret; ++ ret = listen(sockfd, backlog); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef bind ++int slirp_bind_wrap(int sockfd, const struct sockaddr *addr, int addrlen) ++{ ++ int ret; ++ ret = bind(sockfd, addr, addrlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef socket ++int slirp_socket_wrap(int domain, int type, int protocol) ++{ ++ int ret; ++ ret = socket(domain, type, protocol); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef accept ++int slirp_accept_wrap(int sockfd, struct sockaddr *addr, int *addrlen) ++{ ++ int ret; ++ ret = accept(sockfd, addr, addrlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef shutdown ++int slirp_shutdown_wrap(int sockfd, int how) ++{ ++ int ret; ++ ret = shutdown(sockfd, how); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef getsockopt ++int slirp_getsockopt_wrap(int sockfd, int level, int optname, void *optval, ++ int *optlen) ++{ ++ int ret; ++ ret = getsockopt(sockfd, level, optname, optval, optlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef setsockopt ++int slirp_setsockopt_wrap(int sockfd, int level, int optname, ++ const void *optval, int optlen) ++{ ++ int ret; ++ ret = setsockopt(sockfd, level, optname, optval, optlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef getpeername ++int slirp_getpeername_wrap(int sockfd, struct sockaddr *addr, int *addrlen) ++{ ++ int ret; ++ ret = getpeername(sockfd, addr, addrlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef getsockname ++int slirp_getsockname_wrap(int sockfd, struct sockaddr *addr, int *addrlen) ++{ ++ int ret; ++ ret = getsockname(sockfd, addr, addrlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef send ++ssize_t slirp_send_wrap(int sockfd, const void *buf, size_t len, int flags) ++{ ++ int ret; ++ ret = send(sockfd, buf, len, flags); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef sendto ++ssize_t slirp_sendto_wrap(int sockfd, const void *buf, size_t len, int flags, ++ const struct sockaddr *addr, int addrlen) ++{ ++ int ret; ++ ret = sendto(sockfd, buf, len, flags, addr, addrlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef recv ++ssize_t slirp_recv_wrap(int sockfd, void *buf, size_t len, int flags) ++{ ++ int ret; ++ ret = recv(sockfd, buf, len, flags); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++ ++#undef recvfrom ++ssize_t slirp_recvfrom_wrap(int sockfd, void *buf, size_t len, int flags, ++ struct sockaddr *addr, int *addrlen) ++{ ++ int ret; ++ ret = recvfrom(sockfd, buf, len, flags, addr, addrlen); ++ if (ret < 0) { ++ errno = socket_error(); ++ } ++ return ret; ++} ++#endif /* WIN32 */ ++ ++void slirp_pstrcpy(char *buf, int buf_size, const char *str) ++{ ++ int c; ++ char *q = buf; ++ ++ if (buf_size <= 0) ++ return; ++ ++ for (;;) { ++ c = *str++; ++ if (c == 0 || q >= buf + buf_size - 1) ++ break; ++ *q++ = c; ++ } ++ *q = '\0'; ++} ++ ++G_GNUC_PRINTF(3, 0) ++static int slirp_vsnprintf(char *str, size_t size, ++ const char *format, va_list args) ++{ ++ int rv = g_vsnprintf(str, size, format, args); ++ ++ if (rv < 0) { ++ g_error("g_vsnprintf() failed: %s", g_strerror(errno)); ++ } ++ ++ return rv; ++} ++ ++/* ++ * A snprintf()-like function that: ++ * - returns the number of bytes written (excluding optional \0-ending) ++ * - dies on error ++ * - warn on truncation ++ */ ++int slirp_fmt(char *str, size_t size, const char *format, ...) ++{ ++ va_list args; ++ int rv; ++ ++ va_start(args, format); ++ rv = slirp_vsnprintf(str, size, format, args); ++ va_end(args); ++ ++ if (rv >= size) { ++ g_critical("slirp_fmt() truncation"); ++ } ++ ++ return MIN(rv, size); ++} ++ ++/* ++ * A snprintf()-like function that: ++ * - always \0-end (unless size == 0) ++ * - returns the number of bytes actually written, including \0 ending ++ * - dies on error ++ * - warn on truncation ++ */ ++int slirp_fmt0(char *str, size_t size, const char *format, ...) ++{ ++ va_list args; ++ int rv; ++ ++ va_start(args, format); ++ rv = slirp_vsnprintf(str, size, format, args); ++ va_end(args); ++ ++ if (rv >= size) { ++ g_critical("slirp_fmt0() truncation"); ++ if (size > 0) ++ str[size - 1] = '\0'; ++ rv = size; ++ } else { ++ rv += 1; /* include \0 */ ++ } ++ ++ return rv; ++} ++ ++const char *slirp_ether_ntoa(const uint8_t *addr, char *out_str, ++ size_t out_str_size) ++{ ++ assert(out_str_size >= ETH_ADDRSTRLEN); ++ ++ slirp_fmt0(out_str, out_str_size, "%02x:%02x:%02x:%02x:%02x:%02x", ++ addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]); ++ ++ return out_str; ++} +diff --git a/slirp/src/util.h b/slirp/src/util.h +new file mode 100644 +index 0000000000..07654ecf37 +--- /dev/null ++++ b/slirp/src/util.h +@@ -0,0 +1,203 @@ ++/* SPDX-License-Identifier: MIT */ ++/* ++ * Copyright (c) 2003-2008 Fabrice Bellard ++ * Copyright (c) 2010-2019 Red Hat, Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++#ifndef UTIL_H_ ++#define UTIL_H_ ++ ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#ifdef _WIN32 ++#include ++#include ++#include ++#else ++#include ++#include ++#include ++#endif ++ ++#if defined(_WIN32) && (defined(__x86_64__) || defined(__i386__)) ++#define SLIRP_PACKED __attribute__((gcc_struct, packed)) ++#else ++#define SLIRP_PACKED __attribute__((packed)) ++#endif ++ ++#ifndef DIV_ROUND_UP ++#define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d)) ++#endif ++ ++#ifndef container_of ++#define container_of(ptr, type, member) \ ++ __extension__({ \ ++ void *__mptr = (void *)(ptr); \ ++ ((type *)(__mptr - offsetof(type, member))); \ ++ }) ++#endif ++ ++#ifndef G_SIZEOF_MEMBER ++#define G_SIZEOF_MEMBER(type, member) sizeof(((type *)0)->member) ++#endif ++ ++#if defined(_WIN32) /* CONFIG_IOVEC */ ++#if !defined(IOV_MAX) /* XXX: to avoid duplicate with QEMU osdep.h */ ++struct iovec { ++ void *iov_base; ++ size_t iov_len; ++}; ++#endif ++#else ++#include ++#endif ++ ++#define stringify(s) tostring(s) ++#define tostring(s) #s ++ ++#define SCALE_MS 1000000 ++ ++#define ETH_ALEN 6 ++#define ETH_ADDRSTRLEN 18 /* "xx:xx:xx:xx:xx:xx", with trailing NUL */ ++#define ETH_HLEN 14 ++#define ETH_P_IP (0x0800) /* Internet Protocol packet */ ++#define ETH_P_ARP (0x0806) /* Address Resolution packet */ ++#define ETH_P_IPV6 (0x86dd) ++#define ETH_P_VLAN (0x8100) ++#define ETH_P_DVLAN (0x88a8) ++#define ETH_P_NCSI (0x88f8) ++#define ETH_P_UNKNOWN (0xffff) ++ ++/* FIXME: remove me when made standalone */ ++#ifdef _WIN32 ++#undef accept ++#undef bind ++#undef closesocket ++#undef connect ++#undef getpeername ++#undef getsockname ++#undef getsockopt ++#undef ioctlsocket ++#undef listen ++#undef recv ++#undef recvfrom ++#undef send ++#undef sendto ++#undef setsockopt ++#undef shutdown ++#undef socket ++#endif ++ ++#ifdef _WIN32 ++#define connect slirp_connect_wrap ++int slirp_connect_wrap(int fd, const struct sockaddr *addr, int addrlen); ++#define listen slirp_listen_wrap ++int slirp_listen_wrap(int fd, int backlog); ++#define bind slirp_bind_wrap ++int slirp_bind_wrap(int fd, const struct sockaddr *addr, int addrlen); ++#define socket slirp_socket_wrap ++int slirp_socket_wrap(int domain, int type, int protocol); ++#define accept slirp_accept_wrap ++int slirp_accept_wrap(int fd, struct sockaddr *addr, int *addrlen); ++#define shutdown slirp_shutdown_wrap ++int slirp_shutdown_wrap(int fd, int how); ++#define getpeername slirp_getpeername_wrap ++int slirp_getpeername_wrap(int fd, struct sockaddr *addr, int *addrlen); ++#define getsockname slirp_getsockname_wrap ++int slirp_getsockname_wrap(int fd, struct sockaddr *addr, int *addrlen); ++#define send slirp_send_wrap ++ssize_t slirp_send_wrap(int fd, const void *buf, size_t len, int flags); ++#define sendto slirp_sendto_wrap ++ssize_t slirp_sendto_wrap(int fd, const void *buf, size_t len, int flags, ++ const struct sockaddr *dest_addr, int addrlen); ++#define recv slirp_recv_wrap ++ssize_t slirp_recv_wrap(int fd, void *buf, size_t len, int flags); ++#define recvfrom slirp_recvfrom_wrap ++ssize_t slirp_recvfrom_wrap(int fd, void *buf, size_t len, int flags, ++ struct sockaddr *src_addr, int *addrlen); ++#define closesocket slirp_closesocket_wrap ++int slirp_closesocket_wrap(int fd); ++#define ioctlsocket slirp_ioctlsocket_wrap ++int slirp_ioctlsocket_wrap(int fd, int req, void *val); ++#define getsockopt slirp_getsockopt_wrap ++int slirp_getsockopt_wrap(int sockfd, int level, int optname, void *optval, ++ int *optlen); ++#define setsockopt slirp_setsockopt_wrap ++int slirp_setsockopt_wrap(int sockfd, int level, int optname, ++ const void *optval, int optlen); ++#define inet_aton slirp_inet_aton ++int slirp_inet_aton(const char *cp, struct in_addr *ia); ++#else ++#define closesocket(s) close(s) ++#define ioctlsocket(s, r, v) ioctl(s, r, v) ++#endif ++ ++int slirp_socket(int domain, int type, int protocol); ++void slirp_set_nonblock(int fd); ++ ++static inline int slirp_socket_set_v6only(int fd, int v) ++{ ++ return setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &v, sizeof(v)); ++} ++ ++static inline int slirp_socket_set_nodelay(int fd) ++{ ++ int v = 1; ++ return setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &v, sizeof(v)); ++} ++ ++static inline int slirp_socket_set_fast_reuse(int fd) ++{ ++#ifndef _WIN32 ++ int v = 1; ++ return setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &v, sizeof(v)); ++#else ++ /* Enabling the reuse of an endpoint that was used by a socket still in ++ * TIME_WAIT state is usually performed by setting SO_REUSEADDR. On Windows ++ * fast reuse is the default and SO_REUSEADDR does strange things. So we ++ * don't have to do anything here. More info can be found at: ++ * http://msdn.microsoft.com/en-us/library/windows/desktop/ms740621.aspx */ ++ return 0; ++#endif ++} ++ ++void slirp_pstrcpy(char *buf, int buf_size, const char *str); ++ ++int slirp_fmt(char *str, size_t size, const char *format, ...) G_GNUC_PRINTF(3, 4); ++int slirp_fmt0(char *str, size_t size, const char *format, ...) G_GNUC_PRINTF(3, 4); ++ ++/* ++ * Pretty print a MAC address into out_str. ++ * As a convenience returns out_str. ++ */ ++const char *slirp_ether_ntoa(const uint8_t *addr, char *out_str, ++ size_t out_str_len); ++ ++#endif +diff --git a/slirp/src/version.c b/slirp/src/version.c +new file mode 100644 +index 0000000000..93e0be9c24 +--- /dev/null ++++ b/slirp/src/version.c +@@ -0,0 +1,8 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++#include "libslirp.h" ++ ++const char * ++slirp_version_string(void) ++{ ++ return SLIRP_VERSION_STRING; ++} +diff --git a/slirp/src/vmstate.c b/slirp/src/vmstate.c +new file mode 100644 +index 0000000000..68cc1729c5 +--- /dev/null ++++ b/slirp/src/vmstate.c +@@ -0,0 +1,444 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * VMState interpreter ++ * ++ * Copyright (c) 2009-2018 Red Hat Inc ++ * ++ * Authors: ++ * Juan Quintela ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer. ++ * ++ * 2. Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials provided ++ * with the distribution. ++ * ++ * 3. Neither the name of the copyright holder nor the names of its ++ * contributors may be used to endorse or promote products derived ++ * from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS ++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, ++ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR ++ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED ++ * OF THE POSSIBILITY OF SUCH DAMAGE. ++ */ ++#include ++#include ++#include ++#include ++ ++#include "stream.h" ++#include "vmstate.h" ++ ++static int get_nullptr(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ if (slirp_istream_read_u8(f) == VMS_NULLPTR_MARKER) { ++ return 0; ++ } ++ g_warning("vmstate: get_nullptr expected VMS_NULLPTR_MARKER"); ++ return -EINVAL; ++} ++ ++static int put_nullptr(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++ ++{ ++ if (pv == NULL) { ++ slirp_ostream_write_u8(f, VMS_NULLPTR_MARKER); ++ return 0; ++ } ++ g_warning("vmstate: put_nullptr must be called with pv == NULL"); ++ return -EINVAL; ++} ++ ++const VMStateInfo slirp_vmstate_info_nullptr = { ++ .name = "uint64", ++ .get = get_nullptr, ++ .put = put_nullptr, ++}; ++ ++/* 8 bit unsigned int */ ++ ++static int get_uint8(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ uint8_t *v = pv; ++ *v = slirp_istream_read_u8(f); ++ return 0; ++} ++ ++static int put_uint8(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ uint8_t *v = pv; ++ slirp_ostream_write_u8(f, *v); ++ return 0; ++} ++ ++const VMStateInfo slirp_vmstate_info_uint8 = { ++ .name = "uint8", ++ .get = get_uint8, ++ .put = put_uint8, ++}; ++ ++/* 16 bit unsigned int */ ++ ++static int get_uint16(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ uint16_t *v = pv; ++ *v = slirp_istream_read_u16(f); ++ return 0; ++} ++ ++static int put_uint16(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ uint16_t *v = pv; ++ slirp_ostream_write_u16(f, *v); ++ return 0; ++} ++ ++const VMStateInfo slirp_vmstate_info_uint16 = { ++ .name = "uint16", ++ .get = get_uint16, ++ .put = put_uint16, ++}; ++ ++/* 32 bit unsigned int */ ++ ++static int get_uint32(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ uint32_t *v = pv; ++ *v = slirp_istream_read_u32(f); ++ return 0; ++} ++ ++static int put_uint32(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ uint32_t *v = pv; ++ slirp_ostream_write_u32(f, *v); ++ return 0; ++} ++ ++const VMStateInfo slirp_vmstate_info_uint32 = { ++ .name = "uint32", ++ .get = get_uint32, ++ .put = put_uint32, ++}; ++ ++/* 16 bit int */ ++ ++static int get_int16(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ int16_t *v = pv; ++ *v = slirp_istream_read_i16(f); ++ return 0; ++} ++ ++static int put_int16(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ int16_t *v = pv; ++ slirp_ostream_write_i16(f, *v); ++ return 0; ++} ++ ++const VMStateInfo slirp_vmstate_info_int16 = { ++ .name = "int16", ++ .get = get_int16, ++ .put = put_int16, ++}; ++ ++/* 32 bit int */ ++ ++static int get_int32(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ int32_t *v = pv; ++ *v = slirp_istream_read_i32(f); ++ return 0; ++} ++ ++static int put_int32(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ int32_t *v = pv; ++ slirp_ostream_write_i32(f, *v); ++ return 0; ++} ++ ++const VMStateInfo slirp_vmstate_info_int32 = { ++ .name = "int32", ++ .get = get_int32, ++ .put = put_int32, ++}; ++ ++/* vmstate_info_tmp, see VMSTATE_WITH_TMP, the idea is that we allocate ++ * a temporary buffer and the pre_load/pre_save methods in the child vmsd ++ * copy stuff from the parent into the child and do calculations to fill ++ * in fields that don't really exist in the parent but need to be in the ++ * stream. ++ */ ++static int get_tmp(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ int ret; ++ const VMStateDescription *vmsd = field->vmsd; ++ int version_id = field->version_id; ++ void *tmp = g_malloc(size); ++ ++ /* Writes the parent field which is at the start of the tmp */ ++ *(void **)tmp = pv; ++ ret = slirp_vmstate_load_state(f, vmsd, tmp, version_id); ++ g_free(tmp); ++ return ret; ++} ++ ++static int put_tmp(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ const VMStateDescription *vmsd = field->vmsd; ++ void *tmp = g_malloc(size); ++ int ret; ++ ++ /* Writes the parent field which is at the start of the tmp */ ++ *(void **)tmp = pv; ++ ret = slirp_vmstate_save_state(f, vmsd, tmp); ++ g_free(tmp); ++ ++ return ret; ++} ++ ++const VMStateInfo slirp_vmstate_info_tmp = { ++ .name = "tmp", ++ .get = get_tmp, ++ .put = put_tmp, ++}; ++ ++/* uint8_t buffers */ ++ ++static int get_buffer(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ slirp_istream_read(f, pv, size); ++ return 0; ++} ++ ++static int put_buffer(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field) ++{ ++ slirp_ostream_write(f, pv, size); ++ return 0; ++} ++ ++const VMStateInfo slirp_vmstate_info_buffer = { ++ .name = "buffer", ++ .get = get_buffer, ++ .put = put_buffer, ++}; ++ ++static int vmstate_n_elems(void *opaque, const VMStateField *field) ++{ ++ int n_elems = 1; ++ ++ if (field->flags & VMS_ARRAY) { ++ n_elems = field->num; ++ } else if (field->flags & VMS_VARRAY_INT32) { ++ n_elems = *(int32_t *)(opaque + field->num_offset); ++ } else if (field->flags & VMS_VARRAY_UINT32) { ++ n_elems = *(uint32_t *)(opaque + field->num_offset); ++ } else if (field->flags & VMS_VARRAY_UINT16) { ++ n_elems = *(uint16_t *)(opaque + field->num_offset); ++ } else if (field->flags & VMS_VARRAY_UINT8) { ++ n_elems = *(uint8_t *)(opaque + field->num_offset); ++ } ++ ++ if (field->flags & VMS_MULTIPLY_ELEMENTS) { ++ n_elems *= field->num; ++ } ++ ++ return n_elems; ++} ++ ++static int vmstate_size(void *opaque, const VMStateField *field) ++{ ++ int size = field->size; ++ ++ if (field->flags & VMS_VBUFFER) { ++ size = *(int32_t *)(opaque + field->size_offset); ++ if (field->flags & VMS_MULTIPLY) { ++ size *= field->size; ++ } ++ } ++ ++ return size; ++} ++ ++static int vmstate_save_state_v(SlirpOStream *f, const VMStateDescription *vmsd, ++ void *opaque, int version_id) ++{ ++ int ret = 0; ++ const VMStateField *field = vmsd->fields; ++ ++ if (vmsd->pre_save) { ++ ret = vmsd->pre_save(opaque); ++ if (ret) { ++ g_warning("pre-save failed: %s", vmsd->name); ++ return ret; ++ } ++ } ++ ++ while (field->name) { ++ if ((field->field_exists && field->field_exists(opaque, version_id)) || ++ (!field->field_exists && field->version_id <= version_id)) { ++ void *first_elem = opaque + field->offset; ++ int i, n_elems = vmstate_n_elems(opaque, field); ++ int size = vmstate_size(opaque, field); ++ ++ if (field->flags & VMS_POINTER) { ++ first_elem = *(void **)first_elem; ++ assert(first_elem || !n_elems || !size); ++ } ++ for (i = 0; i < n_elems; i++) { ++ void *curr_elem = first_elem + size * i; ++ ++ if (field->flags & VMS_ARRAY_OF_POINTER) { ++ assert(curr_elem); ++ curr_elem = *(void **)curr_elem; ++ } ++ if (!curr_elem && size) { ++ /* if null pointer write placeholder and do not follow */ ++ assert(field->flags & VMS_ARRAY_OF_POINTER); ++ ret = slirp_vmstate_info_nullptr.put(f, curr_elem, size, ++ NULL); ++ } else if (field->flags & VMS_STRUCT) { ++ ret = slirp_vmstate_save_state(f, field->vmsd, curr_elem); ++ } else if (field->flags & VMS_VSTRUCT) { ++ ret = vmstate_save_state_v(f, field->vmsd, curr_elem, ++ field->struct_version_id); ++ } else { ++ ret = field->info->put(f, curr_elem, size, field); ++ } ++ if (ret) { ++ g_warning("Save of field %s/%s failed", vmsd->name, ++ field->name); ++ return ret; ++ } ++ } ++ } else { ++ if (field->flags & VMS_MUST_EXIST) { ++ g_warning("Output state validation failed: %s/%s", vmsd->name, ++ field->name); ++ assert(!(field->flags & VMS_MUST_EXIST)); ++ } ++ } ++ field++; ++ } ++ ++ return 0; ++} ++ ++int slirp_vmstate_save_state(SlirpOStream *f, const VMStateDescription *vmsd, ++ void *opaque) ++{ ++ return vmstate_save_state_v(f, vmsd, opaque, vmsd->version_id); ++} ++ ++static void vmstate_handle_alloc(void *ptr, VMStateField *field, void *opaque) ++{ ++ if (field->flags & VMS_POINTER && field->flags & VMS_ALLOC) { ++ size_t size = vmstate_size(opaque, field); ++ size *= vmstate_n_elems(opaque, field); ++ if (size) { ++ *(void **)ptr = g_malloc(size); ++ } ++ } ++} ++ ++int slirp_vmstate_load_state(SlirpIStream *f, const VMStateDescription *vmsd, ++ void *opaque, int version_id) ++{ ++ VMStateField *field = vmsd->fields; ++ int ret = 0; ++ ++ if (version_id > vmsd->version_id) { ++ g_warning("%s: incoming version_id %d is too new " ++ "for local version_id %d", ++ vmsd->name, version_id, vmsd->version_id); ++ return -EINVAL; ++ } ++ if (vmsd->pre_load) { ++ int ret = vmsd->pre_load(opaque); ++ if (ret) { ++ return ret; ++ } ++ } ++ while (field->name) { ++ if ((field->field_exists && field->field_exists(opaque, version_id)) || ++ (!field->field_exists && field->version_id <= version_id)) { ++ void *first_elem = opaque + field->offset; ++ int i, n_elems = vmstate_n_elems(opaque, field); ++ int size = vmstate_size(opaque, field); ++ ++ vmstate_handle_alloc(first_elem, field, opaque); ++ if (field->flags & VMS_POINTER) { ++ first_elem = *(void **)first_elem; ++ assert(first_elem || !n_elems || !size); ++ } ++ for (i = 0; i < n_elems; i++) { ++ void *curr_elem = first_elem + size * i; ++ ++ if (field->flags & VMS_ARRAY_OF_POINTER) { ++ curr_elem = *(void **)curr_elem; ++ } ++ if (!curr_elem && size) { ++ /* if null pointer check placeholder and do not follow */ ++ assert(field->flags & VMS_ARRAY_OF_POINTER); ++ ret = slirp_vmstate_info_nullptr.get(f, curr_elem, size, ++ NULL); ++ } else if (field->flags & VMS_STRUCT) { ++ ret = slirp_vmstate_load_state(f, field->vmsd, curr_elem, ++ field->vmsd->version_id); ++ } else if (field->flags & VMS_VSTRUCT) { ++ ret = slirp_vmstate_load_state(f, field->vmsd, curr_elem, ++ field->struct_version_id); ++ } else { ++ ret = field->info->get(f, curr_elem, size, field); ++ } ++ if (ret < 0) { ++ g_warning("Failed to load %s:%s", vmsd->name, field->name); ++ return ret; ++ } ++ } ++ } else if (field->flags & VMS_MUST_EXIST) { ++ g_warning("Input validation failed: %s/%s", vmsd->name, ++ field->name); ++ return -1; ++ } ++ field++; ++ } ++ if (vmsd->post_load) { ++ ret = vmsd->post_load(opaque, version_id); ++ } ++ return ret; ++} +diff --git a/slirp/src/vmstate.h b/slirp/src/vmstate.h +new file mode 100644 +index 0000000000..94c6a4bc7b +--- /dev/null ++++ b/slirp/src/vmstate.h +@@ -0,0 +1,391 @@ ++/* SPDX-License-Identifier: BSD-3-Clause */ ++/* ++ * QEMU migration/snapshot declarations ++ * ++ * Copyright (c) 2009-2011 Red Hat, Inc. ++ * ++ * Original author: Juan Quintela ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer. ++ * ++ * 2. Redistributions in binary form must reproduce the above ++ * copyright notice, this list of conditions and the following ++ * disclaimer in the documentation and/or other materials provided ++ * with the distribution. ++ * ++ * 3. Neither the name of the copyright holder nor the names of its ++ * contributors may be used to endorse or promote products derived ++ * from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS ++ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ++ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, ++ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR ++ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED ++ * OF THE POSSIBILITY OF SUCH DAMAGE. ++ */ ++#ifndef VMSTATE_H_ ++#define VMSTATE_H_ ++ ++#include ++#include ++#include ++#include "slirp.h" ++#include "stream.h" ++ ++#define stringify(s) tostring(s) ++#define tostring(s) #s ++ ++typedef struct VMStateInfo VMStateInfo; ++typedef struct VMStateDescription VMStateDescription; ++typedef struct VMStateField VMStateField; ++ ++int slirp_vmstate_save_state(SlirpOStream *f, const VMStateDescription *vmsd, ++ void *opaque); ++int slirp_vmstate_load_state(SlirpIStream *f, const VMStateDescription *vmsd, ++ void *opaque, int version_id); ++ ++/* VMStateInfo allows customized migration of objects that don't fit in ++ * any category in VMStateFlags. Additional information is always passed ++ * into get and put in terms of field and vmdesc parameters. However ++ * these two parameters should only be used in cases when customized ++ * handling is needed, such as QTAILQ. For primitive data types such as ++ * integer, field and vmdesc parameters should be ignored inside get/put. ++ */ ++struct VMStateInfo { ++ const char *name; ++ int (*get)(SlirpIStream *f, void *pv, size_t size, ++ const VMStateField *field); ++ int (*put)(SlirpOStream *f, void *pv, size_t size, ++ const VMStateField *field); ++}; ++ ++enum VMStateFlags { ++ /* Ignored */ ++ VMS_SINGLE = 0x001, ++ ++ /* The struct member at opaque + VMStateField.offset is a pointer ++ * to the actual field (e.g. struct a { uint8_t *b; ++ * }). Dereference the pointer before using it as basis for ++ * further pointer arithmetic (see e.g. VMS_ARRAY). Does not ++ * affect the meaning of VMStateField.num_offset or ++ * VMStateField.size_offset; see VMS_VARRAY* and VMS_VBUFFER for ++ * those. */ ++ VMS_POINTER = 0x002, ++ ++ /* The field is an array of fixed size. VMStateField.num contains ++ * the number of entries in the array. The size of each entry is ++ * given by VMStateField.size and / or opaque + ++ * VMStateField.size_offset; see VMS_VBUFFER and ++ * VMS_MULTIPLY. Each array entry will be processed individually ++ * (VMStateField.info.get()/put() if VMS_STRUCT is not set, ++ * recursion into VMStateField.vmsd if VMS_STRUCT is set). May not ++ * be combined with VMS_VARRAY*. */ ++ VMS_ARRAY = 0x004, ++ ++ /* The field is itself a struct, containing one or more ++ * fields. Recurse into VMStateField.vmsd. Most useful in ++ * combination with VMS_ARRAY / VMS_VARRAY*, recursing into each ++ * array entry. */ ++ VMS_STRUCT = 0x008, ++ ++ /* The field is an array of variable size. The int32_t at opaque + ++ * VMStateField.num_offset contains the number of entries in the ++ * array. See the VMS_ARRAY description regarding array handling ++ * in general. May not be combined with VMS_ARRAY or any other ++ * VMS_VARRAY*. */ ++ VMS_VARRAY_INT32 = 0x010, ++ ++ /* Ignored */ ++ VMS_BUFFER = 0x020, ++ ++ /* The field is a (fixed-size or variable-size) array of pointers ++ * (e.g. struct a { uint8_t *b[]; }). Dereference each array entry ++ * before using it. Note: Does not imply any one of VMS_ARRAY / ++ * VMS_VARRAY*; these need to be set explicitly. */ ++ VMS_ARRAY_OF_POINTER = 0x040, ++ ++ /* The field is an array of variable size. The uint16_t at opaque ++ * + VMStateField.num_offset (subject to VMS_MULTIPLY_ELEMENTS) ++ * contains the number of entries in the array. See the VMS_ARRAY ++ * description regarding array handling in general. May not be ++ * combined with VMS_ARRAY or any other VMS_VARRAY*. */ ++ VMS_VARRAY_UINT16 = 0x080, ++ ++ /* The size of the individual entries (a single array entry if ++ * VMS_ARRAY or any of VMS_VARRAY* are set, or the field itself if ++ * neither is set) is variable (i.e. not known at compile-time), ++ * but the same for all entries. Use the int32_t at opaque + ++ * VMStateField.size_offset (subject to VMS_MULTIPLY) to determine ++ * the size of each (and every) entry. */ ++ VMS_VBUFFER = 0x100, ++ ++ /* Multiply the entry size given by the int32_t at opaque + ++ * VMStateField.size_offset (see VMS_VBUFFER description) with ++ * VMStateField.size to determine the number of bytes to be ++ * allocated. Only valid in combination with VMS_VBUFFER. */ ++ VMS_MULTIPLY = 0x200, ++ ++ /* The field is an array of variable size. The uint8_t at opaque + ++ * VMStateField.num_offset (subject to VMS_MULTIPLY_ELEMENTS) ++ * contains the number of entries in the array. See the VMS_ARRAY ++ * description regarding array handling in general. May not be ++ * combined with VMS_ARRAY or any other VMS_VARRAY*. */ ++ VMS_VARRAY_UINT8 = 0x400, ++ ++ /* The field is an array of variable size. The uint32_t at opaque ++ * + VMStateField.num_offset (subject to VMS_MULTIPLY_ELEMENTS) ++ * contains the number of entries in the array. See the VMS_ARRAY ++ * description regarding array handling in general. May not be ++ * combined with VMS_ARRAY or any other VMS_VARRAY*. */ ++ VMS_VARRAY_UINT32 = 0x800, ++ ++ /* Fail loading the serialised VM state if this field is missing ++ * from the input. */ ++ VMS_MUST_EXIST = 0x1000, ++ ++ /* When loading serialised VM state, allocate memory for the ++ * (entire) field. Only valid in combination with ++ * VMS_POINTER. Note: Not all combinations with other flags are ++ * currently supported, e.g. VMS_ALLOC|VMS_ARRAY_OF_POINTER won't ++ * cause the individual entries to be allocated. */ ++ VMS_ALLOC = 0x2000, ++ ++ /* Multiply the number of entries given by the integer at opaque + ++ * VMStateField.num_offset (see VMS_VARRAY*) with VMStateField.num ++ * to determine the number of entries in the array. Only valid in ++ * combination with one of VMS_VARRAY*. */ ++ VMS_MULTIPLY_ELEMENTS = 0x4000, ++ ++ /* A structure field that is like VMS_STRUCT, but uses ++ * VMStateField.struct_version_id to tell which version of the ++ * structure we are referencing to use. */ ++ VMS_VSTRUCT = 0x8000, ++}; ++ ++struct VMStateField { ++ const char *name; ++ size_t offset; ++ size_t size; ++ size_t start; ++ int num; ++ size_t num_offset; ++ size_t size_offset; ++ const VMStateInfo *info; ++ enum VMStateFlags flags; ++ const VMStateDescription *vmsd; ++ int version_id; ++ int struct_version_id; ++ bool (*field_exists)(void *opaque, int version_id); ++}; ++ ++struct VMStateDescription { ++ const char *name; ++ int version_id; ++ int (*pre_load)(void *opaque); ++ int (*post_load)(void *opaque, int version_id); ++ int (*pre_save)(void *opaque); ++ VMStateField *fields; ++}; ++ ++ ++extern const VMStateInfo slirp_vmstate_info_int16; ++extern const VMStateInfo slirp_vmstate_info_int32; ++extern const VMStateInfo slirp_vmstate_info_uint8; ++extern const VMStateInfo slirp_vmstate_info_uint16; ++extern const VMStateInfo slirp_vmstate_info_uint32; ++ ++/** Put this in the stream when migrating a null pointer.*/ ++#define VMS_NULLPTR_MARKER (0x30U) /* '0' */ ++extern const VMStateInfo slirp_vmstate_info_nullptr; ++ ++extern const VMStateInfo slirp_vmstate_info_buffer; ++extern const VMStateInfo slirp_vmstate_info_tmp; ++ ++#define type_check_array(t1, t2, n) ((t1(*)[n])0 - (t2 *)0) ++#define type_check_pointer(t1, t2) ((t1 **)0 - (t2 *)0) ++#define typeof_field(type, field) typeof(((type *)0)->field) ++#define type_check(t1, t2) ((t1 *)0 - (t2 *)0) ++ ++#define vmstate_offset_value(_state, _field, _type) \ ++ (offsetof(_state, _field) + type_check(_type, typeof_field(_state, _field))) ++ ++#define vmstate_offset_pointer(_state, _field, _type) \ ++ (offsetof(_state, _field) + \ ++ type_check_pointer(_type, typeof_field(_state, _field))) ++ ++#define vmstate_offset_array(_state, _field, _type, _num) \ ++ (offsetof(_state, _field) + \ ++ type_check_array(_type, typeof_field(_state, _field), _num)) ++ ++#define vmstate_offset_buffer(_state, _field) \ ++ vmstate_offset_array(_state, _field, uint8_t, \ ++ sizeof(typeof_field(_state, _field))) ++ ++/* In the macros below, if there is a _version, that means the macro's ++ * field will be processed only if the version being received is >= ++ * the _version specified. In general, if you add a new field, you ++ * would increment the structure's version and put that version ++ * number into the new field so it would only be processed with the ++ * new version. ++ * ++ * In particular, for VMSTATE_STRUCT() and friends the _version does ++ * *NOT* pick the version of the sub-structure. It works just as ++ * specified above. The version of the top-level structure received ++ * is passed down to all sub-structures. This means that the ++ * sub-structures must have version that are compatible with all the ++ * structures that use them. ++ * ++ * If you want to specify the version of the sub-structure, use ++ * VMSTATE_VSTRUCT(), which allows the specific sub-structure version ++ * to be directly specified. ++ */ ++ ++#define VMSTATE_SINGLE_TEST(_field, _state, _test, _version, _info, _type) \ ++ { \ ++ .name = (stringify(_field)), .version_id = (_version), \ ++ .field_exists = (_test), .size = sizeof(_type), .info = &(_info), \ ++ .flags = VMS_SINGLE, \ ++ .offset = vmstate_offset_value(_state, _field, _type), \ ++ } ++ ++#define VMSTATE_ARRAY(_field, _state, _num, _version, _info, _type) \ ++ { \ ++ .name = (stringify(_field)), .version_id = (_version), .num = (_num), \ ++ .info = &(_info), .size = sizeof(_type), .flags = VMS_ARRAY, \ ++ .offset = vmstate_offset_array(_state, _field, _type, _num), \ ++ } ++ ++#define VMSTATE_STRUCT_TEST(_field, _state, _test, _version, _vmsd, _type) \ ++ { \ ++ .name = (stringify(_field)), .version_id = (_version), \ ++ .field_exists = (_test), .vmsd = &(_vmsd), .size = sizeof(_type), \ ++ .flags = VMS_STRUCT, \ ++ .offset = vmstate_offset_value(_state, _field, _type), \ ++ } ++ ++#define VMSTATE_STRUCT_POINTER_V(_field, _state, _version, _vmsd, _type) \ ++ { \ ++ .name = (stringify(_field)), .version_id = (_version), \ ++ .vmsd = &(_vmsd), .size = sizeof(_type *), \ ++ .flags = VMS_STRUCT | VMS_POINTER, \ ++ .offset = vmstate_offset_pointer(_state, _field, _type), \ ++ } ++ ++#define VMSTATE_STRUCT_ARRAY_TEST(_field, _state, _num, _test, _version, \ ++ _vmsd, _type) \ ++ { \ ++ .name = (stringify(_field)), .num = (_num), .field_exists = (_test), \ ++ .version_id = (_version), .vmsd = &(_vmsd), .size = sizeof(_type), \ ++ .flags = VMS_STRUCT | VMS_ARRAY, \ ++ .offset = vmstate_offset_array(_state, _field, _type, _num), \ ++ } ++ ++#define VMSTATE_STATIC_BUFFER(_field, _state, _version, _test, _start, _size) \ ++ { \ ++ .name = (stringify(_field)), .version_id = (_version), \ ++ .field_exists = (_test), .size = (_size - _start), \ ++ .info = &slirp_vmstate_info_buffer, .flags = VMS_BUFFER, \ ++ .offset = vmstate_offset_buffer(_state, _field) + _start, \ ++ } ++ ++#define VMSTATE_VBUFFER_UINT32(_field, _state, _version, _test, _field_size) \ ++ { \ ++ .name = (stringify(_field)), .version_id = (_version), \ ++ .field_exists = (_test), \ ++ .size_offset = vmstate_offset_value(_state, _field_size, uint32_t), \ ++ .info = &slirp_vmstate_info_buffer, \ ++ .flags = VMS_VBUFFER | VMS_POINTER, \ ++ .offset = offsetof(_state, _field), \ ++ } ++ ++#define QEMU_BUILD_BUG_ON_STRUCT(x) \ ++ struct { \ ++ int : (x) ? -1 : 1; \ ++ } ++ ++#define QEMU_BUILD_BUG_ON_ZERO(x) \ ++ (sizeof(QEMU_BUILD_BUG_ON_STRUCT(x)) - sizeof(QEMU_BUILD_BUG_ON_STRUCT(x))) ++ ++/* Allocate a temporary of type 'tmp_type', set tmp->parent to _state ++ * and execute the vmsd on the temporary. Note that we're working with ++ * the whole of _state here, not a field within it. ++ * We compile time check that: ++ * That _tmp_type contains a 'parent' member that's a pointer to the ++ * '_state' type ++ * That the pointer is right at the start of _tmp_type. ++ */ ++#define VMSTATE_WITH_TMP(_state, _tmp_type, _vmsd) \ ++ { \ ++ .name = "tmp", \ ++ .size = sizeof(_tmp_type) + \ ++ QEMU_BUILD_BUG_ON_ZERO(offsetof(_tmp_type, parent) != 0) + \ ++ type_check_pointer(_state, typeof_field(_tmp_type, parent)), \ ++ .vmsd = &(_vmsd), .info = &slirp_vmstate_info_tmp, \ ++ } ++ ++#define VMSTATE_SINGLE(_field, _state, _version, _info, _type) \ ++ VMSTATE_SINGLE_TEST(_field, _state, NULL, _version, _info, _type) ++ ++#define VMSTATE_STRUCT(_field, _state, _version, _vmsd, _type) \ ++ VMSTATE_STRUCT_TEST(_field, _state, NULL, _version, _vmsd, _type) ++ ++#define VMSTATE_STRUCT_POINTER(_field, _state, _vmsd, _type) \ ++ VMSTATE_STRUCT_POINTER_V(_field, _state, 0, _vmsd, _type) ++ ++#define VMSTATE_STRUCT_ARRAY(_field, _state, _num, _version, _vmsd, _type) \ ++ VMSTATE_STRUCT_ARRAY_TEST(_field, _state, _num, NULL, _version, _vmsd, \ ++ _type) ++ ++#define VMSTATE_INT16_V(_f, _s, _v) \ ++ VMSTATE_SINGLE(_f, _s, _v, slirp_vmstate_info_int16, int16_t) ++#define VMSTATE_INT32_V(_f, _s, _v) \ ++ VMSTATE_SINGLE(_f, _s, _v, slirp_vmstate_info_int32, int32_t) ++ ++#define VMSTATE_UINT8_V(_f, _s, _v) \ ++ VMSTATE_SINGLE(_f, _s, _v, slirp_vmstate_info_uint8, uint8_t) ++#define VMSTATE_UINT16_V(_f, _s, _v) \ ++ VMSTATE_SINGLE(_f, _s, _v, slirp_vmstate_info_uint16, uint16_t) ++#define VMSTATE_UINT32_V(_f, _s, _v) \ ++ VMSTATE_SINGLE(_f, _s, _v, slirp_vmstate_info_uint32, uint32_t) ++ ++#define VMSTATE_INT16(_f, _s) VMSTATE_INT16_V(_f, _s, 0) ++#define VMSTATE_INT32(_f, _s) VMSTATE_INT32_V(_f, _s, 0) ++ ++#define VMSTATE_UINT8(_f, _s) VMSTATE_UINT8_V(_f, _s, 0) ++#define VMSTATE_UINT16(_f, _s) VMSTATE_UINT16_V(_f, _s, 0) ++#define VMSTATE_UINT32(_f, _s) VMSTATE_UINT32_V(_f, _s, 0) ++ ++#define VMSTATE_UINT16_TEST(_f, _s, _t) \ ++ VMSTATE_SINGLE_TEST(_f, _s, _t, 0, slirp_vmstate_info_uint16, uint16_t) ++ ++#define VMSTATE_UINT32_TEST(_f, _s, _t) \ ++ VMSTATE_SINGLE_TEST(_f, _s, _t, 0, slirp_vmstate_info_uint32, uint32_t) ++ ++#define VMSTATE_INT16_ARRAY_V(_f, _s, _n, _v) \ ++ VMSTATE_ARRAY(_f, _s, _n, _v, slirp_vmstate_info_int16, int16_t) ++ ++#define VMSTATE_INT16_ARRAY(_f, _s, _n) VMSTATE_INT16_ARRAY_V(_f, _s, _n, 0) ++ ++#define VMSTATE_BUFFER_V(_f, _s, _v) \ ++ VMSTATE_STATIC_BUFFER(_f, _s, _v, NULL, 0, sizeof(typeof_field(_s, _f))) ++ ++#define VMSTATE_BUFFER(_f, _s) VMSTATE_BUFFER_V(_f, _s, 0) ++ ++#define VMSTATE_END_OF_LIST() \ ++ { \ ++ } ++ ++#endif +-- +2.27.0 + diff --git a/0004-Initial-redhat-build.patch b/0004-Initial-redhat-build.patch new file mode 100644 index 0000000..94cf91c --- /dev/null +++ b/0004-Initial-redhat-build.patch @@ -0,0 +1,313 @@ +From fc113ecd7c99646a7ced0b99570b5927ae6d595f Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Wed, 26 May 2021 10:56:02 +0200 +Subject: Initial redhat build + +This patch introduces redhat build structure in redhat subdirectory. In addition, +several issues are fixed in QEMU tree: + +- Change of app name for sasl_server_init in VNC code from qemu to qemu-kvm + - As we use qemu-kvm as name in all places, this is updated to be consistent +- Man page renamed from qemu to qemu-kvm + - man page is installed using make install so we have to fix it in qemu tree + +We disable make check due to issues with some of the tests. + +This rebase is based on qemu-kvm-6.2.0-13.el9 + +Signed-off-by: Miroslav Rezanina +-- +Rebase changes (6.1.0): +- Move build to .distro +- Move changes for support file to related commit +- Added dependency for python3-sphinx-rtd_theme +- Removed --disable-sheepdog configure option +- Added new hw-display modules +- SASL initialization moved to ui/vnc-auth-sasl.c +- Add accel-qtest- and accel-tcg-x86_64 libraries +- Added hw-usb-host module +- Disable new configure options (bpf, nvmm, slirp-smbd) +- Use -pie for ksmctl build (annocheck complain fix) + +Rebase changes (6.2.0): +- removed --disable-jemalloc and --disable-tcmalloc configure options +- added audio-oss.so +- added fdt requirement for x86_64 +- tests/acceptance renamed to tests/avocado +- added multiboot_dma.bin +- Add -Wno-string-plus-int to extra flags +- Updated configure options + +Rebase changes (7.0.0): +- Do not use -mlittle CFLAG on ppc64le +- Used upstream handling issue with ui/clipboard.c +- Use -mlittle-endian on ppc64le instead of deleteing it in configure +- Drop --disable-libxml2 option for configure (upstream) +- Remove vof roms +- Disable AVX2 support +- Use internal meson +- Disable new configure options (dbus-display and qga-vss) +- Change permissions on installing tests/Makefile.include +- Remove ssh block driver + +Merged patches (6.0.0): + - 605758c902 Limit build on Power to qemu-img and qemu-ga only + +Merged patches (6.1.0): +- f04f91751f Use cached tarballs +- 6581165c65 Remove message with running VM count +- 03c3cac9fc spec-file: build qemu-kvm without SPICE and QXL +- e0ae6c1f6c spec-file: Obsolete qemu-kvm-ui-spice +- 9d2e9f9ecf spec: Do not build qemu-kvm-block-gluster +- cf470b4234 spec: Do not link pcnet and ne2k_pci roms +- e981284a6b redhat: Install the s390-netboot.img that we've built +- 24ef557f33 spec: Remove usage of Group: tag +- c40d69b4f4 spec: Drop %defattr usage +- f8e98798ce spec: Clean up BuildRequires +- 47246b43ee spec: Remove iasl BuildRequires +- 170dc1cbe0 spec: Remove redundant 0 in conditionals +- 8718f6fa11 spec: Add more have_XXX conditionals +- a001269ce9 spec: Remove binutils versioned Requires +- 34545ee641 spec: Remove diffutils BuildRequires +- c2c82beac9 spec: Remove redundant Requires: +- 9314c231f4 spec: Add XXX_version macros +- c43db0bf0f spec: Add have_block_rbd +- 3ecb0c0319 qga: drop StandardError=syslog +- 018049dc80 Remove iscsi support +- a2edf18777 redhat: Replace the kvm-setup.service with a /etc/modules-load.d config file +- 387b5fbcfe redhat: Move qemu-kvm-docs dependency to qemu-kvm +- 4ead693178 redhat: introducting qemu-kvm-hw-usbredir +- 4dc6fc3035 redhat: use the standard vhost-user JSON path +- 84757178b4 Fix local build +- 8c394227dd spec: Restrict block drivers in tools +- b6aa7c1fae Move tools to separate package +- eafd82e509 Split qemu-pr-helper to separate package +- 2c0182e2aa spec: RPM_BUILD_ROOT -> %{buildroot} +- 91bd55ca13 spec: More use of %{name} instead of 'qemu-kvm' +- 50ba299c61 spec: Use qemu-pr-helper.service from qemu.git (partial) +- ee08d4e0a3 spec: Use %{_sourcedir} for referencing sources +- 039e7f7d02 spec: Add tools_only +- 884ba71617 spec: %build: Add run_configure helper +- 8ebd864d65 spec: %build: Disable more bits with %{disable_everything} (partial) +- f23fdb53f5 spec: %build: Add macros for some 'configure' parameters +- fe951a8bd8 spec: %files: Move qemu-guest-agent and qemu-img earlier +- 353b632e37 spec: %install: Remove redundant bits +- 9d2015b752 spec: %install: Add %{modprobe_kvm_conf} macro +- 6d05134e8c spec: %install: Remove qemu-guest-agent /etc/qemu-kvm usage +- 985b226467 spec: %install: clean up qemu-ga section +- dfaf9c600d spec: %install: Use a single %{tools_only} section +- f6978ddb46 spec: Make tools_only not cross spec sections +- 071c211098 spec: %install: Limit time spent in %{qemu_kvm_build} +- 1b65c674be spec: misc syntactic merges with Fedora +- 4da16294cf spec: Use Fedora's pattern for specifying rc version +- d7ee259a79 spec: %files: don't use fine grained -docs file list +- 64cad0c60f spec: %files: Add licenses to qemu-common too +- c3de4f080a spec: %install: Drop python3 shebang fixup +- 46fc216115 Update local build to work with spec file improvements +- bab9531548 spec: Remove buildldflags +- c8360ab6a9 spec: Use %make_build macro +- f6966c66e9 spec: Drop make install sharedir and datadir usage +- 86982421bc spec: use %make_install macro +- 191c405d22 spec: parallelize `make check` +- 251a1fb958 spec: Drop explicit --build-id +- 44c7dda6c3 spec: use %{build_ldflags} +- 0009a34354 Move virtiofsd to separate package +- 34d1b200b3 Utilize --firmware configure option +- 2800e1dd03 spec: Switch toolchain to Clang/LLVM (except process-patches.sh) +- e8a70f500f spec: Use safe-stack for x86_64 +- e29445d50d spec: Reenable write support for VMDK etc. in tools +- a4fe2a3e16 redhat: Disable LTO on non-x86 architectures + +Merged patches (6.2.0): +- 333452440b remove sgabios dependency +- 7d3633f184 enable pulseaudio +- bd898709b0 spec: disable use of gcrypt for crypto backends in favour of gnutls +- e4f0c6dee6 spec: Remove block-curl and block-ssh dependency +- 4dc13bfe63 spec: Build the VDI block driver +- d2f2ff3c74 spec: Explicitly include compress filter +- a7d047f9c2 Move ksmtuned files to separate package + +Merged patches (7.0.0): +- 098d4d08d0 spec: Rename qemu-kvm-hw-usbredir to qemu-kvm-device-usb-redirect +- c2bd0d6834 spec: Split qemu-kvm-ui-opengl +- 2c9cda805d spec: Introduce packages for virtio-gpu-* modules (changed as rhel device tree not set) +- d0414a3e0b spec: Introduce device-display-virtio-vga* packages +- 3534ec46d4 spec: Move usb-host module to separate package +- ddc14d4737 spec: Move qtest accel module to tests package +- 6f2c4befa6 spec: Extend qemu-kvm-core description +- 6f11866e4e (rhel/rhel-9.0.0) Update to qemu-kvm-6.2.0-6.el9 +- da0a28758f ui/clipboard: fix use-after-free regression +- 895d4d52eb spec: Remove qemu-virtiofsd +- c8c8c8bd84 spec: Fix obsolete for spice subpackages +- d46d2710b2 spec: Obsolete old usb redir subpackage +- 6f52a50b68 spec: Obsolete ssh driver + +Signed-off-by: Miroslav Rezanina +--- + .distro/85-kvm.preset | 5 - + .distro/Makefile | 100 + + .distro/Makefile.common | 40 + + .distro/README.tests | 39 + + .distro/ksm.service | 13 - + .distro/ksm.sysconfig | 4 - + .distro/ksmctl.c | 77 - + .distro/ksmtuned | 139 - + .distro/ksmtuned.conf | 21 - + .distro/ksmtuned.service | 12 - + .distro/kvm-setup | 49 - + .distro/kvm-setup.service | 14 - + .distro/modules-load.conf | 4 + + .distro/qemu-guest-agent.service | 1 - + .distro/qemu-kvm.spec.template | 4034 +++++++++++++++++++++++ + .distro/rpminspect.yaml | 6 +- + .distro/scripts/extract_build_cmd.py | 12 + + .gitignore | 1 + + README.systemtap | 43 + + meson.build | 4 +- + scripts/qemu-guest-agent/fsfreeze-hook | 2 +- + scripts/systemtap/conf.d/qemu_kvm.conf | 4 + + scripts/systemtap/script.d/qemu_kvm.stp | 1 + + tests/check-block.sh | 2 + + ui/vnc-auth-sasl.c | 2 +- + 25 files changed, 4290 insertions(+), 339 deletions(-) + delete mode 100644 .distro/85-kvm.preset + create mode 100644 .distro/Makefile + create mode 100644 .distro/Makefile.common + create mode 100644 .distro/README.tests + delete mode 100644 .distro/ksm.service + delete mode 100644 .distro/ksm.sysconfig + delete mode 100644 .distro/ksmctl.c + delete mode 100644 .distro/ksmtuned + delete mode 100644 .distro/ksmtuned.conf + delete mode 100644 .distro/ksmtuned.service + delete mode 100644 .distro/kvm-setup + delete mode 100644 .distro/kvm-setup.service + create mode 100644 .distro/modules-load.conf + create mode 100644 .distro/qemu-kvm.spec.template + create mode 100644 README.systemtap + create mode 100644 scripts/systemtap/conf.d/qemu_kvm.conf + create mode 100644 scripts/systemtap/script.d/qemu_kvm.stp + +diff --git a/README.systemtap b/README.systemtap +new file mode 100644 +index 0000000000..ad913fc990 +--- /dev/null ++++ b/README.systemtap +@@ -0,0 +1,43 @@ ++QEMU tracing using systemtap-initscript ++--------------------------------------- ++ ++You can capture QEMU trace data all the time using systemtap-initscript. This ++uses SystemTap's flight recorder mode to trace all running guests to a ++fixed-size buffer on the host. Old trace entries are overwritten by new ++entries when the buffer size wraps. ++ ++1. Install the systemtap-initscript package: ++ # yum install systemtap-initscript ++ ++2. Install the systemtap scripts and the conf file: ++ # cp /usr/share/qemu-kvm/systemtap/script.d/qemu_kvm.stp /etc/systemtap/script.d/ ++ # cp /usr/share/qemu-kvm/systemtap/conf.d/qemu_kvm.conf /etc/systemtap/conf.d/ ++ ++The set of trace events to enable is given in qemu_kvm.stp. This SystemTap ++script can be customized to add or remove trace events provided in ++/usr/share/systemtap/tapset/qemu-kvm-simpletrace.stp. ++ ++SystemTap customizations can be made to qemu_kvm.conf to control the flight ++recorder buffer size and whether to store traces in memory only or disk too. ++See stap(1) for option documentation. ++ ++3. Start the systemtap service. ++ # service systemtap start qemu_kvm ++ ++4. Make the service start at boot time. ++ # chkconfig systemtap on ++ ++5. Confirm that the service works. ++ # service systemtap status qemu_kvm ++ qemu_kvm is running... ++ ++When you want to inspect the trace buffer, perform the following steps: ++ ++1. Dump the trace buffer. ++ # staprun -A qemu_kvm >/tmp/trace.log ++ ++2. Start the systemtap service because the preceding step stops the service. ++ # service systemtap start qemu_kvm ++ ++3. Translate the trace record to readable format. ++ # /usr/share/qemu-kvm/simpletrace.py --no-header /usr/share/qemu-kvm/trace-events /tmp/trace.log +diff --git a/meson.build b/meson.build +index 861de93c4f..6f7e430f0f 100644 +--- a/meson.build ++++ b/meson.build +@@ -2394,7 +2394,9 @@ if capstone_opt == 'internal' + # Include all configuration defines via a header file, which will wind up + # as a dependency on the object file, and thus changes here will result + # in a rebuild. +- '-include', 'capstone-defs.h' ++ '-include', 'capstone-defs.h', ++ ++ '-Wp,-D_GLIBCXX_ASSERTIONS', + ] + + libcapstone = static_library('capstone', +diff --git a/scripts/qemu-guest-agent/fsfreeze-hook b/scripts/qemu-guest-agent/fsfreeze-hook +index 13aafd4845..e9b84ec028 100755 +--- a/scripts/qemu-guest-agent/fsfreeze-hook ++++ b/scripts/qemu-guest-agent/fsfreeze-hook +@@ -8,7 +8,7 @@ + # request, it is issued with "thaw" argument after filesystem is thawed. + + LOGFILE=/var/log/qga-fsfreeze-hook.log +-FSFREEZE_D=$(dirname -- "$0")/fsfreeze-hook.d ++FSFREEZE_D=$(dirname -- "$(realpath $0)")/fsfreeze-hook.d + + # Check whether file $1 is a backup or rpm-generated file and should be ignored + is_ignored_file() { +diff --git a/scripts/systemtap/conf.d/qemu_kvm.conf b/scripts/systemtap/conf.d/qemu_kvm.conf +new file mode 100644 +index 0000000000..372d8160a4 +--- /dev/null ++++ b/scripts/systemtap/conf.d/qemu_kvm.conf +@@ -0,0 +1,4 @@ ++# Force load uprobes (see BZ#1118352) ++stap -e 'probe process("/usr/libexec/qemu-kvm").function("main") { printf("") }' -c true ++ ++qemu_kvm_OPT="-s4" # per-CPU buffer size, in megabytes +diff --git a/scripts/systemtap/script.d/qemu_kvm.stp b/scripts/systemtap/script.d/qemu_kvm.stp +new file mode 100644 +index 0000000000..c04abf9449 +--- /dev/null ++++ b/scripts/systemtap/script.d/qemu_kvm.stp +@@ -0,0 +1 @@ ++probe qemu.kvm.simpletrace.handle_qmp_command,qemu.kvm.simpletrace.monitor_protocol_*,qemu.kvm.simpletrace.migrate_set_state {} +diff --git a/tests/check-block.sh b/tests/check-block.sh +index f59496396c..d900d8b35e 100755 +--- a/tests/check-block.sh ++++ b/tests/check-block.sh +@@ -48,6 +48,8 @@ if LANG=C bash --version | grep -q 'GNU bash, version [123]' ; then + skip "bash version too old ==> Not running the qemu-iotests." + fi + ++exit 0 ++ + cd tests/qemu-iotests + + # QEMU_CHECK_BLOCK_AUTO is used to disable some unstable sub-tests +diff --git a/ui/vnc-auth-sasl.c b/ui/vnc-auth-sasl.c +index 47fdae5b21..2a950caa2a 100644 +--- a/ui/vnc-auth-sasl.c ++++ b/ui/vnc-auth-sasl.c +@@ -42,7 +42,7 @@ + + bool vnc_sasl_server_init(Error **errp) + { +- int saslErr = sasl_server_init(NULL, "qemu"); ++ int saslErr = sasl_server_init(NULL, "qemu-kvm"); + + if (saslErr != SASL_OK) { + error_setg(errp, "Failed to initialize SASL auth: %s", +-- +2.31.1 + diff --git a/0005-Enable-disable-devices-for-RHEL.patch b/0005-Enable-disable-devices-for-RHEL.patch new file mode 100644 index 0000000..1ffbe97 --- /dev/null +++ b/0005-Enable-disable-devices-for-RHEL.patch @@ -0,0 +1,642 @@ +From 51ec7495d69fe4b4d0b61642ca6c0e7fd7a1032d Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Thu, 15 Jul 2021 03:22:36 -0400 +Subject: Enable/disable devices for RHEL + +This commit adds all changes related to changes in supported devices. + +Signed-off-by: Miroslav Rezanina +-- +Rebase notes (6.1.0): +- Added CONFIG_TPM (except s390x) +- default-configs moved to configs +- Use --with-device- configure option to use rhel configs + +Rebase notes (6.2.0): +- Add CONFIG_ISA_FDC +- Do not remove -no-hpet documentation + +Rebase notes (7.0.0): +- Added CONFIG_ARM_GIC_TCG option for aarch64 +- Fixes necessary for layout change fixes +- Renamed CONFIG_ARM_GIC_TCG to CONFIG_ARM_GICV3_TCG +- Removed upstream devices + +Merged patches (6.1.0): +- c51bf45304 Remove SPICE and QXL from x86_64-rh-devices.mak +- 02fc745601 aarch64-rh-devices: add CONFIG_PVPANIC_PCI +- f2fe835153 aarch64-rh-devices: add CONFIG_PXB +- b5431733ad disable CONFIG_USB_STORAGE_BOT +- 478ba0cdf6 Disable TPM passthrough +- 2504d68a7c aarch64: Add USB storage devices +- 51c2a3253c disable ac97 audio + +Merged patches (6.2.0): +- 9f2f9fa2ba disable sga device + +Merged patches (7.0.0): +- fd7c45a5a8 redhat: Enable virtio-mem as tech-preview on x86-64 +- c9e68ea451 Enable SGX -- RH Only +--- + .distro/qemu-kvm.spec.template | 18 +-- + .../aarch64-softmmu/aarch64-rh-devices.mak | 34 ++++++ + .../ppc64-softmmu/ppc64-rh-devices.mak | 35 ++++++ + configs/devices/rh-virtio.mak | 10 ++ + .../s390x-softmmu/s390x-rh-devices.mak | 15 +++ + .../x86_64-softmmu/x86_64-rh-devices.mak | 103 ++++++++++++++++++ + hw/acpi/ich9.c | 4 +- + hw/arm/meson.build | 2 +- + hw/block/fdc.c | 10 ++ + hw/cpu/meson.build | 5 +- + hw/display/cirrus_vga.c | 5 +- + hw/ide/piix.c | 5 +- + hw/input/pckbd.c | 2 + + hw/net/e1000.c | 2 + + hw/ppc/spapr_cpu_core.c | 2 + + hw/usb/meson.build | 2 +- + target/arm/cpu_tcg.c | 10 ++ + target/ppc/cpu-models.c | 9 ++ + target/s390x/cpu_models_sysemu.c | 3 + + target/s390x/kvm/kvm.c | 8 ++ + 20 files changed, 269 insertions(+), 15 deletions(-) + create mode 100644 configs/devices/aarch64-softmmu/aarch64-rh-devices.mak + create mode 100644 configs/devices/ppc64-softmmu/ppc64-rh-devices.mak + create mode 100644 configs/devices/rh-virtio.mak + create mode 100644 configs/devices/s390x-softmmu/s390x-rh-devices.mak + create mode 100644 configs/devices/x86_64-softmmu/x86_64-rh-devices.mak + +diff --git a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak +new file mode 100644 +index 0000000000..5f6ee1de5b +--- /dev/null ++++ b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak +@@ -0,0 +1,34 @@ ++include ../rh-virtio.mak ++ ++CONFIG_ARM_GIC_KVM=y ++CONFIG_ARM_GICV3_TCG=y ++CONFIG_ARM_GIC=y ++CONFIG_ARM_SMMUV3=y ++CONFIG_ARM_V7M=y ++CONFIG_ARM_VIRT=y ++CONFIG_EDID=y ++CONFIG_PCIE_PORT=y ++CONFIG_PCI_DEVICES=y ++CONFIG_PCI_TESTDEV=y ++CONFIG_PFLASH_CFI01=y ++CONFIG_SCSI=y ++CONFIG_SEMIHOSTING=y ++CONFIG_USB=y ++CONFIG_USB_XHCI=y ++CONFIG_USB_XHCI_PCI=y ++CONFIG_USB_STORAGE_CORE=y ++CONFIG_USB_STORAGE_CLASSIC=y ++CONFIG_VFIO=y ++CONFIG_VFIO_PCI=y ++CONFIG_VIRTIO_MMIO=y ++CONFIG_VIRTIO_PCI=y ++CONFIG_XIO3130=y ++CONFIG_NVDIMM=y ++CONFIG_ACPI_APEI=y ++CONFIG_TPM=y ++CONFIG_TPM_EMULATOR=y ++CONFIG_TPM_TIS_SYSBUS=y ++CONFIG_PTIMER=y ++CONFIG_ARM_COMPATIBLE_SEMIHOSTING=y ++CONFIG_PVPANIC_PCI=y ++CONFIG_PXB=y +diff --git a/configs/devices/ppc64-softmmu/ppc64-rh-devices.mak b/configs/devices/ppc64-softmmu/ppc64-rh-devices.mak +new file mode 100644 +index 0000000000..6a3e3f0227 +--- /dev/null ++++ b/configs/devices/ppc64-softmmu/ppc64-rh-devices.mak +@@ -0,0 +1,35 @@ ++include ../rh-virtio.mak ++ ++CONFIG_DIMM=y ++CONFIG_MEM_DEVICE=y ++CONFIG_NVDIMM=y ++CONFIG_PCI=y ++CONFIG_PCI_DEVICES=y ++CONFIG_PCI_TESTDEV=y ++CONFIG_PCI_EXPRESS=y ++CONFIG_PSERIES=y ++CONFIG_SCSI=y ++CONFIG_SPAPR_VSCSI=y ++CONFIG_TEST_DEVICES=y ++CONFIG_USB=y ++CONFIG_USB_OHCI=y ++CONFIG_USB_OHCI_PCI=y ++CONFIG_USB_SMARTCARD=y ++CONFIG_USB_STORAGE_CORE=y ++CONFIG_USB_STORAGE_CLASSIC=y ++CONFIG_USB_XHCI=y ++CONFIG_USB_XHCI_NEC=y ++CONFIG_USB_XHCI_PCI=y ++CONFIG_VFIO=y ++CONFIG_VFIO_PCI=y ++CONFIG_VGA=y ++CONFIG_VGA_PCI=y ++CONFIG_VHOST_USER=y ++CONFIG_VIRTIO_PCI=y ++CONFIG_VIRTIO_VGA=y ++CONFIG_WDT_IB6300ESB=y ++CONFIG_XICS=y ++CONFIG_XIVE=y ++CONFIG_TPM=y ++CONFIG_TPM_SPAPR=y ++CONFIG_TPM_EMULATOR=y +diff --git a/configs/devices/rh-virtio.mak b/configs/devices/rh-virtio.mak +new file mode 100644 +index 0000000000..94ede1b5f6 +--- /dev/null ++++ b/configs/devices/rh-virtio.mak +@@ -0,0 +1,10 @@ ++CONFIG_VIRTIO=y ++CONFIG_VIRTIO_BALLOON=y ++CONFIG_VIRTIO_BLK=y ++CONFIG_VIRTIO_GPU=y ++CONFIG_VIRTIO_INPUT=y ++CONFIG_VIRTIO_INPUT_HOST=y ++CONFIG_VIRTIO_NET=y ++CONFIG_VIRTIO_RNG=y ++CONFIG_VIRTIO_SCSI=y ++CONFIG_VIRTIO_SERIAL=y +diff --git a/configs/devices/s390x-softmmu/s390x-rh-devices.mak b/configs/devices/s390x-softmmu/s390x-rh-devices.mak +new file mode 100644 +index 0000000000..d3b38312e1 +--- /dev/null ++++ b/configs/devices/s390x-softmmu/s390x-rh-devices.mak +@@ -0,0 +1,15 @@ ++include ../rh-virtio.mak ++ ++CONFIG_PCI=y ++CONFIG_S390_CCW_VIRTIO=y ++CONFIG_S390_FLIC=y ++CONFIG_S390_FLIC_KVM=y ++CONFIG_SCLPCONSOLE=y ++CONFIG_SCSI=y ++CONFIG_VFIO=y ++CONFIG_VFIO_AP=y ++CONFIG_VFIO_CCW=y ++CONFIG_VFIO_PCI=y ++CONFIG_VHOST_USER=y ++CONFIG_VIRTIO_CCW=y ++CONFIG_WDT_DIAG288=y +diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak +new file mode 100644 +index 0000000000..d0c9e66641 +--- /dev/null ++++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak +@@ -0,0 +1,103 @@ ++include ../rh-virtio.mak ++ ++CONFIG_ACPI=y ++CONFIG_ACPI_PCI=y ++CONFIG_ACPI_CPU_HOTPLUG=y ++CONFIG_ACPI_MEMORY_HOTPLUG=y ++CONFIG_ACPI_NVDIMM=y ++CONFIG_ACPI_SMBUS=y ++CONFIG_ACPI_VMGENID=y ++CONFIG_ACPI_X86=y ++CONFIG_ACPI_X86_ICH=y ++CONFIG_AHCI=y ++CONFIG_APIC=y ++CONFIG_APM=y ++CONFIG_BOCHS_DISPLAY=y ++CONFIG_DIMM=y ++CONFIG_E1000E_PCI_EXPRESS=y ++CONFIG_E1000_PCI=y ++CONFIG_EDU=y ++CONFIG_FDC=y ++CONFIG_FDC_SYSBUS=y ++CONFIG_FDC_ISA=y ++CONFIG_FW_CFG_DMA=y ++CONFIG_HDA=y ++CONFIG_HYPERV=y ++CONFIG_HYPERV_TESTDEV=y ++CONFIG_I2C=y ++CONFIG_I440FX=y ++CONFIG_I8254=y ++CONFIG_I8257=y ++CONFIG_I8259=y ++CONFIG_I82801B11=y ++CONFIG_IDE_CORE=y ++CONFIG_IDE_PCI=y ++CONFIG_IDE_PIIX=y ++CONFIG_IDE_QDEV=y ++CONFIG_IOAPIC=y ++CONFIG_IOH3420=y ++CONFIG_ISA_BUS=y ++CONFIG_ISA_DEBUG=y ++CONFIG_ISA_TESTDEV=y ++CONFIG_LPC_ICH9=y ++CONFIG_MC146818RTC=y ++CONFIG_MEM_DEVICE=y ++CONFIG_NVDIMM=y ++CONFIG_OPENGL=y ++CONFIG_PAM=y ++CONFIG_PC=y ++CONFIG_PCI=y ++CONFIG_PCIE_PORT=y ++CONFIG_PCI_DEVICES=y ++CONFIG_PCI_EXPRESS=y ++CONFIG_PCI_EXPRESS_Q35=y ++CONFIG_PCI_I440FX=y ++CONFIG_PCI_TESTDEV=y ++CONFIG_PCKBD=y ++CONFIG_PCSPK=y ++CONFIG_PC_ACPI=y ++CONFIG_PC_PCI=y ++CONFIG_PFLASH_CFI01=y ++CONFIG_PVPANIC_ISA=y ++CONFIG_PXB=y ++CONFIG_Q35=y ++CONFIG_RTL8139_PCI=y ++CONFIG_SCSI=y ++CONFIG_SERIAL=y ++CONFIG_SERIAL_ISA=y ++CONFIG_SERIAL_PCI=y ++CONFIG_SEV=y ++CONFIG_SMBIOS=y ++CONFIG_SMBUS_EEPROM=y ++CONFIG_TEST_DEVICES=y ++CONFIG_USB=y ++CONFIG_USB_EHCI=y ++CONFIG_USB_EHCI_PCI=y ++CONFIG_USB_SMARTCARD=y ++CONFIG_USB_STORAGE_CORE=y ++CONFIG_USB_STORAGE_CLASSIC=y ++CONFIG_USB_UHCI=y ++CONFIG_USB_XHCI=y ++CONFIG_USB_XHCI_NEC=y ++CONFIG_USB_XHCI_PCI=y ++CONFIG_VFIO=y ++CONFIG_VFIO_PCI=y ++CONFIG_VGA=y ++CONFIG_VGA_CIRRUS=y ++CONFIG_VGA_PCI=y ++CONFIG_VHOST_USER=y ++CONFIG_VHOST_USER_BLK=y ++CONFIG_VIRTIO_MEM=y ++CONFIG_VIRTIO_PCI=y ++CONFIG_VIRTIO_VGA=y ++CONFIG_VMMOUSE=y ++CONFIG_VMPORT=y ++CONFIG_VTD=y ++CONFIG_WDT_IB6300ESB=y ++CONFIG_WDT_IB700=y ++CONFIG_XIO3130=y ++CONFIG_TPM=y ++CONFIG_TPM_CRB=y ++CONFIG_TPM_TIS_ISA=y ++CONFIG_TPM_EMULATOR=y ++CONFIG_SGX=y +diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c +index bd9bbade70..de1e401cdf 100644 +--- a/hw/acpi/ich9.c ++++ b/hw/acpi/ich9.c +@@ -435,8 +435,8 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) + static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN; + pm->acpi_memory_hotplug.is_enabled = true; + pm->cpu_hotplug_legacy = true; +- pm->disable_s3 = 0; +- pm->disable_s4 = 0; ++ pm->disable_s3 = 1; ++ pm->disable_s4 = 1; + pm->s4_val = 2; + pm->use_acpi_hotplug_bridge = true; + pm->keep_pci_slot_hpc = true; +diff --git a/hw/arm/meson.build b/hw/arm/meson.build +index 721a8eb8be..87ed4dd914 100644 +--- a/hw/arm/meson.build ++++ b/hw/arm/meson.build +@@ -31,7 +31,7 @@ arm_ss.add(when: 'CONFIG_VEXPRESS', if_true: files('vexpress.c')) + arm_ss.add(when: 'CONFIG_ZYNQ', if_true: files('xilinx_zynq.c')) + arm_ss.add(when: 'CONFIG_SABRELITE', if_true: files('sabrelite.c')) + +-arm_ss.add(when: 'CONFIG_ARM_V7M', if_true: files('armv7m.c')) ++#arm_ss.add(when: 'CONFIG_ARM_V7M', if_true: files('armv7m.c')) + arm_ss.add(when: 'CONFIG_EXYNOS4', if_true: files('exynos4210.c')) + arm_ss.add(when: 'CONFIG_PXA2XX', if_true: files('pxa2xx.c', 'pxa2xx_gpio.c', 'pxa2xx_pic.c')) + arm_ss.add(when: 'CONFIG_DIGIC', if_true: files('digic.c')) +diff --git a/hw/block/fdc.c b/hw/block/fdc.c +index 347875a0cd..ca1776121f 100644 +--- a/hw/block/fdc.c ++++ b/hw/block/fdc.c +@@ -49,6 +49,8 @@ + #include "qom/object.h" + #include "fdc-internal.h" + ++#include "hw/boards.h" ++ + /********************************************************/ + /* debug Floppy devices */ + +@@ -2338,6 +2340,14 @@ void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl, Error **errp) + FDrive *drive; + static int command_tables_inited = 0; + ++ /* Restricted for Red Hat Enterprise Linux: */ ++ MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); ++ if (!strstr(mc->name, "-rhel7.")) { ++ error_setg(errp, "Device %s is not supported with machine type %s", ++ object_get_typename(OBJECT(dev)), mc->name); ++ return; ++ } ++ + if (fdctrl->fallback == FLOPPY_DRIVE_TYPE_AUTO) { + error_setg(errp, "Cannot choose a fallback FDrive type of 'auto'"); + return; +diff --git a/hw/cpu/meson.build b/hw/cpu/meson.build +index 9e52fee9e7..bb71c9f3e7 100644 +--- a/hw/cpu/meson.build ++++ b/hw/cpu/meson.build +@@ -1,6 +1,7 @@ +-softmmu_ss.add(files('core.c', 'cluster.c')) ++#softmmu_ss.add(files('core.c', 'cluster.c')) ++softmmu_ss.add(files('core.c')) + + specific_ss.add(when: 'CONFIG_ARM11MPCORE', if_true: files('arm11mpcore.c')) + specific_ss.add(when: 'CONFIG_REALVIEW', if_true: files('realview_mpcore.c')) + specific_ss.add(when: 'CONFIG_A9MPCORE', if_true: files('a9mpcore.c')) +-specific_ss.add(when: 'CONFIG_A15MPCORE', if_true: files('a15mpcore.c')) ++#specific_ss.add(when: 'CONFIG_A15MPCORE', if_true: files('a15mpcore.c')) +diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c +index 3bb6a58698..6447fdb02e 100644 +--- a/hw/display/cirrus_vga.c ++++ b/hw/display/cirrus_vga.c +@@ -2945,7 +2945,10 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp) + PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev); + int16_t device_id = pc->device_id; + +- /* ++ warn_report("'cirrus-vga' is deprecated, " ++ "please use a different VGA card instead"); ++ ++ /* + * Follow real hardware, cirrus card emulated has 4 MB video memory. + * Also accept 8 MB/16 MB for backward compatibility. + */ +diff --git a/hw/ide/piix.c b/hw/ide/piix.c +index ce89fd0aa3..fbcf802b13 100644 +--- a/hw/ide/piix.c ++++ b/hw/ide/piix.c +@@ -232,7 +232,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data) + k->device_id = PCI_DEVICE_ID_INTEL_82371SB_1; + k->class_id = PCI_CLASS_STORAGE_IDE; + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); +- dc->hotpluggable = false; ++ /* Disabled for Red Hat Enterprise Linux: */ ++ dc->user_creatable = false; + } + + static const TypeInfo piix3_ide_info = { +@@ -261,6 +262,8 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data) + k->class_id = PCI_CLASS_STORAGE_IDE; + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); + dc->hotpluggable = false; ++ /* Disabled for Red Hat Enterprise Linux: */ ++ dc->user_creatable = false; + } + + static const TypeInfo piix4_ide_info = { +diff --git a/hw/input/pckbd.c b/hw/input/pckbd.c +index 4efdf75620..5143ebaa27 100644 +--- a/hw/input/pckbd.c ++++ b/hw/input/pckbd.c +@@ -814,6 +814,8 @@ static void i8042_class_initfn(ObjectClass *klass, void *data) + dc->vmsd = &vmstate_kbd_isa; + isa->build_aml = i8042_build_aml; + set_bit(DEVICE_CATEGORY_INPUT, dc->categories); ++ /* Disabled for Red Hat Enterprise Linux: */ ++ dc->user_creatable = false; + } + + static const TypeInfo i8042_info = { +diff --git a/hw/net/e1000.c b/hw/net/e1000.c +index f5bc81296d..282d01e374 100644 +--- a/hw/net/e1000.c ++++ b/hw/net/e1000.c +@@ -1821,6 +1821,7 @@ static const E1000Info e1000_devices[] = { + .revision = 0x03, + .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, + }, ++#if 0 /* Disabled for Red Hat Enterprise Linux 7 */ + { + .name = "e1000-82544gc", + .device_id = E1000_DEV_ID_82544GC_COPPER, +@@ -1833,6 +1834,7 @@ static const E1000Info e1000_devices[] = { + .revision = 0x03, + .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, + }, ++#endif + }; + + static void e1000_register_types(void) +diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c +index 8a4861f45a..fcb5dfe792 100644 +--- a/hw/ppc/spapr_cpu_core.c ++++ b/hw/ppc/spapr_cpu_core.c +@@ -379,10 +379,12 @@ static const TypeInfo spapr_cpu_core_type_infos[] = { + .instance_size = sizeof(SpaprCpuCore), + .class_size = sizeof(SpaprCpuCoreClass), + }, ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_SPAPR_CPU_CORE_TYPE("970_v2.2"), + DEFINE_SPAPR_CPU_CORE_TYPE("970mp_v1.0"), + DEFINE_SPAPR_CPU_CORE_TYPE("970mp_v1.1"), + DEFINE_SPAPR_CPU_CORE_TYPE("power5+_v2.1"), ++#endif + DEFINE_SPAPR_CPU_CORE_TYPE("power7_v2.3"), + DEFINE_SPAPR_CPU_CORE_TYPE("power7+_v2.1"), + DEFINE_SPAPR_CPU_CORE_TYPE("power8_v2.0"), +diff --git a/hw/usb/meson.build b/hw/usb/meson.build +index de853d780d..0776ae6a20 100644 +--- a/hw/usb/meson.build ++++ b/hw/usb/meson.build +@@ -52,7 +52,7 @@ softmmu_ss.add(when: 'CONFIG_USB_SMARTCARD', if_true: files('dev-smartcard-reade + if cacard.found() + usbsmartcard_ss = ss.source_set() + usbsmartcard_ss.add(when: 'CONFIG_USB_SMARTCARD', +- if_true: [cacard, files('ccid-card-emulated.c', 'ccid-card-passthru.c')]) ++ if_true: [cacard, files('ccid-card-passthru.c')]) + hw_usb_modules += {'smartcard': usbsmartcard_ss} + endif + +diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c +index 13d0e9b195..3826fa5122 100644 +--- a/target/arm/cpu_tcg.c ++++ b/target/arm/cpu_tcg.c +@@ -22,6 +22,7 @@ + /* CPU models. These are not needed for the AArch64 linux-user build. */ + #if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64) + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + #if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG) + static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request) + { +@@ -375,6 +376,7 @@ static void cortex_a9_initfn(Object *obj) + cpu->ccsidr[1] = 0x200fe019; /* 16k L1 icache. */ + define_arm_cp_regs(cpu, cortexa9_cp_reginfo); + } ++#endif /* disabled for RHEL */ + + #ifndef CONFIG_USER_ONLY + static uint64_t a15_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri) +@@ -400,6 +402,7 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = { + REGINFO_SENTINEL + }; + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void cortex_a7_initfn(Object *obj) + { + ARMCPU *cpu = ARM_CPU(obj); +@@ -445,6 +448,7 @@ static void cortex_a7_initfn(Object *obj) + cpu->ccsidr[2] = 0x711fe07a; /* 4096K L2 unified cache */ + define_arm_cp_regs(cpu, cortexa15_cp_reginfo); /* Same as A15 */ + } ++#endif /* disabled for RHEL */ + + static void cortex_a15_initfn(Object *obj) + { +@@ -488,6 +492,7 @@ static void cortex_a15_initfn(Object *obj) + define_arm_cp_regs(cpu, cortexa15_cp_reginfo); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void cortex_m0_initfn(Object *obj) + { + ARMCPU *cpu = ARM_CPU(obj); +@@ -928,6 +933,7 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data) + + cc->gdb_core_xml_file = "arm-m-profile.xml"; + } ++#endif /* disabled for RHEL */ + + #ifndef TARGET_AARCH64 + /* +@@ -1007,6 +1013,7 @@ static void arm_max_initfn(Object *obj) + #endif /* !TARGET_AARCH64 */ + + static const ARMCPUInfo arm_tcg_cpus[] = { ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { .name = "arm926", .initfn = arm926_initfn }, + { .name = "arm946", .initfn = arm946_initfn }, + { .name = "arm1026", .initfn = arm1026_initfn }, +@@ -1022,7 +1029,9 @@ static const ARMCPUInfo arm_tcg_cpus[] = { + { .name = "cortex-a7", .initfn = cortex_a7_initfn }, + { .name = "cortex-a8", .initfn = cortex_a8_initfn }, + { .name = "cortex-a9", .initfn = cortex_a9_initfn }, ++#endif /* disabled for RHEL */ + { .name = "cortex-a15", .initfn = cortex_a15_initfn }, ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { .name = "cortex-m0", .initfn = cortex_m0_initfn, + .class_init = arm_v7m_class_init }, + { .name = "cortex-m3", .initfn = cortex_m3_initfn, +@@ -1053,6 +1062,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = { + { .name = "pxa270-b1", .initfn = pxa270b1_initfn }, + { .name = "pxa270-c0", .initfn = pxa270c0_initfn }, + { .name = "pxa270-c5", .initfn = pxa270c5_initfn }, ++#endif /* disabled for RHEL */ + #ifndef TARGET_AARCH64 + { .name = "max", .initfn = arm_max_initfn }, + #endif +diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c +index 976be5e0d1..dd78883410 100644 +--- a/target/ppc/cpu-models.c ++++ b/target/ppc/cpu-models.c +@@ -66,6 +66,7 @@ + #define POWERPC_DEF(_name, _pvr, _type, _desc) \ + POWERPC_DEF_SVR(_name, _desc, _pvr, POWERPC_SVR_NONE, _type) + ++#if 0 /* Embedded and 32-bit CPUs disabled for Red Hat Enterprise Linux */ + /* Embedded PowerPC */ + /* PowerPC 405 family */ + /* PowerPC 405 cores */ +@@ -698,8 +699,10 @@ + "PowerPC 7447A v1.2 (G4)") + POWERPC_DEF("7457a_v1.2", CPU_POWERPC_74x7A_v12, 7455, + "PowerPC 7457A v1.2 (G4)") ++#endif + /* 64 bits PowerPC */ + #if defined(TARGET_PPC64) ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + POWERPC_DEF("970_v2.2", CPU_POWERPC_970_v22, 970, + "PowerPC 970 v2.2") + POWERPC_DEF("970fx_v1.0", CPU_POWERPC_970FX_v10, 970, +@@ -718,6 +721,7 @@ + "PowerPC 970MP v1.1") + POWERPC_DEF("power5+_v2.1", CPU_POWERPC_POWER5P_v21, POWER5P, + "POWER5+ v2.1") ++#endif + POWERPC_DEF("power7_v2.3", CPU_POWERPC_POWER7_v23, POWER7, + "POWER7 v2.3") + POWERPC_DEF("power7+_v2.1", CPU_POWERPC_POWER7P_v21, POWER7, +@@ -897,12 +901,15 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { + { "7447a", "7447a_v1.2" }, + { "7457a", "7457a_v1.2" }, + { "apollo7pm", "7457a_v1.0" }, ++#endif + #if defined(TARGET_PPC64) ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { "970", "970_v2.2" }, + { "970fx", "970fx_v3.1" }, + { "970mp", "970mp_v1.1" }, + { "power5+", "power5+_v2.1" }, + { "power5gs", "power5+_v2.1" }, ++#endif + { "power7", "power7_v2.3" }, + { "power7+", "power7+_v2.1" }, + { "power8e", "power8e_v2.1" }, +@@ -912,6 +919,7 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { + { "power10", "power10_v2.0" }, + #endif + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + /* Generic PowerPCs */ + #if defined(TARGET_PPC64) + { "ppc64", "970fx_v3.1" }, +@@ -919,5 +927,6 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { + { "ppc32", "604" }, + { "ppc", "604" }, + { "default", "604" }, ++#endif + { NULL, NULL } + }; +diff --git a/target/s390x/cpu_models_sysemu.c b/target/s390x/cpu_models_sysemu.c +index 05c3ccaaff..6a04ccab1b 100644 +--- a/target/s390x/cpu_models_sysemu.c ++++ b/target/s390x/cpu_models_sysemu.c +@@ -36,6 +36,9 @@ static void check_unavailable_features(const S390CPUModel *max_model, + (max_model->def->gen == model->def->gen && + max_model->def->ec_ga < model->def->ec_ga)) { + list_add_feat("type", unavailable); ++ } else if (model->def->gen < 11 && kvm_enabled()) { ++ /* Older CPU models are not supported on Red Hat Enterprise Linux */ ++ list_add_feat("type", unavailable); + } + + /* detect missing features if any to properly report them */ +diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c +index 6acf14d5ec..74f089d87f 100644 +--- a/target/s390x/kvm/kvm.c ++++ b/target/s390x/kvm/kvm.c +@@ -2512,6 +2512,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) + error_setg(errp, "KVM doesn't support CPU models"); + return; + } ++ ++ /* Older CPU models are not supported on Red Hat Enterprise Linux */ ++ if (model->def->gen < 11) { ++ error_setg(errp, "KVM: Unsupported CPU type specified: %s", ++ MACHINE(qdev_get_machine())->cpu_type); ++ return; ++ } ++ + prop.cpuid = s390_cpuid_from_cpu_model(model); + prop.ibc = s390_ibc_from_cpu_model(model); + /* configure cpu features indicated via STFL(e) */ +-- +2.31.1 + diff --git a/0005-Initial-redhat-build.patch b/0005-Initial-redhat-build.patch new file mode 100644 index 0000000..ddae98d --- /dev/null +++ b/0005-Initial-redhat-build.patch @@ -0,0 +1,351 @@ +From 19ce5ff93ddd6b8a998348f2a5f59f603c5e11b7 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Fri, 12 Oct 2018 07:31:11 +0200 +Subject: Initial redhat build + +This patch introduces redhat build structure in redhat subdirectory. In addition, +several issues are fixed in QEMU tree: + + - Change of app name for sasl_server_init in VNC code from qemu to qemu-kvm + - As we use qemu-kvm as name in all places, this is updated to be consistent + - Man page renamed from qemu to qemu-kvm + - man page is installed using make install so we have to fix it in qemu tree + +This rebase includes changes up to qemu-kvm-6.1.0-5.el9 + +Rebase notes (3.1.0): +- added new configure options + +Rebase notes (4.0.0): +- Added dependency to perl-Test-Harness (upstream) +- Added dependency to python3-sphinx (upstream) +- Change location of icons (upstream) +- Remove .desktop file (added upstream) +- Added qemu-trace-stap (added upstream) +- Removed elf2dmp (added upstream) +- Remove .buildinfo +- Added pvh.bin rom (added upstream) +- Added interop documentation files +- Use python module instead of qemu.py (upstream) + +Rebase notes (4.1.0): +- Remove edk2 files generated by build +- Switch to rhel-8.1-candidate build target +- Remove specs documentation +- Switched from libssh2 to libssh +- Add rc0 tarball usage hacks +- Added BuildRequires for wget, rpm-build and python3-sphinx +- Removed new unpacked files +- Update configure line to use new options + +Rebase notes (4.2.0): +- Disable iotest run during make check +- README renamed to README.rst (upstream) +- Removed ui-spice-app.so +- Added relevant changes from "505f7f4 redhat: Adding slirp to the exploded tree" +- Removed qemu-ga.8 install from spec file - installed by make +- Removed spapr-rtas.bin (upstream) +- Require newer SLOF (20191022) + +Rebase notes (5.1.0): +- Use python3 for virtio_seg_max_adjust.py test +- Removed qemu-trace-stap shebang from spec file +- Added virtiofsd.1 (upstream) +- Use out-of-tree build +- New documentation structure (upstream) +- Update local build +- Removing installed qemu-storage-daemon (added upstream) +- Removing opensbi-riscv32-sifive_u-fw_jump.bin (added upstream) +- Disable iotests (moved from Enable make check commit) +- Added missing configure options +- Reorder configure options +- qemu-pr-helper moved to /usr/libexec/ (upstream) +- Added submodules for usb-redir, smartcard-reader and qxl display (upstream) +- Added setting rc version in Makefile for build +- removed --disable-vxhs configure option (removed upstream) +- bumped required libusbx-devel version to 1.0.23 +- bumped libfdt version to 1.6.0 + +Rebase notes (5.2.0 rc0): +- Move libfdt dependency to qemu-kvm-core +- Move manpage rename from Makefile to spec file +- rename with-confsuffix configure option to with-suffix (upstream) +- Bump libusbx Requires version to 1.0.234 +- Manual copy of keymaps in spec file (BZ 1875217) +- Removed /usr/share/qemu-kvm/npcm7xx_bootrom.bin, considering it + unpackaged for now. +- Removed /usr/share/qemu-kvm/qboot.rom, considering unpackaged. +- Added build dependency for meson and ninja-build +- hw/s390/s390-pci-vfio.c hack - set NULL for g_autofree variables +- Removed Chanelog (upstream) +- Fix in directory used for docs (upstream add %name so we do not pass it in configure) +- Package various .so as part of qemu-kvm-core package. + +Rebase notes (5.2.0 rc2): +- Added fix for dtrace build on RHEL 8.4.0 + +Rebase notes (5.2.0 rc3): +- Added man page for qemu-pr-helper +- Added new configure options +- Update qemu-kiwi patches to v4 + +Rebase notes (6.0.0): +- update tracetool usage in spec file +- remove qemu-storage-daemon-qmp-ref man page +- remove qemu-storage-daemon man page +- Added devel documentation +- do not package virtfs-proxy-helper files +- Use --with-git-submodules instead of --(enable|disable)-git-update +- Minor build fixes for sending upstream +- g_autofree initialization fixed upstream +- Updated rc information usage +- do not package package hw-s390x-virtio-gpu-ccw.so +- Disable new switch options + +Rebase notes (6.1.0): +- Fix warning issue in block.c +- Download tarball from dist-git cache +- Removed sheepdog driver +- Added new display modules: + - hw-display-virtio-gpu-gl.so + - hw-display-virtio-gpu-pci-gl.so + - hw-display-virtio-vga-gl.so +- sasl fix moved from ui/vnc.c to ui/vnc-auth-sasl.c +- Added accel-qtest-%{kvm_target} and accel-tcg-%{kvm_target} +- Added about docs +- Use -q option for setup +- Added hw-usb-host.so +- Disable new options (bpf, nvmm, slirp-smbd) + +Rebase notes (6.2.0): +- Using internal meson +- removed --disable-jemalloc and --disable-tcmalloc configure options +- added audio-oss.so +- added fdt requirement for x86_64 +- tests/acceptance renamed to tests/avocado +- added multiboot_dma.bin +- Removed conflict relics +- Updated configure options + +Merged patches (3.1.0): +- 01f0c9f RHEL8: Add disable configure options to qemu spec file +- Spec file cleanups + +Merged patches (4.0.0): +- aa4297c Add edk2 Requires to qemu-kvm +- d124ff5779 Fixing brew build target +- eb204b5 Introduce the qemu-kvm-tests rpm +- 223cf0c Load kvm module during boot (partial) + +Merged patches (4.1.0): +- ebb6e97 redhat: Fix LOCALVERSION creation +- b0ab0cc redhat: enable tpmdev passthrough (not disabling tests) +- 7cb3c4a Enable libpmem to support nvdimm +- 8943607 qemu-kvm.spec: bump libseccomp >= 2.4.0 +- 27b7c44 rh: set CONFIG_BOCHS_DISPLAY=y for x86 (partial) +- e1fe9fe x86_64-rh-devices: enable TPM emulation (partial) + +Merged patches (4.2.0): +- 69e1fb2 enable virgla +- d4f6115 enable virgl, for real this time ... + +Merged patches (5.1.0): +- 5edf6bd Add support for rh-brew-module +- f77d52d redhat: ship virtiofsd vhost-user device backend +- 63f12d4 redhat: Always use module build target for rh-brew (modified) +- 9b1e140 redhat: updating the modular target +- 44b8bd0 spec: Fix python shenigans for tests + +Merged patches (5.2.0 rc0): +- 9238ce7 Add support for simpletrace +- 5797cff Remove explicit glusterfs-api dependency +- fd62478 disable virgl +- 0205018 redhat: link /etc/qemu-ga/fsfreeze-hook to /etc/qemu-kvm/ +- 3645097 redhat: Make all generated so files executable (not only block-*) + +Merged patches (5.2.0 rc2): +- pjw 99657 redhat: introduces disable_everything macro into the configure call +- pjw 99659 redhat: scripts/extract_build_cmd.py - Avoid listing empty lines +- pjw 99658 redhat: Fixing rh-local build +- pjw 99660 redhat: Add qemu-kiwi subpackage +- d2e59ce redhat: add (un/pre)install systemd hooks for qemu-ga + +Merged patches (5.2.0 rc3): +- pjw 99887 - redhat: allow Makefile rh-prep builddep to fail +- pjw 99885 - redhat: adding rh-rpm target + +Merged patches (6.0.0): +- 5ab9954a3b spec: find system python via meson +- cd0f7db11f build-system: use b_staticpic=false +- 80d2dec42c udev-kvm-check: remove the "exceeded subscription limit" message +- 38959d51c0 redhat: Allow make to inherit params from parent make for rh-local +- 1e0cfe458f redhat: moving all documentation files to qemu-kvm-docs +- d7a594d02b redhat: makes qemu respect system's crypto profile +- e2bbf1572b spec: Package qemu-storage-daemon +- 92f10993ba spec: ui-spice sub-package +- 8931e46069 spec: ui-opengl sub-package + +Merged patches (6.1.0): +- 7bb57541b3 redhat: Install the s390-netboot.img that we've built +- b4a8531f41 redhat: Fix "unversioned Obsoletes" warning +- 141a1693c7 redhat: Move qemu-kvm-docs dependency to qemu-kvm +- d75f59c6f9 redhat: introducting qemu-kvm-hw-usbredir +- a934d8bf44 redhat: use the standard vhost-user JSON path + +Merged patches (6.2.0): +- 4f3f04bbb6 spec: Remove qemu-kiwi build +--- + .gitignore | 1 + + .gitlab-ci.yml | 24 - + .gitlab/issue_templates/bug.md | 64 - + .gitlab/issue_templates/feature_request.md | 32 - + README.systemtap | 43 + + meson.build | 4 +- + redhat/Makefile | 90 + + redhat/Makefile.common | 48 + + redhat/README.tests | 39 + + redhat/qemu-kvm.spec.template | 3896 ++++++++++++++++++++ + redhat/scripts/extract_build_cmd.py | 5 +- + redhat/scripts/process-patches.sh | 20 +- + redhat/scripts/tarball_checksum.sh | 2 +- + redhat/udev-kvm-check.c | 19 +- + scripts/qemu-guest-agent/fsfreeze-hook | 2 +- + scripts/systemtap/conf.d/qemu_kvm.conf | 4 + + scripts/systemtap/script.d/qemu_kvm.stp | 1 + + tests/check-block.sh | 2 + + ui/vnc-auth-sasl.c | 2 +- + 19 files changed, 4142 insertions(+), 156 deletions(-) + delete mode 100644 .gitlab-ci.yml + delete mode 100644 .gitlab/issue_templates/bug.md + delete mode 100644 .gitlab/issue_templates/feature_request.md + create mode 100644 README.systemtap + create mode 100644 redhat/Makefile + create mode 100644 redhat/Makefile.common + create mode 100644 redhat/README.tests + create mode 100644 redhat/qemu-kvm.spec.template + create mode 100644 scripts/systemtap/conf.d/qemu_kvm.conf + create mode 100644 scripts/systemtap/script.d/qemu_kvm.stp + +diff --git a/README.systemtap b/README.systemtap +new file mode 100644 +index 0000000000..ad913fc990 +--- /dev/null ++++ b/README.systemtap +@@ -0,0 +1,43 @@ ++QEMU tracing using systemtap-initscript ++--------------------------------------- ++ ++You can capture QEMU trace data all the time using systemtap-initscript. This ++uses SystemTap's flight recorder mode to trace all running guests to a ++fixed-size buffer on the host. Old trace entries are overwritten by new ++entries when the buffer size wraps. ++ ++1. Install the systemtap-initscript package: ++ # yum install systemtap-initscript ++ ++2. Install the systemtap scripts and the conf file: ++ # cp /usr/share/qemu-kvm/systemtap/script.d/qemu_kvm.stp /etc/systemtap/script.d/ ++ # cp /usr/share/qemu-kvm/systemtap/conf.d/qemu_kvm.conf /etc/systemtap/conf.d/ ++ ++The set of trace events to enable is given in qemu_kvm.stp. This SystemTap ++script can be customized to add or remove trace events provided in ++/usr/share/systemtap/tapset/qemu-kvm-simpletrace.stp. ++ ++SystemTap customizations can be made to qemu_kvm.conf to control the flight ++recorder buffer size and whether to store traces in memory only or disk too. ++See stap(1) for option documentation. ++ ++3. Start the systemtap service. ++ # service systemtap start qemu_kvm ++ ++4. Make the service start at boot time. ++ # chkconfig systemtap on ++ ++5. Confirm that the service works. ++ # service systemtap status qemu_kvm ++ qemu_kvm is running... ++ ++When you want to inspect the trace buffer, perform the following steps: ++ ++1. Dump the trace buffer. ++ # staprun -A qemu_kvm >/tmp/trace.log ++ ++2. Start the systemtap service because the preceding step stops the service. ++ # service systemtap start qemu_kvm ++ ++3. Translate the trace record to readable format. ++ # /usr/share/qemu-kvm/simpletrace.py --no-header /usr/share/qemu-kvm/trace-events /tmp/trace.log +diff --git a/meson.build b/meson.build +index 96de1a6ef9..5f6ba86dbb 100644 +--- a/meson.build ++++ b/meson.build +@@ -2108,7 +2108,9 @@ if capstone_opt == 'internal' + # Include all configuration defines via a header file, which will wind up + # as a dependency on the object file, and thus changes here will result + # in a rebuild. +- '-include', 'capstone-defs.h' ++ '-include', 'capstone-defs.h', ++ ++ '-Wp,-D_GLIBCXX_ASSERTIONS', + ] + + libcapstone = static_library('capstone', +diff --git a/scripts/qemu-guest-agent/fsfreeze-hook b/scripts/qemu-guest-agent/fsfreeze-hook +index 13aafd4845..e9b84ec028 100755 +--- a/scripts/qemu-guest-agent/fsfreeze-hook ++++ b/scripts/qemu-guest-agent/fsfreeze-hook +@@ -8,7 +8,7 @@ + # request, it is issued with "thaw" argument after filesystem is thawed. + + LOGFILE=/var/log/qga-fsfreeze-hook.log +-FSFREEZE_D=$(dirname -- "$0")/fsfreeze-hook.d ++FSFREEZE_D=$(dirname -- "$(realpath $0)")/fsfreeze-hook.d + + # Check whether file $1 is a backup or rpm-generated file and should be ignored + is_ignored_file() { +diff --git a/scripts/systemtap/conf.d/qemu_kvm.conf b/scripts/systemtap/conf.d/qemu_kvm.conf +new file mode 100644 +index 0000000000..372d8160a4 +--- /dev/null ++++ b/scripts/systemtap/conf.d/qemu_kvm.conf +@@ -0,0 +1,4 @@ ++# Force load uprobes (see BZ#1118352) ++stap -e 'probe process("/usr/libexec/qemu-kvm").function("main") { printf("") }' -c true ++ ++qemu_kvm_OPT="-s4" # per-CPU buffer size, in megabytes +diff --git a/scripts/systemtap/script.d/qemu_kvm.stp b/scripts/systemtap/script.d/qemu_kvm.stp +new file mode 100644 +index 0000000000..c04abf9449 +--- /dev/null ++++ b/scripts/systemtap/script.d/qemu_kvm.stp +@@ -0,0 +1 @@ ++probe qemu.kvm.simpletrace.handle_qmp_command,qemu.kvm.simpletrace.monitor_protocol_*,qemu.kvm.simpletrace.migrate_set_state {} +diff --git a/tests/check-block.sh b/tests/check-block.sh +index f86cb863de..6d38340d49 100755 +--- a/tests/check-block.sh ++++ b/tests/check-block.sh +@@ -69,6 +69,8 @@ else + fi + fi + ++exit 0 ++ + cd tests/qemu-iotests + + # QEMU_CHECK_BLOCK_AUTO is used to disable some unstable sub-tests +diff --git a/ui/vnc-auth-sasl.c b/ui/vnc-auth-sasl.c +index 47fdae5b21..2a950caa2a 100644 +--- a/ui/vnc-auth-sasl.c ++++ b/ui/vnc-auth-sasl.c +@@ -42,7 +42,7 @@ + + bool vnc_sasl_server_init(Error **errp) + { +- int saslErr = sasl_server_init(NULL, "qemu"); ++ int saslErr = sasl_server_init(NULL, "qemu-kvm"); + + if (saslErr != SASL_OK) { + error_setg(errp, "Failed to initialize SASL auth: %s", +-- +2.27.0 + diff --git a/0006-Enable-disable-devices-for-RHEL.patch b/0006-Enable-disable-devices-for-RHEL.patch new file mode 100644 index 0000000..a3fa5d1 --- /dev/null +++ b/0006-Enable-disable-devices-for-RHEL.patch @@ -0,0 +1,795 @@ +From 3d5a82d172345d17e300672909835262ff9dc917 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Wed, 2 Sep 2020 09:11:07 +0200 +Subject: Enable/disable devices for RHEL + +This commit adds all changes related to changes in supported devices. + +Signed-off-by: Miroslav Rezanina + +Rebase notes (qemu 3.1.0) +- spapr_rng disabled in default_config +- new hyperv.mak in default configs +- Move changes from x86_64-softmmu.mak to i386-softmmu.mak +- Added CONFIG_VIRTIO_MMIO to aarch64-softmmu.mak +- Removed config_vga_isa.c changes as no longer needed +- Removed new devices + +Rebase notes (4.0.0): +- Added CONFIG_PCI_EXPRESS_GENERIC_BRIDGE for aarch64-softmmu.mak +- Added CONFIG_ARM_VIRT for aarch64-softmmu.mak +- Switch to KConfig (upstream) + - Using device whitelist + without-defualt-devices option + +Rebase notes (4.1.0): +- Added CONFIG_USB_OHCI_PCI for ppc64 +- Added CONFIG_XIVE_KVM for ppc64 +- Added CONFIG_ACPI_PCI for x86_64 +- Added CONFIG_SEMIHOSTING for aarch64 +- Cleanup aarch64 devices +- Do not build a15mpcore.c +- Removed ide-isa.c stub file +- Use CONFIG_USB_EHCI_PCI on x86_64 (new upstream) + +Rebase notes (4.2.0-rc0): +- Use conditional build for isa-superio.c (upstream change) +- Rename PCI_PIIX to PCI_I440FX (upstream change) + +Rebase notes (4.2.0-rc3): +- Disabled ccid-card-emulated (patch 92566) +- Disabled vfio-pci-igd-lpc-bridge (patch 92565) + +Rebase notes (5.1.0): +- added CONFIG_PCI_EXPRESS on ppc64 (due to upstream dependency) +- Added CONFIG_NVDIMM +- updated cortex-15 disabling to upstream code +- Add CONFIG_ACPI_APEI for aarch64 +- removed obsolete hw/bt/Makefile.objs chunk +- removed unnecessary changes in target/i386/cpu.c + +Rebase notes (5.2.0 rc0): +- Added CONFIG_USB_XHCI_PCI on aarch64 ppc64 and x86_64 +- remove vl.c hack for no hpet +- Enable CONFIG_PTIMER for aarch64 +- Do not package hw-display-virtio-gpu.so on s390x + +Rebase notes (5.2.0 rc1): +- Added CONFIG_ARM_GIC for aarch64 (required for build) + +Rebase notes (weekly-210113): +- Removed XICS_KVM, XICS_SPAPR, XIVE_KVM and XIVE_SPAPR config (removed upstream) + +Rebase notes (weekly-210120): +- Add CONFIG_ARM_COMPATIBLE_SEMIHOSTING option + +Rebase notes (weekly-210203): +- Rename CONFIG_PVPANIC to CONFIG_PVPANIC_ISA + +Rebase notes (weekly-210317): +- Add new USB_STORAGE_CORE and USB_STORAGE_CLASSIC config for ppc64 and x86_64 +- Update disabling TCG cpus for AArch64 + +Rebase notes (weekly-210519): +- Do not use CONFIG_SPICE and CONFIG_OPENGL in default configs + +Rebase notes (weekly-210623): +- Add CONFIG_TPM for archs with used TPM functionality + +Rebase notes (weekly-210714): +- default_configs moved to configs + +Rebase notes (6.1.0 rc2): +- Use --with-device-ARCH configure option to use redhat config files + +Rebase notes (6.2.0 rc3): +- Do not remove -no-hpet documentation +Merged patches (qemu 3.1.0): +- d51e082 Re-enable CONFIG_HYPERV_TESTDEV +- 4b889f3 Declare cirrus-vga as deprecated +- b579d32 Do not build bluetooth support +- 3eef52a Disable CONFIG_IPMI and CONFIG_I2C for ppc64 +- 9caf292 Disable CONFIG_CAN_BUS and CONFIG_CAN_SJA1000 + +Merged patches (4.1.0): +- 20a51f6 fdc: Revert downstream disablement of device "floppy" +- f869cc0 fdc: Restrict floppy controllers to RHEL-7 machine types +- 5909721 aarch64: Compile out IOH3420 +- 27b7c44 rh: set CONFIG_BOCHS_DISPLAY=y for x86 (partial) +- 495a27d x86_64-rh-devices: add missing TPM passthrough +- e1fe9fe x86_64-rh-devices: enable TPM emulation (partial) + +Merged patches (4.2.0): +- f7587dd RHEL: disable hostmem-memfd + +Merged patches (5.1.0): +- 4543a3c i386: Remove cpu64-rhel6 CPU model +- 96533 aarch64: Remove tcg cpu types (pjw commit) +- 559d589 Revert "RHEL: disable hostmem-memfd" +- 441128e enable ramfb + +Merged patches (5.2.0 rc0): +- f70eb50 RHEL-only: Enable vTPM for POWER in downstream configs +- 69d8ae7 redhat: fix 5.0 rebase missing ISA TPM TIS +- 8310f89 RHEL-only: Enable vTPM for ARM in downstream configs +- 4a8ccfd Disable TPM passthrough backend on ARM + +Merged patches (6.0.0): +- ff817df9e3 config: enable VFIO_CCW +- 70d3924521 redhat: Add some devices for exporting upstream machine types + - without machine type chunks +- efac91b2b4 default-configs: Enable vhost-user-blk + +Merged patches (weekly-210630): +- 59a178acff disable CONFIG_USB_STORAGE_BOT + +Merged patches (6.1.0 rc2): +- 86f0025f16 aarch64: Add USB storage devices +--- + .../aarch64-softmmu/aarch64-rh-devices.mak | 31 ++++++ + .../ppc64-softmmu/ppc64-rh-devices.mak | 36 ++++++ + configs/devices/rh-virtio.mak | 10 ++ + .../s390x-softmmu/s390x-rh-devices.mak | 16 +++ + .../x86_64-softmmu/x86_64-rh-devices.mak | 104 ++++++++++++++++++ + .../x86_64-upstream-devices.mak | 4 + + hw/acpi/ich9.c | 4 +- + hw/arm/meson.build | 2 +- + hw/block/fdc.c | 10 ++ + hw/char/parallel.c | 9 ++ + hw/cpu/meson.build | 5 +- + hw/display/cirrus_vga.c | 3 + + hw/ide/piix.c | 5 +- + hw/input/pckbd.c | 2 + + hw/net/e1000.c | 2 + + hw/ppc/spapr_cpu_core.c | 2 + + hw/timer/hpet.c | 8 ++ + hw/usb/meson.build | 2 +- + redhat/qemu-kvm.spec.template | 9 +- + target/arm/cpu_tcg.c | 10 ++ + target/ppc/cpu-models.c | 10 ++ + target/s390x/cpu_models_sysemu.c | 3 + + target/s390x/kvm/kvm.c | 8 ++ + 23 files changed, 286 insertions(+), 9 deletions(-) + create mode 100644 configs/devices/aarch64-softmmu/aarch64-rh-devices.mak + create mode 100644 configs/devices/ppc64-softmmu/ppc64-rh-devices.mak + create mode 100644 configs/devices/rh-virtio.mak + create mode 100644 configs/devices/s390x-softmmu/s390x-rh-devices.mak + create mode 100644 configs/devices/x86_64-softmmu/x86_64-rh-devices.mak + create mode 100644 configs/devices/x86_64-softmmu/x86_64-upstream-devices.mak + +diff --git a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak +new file mode 100644 +index 0000000000..0d4f9e6e4b +--- /dev/null ++++ b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak +@@ -0,0 +1,31 @@ ++include ../rh-virtio.mak ++ ++CONFIG_ARM_GIC_KVM=y ++CONFIG_ARM_GIC=y ++CONFIG_ARM_SMMUV3=y ++CONFIG_ARM_V7M=y ++CONFIG_ARM_VIRT=y ++CONFIG_EDID=y ++CONFIG_PCIE_PORT=y ++CONFIG_PCI_DEVICES=y ++CONFIG_PCI_TESTDEV=y ++CONFIG_PFLASH_CFI01=y ++CONFIG_SCSI=y ++CONFIG_SEMIHOSTING=y ++CONFIG_USB=y ++CONFIG_USB_XHCI=y ++CONFIG_USB_XHCI_PCI=y ++CONFIG_USB_STORAGE_CORE=y ++CONFIG_USB_STORAGE_CLASSIC=y ++CONFIG_VFIO=y ++CONFIG_VFIO_PCI=y ++CONFIG_VIRTIO_MMIO=y ++CONFIG_VIRTIO_PCI=y ++CONFIG_XIO3130=y ++CONFIG_NVDIMM=y ++CONFIG_ACPI_APEI=y ++CONFIG_TPM=y ++CONFIG_TPM_EMULATOR=y ++CONFIG_TPM_TIS_SYSBUS=y ++CONFIG_PTIMER=y ++CONFIG_ARM_COMPATIBLE_SEMIHOSTING=y +diff --git a/configs/devices/ppc64-softmmu/ppc64-rh-devices.mak b/configs/devices/ppc64-softmmu/ppc64-rh-devices.mak +new file mode 100644 +index 0000000000..73e3ee0293 +--- /dev/null ++++ b/configs/devices/ppc64-softmmu/ppc64-rh-devices.mak +@@ -0,0 +1,36 @@ ++include ../rh-virtio.mak ++ ++CONFIG_DIMM=y ++CONFIG_MEM_DEVICE=y ++CONFIG_NVDIMM=y ++CONFIG_PCI=y ++CONFIG_PCI_DEVICES=y ++CONFIG_PCI_TESTDEV=y ++CONFIG_PCI_EXPRESS=y ++CONFIG_PSERIES=y ++CONFIG_SCSI=y ++CONFIG_SPAPR_VSCSI=y ++CONFIG_TEST_DEVICES=y ++CONFIG_USB=y ++CONFIG_USB_OHCI=y ++CONFIG_USB_OHCI_PCI=y ++CONFIG_USB_SMARTCARD=y ++CONFIG_USB_STORAGE_CORE=y ++CONFIG_USB_STORAGE_CLASSIC=y ++CONFIG_USB_XHCI=y ++CONFIG_USB_XHCI_NEC=y ++CONFIG_USB_XHCI_PCI=y ++CONFIG_VFIO=y ++CONFIG_VFIO_PCI=y ++CONFIG_VGA=y ++CONFIG_VGA_PCI=y ++CONFIG_VHOST_USER=y ++CONFIG_VIRTIO_PCI=y ++CONFIG_VIRTIO_VGA=y ++CONFIG_WDT_IB6300ESB=y ++CONFIG_XICS=y ++CONFIG_XIVE=y ++CONFIG_TPM=y ++CONFIG_TPM_SPAPR=y ++CONFIG_TPM_EMULATOR=y ++CONFIG_TPM_PASSTHROUGH=y +diff --git a/configs/devices/rh-virtio.mak b/configs/devices/rh-virtio.mak +new file mode 100644 +index 0000000000..94ede1b5f6 +--- /dev/null ++++ b/configs/devices/rh-virtio.mak +@@ -0,0 +1,10 @@ ++CONFIG_VIRTIO=y ++CONFIG_VIRTIO_BALLOON=y ++CONFIG_VIRTIO_BLK=y ++CONFIG_VIRTIO_GPU=y ++CONFIG_VIRTIO_INPUT=y ++CONFIG_VIRTIO_INPUT_HOST=y ++CONFIG_VIRTIO_NET=y ++CONFIG_VIRTIO_RNG=y ++CONFIG_VIRTIO_SCSI=y ++CONFIG_VIRTIO_SERIAL=y +diff --git a/configs/devices/s390x-softmmu/s390x-rh-devices.mak b/configs/devices/s390x-softmmu/s390x-rh-devices.mak +new file mode 100644 +index 0000000000..165c082e87 +--- /dev/null ++++ b/configs/devices/s390x-softmmu/s390x-rh-devices.mak +@@ -0,0 +1,16 @@ ++include ../rh-virtio.mak ++ ++CONFIG_PCI=y ++CONFIG_S390_CCW_VIRTIO=y ++CONFIG_S390_FLIC=y ++CONFIG_S390_FLIC_KVM=y ++CONFIG_SCLPCONSOLE=y ++CONFIG_SCSI=y ++CONFIG_TERMINAL3270=y ++CONFIG_VFIO=y ++CONFIG_VFIO_AP=y ++CONFIG_VFIO_CCW=y ++CONFIG_VFIO_PCI=y ++CONFIG_VHOST_USER=y ++CONFIG_VIRTIO_CCW=y ++CONFIG_WDT_DIAG288=y +diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak +new file mode 100644 +index 0000000000..ddf036f042 +--- /dev/null ++++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak +@@ -0,0 +1,104 @@ ++include ../rh-virtio.mak ++include x86_64-upstream-devices.mak ++ ++CONFIG_AC97=y ++CONFIG_ACPI=y ++CONFIG_ACPI_PCI=y ++CONFIG_ACPI_CPU_HOTPLUG=y ++CONFIG_ACPI_MEMORY_HOTPLUG=y ++CONFIG_ACPI_NVDIMM=y ++CONFIG_ACPI_SMBUS=y ++CONFIG_ACPI_VMGENID=y ++CONFIG_ACPI_X86=y ++CONFIG_ACPI_X86_ICH=y ++CONFIG_AHCI=y ++CONFIG_APIC=y ++CONFIG_APM=y ++CONFIG_BOCHS_DISPLAY=y ++CONFIG_DIMM=y ++CONFIG_E1000E_PCI_EXPRESS=y ++CONFIG_E1000_PCI=y ++CONFIG_EDU=y ++CONFIG_FDC=y ++CONFIG_FDC_SYSBUS=y ++CONFIG_FW_CFG_DMA=y ++CONFIG_HDA=y ++CONFIG_HYPERV=y ++CONFIG_HYPERV_TESTDEV=y ++CONFIG_I2C=y ++CONFIG_I440FX=y ++CONFIG_I8254=y ++CONFIG_I8257=y ++CONFIG_I8259=y ++CONFIG_I82801B11=y ++CONFIG_IDE_CORE=y ++CONFIG_IDE_PCI=y ++CONFIG_IDE_PIIX=y ++CONFIG_IDE_QDEV=y ++CONFIG_IOAPIC=y ++CONFIG_IOH3420=y ++CONFIG_ISA_BUS=y ++CONFIG_ISA_DEBUG=y ++CONFIG_ISA_TESTDEV=y ++CONFIG_LPC_ICH9=y ++CONFIG_MC146818RTC=y ++CONFIG_MEM_DEVICE=y ++CONFIG_NVDIMM=y ++CONFIG_PAM=y ++CONFIG_PC=y ++CONFIG_PCI=y ++CONFIG_PCIE_PORT=y ++CONFIG_PCI_DEVICES=y ++CONFIG_PCI_EXPRESS=y ++CONFIG_PCI_EXPRESS_Q35=y ++CONFIG_PCI_I440FX=y ++CONFIG_PCI_TESTDEV=y ++CONFIG_PCKBD=y ++CONFIG_PCSPK=y ++CONFIG_PC_ACPI=y ++CONFIG_PC_PCI=y ++CONFIG_PFLASH_CFI01=y ++CONFIG_PVPANIC_ISA=y ++CONFIG_PXB=y ++CONFIG_Q35=y ++CONFIG_QXL=y ++CONFIG_RTL8139_PCI=y ++CONFIG_SCSI=y ++CONFIG_SERIAL=y ++CONFIG_SERIAL_ISA=y ++CONFIG_SERIAL_PCI=y ++CONFIG_SEV=y ++CONFIG_SGA=y ++CONFIG_SMBIOS=y ++CONFIG_SMBUS_EEPROM=y ++CONFIG_TEST_DEVICES=y ++CONFIG_USB=y ++CONFIG_USB_EHCI=y ++CONFIG_USB_EHCI_PCI=y ++CONFIG_USB_SMARTCARD=y ++CONFIG_USB_STORAGE_CORE=y ++CONFIG_USB_STORAGE_CLASSIC=y ++CONFIG_USB_UHCI=y ++CONFIG_USB_XHCI=y ++CONFIG_USB_XHCI_NEC=y ++CONFIG_USB_XHCI_PCI=y ++CONFIG_VFIO=y ++CONFIG_VFIO_PCI=y ++CONFIG_VGA=y ++CONFIG_VGA_CIRRUS=y ++CONFIG_VGA_PCI=y ++CONFIG_VHOST_USER=y ++CONFIG_VHOST_USER_BLK=y ++CONFIG_VIRTIO_PCI=y ++CONFIG_VIRTIO_VGA=y ++CONFIG_VMMOUSE=y ++CONFIG_VMPORT=y ++CONFIG_VTD=y ++CONFIG_WDT_IB6300ESB=y ++CONFIG_WDT_IB700=y ++CONFIG_XIO3130=y ++CONFIG_TPM=y ++CONFIG_TPM_CRB=y ++CONFIG_TPM_TIS_ISA=y ++CONFIG_TPM_EMULATOR=y ++CONFIG_TPM_PASSTHROUGH=y +diff --git a/configs/devices/x86_64-softmmu/x86_64-upstream-devices.mak b/configs/devices/x86_64-softmmu/x86_64-upstream-devices.mak +new file mode 100644 +index 0000000000..2cd20f54d2 +--- /dev/null ++++ b/configs/devices/x86_64-softmmu/x86_64-upstream-devices.mak +@@ -0,0 +1,4 @@ ++# We need "isa-parallel" ++CONFIG_PARALLEL=y ++# We need "hpet" ++CONFIG_HPET=y +diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c +index ebe08ed831..381ef2ddcf 100644 +--- a/hw/acpi/ich9.c ++++ b/hw/acpi/ich9.c +@@ -438,8 +438,8 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) + static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN; + pm->acpi_memory_hotplug.is_enabled = true; + pm->cpu_hotplug_legacy = true; +- pm->disable_s3 = 0; +- pm->disable_s4 = 0; ++ pm->disable_s3 = 1; ++ pm->disable_s4 = 1; + pm->s4_val = 2; + pm->use_acpi_hotplug_bridge = true; + pm->keep_pci_slot_hpc = true; +diff --git a/hw/arm/meson.build b/hw/arm/meson.build +index 721a8eb8be..87ed4dd914 100644 +--- a/hw/arm/meson.build ++++ b/hw/arm/meson.build +@@ -31,7 +31,7 @@ arm_ss.add(when: 'CONFIG_VEXPRESS', if_true: files('vexpress.c')) + arm_ss.add(when: 'CONFIG_ZYNQ', if_true: files('xilinx_zynq.c')) + arm_ss.add(when: 'CONFIG_SABRELITE', if_true: files('sabrelite.c')) + +-arm_ss.add(when: 'CONFIG_ARM_V7M', if_true: files('armv7m.c')) ++#arm_ss.add(when: 'CONFIG_ARM_V7M', if_true: files('armv7m.c')) + arm_ss.add(when: 'CONFIG_EXYNOS4', if_true: files('exynos4210.c')) + arm_ss.add(when: 'CONFIG_PXA2XX', if_true: files('pxa2xx.c', 'pxa2xx_gpio.c', 'pxa2xx_pic.c')) + arm_ss.add(when: 'CONFIG_DIGIC', if_true: files('digic.c')) +diff --git a/hw/block/fdc.c b/hw/block/fdc.c +index 21d18ac2e3..97fa6de423 100644 +--- a/hw/block/fdc.c ++++ b/hw/block/fdc.c +@@ -48,6 +48,8 @@ + #include "qom/object.h" + #include "fdc-internal.h" + ++#include "hw/boards.h" ++ + /********************************************************/ + /* debug Floppy devices */ + +@@ -2337,6 +2339,14 @@ void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl, Error **errp) + FDrive *drive; + static int command_tables_inited = 0; + ++ /* Restricted for Red Hat Enterprise Linux: */ ++ MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); ++ if (!strstr(mc->name, "-rhel7.")) { ++ error_setg(errp, "Device %s is not supported with machine type %s", ++ object_get_typename(OBJECT(dev)), mc->name); ++ return; ++ } ++ + if (fdctrl->fallback == FLOPPY_DRIVE_TYPE_AUTO) { + error_setg(errp, "Cannot choose a fallback FDrive type of 'auto'"); + return; +diff --git a/hw/char/parallel.c b/hw/char/parallel.c +index b45e67bfbb..e5f108211b 100644 +--- a/hw/char/parallel.c ++++ b/hw/char/parallel.c +@@ -29,6 +29,7 @@ + #include "chardev/char-parallel.h" + #include "chardev/char-fe.h" + #include "hw/acpi/aml-build.h" ++#include "hw/boards.h" + #include "hw/irq.h" + #include "hw/isa/isa.h" + #include "hw/qdev-properties.h" +@@ -534,6 +535,14 @@ static void parallel_isa_realizefn(DeviceState *dev, Error **errp) + int base; + uint8_t dummy; + ++ /* Restricted for Red Hat Enterprise Linux */ ++ MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); ++ if (strstr(mc->name, "rhel")) { ++ error_setg(errp, "Device %s is not supported with machine type %s", ++ object_get_typename(OBJECT(dev)), mc->name); ++ return; ++ } ++ + if (!qemu_chr_fe_backend_connected(&s->chr)) { + error_setg(errp, "Can't create parallel device, empty char device"); + return; +diff --git a/hw/cpu/meson.build b/hw/cpu/meson.build +index 9e52fee9e7..bb71c9f3e7 100644 +--- a/hw/cpu/meson.build ++++ b/hw/cpu/meson.build +@@ -1,6 +1,7 @@ +-softmmu_ss.add(files('core.c', 'cluster.c')) ++#softmmu_ss.add(files('core.c', 'cluster.c')) ++softmmu_ss.add(files('core.c')) + + specific_ss.add(when: 'CONFIG_ARM11MPCORE', if_true: files('arm11mpcore.c')) + specific_ss.add(when: 'CONFIG_REALVIEW', if_true: files('realview_mpcore.c')) + specific_ss.add(when: 'CONFIG_A9MPCORE', if_true: files('a9mpcore.c')) +-specific_ss.add(when: 'CONFIG_A15MPCORE', if_true: files('a15mpcore.c')) ++#specific_ss.add(when: 'CONFIG_A15MPCORE', if_true: files('a15mpcore.c')) +diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c +index fdca6ca659..fa1a7eee51 100644 +--- a/hw/display/cirrus_vga.c ++++ b/hw/display/cirrus_vga.c +@@ -2945,6 +2945,9 @@ static void pci_cirrus_vga_realize(PCIDevice *dev, Error **errp) + PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev); + int16_t device_id = pc->device_id; + ++ warn_report("'cirrus-vga' is deprecated, " ++ "please use a different VGA card instead"); ++ + /* follow real hardware, cirrus card emulated has 4 MB video memory. + Also accept 8 MB/16 MB for backward compatibility. */ + if (s->vga.vram_size_mb != 4 && s->vga.vram_size_mb != 8 && +diff --git a/hw/ide/piix.c b/hw/ide/piix.c +index ce89fd0aa3..fbcf802b13 100644 +--- a/hw/ide/piix.c ++++ b/hw/ide/piix.c +@@ -232,7 +232,8 @@ static void piix3_ide_class_init(ObjectClass *klass, void *data) + k->device_id = PCI_DEVICE_ID_INTEL_82371SB_1; + k->class_id = PCI_CLASS_STORAGE_IDE; + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); +- dc->hotpluggable = false; ++ /* Disabled for Red Hat Enterprise Linux: */ ++ dc->user_creatable = false; + } + + static const TypeInfo piix3_ide_info = { +@@ -261,6 +262,8 @@ static void piix4_ide_class_init(ObjectClass *klass, void *data) + k->class_id = PCI_CLASS_STORAGE_IDE; + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); + dc->hotpluggable = false; ++ /* Disabled for Red Hat Enterprise Linux: */ ++ dc->user_creatable = false; + } + + static const TypeInfo piix4_ide_info = { +diff --git a/hw/input/pckbd.c b/hw/input/pckbd.c +index baba62f357..bc360347ea 100644 +--- a/hw/input/pckbd.c ++++ b/hw/input/pckbd.c +@@ -796,6 +796,8 @@ static void i8042_class_initfn(ObjectClass *klass, void *data) + dc->vmsd = &vmstate_kbd_isa; + isa->build_aml = i8042_build_aml; + set_bit(DEVICE_CATEGORY_INPUT, dc->categories); ++ /* Disabled for Red Hat Enterprise Linux: */ ++ dc->user_creatable = false; + } + + static const TypeInfo i8042_info = { +diff --git a/hw/net/e1000.c b/hw/net/e1000.c +index f5bc81296d..282d01e374 100644 +--- a/hw/net/e1000.c ++++ b/hw/net/e1000.c +@@ -1821,6 +1821,7 @@ static const E1000Info e1000_devices[] = { + .revision = 0x03, + .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, + }, ++#if 0 /* Disabled for Red Hat Enterprise Linux 7 */ + { + .name = "e1000-82544gc", + .device_id = E1000_DEV_ID_82544GC_COPPER, +@@ -1833,6 +1834,7 @@ static const E1000Info e1000_devices[] = { + .revision = 0x03, + .phy_id2 = E1000_PHY_ID2_8254xx_DEFAULT, + }, ++#endif + }; + + static void e1000_register_types(void) +diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c +index 58e7341cb7..8ba34f6a1d 100644 +--- a/hw/ppc/spapr_cpu_core.c ++++ b/hw/ppc/spapr_cpu_core.c +@@ -370,10 +370,12 @@ static const TypeInfo spapr_cpu_core_type_infos[] = { + .instance_size = sizeof(SpaprCpuCore), + .class_size = sizeof(SpaprCpuCoreClass), + }, ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_SPAPR_CPU_CORE_TYPE("970_v2.2"), + DEFINE_SPAPR_CPU_CORE_TYPE("970mp_v1.0"), + DEFINE_SPAPR_CPU_CORE_TYPE("970mp_v1.1"), + DEFINE_SPAPR_CPU_CORE_TYPE("power5+_v2.1"), ++#endif + DEFINE_SPAPR_CPU_CORE_TYPE("power7_v2.3"), + DEFINE_SPAPR_CPU_CORE_TYPE("power7+_v2.1"), + DEFINE_SPAPR_CPU_CORE_TYPE("power8_v2.0"), +diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c +index 9520471be2..202e032524 100644 +--- a/hw/timer/hpet.c ++++ b/hw/timer/hpet.c +@@ -733,6 +733,14 @@ static void hpet_realize(DeviceState *dev, Error **errp) + int i; + HPETTimer *timer; + ++ /* Restricted for Red Hat Enterprise Linux */ ++ MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); ++ if (strstr(mc->name, "rhel")) { ++ error_setg(errp, "Device %s is not supported with machine type %s", ++ object_get_typename(OBJECT(dev)), mc->name); ++ return; ++ } ++ + if (!s->intcap) { + warn_report("Hpet's intcap not initialized"); + } +diff --git a/hw/usb/meson.build b/hw/usb/meson.build +index de853d780d..0776ae6a20 100644 +--- a/hw/usb/meson.build ++++ b/hw/usb/meson.build +@@ -52,7 +52,7 @@ softmmu_ss.add(when: 'CONFIG_USB_SMARTCARD', if_true: files('dev-smartcard-reade + if cacard.found() + usbsmartcard_ss = ss.source_set() + usbsmartcard_ss.add(when: 'CONFIG_USB_SMARTCARD', +- if_true: [cacard, files('ccid-card-emulated.c', 'ccid-card-passthru.c')]) ++ if_true: [cacard, files('ccid-card-passthru.c')]) + hw_usb_modules += {'smartcard': usbsmartcard_ss} + endif + +diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c +index 13d0e9b195..3826fa5122 100644 +--- a/target/arm/cpu_tcg.c ++++ b/target/arm/cpu_tcg.c +@@ -22,6 +22,7 @@ + /* CPU models. These are not needed for the AArch64 linux-user build. */ + #if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64) + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + #if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG) + static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request) + { +@@ -375,6 +376,7 @@ static void cortex_a9_initfn(Object *obj) + cpu->ccsidr[1] = 0x200fe019; /* 16k L1 icache. */ + define_arm_cp_regs(cpu, cortexa9_cp_reginfo); + } ++#endif /* disabled for RHEL */ + + #ifndef CONFIG_USER_ONLY + static uint64_t a15_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri) +@@ -400,6 +402,7 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = { + REGINFO_SENTINEL + }; + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void cortex_a7_initfn(Object *obj) + { + ARMCPU *cpu = ARM_CPU(obj); +@@ -445,6 +448,7 @@ static void cortex_a7_initfn(Object *obj) + cpu->ccsidr[2] = 0x711fe07a; /* 4096K L2 unified cache */ + define_arm_cp_regs(cpu, cortexa15_cp_reginfo); /* Same as A15 */ + } ++#endif /* disabled for RHEL */ + + static void cortex_a15_initfn(Object *obj) + { +@@ -488,6 +492,7 @@ static void cortex_a15_initfn(Object *obj) + define_arm_cp_regs(cpu, cortexa15_cp_reginfo); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void cortex_m0_initfn(Object *obj) + { + ARMCPU *cpu = ARM_CPU(obj); +@@ -928,6 +933,7 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data) + + cc->gdb_core_xml_file = "arm-m-profile.xml"; + } ++#endif /* disabled for RHEL */ + + #ifndef TARGET_AARCH64 + /* +@@ -1007,6 +1013,7 @@ static void arm_max_initfn(Object *obj) + #endif /* !TARGET_AARCH64 */ + + static const ARMCPUInfo arm_tcg_cpus[] = { ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { .name = "arm926", .initfn = arm926_initfn }, + { .name = "arm946", .initfn = arm946_initfn }, + { .name = "arm1026", .initfn = arm1026_initfn }, +@@ -1022,7 +1029,9 @@ static const ARMCPUInfo arm_tcg_cpus[] = { + { .name = "cortex-a7", .initfn = cortex_a7_initfn }, + { .name = "cortex-a8", .initfn = cortex_a8_initfn }, + { .name = "cortex-a9", .initfn = cortex_a9_initfn }, ++#endif /* disabled for RHEL */ + { .name = "cortex-a15", .initfn = cortex_a15_initfn }, ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { .name = "cortex-m0", .initfn = cortex_m0_initfn, + .class_init = arm_v7m_class_init }, + { .name = "cortex-m3", .initfn = cortex_m3_initfn, +@@ -1053,6 +1062,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = { + { .name = "pxa270-b1", .initfn = pxa270b1_initfn }, + { .name = "pxa270-c0", .initfn = pxa270c0_initfn }, + { .name = "pxa270-c5", .initfn = pxa270c5_initfn }, ++#endif /* disabled for RHEL */ + #ifndef TARGET_AARCH64 + { .name = "max", .initfn = arm_max_initfn }, + #endif +diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c +index 4baa111713..d779c4d1d5 100644 +--- a/target/ppc/cpu-models.c ++++ b/target/ppc/cpu-models.c +@@ -66,6 +66,7 @@ + #define POWERPC_DEF(_name, _pvr, _type, _desc) \ + POWERPC_DEF_SVR(_name, _desc, _pvr, POWERPC_SVR_NONE, _type) + ++#if 0 /* Embedded and 32-bit CPUs disabled for Red Hat Enterprise Linux */ + /* Embedded PowerPC */ + /* PowerPC 401 family */ + POWERPC_DEF("401", CPU_POWERPC_401, 401, +@@ -740,8 +741,10 @@ + "PowerPC 7447A v1.2 (G4)") + POWERPC_DEF("7457a_v1.2", CPU_POWERPC_74x7A_v12, 7455, + "PowerPC 7457A v1.2 (G4)") ++#endif + /* 64 bits PowerPC */ + #if defined(TARGET_PPC64) ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + POWERPC_DEF("970_v2.2", CPU_POWERPC_970_v22, 970, + "PowerPC 970 v2.2") + POWERPC_DEF("970fx_v1.0", CPU_POWERPC_970FX_v10, 970, +@@ -760,6 +763,7 @@ + "PowerPC 970MP v1.1") + POWERPC_DEF("power5+_v2.1", CPU_POWERPC_POWER5P_v21, POWER5P, + "POWER5+ v2.1") ++#endif + POWERPC_DEF("power7_v2.3", CPU_POWERPC_POWER7_v23, POWER7, + "POWER7 v2.3") + POWERPC_DEF("power7+_v2.1", CPU_POWERPC_POWER7P_v21, POWER7, +@@ -784,6 +788,7 @@ + /* PowerPC CPU aliases */ + + PowerPCCPUAlias ppc_cpu_aliases[] = { ++#if 0 /* Embedded and 32-bit CPUs disabled for Red Hat Enterprise Linux */ + { "403", "403gc" }, + { "405", "405d4" }, + { "405cr", "405crc" }, +@@ -942,12 +947,15 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { + { "7447a", "7447a_v1.2" }, + { "7457a", "7457a_v1.2" }, + { "apollo7pm", "7457a_v1.0" }, ++#endif + #if defined(TARGET_PPC64) ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { "970", "970_v2.2" }, + { "970fx", "970fx_v3.1" }, + { "970mp", "970mp_v1.1" }, + { "power5+", "power5+_v2.1" }, + { "power5gs", "power5+_v2.1" }, ++#endif + { "power7", "power7_v2.3" }, + { "power7+", "power7+_v2.1" }, + { "power8e", "power8e_v2.1" }, +@@ -957,6 +965,7 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { + { "power10", "power10_v2.0" }, + #endif + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + /* Generic PowerPCs */ + #if defined(TARGET_PPC64) + { "ppc64", "970fx_v3.1" }, +@@ -964,5 +973,6 @@ PowerPCCPUAlias ppc_cpu_aliases[] = { + { "ppc32", "604" }, + { "ppc", "604" }, + { "default", "604" }, ++#endif + { NULL, NULL } + }; +diff --git a/target/s390x/cpu_models_sysemu.c b/target/s390x/cpu_models_sysemu.c +index 05c3ccaaff..6a04ccab1b 100644 +--- a/target/s390x/cpu_models_sysemu.c ++++ b/target/s390x/cpu_models_sysemu.c +@@ -36,6 +36,9 @@ static void check_unavailable_features(const S390CPUModel *max_model, + (max_model->def->gen == model->def->gen && + max_model->def->ec_ga < model->def->ec_ga)) { + list_add_feat("type", unavailable); ++ } else if (model->def->gen < 11 && kvm_enabled()) { ++ /* Older CPU models are not supported on Red Hat Enterprise Linux */ ++ list_add_feat("type", unavailable); + } + + /* detect missing features if any to properly report them */ +diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c +index 5b1fdb55c4..c52434985b 100644 +--- a/target/s390x/kvm/kvm.c ++++ b/target/s390x/kvm/kvm.c +@@ -2508,6 +2508,14 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) + error_setg(errp, "KVM doesn't support CPU models"); + return; + } ++ ++ /* Older CPU models are not supported on Red Hat Enterprise Linux */ ++ if (model->def->gen < 11) { ++ error_setg(errp, "KVM: Unsupported CPU type specified: %s", ++ MACHINE(qdev_get_machine())->cpu_type); ++ return; ++ } ++ + prop.cpuid = s390_cpuid_from_cpu_model(model); + prop.ibc = s390_ibc_from_cpu_model(model); + /* configure cpu features indicated via STFL(e) */ +-- +2.27.0 + diff --git a/0006-Machine-type-related-general-changes.patch b/0006-Machine-type-related-general-changes.patch new file mode 100644 index 0000000..c3b08a4 --- /dev/null +++ b/0006-Machine-type-related-general-changes.patch @@ -0,0 +1,619 @@ +From a525db3951dc68c469d1f51bdc69ab6e75e72c37 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Fri, 11 Jan 2019 09:54:45 +0100 +Subject: Machine type related general changes + +This patch is first part of original "Add RHEL machine types" patch we +split to allow easier review. It contains changes not related to any +architecture. + +Signed-off-by: Miroslav Rezanina +-- +Rebase notes (6.2.0): +- Do not duplicate minimal_version_id for piix4_pm +- Remove empty line chunks in serial.c +- Remove migration.h include in serial.c +- Update hw_compat_rhel_8_5 (from MR 66) + +Rebase notes (7.0.0): +- Remove downstream changes leftovers in hw/rtc/mc146818rtc.c +- Remove unnecessary change in hw/usb/hcd-uhci.c + +Merged patches (6.1.0): +- f2fb42a3c6 redhat: add missing entries in hw_compat_rhel_8_4 +- 1949ec258e hw/arm/virt: Disable PL011 clock migration through hw_compat_rhel_8_3 +- a3995e2eff Remove RHEL 7.0.0 machine type (only generic changes) +- ad3190a79b Remove RHEL 7.1.0 machine type (only generic changes) +- 84bbe15d4e Remove RHEL 7.2.0 machine type (only generic changes) +- 0215eb3356 Remove RHEL 7.3.0 machine types (only generic changes) +- af69d1ca6e Remove RHEL 7.4.0 machine types (only generic changes) +- 8f7a74ab78 Remove RHEL 7.5.0 machine types (only generic changes) + +Merged patches (6.2.0): +- d687ac13d2 redhat: Define hw_compat_rhel_8_5 + +Merged patches (7.0.0): +- ef5afcc86d Fix virtio-net-pci* "vectors" compat +- 168f0d56e3 compat: Update hw_compat_rhel_8_5 with 6.2.0 RC2 changes +--- + hw/acpi/piix4.c | 6 +- + hw/arm/virt.c | 2 +- + hw/core/machine.c | 186 +++++++++++++++++++++++++++++++++++ + hw/display/vga-isa.c | 2 +- + hw/i386/pc_piix.c | 2 + + hw/i386/pc_q35.c | 2 + + hw/net/rtl8139.c | 4 +- + hw/smbios/smbios.c | 46 ++++++++- + hw/timer/i8254_common.c | 2 +- + hw/usb/hcd-xhci-pci.c | 59 ++++++++--- + hw/usb/hcd-xhci-pci.h | 1 + + include/hw/boards.h | 21 ++++ + include/hw/firmware/smbios.h | 5 +- + include/hw/i386/pc.h | 3 + + 14 files changed, 316 insertions(+), 25 deletions(-) + +diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c +index fe5625d07a..28544e78c3 100644 +--- a/hw/acpi/piix4.c ++++ b/hw/acpi/piix4.c +@@ -287,7 +287,7 @@ static bool vmstate_test_migrate_acpi_index(void *opaque, int version_id) + static const VMStateDescription vmstate_acpi = { + .name = "piix4_pm", + .version_id = 3, +- .minimum_version_id = 3, ++ .minimum_version_id = 2, + .post_load = vmstate_acpi_post_load, + .fields = (VMStateField[]) { + VMSTATE_PCI_DEVICE(parent_obj, PIIX4PMState), +@@ -653,8 +653,8 @@ static void piix4_send_gpe(AcpiDeviceIf *adev, AcpiEventStatusBits ev) + + static Property piix4_pm_properties[] = { + DEFINE_PROP_UINT32("smb_io_base", PIIX4PMState, smb_io_base, 0), +- DEFINE_PROP_UINT8(ACPI_PM_PROP_S3_DISABLED, PIIX4PMState, disable_s3, 0), +- DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_DISABLED, PIIX4PMState, disable_s4, 0), ++ DEFINE_PROP_UINT8(ACPI_PM_PROP_S3_DISABLED, PIIX4PMState, disable_s3, 1), ++ DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_DISABLED, PIIX4PMState, disable_s4, 1), + DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_VAL, PIIX4PMState, s4_val, 2), + DEFINE_PROP_BOOL(ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, PIIX4PMState, + use_acpi_hotplug_bridge, true), +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index d2e5ecd234..6a84031fd7 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -1596,7 +1596,7 @@ static void virt_build_smbios(VirtMachineState *vms) + + smbios_set_defaults("QEMU", product, + vmc->smbios_old_sys_ver ? "1.0" : mc->name, false, +- true, SMBIOS_ENTRY_POINT_TYPE_64); ++ true, NULL, NULL, SMBIOS_ENTRY_POINT_TYPE_64); + + smbios_get_tables(MACHINE(vms), NULL, 0, + &smbios_tables, &smbios_tables_len, +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 1e23fdc14b..ea430d844e 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -37,6 +37,192 @@ + #include "hw/virtio/virtio.h" + #include "hw/virtio/virtio-pci.h" + ++/* ++ * Mostly the same as hw_compat_6_0 and hw_compat_6_1 ++ */ ++GlobalProperty hw_compat_rhel_8_5[] = { ++ /* hw_compat_rhel_8_5 from hw_compat_6_0 */ ++ { "gpex-pcihost", "allow-unmapped-accesses", "false" }, ++ /* hw_compat_rhel_8_5 from hw_compat_6_0 */ ++ { "i8042", "extended-state", "false"}, ++ /* hw_compat_rhel_8_5 from hw_compat_6_0 */ ++ { "nvme-ns", "eui64-default", "off"}, ++ /* hw_compat_rhel_8_5 from hw_compat_6_0 */ ++ { "e1000", "init-vet", "off" }, ++ /* hw_compat_rhel_8_5 from hw_compat_6_0 */ ++ { "e1000e", "init-vet", "off" }, ++ /* hw_compat_rhel_8_5 from hw_compat_6_0 */ ++ { "vhost-vsock-device", "seqpacket", "off" }, ++ /* hw_compat_rhel_8_5 from hw_compat_6_1 */ ++ { "vhost-user-vsock-device", "seqpacket", "off" }, ++ /* hw_compat_rhel_8_5 from hw_compat_6_1 */ ++ { "nvme-ns", "shared", "off" }, ++}; ++const size_t hw_compat_rhel_8_5_len = G_N_ELEMENTS(hw_compat_rhel_8_5); ++ ++/* ++ * Mostly the same as hw_compat_5_2 ++ */ ++GlobalProperty hw_compat_rhel_8_4[] = { ++ /* hw_compat_rhel_8_4 from hw_compat_5_2 */ ++ { "ICH9-LPC", "smm-compat", "on"}, ++ /* hw_compat_rhel_8_4 from hw_compat_5_2 */ ++ { "PIIX4_PM", "smm-compat", "on"}, ++ /* hw_compat_rhel_8_4 from hw_compat_5_2 */ ++ { "virtio-blk-device", "report-discard-granularity", "off" }, ++ /* hw_compat_rhel_8_4 from hw_compat_5_2 */ ++ /* ++ * Upstream incorrectly had "virtio-net-pci" instead of "virtio-net-pci-base", ++ * (https://bugzilla.redhat.com/show_bug.cgi?id=1999141) ++ */ ++ { "virtio-net-pci-base", "vectors", "3"}, ++}; ++const size_t hw_compat_rhel_8_4_len = G_N_ELEMENTS(hw_compat_rhel_8_4); ++ ++/* ++ * Mostly the same as hw_compat_5_1 ++ */ ++GlobalProperty hw_compat_rhel_8_3[] = { ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "vhost-scsi", "num_queues", "1"}, ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "vhost-user-blk", "num-queues", "1"}, ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "vhost-user-scsi", "num_queues", "1"}, ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "virtio-blk-device", "num-queues", "1"}, ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "virtio-scsi-device", "num_queues", "1"}, ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "nvme", "use-intel-id", "on"}, ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "pvpanic", "events", "1"}, /* PVPANIC_PANICKED */ ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "pl011", "migrate-clk", "off" }, ++ /* hw_compat_rhel_8_3 bz 1912846 */ ++ { "pci-xhci", "x-rh-late-msi-cap", "off" }, ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "virtio-pci", "x-ats-page-aligned", "off"}, ++}; ++const size_t hw_compat_rhel_8_3_len = G_N_ELEMENTS(hw_compat_rhel_8_3); ++ ++/* ++ * The same as hw_compat_4_2 + hw_compat_5_0 ++ */ ++GlobalProperty hw_compat_rhel_8_2[] = { ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "virtio-blk-device", "queue-size", "128"}, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "virtio-scsi-device", "virtqueue_size", "128"}, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "virtio-blk-device", "x-enable-wce-if-config-wce", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "virtio-blk-device", "seg-max-adjust", "off"}, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "virtio-scsi-device", "seg_max_adjust", "off"}, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "vhost-blk-device", "seg_max_adjust", "off"}, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "usb-host", "suppress-remote-wake", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "usb-redir", "suppress-remote-wake", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "qxl", "revision", "4" }, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "qxl-vga", "revision", "4" }, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "fw_cfg", "acpi-mr-restore", "false" }, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "virtio-device", "use-disabled-flag", "false" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "pci-host-bridge", "x-config-reg-migration-enabled", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "virtio-balloon-device", "page-poison", "false" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "vmport", "x-read-set-eax", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "vmport", "x-signal-unsupported-cmd", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "vmport", "x-report-vmx-type", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "vmport", "x-cmds-v2", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "virtio-device", "x-disable-legacy-check", "true" }, ++}; ++const size_t hw_compat_rhel_8_2_len = G_N_ELEMENTS(hw_compat_rhel_8_2); ++ ++/* ++ * The same as hw_compat_4_1 ++ */ ++GlobalProperty hw_compat_rhel_8_1[] = { ++ /* hw_compat_rhel_8_1 from hw_compat_4_1 */ ++ { "virtio-pci", "x-pcie-flr-init", "off" }, ++}; ++const size_t hw_compat_rhel_8_1_len = G_N_ELEMENTS(hw_compat_rhel_8_1); ++ ++/* The same as hw_compat_3_1 ++ * format of array has been changed by: ++ * 6c36bddf5340 ("machine: Use shorter format for GlobalProperty arrays") ++ */ ++GlobalProperty hw_compat_rhel_8_0[] = { ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "pcie-root-port", "x-speed", "2_5" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "pcie-root-port", "x-width", "1" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "memory-backend-file", "x-use-canonical-path-for-ramblock-id", "true" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "memory-backend-memfd", "x-use-canonical-path-for-ramblock-id", "true" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "tpm-crb", "ppi", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "tpm-tis", "ppi", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "usb-kbd", "serial", "42" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "usb-mouse", "serial", "42" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "usb-tablet", "serial", "42" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "virtio-blk-device", "discard", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "virtio-blk-device", "write-zeroes", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ ++ { "VGA", "edid", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ ++ { "secondary-vga", "edid", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ ++ { "bochs-display", "edid", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ ++ { "virtio-vga", "edid", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ ++ { "virtio-gpu-device", "edid", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ ++ { "virtio-device", "use-started", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 - that was added in 4.1 */ ++ { "pcie-root-port-base", "disable-acs", "true" }, ++}; ++const size_t hw_compat_rhel_8_0_len = G_N_ELEMENTS(hw_compat_rhel_8_0); ++ ++/* The same as hw_compat_3_0 + hw_compat_2_12 ++ * except that ++ * there's nothing in 3_0 ++ * migration.decompress-error-check=off was in 7.5 from bz 1584139 ++ */ ++GlobalProperty hw_compat_rhel_7_6[] = { ++ /* hw_compat_rhel_7_6 from hw_compat_2_12 */ ++ { "hda-audio", "use-timer", "false" }, ++ /* hw_compat_rhel_7_6 from hw_compat_2_12 */ ++ { "cirrus-vga", "global-vmstate", "true" }, ++ /* hw_compat_rhel_7_6 from hw_compat_2_12 */ ++ { "VGA", "global-vmstate", "true" }, ++ /* hw_compat_rhel_7_6 from hw_compat_2_12 */ ++ { "vmware-svga", "global-vmstate", "true" }, ++ /* hw_compat_rhel_7_6 from hw_compat_2_12 */ ++ { "qxl-vga", "global-vmstate", "true" }, ++}; ++const size_t hw_compat_rhel_7_6_len = G_N_ELEMENTS(hw_compat_rhel_7_6); ++ + GlobalProperty hw_compat_6_2[] = { + { "PIIX4_PM", "x-not-migrate-acpi-index", "on"}, + }; +diff --git a/hw/display/vga-isa.c b/hw/display/vga-isa.c +index 46abbc5653..505467059b 100644 +--- a/hw/display/vga-isa.c ++++ b/hw/display/vga-isa.c +@@ -88,7 +88,7 @@ static void vga_isa_realizefn(DeviceState *dev, Error **errp) + } + + static Property vga_isa_properties[] = { +- DEFINE_PROP_UINT32("vgamem_mb", ISAVGAState, state.vram_size_mb, 8), ++ DEFINE_PROP_UINT32("vgamem_mb", ISAVGAState, state.vram_size_mb, 16), + DEFINE_PROP_END_OF_LIST(), + }; + +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index b72c03d0a6..c797e98312 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -177,6 +177,8 @@ static void pc_init1(MachineState *machine, + smbios_set_defaults("QEMU", "Standard PC (i440FX + PIIX, 1996)", + mc->name, pcmc->smbios_legacy_mode, + pcmc->smbios_uuid_encoded, ++ pcmc->smbios_stream_product, ++ pcmc->smbios_stream_version, + pcms->smbios_entry_point_type); + } + +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 1780f79bc1..b695f88c45 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -200,6 +200,8 @@ static void pc_q35_init(MachineState *machine) + smbios_set_defaults("QEMU", "Standard PC (Q35 + ICH9, 2009)", + mc->name, pcmc->smbios_legacy_mode, + pcmc->smbios_uuid_encoded, ++ pcmc->smbios_stream_product, ++ pcmc->smbios_stream_version, + pcms->smbios_entry_point_type); + } + +diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c +index 6b65823b4b..75dacabc43 100644 +--- a/hw/net/rtl8139.c ++++ b/hw/net/rtl8139.c +@@ -3179,7 +3179,7 @@ static int rtl8139_pre_save(void *opaque) + + static const VMStateDescription vmstate_rtl8139 = { + .name = "rtl8139", +- .version_id = 5, ++ .version_id = 4, + .minimum_version_id = 3, + .post_load = rtl8139_post_load, + .pre_save = rtl8139_pre_save, +@@ -3260,7 +3260,9 @@ static const VMStateDescription vmstate_rtl8139 = { + VMSTATE_UINT32(tally_counters.TxMCol, RTL8139State), + VMSTATE_UINT64(tally_counters.RxOkPhy, RTL8139State), + VMSTATE_UINT64(tally_counters.RxOkBrd, RTL8139State), ++#if 0 /* Disabled for Red Hat Enterprise Linux bz 1420195 */ + VMSTATE_UINT32_V(tally_counters.RxOkMul, RTL8139State, 5), ++#endif + VMSTATE_UINT16(tally_counters.TxAbt, RTL8139State), + VMSTATE_UINT16(tally_counters.TxUndrn, RTL8139State), + +diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c +index 60349ee402..0edcc98434 100644 +--- a/hw/smbios/smbios.c ++++ b/hw/smbios/smbios.c +@@ -57,6 +57,9 @@ static bool smbios_legacy = true; + static bool smbios_uuid_encoded = true; + /* end: legacy structures & constants for <= 2.0 machines */ + ++/* Set to true for modern Windows 10 HardwareID-6 compat */ ++static bool smbios_type2_required; ++ + + uint8_t *smbios_tables; + size_t smbios_tables_len; +@@ -639,7 +642,7 @@ static void smbios_build_type_1_table(void) + + static void smbios_build_type_2_table(void) + { +- SMBIOS_BUILD_TABLE_PRE(2, T2_BASE, false); /* optional */ ++ SMBIOS_BUILD_TABLE_PRE(2, T2_BASE, smbios_type2_required); + + SMBIOS_TABLE_SET_STR(2, manufacturer_str, type2.manufacturer); + SMBIOS_TABLE_SET_STR(2, product_str, type2.product); +@@ -914,7 +917,10 @@ void smbios_set_cpuid(uint32_t version, uint32_t features) + + void smbios_set_defaults(const char *manufacturer, const char *product, + const char *version, bool legacy_mode, +- bool uuid_encoded, SmbiosEntryPointType ep_type) ++ bool uuid_encoded, ++ const char *stream_product, ++ const char *stream_version, ++ SmbiosEntryPointType ep_type) + { + smbios_have_defaults = true; + smbios_legacy = legacy_mode; +@@ -935,11 +941,45 @@ void smbios_set_defaults(const char *manufacturer, const char *product, + g_free(smbios_entries); + } + ++ /* ++ * If @stream_product & @stream_version are non-NULL, then ++ * we're following rules for new Windows driver support. ++ * The data we have to report is defined in this doc: ++ * ++ * https://docs.microsoft.com/en-us/windows-hardware/drivers/install/specifying-hardware-ids-for-a-computer ++ * ++ * The Windows drivers are written to expect use of the ++ * scheme documented as "HardwareID-6" against Windows 10, ++ * which uses SMBIOS System (Type 1) and Base Board (Type 2) ++ * tables and will match on ++ * ++ * System Manufacturer = Red Hat (@manufacturer) ++ * System SKU Number = 8.2.0 (@stream_version) ++ * Baseboard Manufacturer = Red Hat (@manufacturer) ++ * Baseboard Product = RHEL-AV (@stream_product) ++ * ++ * NB, SKU must be changed with each RHEL-AV release ++ * ++ * Other fields can be freely used by applications using ++ * QEMU. For example apps can use the "System product" ++ * and "System version" to identify themselves. ++ * ++ * We get 'System Manufacturer' and 'Baseboard Manufacturer' ++ */ + SMBIOS_SET_DEFAULT(type1.manufacturer, manufacturer); + SMBIOS_SET_DEFAULT(type1.product, product); + SMBIOS_SET_DEFAULT(type1.version, version); ++ SMBIOS_SET_DEFAULT(type1.family, "Red Hat Enterprise Linux"); ++ if (stream_version != NULL) { ++ SMBIOS_SET_DEFAULT(type1.sku, stream_version); ++ } + SMBIOS_SET_DEFAULT(type2.manufacturer, manufacturer); +- SMBIOS_SET_DEFAULT(type2.product, product); ++ if (stream_product != NULL) { ++ SMBIOS_SET_DEFAULT(type2.product, stream_product); ++ smbios_type2_required = true; ++ } else { ++ SMBIOS_SET_DEFAULT(type2.product, product); ++ } + SMBIOS_SET_DEFAULT(type2.version, version); + SMBIOS_SET_DEFAULT(type3.manufacturer, manufacturer); + SMBIOS_SET_DEFAULT(type3.version, version); +diff --git a/hw/timer/i8254_common.c b/hw/timer/i8254_common.c +index 050875b497..32935da46c 100644 +--- a/hw/timer/i8254_common.c ++++ b/hw/timer/i8254_common.c +@@ -231,7 +231,7 @@ static const VMStateDescription vmstate_pit_common = { + .pre_save = pit_dispatch_pre_save, + .post_load = pit_dispatch_post_load, + .fields = (VMStateField[]) { +- VMSTATE_UINT32_V(channels[0].irq_disabled, PITCommonState, 3), ++ VMSTATE_UINT32(channels[0].irq_disabled, PITCommonState), /* qemu-kvm's v2 had 'flags' here */ + VMSTATE_STRUCT_ARRAY(channels, PITCommonState, 3, 2, + vmstate_pit_channel, PITChannelState), + VMSTATE_INT64(channels[0].next_transition_time, +diff --git a/hw/usb/hcd-xhci-pci.c b/hw/usb/hcd-xhci-pci.c +index e934b1a5b1..e18b05e528 100644 +--- a/hw/usb/hcd-xhci-pci.c ++++ b/hw/usb/hcd-xhci-pci.c +@@ -104,6 +104,33 @@ static int xhci_pci_vmstate_post_load(void *opaque, int version_id) + return 0; + } + ++/* RH bz 1912846 */ ++static bool usb_xhci_pci_add_msi(struct PCIDevice *dev, Error **errp) ++{ ++ int ret; ++ Error *err = NULL; ++ XHCIPciState *s = XHCI_PCI(dev); ++ ++ ret = msi_init(dev, 0x70, s->xhci.numintrs, true, false, &err); ++ /* ++ * Any error other than -ENOTSUP(board's MSI support is broken) ++ * is a programming error ++ */ ++ assert(!ret || ret == -ENOTSUP); ++ if (ret && s->msi == ON_OFF_AUTO_ON) { ++ /* Can't satisfy user's explicit msi=on request, fail */ ++ error_append_hint(&err, "You have to use msi=auto (default) or " ++ "msi=off with this machine type.\n"); ++ error_propagate(errp, err); ++ return true; ++ } ++ assert(!err || s->msi == ON_OFF_AUTO_AUTO); ++ /* With msi=auto, we fall back to MSI off silently */ ++ error_free(err); ++ ++ return false; ++} ++ + static void usb_xhci_pci_realize(struct PCIDevice *dev, Error **errp) + { + int ret; +@@ -125,23 +152,12 @@ static void usb_xhci_pci_realize(struct PCIDevice *dev, Error **errp) + s->xhci.nec_quirks = true; + } + +- if (s->msi != ON_OFF_AUTO_OFF) { +- ret = msi_init(dev, 0x70, s->xhci.numintrs, true, false, &err); +- /* +- * Any error other than -ENOTSUP(board's MSI support is broken) +- * is a programming error +- */ +- assert(!ret || ret == -ENOTSUP); +- if (ret && s->msi == ON_OFF_AUTO_ON) { +- /* Can't satisfy user's explicit msi=on request, fail */ +- error_append_hint(&err, "You have to use msi=auto (default) or " +- "msi=off with this machine type.\n"); ++ if (s->msi != ON_OFF_AUTO_OFF && s->rh_late_msi_cap) { ++ /* This gives the behaviour from 5.2.0 onwards, lspci shows 90,a0,70 */ ++ if (usb_xhci_pci_add_msi(dev, &err)) { + error_propagate(errp, err); + return; + } +- assert(!err || s->msi == ON_OFF_AUTO_AUTO); +- /* With msi=auto, we fall back to MSI off silently */ +- error_free(err); + } + pci_register_bar(dev, 0, + PCI_BASE_ADDRESS_SPACE_MEMORY | +@@ -154,6 +170,14 @@ static void usb_xhci_pci_realize(struct PCIDevice *dev, Error **errp) + assert(ret > 0); + } + ++ /* RH bz 1912846 */ ++ if (s->msi != ON_OFF_AUTO_OFF && !s->rh_late_msi_cap) { ++ /* This gives the older RH machine behaviour, lspci shows 90,70,a0 */ ++ if (usb_xhci_pci_add_msi(dev, &err)) { ++ error_propagate(errp, err); ++ return; ++ } ++ } + if (s->msix != ON_OFF_AUTO_OFF) { + /* TODO check for errors, and should fail when msix=on */ + msix_init(dev, s->xhci.numintrs, +@@ -198,11 +222,18 @@ static void xhci_instance_init(Object *obj) + qdev_alias_all_properties(DEVICE(&s->xhci), obj); + } + ++static Property xhci_pci_properties[] = { ++ /* RH bz 1912846 */ ++ DEFINE_PROP_BOOL("x-rh-late-msi-cap", XHCIPciState, rh_late_msi_cap, true), ++ DEFINE_PROP_END_OF_LIST() ++}; ++ + static void xhci_class_init(ObjectClass *klass, void *data) + { + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + DeviceClass *dc = DEVICE_CLASS(klass); + ++ device_class_set_props(dc, xhci_pci_properties); + dc->reset = xhci_pci_reset; + dc->vmsd = &vmstate_xhci_pci; + set_bit(DEVICE_CATEGORY_USB, dc->categories); +diff --git a/hw/usb/hcd-xhci-pci.h b/hw/usb/hcd-xhci-pci.h +index c193f79443..086a1feb1e 100644 +--- a/hw/usb/hcd-xhci-pci.h ++++ b/hw/usb/hcd-xhci-pci.h +@@ -39,6 +39,7 @@ typedef struct XHCIPciState { + XHCIState xhci; + OnOffAuto msi; + OnOffAuto msix; ++ bool rh_late_msi_cap; /* bz 1912846 */ + } XHCIPciState; + + #endif +diff --git a/include/hw/boards.h b/include/hw/boards.h +index c92ac8815c..c90a19b4d1 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -449,4 +449,25 @@ extern const size_t hw_compat_2_2_len; + extern GlobalProperty hw_compat_2_1[]; + extern const size_t hw_compat_2_1_len; + ++extern GlobalProperty hw_compat_rhel_8_5[]; ++extern const size_t hw_compat_rhel_8_5_len; ++ ++extern GlobalProperty hw_compat_rhel_8_4[]; ++extern const size_t hw_compat_rhel_8_4_len; ++ ++extern GlobalProperty hw_compat_rhel_8_3[]; ++extern const size_t hw_compat_rhel_8_3_len; ++ ++extern GlobalProperty hw_compat_rhel_8_2[]; ++extern const size_t hw_compat_rhel_8_2_len; ++ ++extern GlobalProperty hw_compat_rhel_8_1[]; ++extern const size_t hw_compat_rhel_8_1_len; ++ ++extern GlobalProperty hw_compat_rhel_8_0[]; ++extern const size_t hw_compat_rhel_8_0_len; ++ ++extern GlobalProperty hw_compat_rhel_7_6[]; ++extern const size_t hw_compat_rhel_7_6_len; ++ + #endif +diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h +index 4b7ad77a44..9acff96a86 100644 +--- a/include/hw/firmware/smbios.h ++++ b/include/hw/firmware/smbios.h +@@ -272,7 +272,10 @@ void smbios_entry_add(QemuOpts *opts, Error **errp); + void smbios_set_cpuid(uint32_t version, uint32_t features); + void smbios_set_defaults(const char *manufacturer, const char *product, + const char *version, bool legacy_mode, +- bool uuid_encoded, SmbiosEntryPointType ep_type); ++ bool uuid_encoded, ++ const char *stream_product, ++ const char *stream_version, ++ SmbiosEntryPointType ep_type); + uint8_t *smbios_get_table_legacy(MachineState *ms, size_t *length); + void smbios_get_tables(MachineState *ms, + const struct smbios_phys_mem_area *mem_array, +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 1a27de9c8b..91331059d9 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -113,6 +113,9 @@ struct PCMachineClass { + bool smbios_defaults; + bool smbios_legacy_mode; + bool smbios_uuid_encoded; ++ /* New fields needed for Windows HardwareID-6 matching */ ++ const char *smbios_stream_product; ++ const char *smbios_stream_version; + + /* RAM / address space compat: */ + bool gigabyte_align; +-- +2.31.1 + diff --git a/0007-Add-aarch64-machine-types.patch b/0007-Add-aarch64-machine-types.patch new file mode 100644 index 0000000..3c44b11 --- /dev/null +++ b/0007-Add-aarch64-machine-types.patch @@ -0,0 +1,352 @@ +From 697aaa43e3c0f20fc312f06be6c1093f1ba907e1 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Fri, 19 Oct 2018 12:53:31 +0200 +Subject: Add aarch64 machine types + +Adding changes to add RHEL machine types for aarch64 architecture. + +Signed-off-by: Miroslav Rezanina +--- +Rebase notes (6.1.0): +- Use CONFIG_TPM check when using TPM structures +- Add support for default_bus_bypass_iommu +- ea4c0b32d9 arm/virt: Register highmem and gic-version as class properties +- 895e1fa86a hw/arm/virt: Add 8.5 and 9.0 machine types and remove older ones + +Rebase notes (7.0.0): +- Added dtb-kaslr-seed option +- Set no_tcg_lpa2 to true + +Merged patches (6.2.0): +- 9a3d4fde0e hw/arm/virt: Remove 9.0 machine type +- f7d04d6695 hw: arm: virt: Add hw_compat_rhel_8_5 to 8.5 machine type + +Merged patches (7.0.0): +- 3b82be3dd3 redhat: virt-rhel8.5.0: Update machine type compatibility for QEMU 6.2.0 update +- c354a86c9b hw/arm/virt: Register "iommu" as a class property +- c1a2630dc9 hw/arm/virt: Register "its" as a class property +- 9d8c61dc93 hw/arm/virt: Rename default_bus_bypass_iommu +- a1d1b6eeb6 hw/arm/virt: Expose the 'RAS' option +- 47f8fe1b82 hw/arm/virt: Add 9.0 machine type and remove 8.5 one +- ed2346788f hw/arm/virt: Check no_tcg_its and minor style changes +--- + hw/arm/virt.c | 234 +++++++++++++++++++++++++++++++++++++++++- + include/hw/arm/virt.h | 8 ++ + 2 files changed, 241 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 6a84031fd7..e06862d22a 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -80,6 +80,7 @@ + #include "hw/char/pl011.h" + #include "qemu/guest-random.h" + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \ + static void virt_##major##_##minor##_class_init(ObjectClass *oc, \ + void *data) \ +@@ -106,7 +107,48 @@ + DEFINE_VIRT_MACHINE_LATEST(major, minor, true) + #define DEFINE_VIRT_MACHINE(major, minor) \ + DEFINE_VIRT_MACHINE_LATEST(major, minor, false) +- ++#endif /* disabled for RHEL */ ++ ++#define DEFINE_RHEL_MACHINE_LATEST(m, n, s, latest) \ ++ static void rhel##m##n##s##_virt_class_init(ObjectClass *oc, \ ++ void *data) \ ++ { \ ++ MachineClass *mc = MACHINE_CLASS(oc); \ ++ rhel##m##n##s##_virt_options(mc); \ ++ mc->desc = "RHEL " # m "." # n "." # s " ARM Virtual Machine"; \ ++ if (latest) { \ ++ mc->alias = "virt"; \ ++ mc->is_default = 1; \ ++ } \ ++ } \ ++ static const TypeInfo rhel##m##n##s##_machvirt_info = { \ ++ .name = MACHINE_TYPE_NAME("virt-rhel" # m "." # n "." # s), \ ++ .parent = TYPE_RHEL_MACHINE, \ ++ .class_init = rhel##m##n##s##_virt_class_init, \ ++ }; \ ++ static void rhel##m##n##s##_machvirt_init(void) \ ++ { \ ++ type_register_static(&rhel##m##n##s##_machvirt_info); \ ++ } \ ++ type_init(rhel##m##n##s##_machvirt_init); ++ ++#define DEFINE_RHEL_MACHINE_AS_LATEST(major, minor, subminor) \ ++ DEFINE_RHEL_MACHINE_LATEST(major, minor, subminor, true) ++#define DEFINE_RHEL_MACHINE(major, minor, subminor) \ ++ DEFINE_RHEL_MACHINE_LATEST(major, minor, subminor, false) ++ ++/* This variable is for changes to properties that are RHEL specific, ++ * different to the current upstream and to be applied to the latest ++ * machine type. ++ */ ++GlobalProperty arm_rhel_compat[] = { ++ { ++ .driver = "virtio-net-pci", ++ .property = "romfile", ++ .value = "", ++ }, ++}; ++const size_t arm_rhel_compat_len = G_N_ELEMENTS(arm_rhel_compat); + + /* Number of external interrupt lines to configure the GIC with */ + #define NUM_IRQS 256 +@@ -2250,6 +2292,7 @@ static void machvirt_init(MachineState *machine) + qemu_add_machine_init_done_notifier(&vms->machine_done); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static bool virt_get_secure(Object *obj, Error **errp) + { + VirtMachineState *vms = VIRT_MACHINE(obj); +@@ -2277,6 +2320,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) + + vms->virt = value; + } ++#endif /* disabled for RHEL */ + + static bool virt_get_highmem(Object *obj, Error **errp) + { +@@ -2402,6 +2446,7 @@ static void virt_set_ras(Object *obj, bool value, Error **errp) + vms->ras = value; + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static bool virt_get_mte(Object *obj, Error **errp) + { + VirtMachineState *vms = VIRT_MACHINE(obj); +@@ -2415,6 +2460,7 @@ static void virt_set_mte(Object *obj, bool value, Error **errp) + + vms->mte = value; + } ++#endif /* disabled for RHEL */ + + static char *virt_get_gic_version(Object *obj, Error **errp) + { +@@ -2818,6 +2864,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) + return fixed_ipa ? 0 : requested_pa_size; + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void virt_machine_class_init(ObjectClass *oc, void *data) + { + MachineClass *mc = MACHINE_CLASS(oc); +@@ -3206,3 +3253,188 @@ static void virt_machine_2_6_options(MachineClass *mc) + vmc->no_pmu = true; + } + DEFINE_VIRT_MACHINE(2, 6) ++#endif /* disabled for RHEL */ ++ ++static void rhel_machine_class_init(ObjectClass *oc, void *data) ++{ ++ MachineClass *mc = MACHINE_CLASS(oc); ++ HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); ++ ++ mc->family = "virt-rhel-Z"; ++ mc->init = machvirt_init; ++ /* Maximum supported VCPU count for all virt-rhel* machines */ ++ mc->max_cpus = 384; ++#ifdef CONFIG_TPM ++ machine_class_allow_dynamic_sysbus_dev(mc, TYPE_TPM_TIS_SYSBUS); ++#endif ++ mc->block_default_type = IF_VIRTIO; ++ mc->no_cdrom = 1; ++ mc->pci_allow_0_address = true; ++ /* We know we will never create a pre-ARMv7 CPU which needs 1K pages */ ++ mc->minimum_page_bits = 12; ++ mc->possible_cpu_arch_ids = virt_possible_cpu_arch_ids; ++ mc->cpu_index_to_instance_props = virt_cpu_index_to_props; ++ mc->default_cpu_type = ARM_CPU_TYPE_NAME("cortex-a57"); ++ mc->get_default_cpu_node_id = virt_get_default_cpu_node_id; ++ mc->kvm_type = virt_kvm_type; ++ assert(!mc->get_hotplug_handler); ++ mc->get_hotplug_handler = virt_machine_get_hotplug_handler; ++ hc->pre_plug = virt_machine_device_pre_plug_cb; ++ hc->plug = virt_machine_device_plug_cb; ++ hc->unplug_request = virt_machine_device_unplug_request_cb; ++ hc->unplug = virt_machine_device_unplug_cb; ++ mc->nvdimm_supported = true; ++ mc->auto_enable_numa_with_memhp = true; ++ mc->auto_enable_numa_with_memdev = true; ++ mc->default_ram_id = "mach-virt.ram"; ++ ++ object_class_property_add(oc, "acpi", "OnOffAuto", ++ virt_get_acpi, virt_set_acpi, ++ NULL, NULL); ++ object_class_property_set_description(oc, "acpi", ++ "Enable ACPI"); ++ ++ object_class_property_add_bool(oc, "highmem", virt_get_highmem, ++ virt_set_highmem); ++ object_class_property_set_description(oc, "highmem", ++ "Set on/off to enable/disable using " ++ "physical address space above 32 bits"); ++ ++ object_class_property_add_str(oc, "gic-version", virt_get_gic_version, ++ virt_set_gic_version); ++ object_class_property_set_description(oc, "gic-version", ++ "Set GIC version. " ++ "Valid values are 2, 3, host and max"); ++ ++ object_class_property_add_str(oc, "iommu", virt_get_iommu, virt_set_iommu); ++ object_class_property_set_description(oc, "iommu", ++ "Set the IOMMU type. " ++ "Valid values are none and smmuv3"); ++ ++ object_class_property_add_bool(oc, "default-bus-bypass-iommu", ++ virt_get_default_bus_bypass_iommu, ++ virt_set_default_bus_bypass_iommu); ++ object_class_property_set_description(oc, "default-bus-bypass-iommu", ++ "Set on/off to enable/disable " ++ "bypass_iommu for default root bus"); ++ ++ object_class_property_add_bool(oc, "ras", virt_get_ras, ++ virt_set_ras); ++ object_class_property_set_description(oc, "ras", ++ "Set on/off to enable/disable reporting host memory errors " ++ "to a KVM guest using ACPI and guest external abort exceptions"); ++ ++ object_class_property_add_bool(oc, "its", virt_get_its, ++ virt_set_its); ++ object_class_property_set_description(oc, "its", ++ "Set on/off to enable/disable " ++ "ITS instantiation"); ++ ++ object_class_property_add_str(oc, "x-oem-id", ++ virt_get_oem_id, ++ virt_set_oem_id); ++ object_class_property_set_description(oc, "x-oem-id", ++ "Override the default value of field OEMID " ++ "in ACPI table header." ++ "The string may be up to 6 bytes in size"); ++ ++ ++ object_class_property_add_str(oc, "x-oem-table-id", ++ virt_get_oem_table_id, ++ virt_set_oem_table_id); ++ object_class_property_set_description(oc, "x-oem-table-id", ++ "Override the default value of field OEM Table ID " ++ "in ACPI table header." ++ "The string may be up to 8 bytes in size"); ++ ++ object_class_property_add_bool(oc, "dtb-kaslr-seed", ++ virt_get_dtb_kaslr_seed, ++ virt_set_dtb_kaslr_seed); ++ object_class_property_set_description(oc, "dtb-kaslr-seed", ++ "Set off to disable passing of kaslr-seed " ++ "dtb node to guest"); ++} ++ ++static void rhel_virt_instance_init(Object *obj) ++{ ++ VirtMachineState *vms = VIRT_MACHINE(obj); ++ VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms); ++ ++ /* EL3 is disabled by default and non-configurable for RHEL */ ++ vms->secure = false; ++ ++ /* EL2 is disabled by default and non-configurable for RHEL */ ++ vms->virt = false; ++ ++ /* High memory is enabled by default */ ++ vms->highmem = true; ++ vms->gic_version = VIRT_GIC_VERSION_NOSEL; ++ ++ vms->highmem_ecam = !vmc->no_highmem_ecam; ++ ++ if (vmc->no_its) { ++ vms->its = false; ++ } else { ++ /* Default allows ITS instantiation */ ++ vms->its = true; ++ ++ if (vmc->no_tcg_its) { ++ vms->tcg_its = false; ++ } else { ++ vms->tcg_its = true; ++ } ++ } ++ ++ /* Default disallows iommu instantiation */ ++ vms->iommu = VIRT_IOMMU_NONE; ++ ++ /* The default root bus is attached to iommu by default */ ++ vms->default_bus_bypass_iommu = false; ++ ++ /* Default disallows RAS instantiation and is non-configurable for RHEL */ ++ vms->ras = false; ++ ++ /* MTE is disabled by default and non-configurable for RHEL */ ++ vms->mte = false; ++ ++ /* Supply a kaslr-seed by default */ ++ vms->dtb_kaslr_seed = true; ++ ++ vms->irqmap = a15irqmap; ++ ++ virt_flash_create(vms); ++ ++ vms->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6); ++ vms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8); ++} ++ ++static const TypeInfo rhel_machine_info = { ++ .name = TYPE_RHEL_MACHINE, ++ .parent = TYPE_MACHINE, ++ .abstract = true, ++ .instance_size = sizeof(VirtMachineState), ++ .class_size = sizeof(VirtMachineClass), ++ .class_init = rhel_machine_class_init, ++ .instance_init = rhel_virt_instance_init, ++ .interfaces = (InterfaceInfo[]) { ++ { TYPE_HOTPLUG_HANDLER }, ++ { } ++ }, ++}; ++ ++static void rhel_machine_init(void) ++{ ++ type_register_static(&rhel_machine_info); ++} ++type_init(rhel_machine_init); ++ ++static void rhel900_virt_options(MachineClass *mc) ++{ ++ VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc)); ++ ++ compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); ++ ++ /* Disable FEAT_LPA2 since old kernels (<= v5.12) don't boot with that feature */ ++ vmc->no_tcg_lpa2 = true; ++} ++DEFINE_RHEL_MACHINE_AS_LATEST(9, 0, 0) +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index 7e76ee2619..9b1efe8f0e 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -179,9 +179,17 @@ struct VirtMachineState { + + #define VIRT_ECAM_ID(high) (high ? VIRT_HIGH_PCIE_ECAM : VIRT_PCIE_ECAM) + ++#if 0 /* disabled for Red Hat Enterprise Linux */ + #define TYPE_VIRT_MACHINE MACHINE_TYPE_NAME("virt") + OBJECT_DECLARE_TYPE(VirtMachineState, VirtMachineClass, VIRT_MACHINE) + ++#else ++#define TYPE_RHEL_MACHINE MACHINE_TYPE_NAME("virt-rhel") ++typedef struct VirtMachineClass VirtMachineClass; ++typedef struct VirtMachineState VirtMachineState; ++DECLARE_OBJ_CHECKERS(VirtMachineState, VirtMachineClass, VIRT_MACHINE, TYPE_RHEL_MACHINE) ++#endif ++ + void virt_acpi_setup(VirtMachineState *vms); + bool virt_is_acpi_enabled(VirtMachineState *vms); + +-- +2.31.1 + diff --git a/0007-Machine-type-related-general-changes.patch b/0007-Machine-type-related-general-changes.patch new file mode 100644 index 0000000..f7bd665 --- /dev/null +++ b/0007-Machine-type-related-general-changes.patch @@ -0,0 +1,1071 @@ +From adca046d9db670637b9bf2b24f7a4349a9fe2628 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Fri, 11 Jan 2019 09:54:45 +0100 +Subject: Machine type related general changes + +This patch is first part of original "Add RHEL machine types" patch we +split to allow easier review. It contains changes not related to any +architecture. + +Signed-off-by: Miroslav Rezanina + +Rebase changes (4.0.0): +- Remove e1000 device duplication changes to reflect upstream solution +- Rewrite machine compat properties to upstream solution + +Rebase changes (4.1.0): +- Removed optional flag for machine compat properties (upstream) +- Remove c3e002cb chunk from hw/net/e1000.c +- Reorder compat structures +- Use one format for compat scructures +- Added compat for virtio-balloon-pci.any_layout for rhel71 + +Rebase changes (weekly-210303): +- Added rhel 8.4.0 compat based on 5.2 compat + +Rebase changes (weekly-211103): +- Do not duplicate minimal_version_id for piix4_pm + +Merged patches (4.0.0): +- d4c0957 compat: Generic HW_COMPAT_RHEL7_6 +- cbac773 virtio: Make disable-legacy/disable-modern compat properties optional + +Merged patches (4.1.0): +- 479ad30 redhat: fix cut'n'paste garbage in hw_compat comments +- f19738e compat: Generic hw_compat_rhel_8_0 + +Merged patches (4.2.0): +- 9f2bfaa machine types: Update hw_compat_rhel_8_0 from hw_compat_4_0 +- ca4a5e8 virtio: Make disable-legacy/disable-modern compat properties optional +- compat: Generic hw_compat_rhel_8_1 (patch 93040/92956) + +Merged patches (5.1.0): +- e6c3fbf hw/smbios: set new default SMBIOS fields for Windows driver support (partialy) +- 8f9f4d8 compat: disable 'edid' for virtio-gpu-ccw + +Merged patches (5.2.0 rc0): +- 8348642 redhat: define hw_compat_8_2 +- 45b8402 redhat: define hw_compat_8_2 +- 4effa71 redhat: Update hw_compat_8_2 +- 0e84dff virtio: skip legacy support check on machine types less than 5.1 (partialy) + +Merged patches (6.0.0): +- fa0063ba67 redhat: Define hw_compat_8_3 +- d98e328c8d usb/hcd-xhci-pci: Fixup capabilities ordering (again) +- b8a2578117 virtio: move 'use-disabled-flag' property to hw_compat_4_2 +- f7940b04c8 virtio-pci: compat page aligned ATS + +Merged patches (weekly-210602): +- 26f25108c1 redhat: add missing entries in hw_compat_rhel_8_4 + +Merged patches (weekly-211006): +- 43c4b9bea6 redhat: Define hw_compat_rhel_8_5 +--- + hw/acpi/ich9.c | 15 ++ + hw/acpi/piix4.c | 6 +- + hw/arm/virt.c | 2 +- + hw/char/serial.c | 16 +++ + hw/core/machine.c | 272 +++++++++++++++++++++++++++++++++++ + hw/display/vga-isa.c | 2 +- + hw/i386/pc_piix.c | 2 + + hw/i386/pc_q35.c | 2 + + hw/net/e1000e.c | 22 +++ + hw/net/rtl8139.c | 4 +- + hw/rtc/mc146818rtc.c | 6 + + hw/smbios/smbios.c | 46 +++++- + hw/timer/i8254_common.c | 2 +- + hw/usb/hcd-uhci.c | 4 +- + hw/usb/hcd-xhci-pci.c | 59 ++++++-- + hw/usb/hcd-xhci-pci.h | 1 + + hw/usb/hcd-xhci.c | 20 +++ + hw/usb/hcd-xhci.h | 2 + + include/hw/acpi/ich9.h | 3 + + include/hw/boards.h | 36 +++++ + include/hw/firmware/smbios.h | 5 +- + include/hw/i386/pc.h | 3 + + include/hw/usb.h | 3 + + migration/migration.c | 2 + + migration/migration.h | 5 + + 25 files changed, 514 insertions(+), 26 deletions(-) + +diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c +index 381ef2ddcf..82bd805b55 100644 +--- a/hw/acpi/ich9.c ++++ b/hw/acpi/ich9.c +@@ -433,6 +433,18 @@ static void ich9_pm_set_keep_pci_slot_hpc(Object *obj, bool value, Error **errp) + s->pm.keep_pci_slot_hpc = value; + } + ++static bool ich9_pm_get_force_rev1_fadt(Object *obj, Error **errp) ++{ ++ ICH9LPCState *s = ICH9_LPC_DEVICE(obj); ++ return s->pm.force_rev1_fadt; ++} ++ ++static void ich9_pm_set_force_rev1_fadt(Object *obj, bool value, Error **errp) ++{ ++ ICH9LPCState *s = ICH9_LPC_DEVICE(obj); ++ s->pm.force_rev1_fadt = value; ++} ++ + void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) + { + static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN; +@@ -457,6 +469,9 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) + object_property_add_bool(obj, "cpu-hotplug-legacy", + ich9_pm_get_cpu_hotplug_legacy, + ich9_pm_set_cpu_hotplug_legacy); ++ object_property_add_bool(obj, "__com.redhat_force-rev1-fadt", ++ ich9_pm_get_force_rev1_fadt, ++ ich9_pm_set_force_rev1_fadt); + object_property_add_uint8_ptr(obj, ACPI_PM_PROP_S3_DISABLED, + &pm->disable_s3, OBJ_PROP_FLAG_READWRITE); + object_property_add_uint8_ptr(obj, ACPI_PM_PROP_S4_DISABLED, +diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c +index f0b5fac44a..8d6011c0a3 100644 +--- a/hw/acpi/piix4.c ++++ b/hw/acpi/piix4.c +@@ -278,7 +278,7 @@ static bool piix4_vmstate_need_smbus(void *opaque, int version_id) + static const VMStateDescription vmstate_acpi = { + .name = "piix4_pm", + .version_id = 3, +- .minimum_version_id = 3, ++ .minimum_version_id = 2, + .post_load = vmstate_acpi_post_load, + .fields = (VMStateField[]) { + VMSTATE_PCI_DEVICE(parent_obj, PIIX4PMState), +@@ -644,8 +644,8 @@ static void piix4_send_gpe(AcpiDeviceIf *adev, AcpiEventStatusBits ev) + + static Property piix4_pm_properties[] = { + DEFINE_PROP_UINT32("smb_io_base", PIIX4PMState, smb_io_base, 0), +- DEFINE_PROP_UINT8(ACPI_PM_PROP_S3_DISABLED, PIIX4PMState, disable_s3, 0), +- DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_DISABLED, PIIX4PMState, disable_s4, 0), ++ DEFINE_PROP_UINT8(ACPI_PM_PROP_S3_DISABLED, PIIX4PMState, disable_s3, 1), ++ DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_DISABLED, PIIX4PMState, disable_s4, 1), + DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_VAL, PIIX4PMState, s4_val, 2), + DEFINE_PROP_BOOL(ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, PIIX4PMState, + use_acpi_hotplug_bridge, true), +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 30da05dfe0..5de4d9d73b 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -1590,7 +1590,7 @@ static void virt_build_smbios(VirtMachineState *vms) + + smbios_set_defaults("QEMU", product, + vmc->smbios_old_sys_ver ? "1.0" : mc->name, false, +- true, SMBIOS_ENTRY_POINT_30); ++ true, NULL, NULL, SMBIOS_ENTRY_POINT_30); + + smbios_get_tables(MACHINE(vms), NULL, 0, + &smbios_tables, &smbios_tables_len, +diff --git a/hw/char/serial.c b/hw/char/serial.c +index 7061aacbce..fe8d0afbb0 100644 +--- a/hw/char/serial.c ++++ b/hw/char/serial.c +@@ -37,6 +37,7 @@ + #include "trace.h" + #include "hw/qdev-properties.h" + #include "hw/qdev-properties-system.h" ++#include "migration/migration.h" + + #define UART_LCR_DLAB 0x80 /* Divisor latch access bit */ + +@@ -689,6 +690,9 @@ static int serial_post_load(void *opaque, int version_id) + static bool serial_thr_ipending_needed(void *opaque) + { + SerialState *s = opaque; ++ if (migrate_pre_2_2) { ++ return false; ++ } + + if (s->ier & UART_IER_THRI) { + bool expected_value = ((s->iir & UART_IIR_ID) == UART_IIR_THRI); +@@ -770,6 +774,10 @@ static const VMStateDescription vmstate_serial_xmit_fifo = { + static bool serial_fifo_timeout_timer_needed(void *opaque) + { + SerialState *s = (SerialState *)opaque; ++ if (migrate_pre_2_2) { ++ return false; ++ } ++ + return timer_pending(s->fifo_timeout_timer); + } + +@@ -787,6 +795,10 @@ static const VMStateDescription vmstate_serial_fifo_timeout_timer = { + static bool serial_timeout_ipending_needed(void *opaque) + { + SerialState *s = (SerialState *)opaque; ++ if (migrate_pre_2_2) { ++ return false; ++ } ++ + return s->timeout_ipending != 0; + } + +@@ -804,6 +816,10 @@ static const VMStateDescription vmstate_serial_timeout_ipending = { + static bool serial_poll_needed(void *opaque) + { + SerialState *s = (SerialState *)opaque; ++ if (migrate_pre_2_2) { ++ return false; ++ } ++ + return s->poll_msl >= 0; + } + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 53a99abc56..be4f9864cd 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -37,6 +37,278 @@ + #include "hw/virtio/virtio.h" + #include "hw/virtio/virtio-pci.h" + ++/* ++ * Mostly the same as hw_compat_6_0 ++ */ ++GlobalProperty hw_compat_rhel_8_5[] = { ++ /* hw_compat_rhel_8_5 from hw_compat_6_0 */ ++ { "gpex-pcihost", "allow-unmapped-accesses", "false" }, ++ /* hw_compat_rhel_8_5 from hw_compat_6_0 */ ++ { "i8042", "extended-state", "false"}, ++ /* hw_compat_rhel_8_5 from hw_compat_6_0 */ ++ { "nvme-ns", "eui64-default", "off"}, ++ /* hw_compat_rhel_8_5 from hw_compat_6_0 */ ++ { "e1000", "init-vet", "off" }, ++ /* hw_compat_rhel_8_5 from hw_compat_6_0 */ ++ { "e1000e", "init-vet", "off" }, ++}; ++const size_t hw_compat_rhel_8_5_len = G_N_ELEMENTS(hw_compat_rhel_8_5); ++ ++/* ++ * Mostly the same as hw_compat_5_2 ++ */ ++GlobalProperty hw_compat_rhel_8_4[] = { ++ /* hw_compat_rhel_8_4 from hw_compat_5_2 */ ++ { "ICH9-LPC", "smm-compat", "on"}, ++ /* hw_compat_rhel_8_4 from hw_compat_5_2 */ ++ { "PIIX4_PM", "smm-compat", "on"}, ++ /* hw_compat_rhel_8_4 from hw_compat_5_2 */ ++ { "virtio-blk-device", "report-discard-granularity", "off" }, ++ /* hw_compat_rhel_8_4 from hw_compat_5_2 */ ++ { "virtio-net-pci", "vectors", "3"}, ++}; ++const size_t hw_compat_rhel_8_4_len = G_N_ELEMENTS(hw_compat_rhel_8_4); ++ ++/* ++ * Mostly the same as hw_compat_5_1 ++ */ ++GlobalProperty hw_compat_rhel_8_3[] = { ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "vhost-scsi", "num_queues", "1"}, ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "vhost-user-blk", "num-queues", "1"}, ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "vhost-user-scsi", "num_queues", "1"}, ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "virtio-blk-device", "num-queues", "1"}, ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "virtio-scsi-device", "num_queues", "1"}, ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "nvme", "use-intel-id", "on"}, ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "pvpanic", "events", "1"}, /* PVPANIC_PANICKED */ ++ /* hw_compat_rhel_8_3 bz 1912846 */ ++ { "pci-xhci", "x-rh-late-msi-cap", "off" }, ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "virtio-pci", "x-ats-page-aligned", "off"}, ++}; ++const size_t hw_compat_rhel_8_3_len = G_N_ELEMENTS(hw_compat_rhel_8_3); ++ ++/* ++ * The same as hw_compat_4_2 + hw_compat_5_0 ++ */ ++GlobalProperty hw_compat_rhel_8_2[] = { ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "virtio-blk-device", "queue-size", "128"}, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "virtio-scsi-device", "virtqueue_size", "128"}, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "virtio-blk-device", "x-enable-wce-if-config-wce", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "virtio-blk-device", "seg-max-adjust", "off"}, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "virtio-scsi-device", "seg_max_adjust", "off"}, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "vhost-blk-device", "seg_max_adjust", "off"}, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "usb-host", "suppress-remote-wake", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "usb-redir", "suppress-remote-wake", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "qxl", "revision", "4" }, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "qxl-vga", "revision", "4" }, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "fw_cfg", "acpi-mr-restore", "false" }, ++ /* hw_compat_rhel_8_2 from hw_compat_4_2 */ ++ { "virtio-device", "use-disabled-flag", "false" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "pci-host-bridge", "x-config-reg-migration-enabled", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "virtio-balloon-device", "page-poison", "false" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "vmport", "x-read-set-eax", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "vmport", "x-signal-unsupported-cmd", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "vmport", "x-report-vmx-type", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "vmport", "x-cmds-v2", "off" }, ++ /* hw_compat_rhel_8_2 from hw_compat_5_0 */ ++ { "virtio-device", "x-disable-legacy-check", "true" }, ++}; ++const size_t hw_compat_rhel_8_2_len = G_N_ELEMENTS(hw_compat_rhel_8_2); ++ ++/* ++ * The same as hw_compat_4_1 ++ */ ++GlobalProperty hw_compat_rhel_8_1[] = { ++ /* hw_compat_rhel_8_1 from hw_compat_4_1 */ ++ { "virtio-pci", "x-pcie-flr-init", "off" }, ++}; ++const size_t hw_compat_rhel_8_1_len = G_N_ELEMENTS(hw_compat_rhel_8_1); ++ ++/* The same as hw_compat_3_1 ++ * format of array has been changed by: ++ * 6c36bddf5340 ("machine: Use shorter format for GlobalProperty arrays") ++ */ ++GlobalProperty hw_compat_rhel_8_0[] = { ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "pcie-root-port", "x-speed", "2_5" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "pcie-root-port", "x-width", "1" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "memory-backend-file", "x-use-canonical-path-for-ramblock-id", "true" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "memory-backend-memfd", "x-use-canonical-path-for-ramblock-id", "true" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "tpm-crb", "ppi", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "tpm-tis", "ppi", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "usb-kbd", "serial", "42" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "usb-mouse", "serial", "42" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "usb-tablet", "serial", "42" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "virtio-blk-device", "discard", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 */ ++ { "virtio-blk-device", "write-zeroes", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ ++ { "VGA", "edid", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ ++ { "secondary-vga", "edid", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ ++ { "bochs-display", "edid", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ ++ { "virtio-vga", "edid", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ ++ { "virtio-gpu-device", "edid", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_4_0 */ ++ { "virtio-device", "use-started", "false" }, ++ /* hw_compat_rhel_8_0 from hw_compat_3_1 - that was added in 4.1 */ ++ { "pcie-root-port-base", "disable-acs", "true" }, ++}; ++const size_t hw_compat_rhel_8_0_len = G_N_ELEMENTS(hw_compat_rhel_8_0); ++ ++/* The same as hw_compat_3_0 + hw_compat_2_12 ++ * except that ++ * there's nothing in 3_0 ++ * migration.decompress-error-check=off was in 7.5 from bz 1584139 ++ */ ++GlobalProperty hw_compat_rhel_7_6[] = { ++ /* hw_compat_rhel_7_6 from hw_compat_2_12 */ ++ { "hda-audio", "use-timer", "false" }, ++ /* hw_compat_rhel_7_6 from hw_compat_2_12 */ ++ { "cirrus-vga", "global-vmstate", "true" }, ++ /* hw_compat_rhel_7_6 from hw_compat_2_12 */ ++ { "VGA", "global-vmstate", "true" }, ++ /* hw_compat_rhel_7_6 from hw_compat_2_12 */ ++ { "vmware-svga", "global-vmstate", "true" }, ++ /* hw_compat_rhel_7_6 from hw_compat_2_12 */ ++ { "qxl-vga", "global-vmstate", "true" }, ++}; ++const size_t hw_compat_rhel_7_6_len = G_N_ELEMENTS(hw_compat_rhel_7_6); ++ ++/* The same as hw_compat_2_11 + hw_compat_2_10 */ ++GlobalProperty hw_compat_rhel_7_5[] = { ++ /* hw_compat_rhel_7_5 from hw_compat_2_11 */ ++ { "hpet", "hpet-offset-saved", "false" }, ++ /* hw_compat_rhel_7_5 from hw_compat_2_11 */ ++ { "virtio-blk-pci", "vectors", "2" }, ++ /* hw_compat_rhel_7_5 from hw_compat_2_11 */ ++ { "vhost-user-blk-pci", "vectors", "2" }, ++ /* hw_compat_rhel_7_5 from hw_compat_2_11 ++ bz 1608778 modified for our naming */ ++ { "e1000-82540em", "migrate_tso_props", "off" }, ++ /* hw_compat_rhel_7_5 from hw_compat_2_10 */ ++ { "virtio-mouse-device", "wheel-axis", "false" }, ++ /* hw_compat_rhel_7_5 from hw_compat_2_10 */ ++ { "virtio-tablet-device", "wheel-axis", "false" }, ++ { "cirrus-vga", "vgamem_mb", "16" }, ++ { "migration", "decompress-error-check", "off" }, ++}; ++const size_t hw_compat_rhel_7_5_len = G_N_ELEMENTS(hw_compat_rhel_7_5); ++ ++/* Mostly like hw_compat_2_9 except ++ * x-mtu-bypass-backend, x-migrate-msix has already been ++ * backported to RHEL7.4. shpc was already on in 7.4. ++ */ ++GlobalProperty hw_compat_rhel_7_4[] = { ++ { "intel-iommu", "pt", "off" }, ++}; ++ ++const size_t hw_compat_rhel_7_4_len = G_N_ELEMENTS(hw_compat_rhel_7_4); ++/* Mostly like HW_COMPAT_2_6 + HW_COMPAT_2_7 + HW_COMPAT_2_8 except ++ * disable-modern, disable-legacy, page-per-vq have already been ++ * backported to RHEL7.3 ++ */ ++GlobalProperty hw_compat_rhel_7_3[] = { ++ { "virtio-mmio", "format_transport_address", "off" }, ++ { "virtio-serial-device", "emergency-write", "off" }, ++ { "ioapic", "version", "0x11" }, ++ { "intel-iommu", "x-buggy-eim", "true" }, ++ { "virtio-pci", "x-ignore-backend-features", "on" }, ++ { "fw_cfg_mem", "x-file-slots", stringify(0x10) }, ++ { "fw_cfg_io", "x-file-slots", stringify(0x10) }, ++ { "pflash_cfi01", "old-multiple-chip-handling", "on" }, ++ { TYPE_PCI_DEVICE, "x-pcie-extcap-init", "off" }, ++ { "virtio-pci", "x-pcie-deverr-init", "off" }, ++ { "virtio-pci", "x-pcie-lnkctl-init", "off" }, ++ { "virtio-pci", "x-pcie-pm-init", "off" }, ++ { "virtio-net-device", "x-mtu-bypass-backend", "off" }, ++ { "e1000e", "__redhat_e1000e_7_3_intr_state", "on" }, ++}; ++const size_t hw_compat_rhel_7_3_len = G_N_ELEMENTS(hw_compat_rhel_7_3); ++ ++/* Mostly like hw_compat_2_4 + 2_3 but: ++ * we don't need "any_layout" as it has been backported to 7.2 ++ */ ++GlobalProperty hw_compat_rhel_7_2[] = { ++ { "virtio-blk-device", "scsi", "true" }, ++ { "e1000-82540em", "extra_mac_registers", "off" }, ++ { "virtio-pci", "x-disable-pcie", "on" }, ++ { "virtio-pci", "migrate-extra", "off" }, ++ { "fw_cfg_mem", "dma_enabled", "off" }, ++ { "fw_cfg_io", "dma_enabled", "off" }, ++ { "isa-fdc", "fallback", "144" }, ++ /* Optional because not all virtio-pci devices support legacy mode */ ++ { "virtio-pci", "disable-modern", "on", .optional = true }, ++ { "virtio-pci", "disable-legacy", "off", .optional = true }, ++ { TYPE_PCI_DEVICE, "x-pcie-lnksta-dllla", "off" }, ++ { "virtio-pci", "page-per-vq", "on" }, ++ /* hw_compat_rhel_7_2 - introduced with 2.10.0 */ ++ { "migration", "send-section-footer", "off" }, ++ /* hw_compat_rhel_7_2 - introduced with 2.10.0 */ ++ { "migration", "store-global-state", "off", ++ }, ++}; ++const size_t hw_compat_rhel_7_2_len = G_N_ELEMENTS(hw_compat_rhel_7_2); ++ ++/* Mostly like hw_compat_2_1 but: ++ * we don't need virtio-scsi-pci since 7.0 already had that on ++ * ++ * RH: Note, qemu-extended-regs should have been enabled in the 7.1 ++ * machine type, but was accidentally turned off in 7.2 onwards. ++ */ ++GlobalProperty hw_compat_rhel_7_1[] = { ++ { "intel-hda-generic", "old_msi_addr", "on" }, ++ { "VGA", "qemu-extended-regs", "off" }, ++ { "secondary-vga", "qemu-extended-regs", "off" }, ++ { "usb-mouse", "usb_version", stringify(1) }, ++ { "usb-kbd", "usb_version", stringify(1) }, ++ { "virtio-pci", "virtio-pci-bus-master-bug-migration", "on" }, ++ { "virtio-blk-pci", "any_layout", "off" }, ++ { "virtio-balloon-pci", "any_layout", "off" }, ++ { "virtio-serial-pci", "any_layout", "off" }, ++ { "virtio-9p-pci", "any_layout", "off" }, ++ { "virtio-rng-pci", "any_layout", "off" }, ++ /* HW_COMPAT_RHEL7_1 - introduced with 2.10.0 */ ++ { "migration", "send-configuration", "off" }, ++}; ++const size_t hw_compat_rhel_7_1_len = G_N_ELEMENTS(hw_compat_rhel_7_1); ++ + GlobalProperty hw_compat_6_1[] = { + { "vhost-user-vsock-device", "seqpacket", "off" }, + { "nvme-ns", "shared", "off" }, +diff --git a/hw/display/vga-isa.c b/hw/display/vga-isa.c +index 90851e730b..a91c5d7467 100644 +--- a/hw/display/vga-isa.c ++++ b/hw/display/vga-isa.c +@@ -85,7 +85,7 @@ static void vga_isa_realizefn(DeviceState *dev, Error **errp) + } + + static Property vga_isa_properties[] = { +- DEFINE_PROP_UINT32("vgamem_mb", ISAVGAState, state.vram_size_mb, 8), ++ DEFINE_PROP_UINT32("vgamem_mb", ISAVGAState, state.vram_size_mb, 16), + DEFINE_PROP_END_OF_LIST(), + }; + +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index 223dd3e05d..dda3f64f19 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -177,6 +177,8 @@ static void pc_init1(MachineState *machine, + smbios_set_defaults("QEMU", "Standard PC (i440FX + PIIX, 1996)", + mc->name, pcmc->smbios_legacy_mode, + pcmc->smbios_uuid_encoded, ++ pcmc->smbios_stream_product, ++ pcmc->smbios_stream_version, + SMBIOS_ENTRY_POINT_21); + } + +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index e1e100316d..235054a643 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -200,6 +200,8 @@ static void pc_q35_init(MachineState *machine) + smbios_set_defaults("QEMU", "Standard PC (Q35 + ICH9, 2009)", + mc->name, pcmc->smbios_legacy_mode, + pcmc->smbios_uuid_encoded, ++ pcmc->smbios_stream_product, ++ pcmc->smbios_stream_version, + SMBIOS_ENTRY_POINT_21); + } + +diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c +index ac96f7665a..d35bc1f0b0 100644 +--- a/hw/net/e1000e.c ++++ b/hw/net/e1000e.c +@@ -81,6 +81,12 @@ struct E1000EState { + + E1000ECore core; + bool init_vet; ++ ++ /* 7.3 had the intr_state field that was in the original e1000e code ++ * but that was removed prior to 2.7's release ++ */ ++ bool redhat_7_3_intr_state_enable; ++ uint32_t redhat_7_3_intr_state; + }; + + #define E1000E_MMIO_IDX 0 +@@ -96,6 +102,10 @@ struct E1000EState { + #define E1000E_MSIX_TABLE (0x0000) + #define E1000E_MSIX_PBA (0x2000) + ++/* Values as in RHEL 7.3 build and original upstream */ ++#define RH_E1000E_USE_MSI BIT(0) ++#define RH_E1000E_USE_MSIX BIT(1) ++ + static uint64_t + e1000e_mmio_read(void *opaque, hwaddr addr, unsigned size) + { +@@ -307,6 +317,8 @@ e1000e_init_msix(E1000EState *s) + } else { + if (!e1000e_use_msix_vectors(s, E1000E_MSIX_VEC_NUM)) { + msix_uninit(d, &s->msix, &s->msix); ++ } else { ++ s->redhat_7_3_intr_state |= RH_E1000E_USE_MSIX; + } + } + } +@@ -478,6 +490,8 @@ static void e1000e_pci_realize(PCIDevice *pci_dev, Error **errp) + ret = msi_init(PCI_DEVICE(s), 0xD0, 1, true, false, NULL); + if (ret) { + trace_e1000e_msi_init_fail(ret); ++ } else { ++ s->redhat_7_3_intr_state |= RH_E1000E_USE_MSI; + } + + if (e1000e_add_pm_capability(pci_dev, e1000e_pmrb_offset, +@@ -605,6 +619,11 @@ static const VMStateDescription e1000e_vmstate_intr_timer = { + VMSTATE_STRUCT_ARRAY(_f, _s, _num, 0, \ + e1000e_vmstate_intr_timer, E1000IntrDelayTimer) + ++static bool rhel_7_3_check(void *opaque, int version_id) ++{ ++ return ((E1000EState *)opaque)->redhat_7_3_intr_state_enable; ++} ++ + static const VMStateDescription e1000e_vmstate = { + .name = "e1000e", + .version_id = 1, +@@ -616,6 +635,7 @@ static const VMStateDescription e1000e_vmstate = { + VMSTATE_MSIX(parent_obj, E1000EState), + + VMSTATE_UINT32(ioaddr, E1000EState), ++ VMSTATE_UINT32_TEST(redhat_7_3_intr_state, E1000EState, rhel_7_3_check), + VMSTATE_UINT32(core.rxbuf_min_shift, E1000EState), + VMSTATE_UINT8(core.rx_desc_len, E1000EState), + VMSTATE_UINT32_ARRAY(core.rxbuf_sizes, E1000EState, +@@ -664,6 +684,8 @@ static PropertyInfo e1000e_prop_disable_vnet, + + static Property e1000e_properties[] = { + DEFINE_NIC_PROPERTIES(E1000EState, conf), ++ DEFINE_PROP_BOOL("__redhat_e1000e_7_3_intr_state", E1000EState, ++ redhat_7_3_intr_state_enable, false), + DEFINE_PROP_SIGNED("disable_vnet_hdr", E1000EState, disable_vnet, false, + e1000e_prop_disable_vnet, bool), + DEFINE_PROP_SIGNED("subsys_ven", E1000EState, subsys_ven, +diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c +index 90b4fc63ce..3ffb9dd22c 100644 +--- a/hw/net/rtl8139.c ++++ b/hw/net/rtl8139.c +@@ -3179,7 +3179,7 @@ static int rtl8139_pre_save(void *opaque) + + static const VMStateDescription vmstate_rtl8139 = { + .name = "rtl8139", +- .version_id = 5, ++ .version_id = 4, + .minimum_version_id = 3, + .post_load = rtl8139_post_load, + .pre_save = rtl8139_pre_save, +@@ -3260,7 +3260,9 @@ static const VMStateDescription vmstate_rtl8139 = { + VMSTATE_UINT32(tally_counters.TxMCol, RTL8139State), + VMSTATE_UINT64(tally_counters.RxOkPhy, RTL8139State), + VMSTATE_UINT64(tally_counters.RxOkBrd, RTL8139State), ++#if 0 /* Disabled for Red Hat Enterprise Linux bz 1420195 */ + VMSTATE_UINT32_V(tally_counters.RxOkMul, RTL8139State, 5), ++#endif + VMSTATE_UINT16(tally_counters.TxAbt, RTL8139State), + VMSTATE_UINT16(tally_counters.TxUndrn, RTL8139State), + +diff --git a/hw/rtc/mc146818rtc.c b/hw/rtc/mc146818rtc.c +index 4fbafddb22..2f120c6e70 100644 +--- a/hw/rtc/mc146818rtc.c ++++ b/hw/rtc/mc146818rtc.c +@@ -43,6 +43,7 @@ + #include "qapi/qapi-events-misc-target.h" + #include "qapi/visitor.h" + #include "hw/rtc/mc146818rtc_regs.h" ++#include "migration/migration.h" + + #ifdef TARGET_I386 + #include "qapi/qapi-commands-misc-target.h" +@@ -821,6 +822,11 @@ static int rtc_post_load(void *opaque, int version_id) + static bool rtc_irq_reinject_on_ack_count_needed(void *opaque) + { + RTCState *s = (RTCState *)opaque; ++ ++ if (migrate_pre_2_2) { ++ return false; ++ } ++ + return s->irq_reinject_on_ack_count != 0; + } + +diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c +index 7397e56737..3a4bb894ba 100644 +--- a/hw/smbios/smbios.c ++++ b/hw/smbios/smbios.c +@@ -57,6 +57,9 @@ static bool smbios_legacy = true; + static bool smbios_uuid_encoded = true; + /* end: legacy structures & constants for <= 2.0 machines */ + ++/* Set to true for modern Windows 10 HardwareID-6 compat */ ++static bool smbios_type2_required; ++ + + uint8_t *smbios_tables; + size_t smbios_tables_len; +@@ -619,7 +622,7 @@ static void smbios_build_type_1_table(void) + + static void smbios_build_type_2_table(void) + { +- SMBIOS_BUILD_TABLE_PRE(2, 0x200, false); /* optional */ ++ SMBIOS_BUILD_TABLE_PRE(2, 0x200, smbios_type2_required); + + SMBIOS_TABLE_SET_STR(2, manufacturer_str, type2.manufacturer); + SMBIOS_TABLE_SET_STR(2, product_str, type2.product); +@@ -888,7 +891,10 @@ void smbios_set_cpuid(uint32_t version, uint32_t features) + + void smbios_set_defaults(const char *manufacturer, const char *product, + const char *version, bool legacy_mode, +- bool uuid_encoded, SmbiosEntryPointType ep_type) ++ bool uuid_encoded, ++ const char *stream_product, ++ const char *stream_version, ++ SmbiosEntryPointType ep_type) + { + smbios_have_defaults = true; + smbios_legacy = legacy_mode; +@@ -909,11 +915,45 @@ void smbios_set_defaults(const char *manufacturer, const char *product, + g_free(smbios_entries); + } + ++ /* ++ * If @stream_product & @stream_version are non-NULL, then ++ * we're following rules for new Windows driver support. ++ * The data we have to report is defined in this doc: ++ * ++ * https://docs.microsoft.com/en-us/windows-hardware/drivers/install/specifying-hardware-ids-for-a-computer ++ * ++ * The Windows drivers are written to expect use of the ++ * scheme documented as "HardwareID-6" against Windows 10, ++ * which uses SMBIOS System (Type 1) and Base Board (Type 2) ++ * tables and will match on ++ * ++ * System Manufacturer = Red Hat (@manufacturer) ++ * System SKU Number = 8.2.0 (@stream_version) ++ * Baseboard Manufacturer = Red Hat (@manufacturer) ++ * Baseboard Product = RHEL-AV (@stream_product) ++ * ++ * NB, SKU must be changed with each RHEL-AV release ++ * ++ * Other fields can be freely used by applications using ++ * QEMU. For example apps can use the "System product" ++ * and "System version" to identify themselves. ++ * ++ * We get 'System Manufacturer' and 'Baseboard Manufacturer' ++ */ + SMBIOS_SET_DEFAULT(type1.manufacturer, manufacturer); + SMBIOS_SET_DEFAULT(type1.product, product); + SMBIOS_SET_DEFAULT(type1.version, version); ++ SMBIOS_SET_DEFAULT(type1.family, "Red Hat Enterprise Linux"); ++ if (stream_version != NULL) { ++ SMBIOS_SET_DEFAULT(type1.sku, stream_version); ++ } + SMBIOS_SET_DEFAULT(type2.manufacturer, manufacturer); +- SMBIOS_SET_DEFAULT(type2.product, product); ++ if (stream_product != NULL) { ++ SMBIOS_SET_DEFAULT(type2.product, stream_product); ++ smbios_type2_required = true; ++ } else { ++ SMBIOS_SET_DEFAULT(type2.product, product); ++ } + SMBIOS_SET_DEFAULT(type2.version, version); + SMBIOS_SET_DEFAULT(type3.manufacturer, manufacturer); + SMBIOS_SET_DEFAULT(type3.version, version); +diff --git a/hw/timer/i8254_common.c b/hw/timer/i8254_common.c +index 050875b497..32935da46c 100644 +--- a/hw/timer/i8254_common.c ++++ b/hw/timer/i8254_common.c +@@ -231,7 +231,7 @@ static const VMStateDescription vmstate_pit_common = { + .pre_save = pit_dispatch_pre_save, + .post_load = pit_dispatch_post_load, + .fields = (VMStateField[]) { +- VMSTATE_UINT32_V(channels[0].irq_disabled, PITCommonState, 3), ++ VMSTATE_UINT32(channels[0].irq_disabled, PITCommonState), /* qemu-kvm's v2 had 'flags' here */ + VMSTATE_STRUCT_ARRAY(channels, PITCommonState, 3, 2, + vmstate_pit_channel, PITChannelState), + VMSTATE_INT64(channels[0].next_transition_time, +diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c +index d1b5657d72..7930b868fa 100644 +--- a/hw/usb/hcd-uhci.c ++++ b/hw/usb/hcd-uhci.c +@@ -1166,11 +1166,13 @@ void usb_uhci_common_realize(PCIDevice *dev, Error **errp) + UHCIState *s = UHCI(dev); + uint8_t *pci_conf = s->dev.config; + int i; ++ int irq_pin; + + pci_conf[PCI_CLASS_PROG] = 0x00; + /* TODO: reset value should be 0. */ + pci_conf[USB_SBRN] = USB_RELEASE_1; /* release number */ +- pci_config_set_interrupt_pin(pci_conf, u->info.irq_pin + 1); ++ irq_pin = u->info.irq_pin; ++ pci_config_set_interrupt_pin(pci_conf, irq_pin + 1); + s->irq = pci_allocate_irq(dev); + + if (s->masterbus) { +diff --git a/hw/usb/hcd-xhci-pci.c b/hw/usb/hcd-xhci-pci.c +index e934b1a5b1..e18b05e528 100644 +--- a/hw/usb/hcd-xhci-pci.c ++++ b/hw/usb/hcd-xhci-pci.c +@@ -104,6 +104,33 @@ static int xhci_pci_vmstate_post_load(void *opaque, int version_id) + return 0; + } + ++/* RH bz 1912846 */ ++static bool usb_xhci_pci_add_msi(struct PCIDevice *dev, Error **errp) ++{ ++ int ret; ++ Error *err = NULL; ++ XHCIPciState *s = XHCI_PCI(dev); ++ ++ ret = msi_init(dev, 0x70, s->xhci.numintrs, true, false, &err); ++ /* ++ * Any error other than -ENOTSUP(board's MSI support is broken) ++ * is a programming error ++ */ ++ assert(!ret || ret == -ENOTSUP); ++ if (ret && s->msi == ON_OFF_AUTO_ON) { ++ /* Can't satisfy user's explicit msi=on request, fail */ ++ error_append_hint(&err, "You have to use msi=auto (default) or " ++ "msi=off with this machine type.\n"); ++ error_propagate(errp, err); ++ return true; ++ } ++ assert(!err || s->msi == ON_OFF_AUTO_AUTO); ++ /* With msi=auto, we fall back to MSI off silently */ ++ error_free(err); ++ ++ return false; ++} ++ + static void usb_xhci_pci_realize(struct PCIDevice *dev, Error **errp) + { + int ret; +@@ -125,23 +152,12 @@ static void usb_xhci_pci_realize(struct PCIDevice *dev, Error **errp) + s->xhci.nec_quirks = true; + } + +- if (s->msi != ON_OFF_AUTO_OFF) { +- ret = msi_init(dev, 0x70, s->xhci.numintrs, true, false, &err); +- /* +- * Any error other than -ENOTSUP(board's MSI support is broken) +- * is a programming error +- */ +- assert(!ret || ret == -ENOTSUP); +- if (ret && s->msi == ON_OFF_AUTO_ON) { +- /* Can't satisfy user's explicit msi=on request, fail */ +- error_append_hint(&err, "You have to use msi=auto (default) or " +- "msi=off with this machine type.\n"); ++ if (s->msi != ON_OFF_AUTO_OFF && s->rh_late_msi_cap) { ++ /* This gives the behaviour from 5.2.0 onwards, lspci shows 90,a0,70 */ ++ if (usb_xhci_pci_add_msi(dev, &err)) { + error_propagate(errp, err); + return; + } +- assert(!err || s->msi == ON_OFF_AUTO_AUTO); +- /* With msi=auto, we fall back to MSI off silently */ +- error_free(err); + } + pci_register_bar(dev, 0, + PCI_BASE_ADDRESS_SPACE_MEMORY | +@@ -154,6 +170,14 @@ static void usb_xhci_pci_realize(struct PCIDevice *dev, Error **errp) + assert(ret > 0); + } + ++ /* RH bz 1912846 */ ++ if (s->msi != ON_OFF_AUTO_OFF && !s->rh_late_msi_cap) { ++ /* This gives the older RH machine behaviour, lspci shows 90,70,a0 */ ++ if (usb_xhci_pci_add_msi(dev, &err)) { ++ error_propagate(errp, err); ++ return; ++ } ++ } + if (s->msix != ON_OFF_AUTO_OFF) { + /* TODO check for errors, and should fail when msix=on */ + msix_init(dev, s->xhci.numintrs, +@@ -198,11 +222,18 @@ static void xhci_instance_init(Object *obj) + qdev_alias_all_properties(DEVICE(&s->xhci), obj); + } + ++static Property xhci_pci_properties[] = { ++ /* RH bz 1912846 */ ++ DEFINE_PROP_BOOL("x-rh-late-msi-cap", XHCIPciState, rh_late_msi_cap, true), ++ DEFINE_PROP_END_OF_LIST() ++}; ++ + static void xhci_class_init(ObjectClass *klass, void *data) + { + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + DeviceClass *dc = DEVICE_CLASS(klass); + ++ device_class_set_props(dc, xhci_pci_properties); + dc->reset = xhci_pci_reset; + dc->vmsd = &vmstate_xhci_pci; + set_bit(DEVICE_CATEGORY_USB, dc->categories); +diff --git a/hw/usb/hcd-xhci-pci.h b/hw/usb/hcd-xhci-pci.h +index c193f79443..086a1feb1e 100644 +--- a/hw/usb/hcd-xhci-pci.h ++++ b/hw/usb/hcd-xhci-pci.h +@@ -39,6 +39,7 @@ typedef struct XHCIPciState { + XHCIState xhci; + OnOffAuto msi; + OnOffAuto msix; ++ bool rh_late_msi_cap; /* bz 1912846 */ + } XHCIPciState; + + #endif +diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c +index e01700039b..d5ea13356c 100644 +--- a/hw/usb/hcd-xhci.c ++++ b/hw/usb/hcd-xhci.c +@@ -3494,9 +3494,27 @@ static const VMStateDescription vmstate_xhci_slot = { + } + }; + ++static int xhci_event_pre_save(void *opaque) ++{ ++ XHCIEvent *s = opaque; ++ ++ s->cve_2014_5263_a = ((uint8_t *)&s->type)[0]; ++ s->cve_2014_5263_b = ((uint8_t *)&s->type)[1]; ++ ++ return 0; ++} ++ ++bool migrate_cve_2014_5263_xhci_fields; ++ ++static bool xhci_event_cve_2014_5263(void *opaque, int version_id) ++{ ++ return migrate_cve_2014_5263_xhci_fields; ++} ++ + static const VMStateDescription vmstate_xhci_event = { + .name = "xhci-event", + .version_id = 1, ++ .pre_save = xhci_event_pre_save, + .fields = (VMStateField[]) { + VMSTATE_UINT32(type, XHCIEvent), + VMSTATE_UINT32(ccode, XHCIEvent), +@@ -3505,6 +3523,8 @@ static const VMStateDescription vmstate_xhci_event = { + VMSTATE_UINT32(flags, XHCIEvent), + VMSTATE_UINT8(slotid, XHCIEvent), + VMSTATE_UINT8(epid, XHCIEvent), ++ VMSTATE_UINT8_TEST(cve_2014_5263_a, XHCIEvent, xhci_event_cve_2014_5263), ++ VMSTATE_UINT8_TEST(cve_2014_5263_b, XHCIEvent, xhci_event_cve_2014_5263), + VMSTATE_END_OF_LIST() + } + }; +diff --git a/hw/usb/hcd-xhci.h b/hw/usb/hcd-xhci.h +index 98f598382a..50a7b6f6c4 100644 +--- a/hw/usb/hcd-xhci.h ++++ b/hw/usb/hcd-xhci.h +@@ -149,6 +149,8 @@ typedef struct XHCIEvent { + uint32_t flags; + uint8_t slotid; + uint8_t epid; ++ uint8_t cve_2014_5263_a; ++ uint8_t cve_2014_5263_b; + } XHCIEvent; + + typedef struct XHCIInterrupter { +diff --git a/include/hw/acpi/ich9.h b/include/hw/acpi/ich9.h +index 7ca92843c6..21abfd8447 100644 +--- a/include/hw/acpi/ich9.h ++++ b/include/hw/acpi/ich9.h +@@ -68,6 +68,9 @@ typedef struct ICH9LPCPMRegs { + bool smm_compat; + bool enable_tco; + TCOIORegs tco_regs; ++ ++ /* RH addition, see bz 1489800 */ ++ bool force_rev1_fadt; + } ICH9LPCPMRegs; + + #define ACPI_PM_PROP_TCO_ENABLED "enable_tco" +diff --git a/include/hw/boards.h b/include/hw/boards.h +index 9c1c190104..8bba96ef2b 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -441,4 +441,40 @@ extern const size_t hw_compat_2_2_len; + extern GlobalProperty hw_compat_2_1[]; + extern const size_t hw_compat_2_1_len; + ++extern GlobalProperty hw_compat_rhel_8_5[]; ++extern const size_t hw_compat_rhel_8_5_len; ++ ++extern GlobalProperty hw_compat_rhel_8_4[]; ++extern const size_t hw_compat_rhel_8_4_len; ++ ++extern GlobalProperty hw_compat_rhel_8_3[]; ++extern const size_t hw_compat_rhel_8_3_len; ++ ++extern GlobalProperty hw_compat_rhel_8_2[]; ++extern const size_t hw_compat_rhel_8_2_len; ++ ++extern GlobalProperty hw_compat_rhel_8_1[]; ++extern const size_t hw_compat_rhel_8_1_len; ++ ++extern GlobalProperty hw_compat_rhel_8_0[]; ++extern const size_t hw_compat_rhel_8_0_len; ++ ++extern GlobalProperty hw_compat_rhel_7_6[]; ++extern const size_t hw_compat_rhel_7_6_len; ++ ++extern GlobalProperty hw_compat_rhel_7_5[]; ++extern const size_t hw_compat_rhel_7_5_len; ++ ++extern GlobalProperty hw_compat_rhel_7_4[]; ++extern const size_t hw_compat_rhel_7_4_len; ++ ++extern GlobalProperty hw_compat_rhel_7_3[]; ++extern const size_t hw_compat_rhel_7_3_len; ++ ++extern GlobalProperty hw_compat_rhel_7_2[]; ++extern const size_t hw_compat_rhel_7_2_len; ++ ++extern GlobalProperty hw_compat_rhel_7_1[]; ++extern const size_t hw_compat_rhel_7_1_len; ++ + #endif +diff --git a/include/hw/firmware/smbios.h b/include/hw/firmware/smbios.h +index 5a0dd0c8cf..2cb1ec2bab 100644 +--- a/include/hw/firmware/smbios.h ++++ b/include/hw/firmware/smbios.h +@@ -278,7 +278,10 @@ void smbios_entry_add(QemuOpts *opts, Error **errp); + void smbios_set_cpuid(uint32_t version, uint32_t features); + void smbios_set_defaults(const char *manufacturer, const char *product, + const char *version, bool legacy_mode, +- bool uuid_encoded, SmbiosEntryPointType ep_type); ++ bool uuid_encoded, ++ const char *stream_product, ++ const char *stream_version, ++ SmbiosEntryPointType ep_type); + uint8_t *smbios_get_table_legacy(MachineState *ms, size_t *length); + void smbios_get_tables(MachineState *ms, + const struct smbios_phys_mem_area *mem_array, +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 9ab39e428f..7ccc9a1a07 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -107,6 +107,9 @@ struct PCMachineClass { + bool smbios_defaults; + bool smbios_legacy_mode; + bool smbios_uuid_encoded; ++ /* New fields needed for Windows HardwareID-6 matching */ ++ const char *smbios_stream_product; ++ const char *smbios_stream_version; + + /* RAM / address space compat: */ + bool gigabyte_align; +diff --git a/include/hw/usb.h b/include/hw/usb.h +index 33668dd0a9..e6b2fe72da 100644 +--- a/include/hw/usb.h ++++ b/include/hw/usb.h +@@ -582,4 +582,7 @@ void usb_pcap_init(FILE *fp); + void usb_pcap_ctrl(USBPacket *p, bool setup); + void usb_pcap_data(USBPacket *p, bool setup); + ++/* hcd-xhci.c -- rhel7.0.0 machine type compatibility */ ++extern bool migrate_cve_2014_5263_xhci_fields; ++ + #endif +diff --git a/migration/migration.c b/migration/migration.c +index abaf6f9e3d..a87ff01b81 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -164,6 +164,8 @@ INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot, + MIGRATION_CAPABILITY_X_COLO, + MIGRATION_CAPABILITY_VALIDATE_UUID); + ++bool migrate_pre_2_2; ++ + /* When we add fault tolerance, we could have several + migrations at once. For now we don't need to add + dynamic creation of migration */ +diff --git a/migration/migration.h b/migration/migration.h +index 8130b703eb..d016cedd9d 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -381,6 +381,11 @@ bool check_dirty_bitmap_mig_alias_map(const BitmapMigrationNodeAliasList *bbm, + void migrate_add_address(SocketAddress *address); + + int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque); ++/* ++ * Disables a load of subsections that were added in 2.2/rh7.2 for backwards ++ * migration compatibility. ++ */ ++extern bool migrate_pre_2_2; + + #define qemu_ram_foreach_block \ + #warning "Use foreach_not_ignored_block in migration code" +-- +2.27.0 + diff --git a/0008-Add-aarch64-machine-types.patch b/0008-Add-aarch64-machine-types.patch new file mode 100644 index 0000000..2e8c417 --- /dev/null +++ b/0008-Add-aarch64-machine-types.patch @@ -0,0 +1,405 @@ +From 670e90f5cbd92189155e079b8c6e2aafdf82d162 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Fri, 19 Oct 2018 12:53:31 +0200 +Subject: Add aarch64 machine types + +Adding changes to add RHEL machine types for aarch64 architecture. + +Signed-off-by: Miroslav Rezanina + +Rebase notes (4.0.0): +- Use upstream compat handling + +Rebase notes (4.1.0-rc0): +- Removed a15memmap (upstream) +- Use virt_flash_create in rhel800_virt_instance_init + +Rebase notes (4.2.0-rc0): +- Set numa_mem_supported + +Rebase notes (4.2.0-rc3): +- aarch64: Add virt-rhel8.2.0 machine type for ARM (patch 92246) +- aarch64: virt: Allow more than 1TB of RAM (patch 92249) +- aarch64: virt: Allow PCDIMM instantiation (patch 92247) +- aarch64: virt: Enhance the comment related to gic-version (patch 92248) + +Rebase notes (5.0.0): +- Set default_ram_id in rhel_machine_class_init +- Added setting acpi properties + +Rebase notes (5.1.0): +- Added ras property +- Added to virt_machine_device_unplug_cb to machine type (upstream) +- added mte property (upstream) + +Rebase notes (weekly-210210): +- Added support for oem fields to machine type + +Rebase notes (weekly-210303): +- Use rhel-8.4.0 hw compat + +Rebase notes (6.0.0-rc2): +- renamed oem-id and oem-table-id to x-oem-id and x-oem-table-id + +Rebase notes (210623): +- Protect TPM functions by CONFIG_TPM ifdef + +Rebase notes (6.1.0-rc0): +- Add support for default_bus_bypass_iommu + +Merged patches (4.0.0): +- 7bfdb4c aarch64: Add virt-rhel8.0.0 machine type for ARM +- 3433e69 aarch64: Set virt-rhel8.0.0 max_cpus to 512 +- 4d20863 aarch64: Use 256MB ECAM region by default + +Merged patches (4.1.0): +- c3e39ef aarch64: Add virt-rhel8.1.0 machine type for ARM +- 59a46d1 aarch64: Allow ARM VIRT iommu option in RHEL8.1 machine + +Merged patches (5.2.0 rc0): +- 12990ad hw/arm: Changes to rhel820 machine +- 46d5a79 hw/arm: Introduce rhel_virt_instance_init() helper +- 098954a hw/arm: Add rhel830 machine type +- ee8e99d arm: Set correct max_cpus value on virt-rhel* machine types +- e5edd38 RHEL-only: arm/virt: Allow the TPM_TIS_SYSBUS device dynamic allocation in machvirt +- 6d7ba66 machine types/numa: set numa_mem_supported on old machine types (partialy) +- 25c5644 machine_types/numa: compatibility for auto_enable_numa_with_memdev (partialy) + +Merged patches (6.0): +- 078fadb5da AArch64 machine types cleanup +- ea7b7425fa hw/arm/virt: Add 8.4 Machine type + +Merged patches (weekly-210609): +- 73b1578882 hw/arm/virt: Add 8.5 machine type +- 5333038d11 hw/arm/virt: Disable PL011 clock migration through hw_compat_rhel_8_3 +- 63adb8ae86 arm/virt: Register highmem and gic-version as class properties + +Merged patches (weekly-211027): +- 86e3057c0a hw: arm: virt: Add hw_compat_rhel_8_5 to 8.5 machine type +--- + hw/arm/virt.c | 226 +++++++++++++++++++++++++++++++++++++++++- + hw/core/machine.c | 2 + + include/hw/arm/virt.h | 8 ++ + 3 files changed, 235 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 5de4d9d73b..c77d26ab13 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -79,6 +79,7 @@ + #include "hw/char/pl011.h" + #include "qemu/guest-random.h" + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \ + static void virt_##major##_##minor##_class_init(ObjectClass *oc, \ + void *data) \ +@@ -105,7 +106,48 @@ + DEFINE_VIRT_MACHINE_LATEST(major, minor, true) + #define DEFINE_VIRT_MACHINE(major, minor) \ + DEFINE_VIRT_MACHINE_LATEST(major, minor, false) +- ++#endif /* disabled for RHEL */ ++ ++#define DEFINE_RHEL_MACHINE_LATEST(m, n, s, latest) \ ++ static void rhel##m##n##s##_virt_class_init(ObjectClass *oc, \ ++ void *data) \ ++ { \ ++ MachineClass *mc = MACHINE_CLASS(oc); \ ++ rhel##m##n##s##_virt_options(mc); \ ++ mc->desc = "RHEL " # m "." # n "." # s " ARM Virtual Machine"; \ ++ if (latest) { \ ++ mc->alias = "virt"; \ ++ mc->is_default = 1; \ ++ } \ ++ } \ ++ static const TypeInfo rhel##m##n##s##_machvirt_info = { \ ++ .name = MACHINE_TYPE_NAME("virt-rhel" # m "." # n "." # s), \ ++ .parent = TYPE_RHEL_MACHINE, \ ++ .class_init = rhel##m##n##s##_virt_class_init, \ ++ }; \ ++ static void rhel##m##n##s##_machvirt_init(void) \ ++ { \ ++ type_register_static(&rhel##m##n##s##_machvirt_info); \ ++ } \ ++ type_init(rhel##m##n##s##_machvirt_init); ++ ++#define DEFINE_RHEL_MACHINE_AS_LATEST(major, minor, subminor) \ ++ DEFINE_RHEL_MACHINE_LATEST(major, minor, subminor, true) ++#define DEFINE_RHEL_MACHINE(major, minor, subminor) \ ++ DEFINE_RHEL_MACHINE_LATEST(major, minor, subminor, false) ++ ++/* This variable is for changes to properties that are RHEL specific, ++ * different to the current upstream and to be applied to the latest ++ * machine type. ++ */ ++GlobalProperty arm_rhel_compat[] = { ++ { ++ .driver = "virtio-net-pci", ++ .property = "romfile", ++ .value = "", ++ }, ++}; ++const size_t arm_rhel_compat_len = G_N_ELEMENTS(arm_rhel_compat); + + /* Number of external interrupt lines to configure the GIC with */ + #define NUM_IRQS 256 +@@ -2180,6 +2222,7 @@ static void machvirt_init(MachineState *machine) + qemu_add_machine_init_done_notifier(&vms->machine_done); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static bool virt_get_secure(Object *obj, Error **errp) + { + VirtMachineState *vms = VIRT_MACHINE(obj); +@@ -2207,6 +2250,7 @@ static void virt_set_virt(Object *obj, bool value, Error **errp) + + vms->virt = value; + } ++#endif /* disabled for RHEL */ + + static bool virt_get_highmem(Object *obj, Error **errp) + { +@@ -2304,6 +2348,7 @@ static void virt_set_acpi(Object *obj, Visitor *v, const char *name, + visit_type_OnOffAuto(v, name, &vms->acpi, errp); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static bool virt_get_ras(Object *obj, Error **errp) + { + VirtMachineState *vms = VIRT_MACHINE(obj); +@@ -2331,6 +2376,7 @@ static void virt_set_mte(Object *obj, bool value, Error **errp) + + vms->mte = value; + } ++#endif /* disabled for RHEL */ + + static char *virt_get_gic_version(Object *obj, Error **errp) + { +@@ -2666,6 +2712,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str) + return fixed_ipa ? 0 : requested_pa_size; + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void virt_machine_class_init(ObjectClass *oc, void *data) + { + MachineClass *mc = MACHINE_CLASS(oc); +@@ -3031,3 +3078,180 @@ static void virt_machine_2_6_options(MachineClass *mc) + vmc->no_pmu = true; + } + DEFINE_VIRT_MACHINE(2, 6) ++#endif /* disabled for RHEL */ ++ ++static void rhel_machine_class_init(ObjectClass *oc, void *data) ++{ ++ MachineClass *mc = MACHINE_CLASS(oc); ++ HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); ++ ++ mc->family = "virt-rhel-Z"; ++ mc->init = machvirt_init; ++ /* Maximum supported VCPU count for all virt-rhel* machines */ ++ mc->max_cpus = 384; ++#ifdef CONFIG_TPM ++ machine_class_allow_dynamic_sysbus_dev(mc, TYPE_TPM_TIS_SYSBUS); ++#endif ++ mc->block_default_type = IF_VIRTIO; ++ mc->no_cdrom = 1; ++ mc->pci_allow_0_address = true; ++ /* We know we will never create a pre-ARMv7 CPU which needs 1K pages */ ++ mc->minimum_page_bits = 12; ++ mc->possible_cpu_arch_ids = virt_possible_cpu_arch_ids; ++ mc->cpu_index_to_instance_props = virt_cpu_index_to_props; ++ mc->default_cpu_type = ARM_CPU_TYPE_NAME("cortex-a57"); ++ mc->get_default_cpu_node_id = virt_get_default_cpu_node_id; ++ mc->kvm_type = virt_kvm_type; ++ assert(!mc->get_hotplug_handler); ++ mc->get_hotplug_handler = virt_machine_get_hotplug_handler; ++ hc->pre_plug = virt_machine_device_pre_plug_cb; ++ hc->plug = virt_machine_device_plug_cb; ++ hc->unplug_request = virt_machine_device_unplug_request_cb; ++ hc->unplug = virt_machine_device_unplug_cb; ++ mc->nvdimm_supported = true; ++ mc->auto_enable_numa_with_memhp = true; ++ mc->auto_enable_numa_with_memdev = true; ++ mc->default_ram_id = "mach-virt.ram"; ++ ++ object_class_property_add(oc, "acpi", "OnOffAuto", ++ virt_get_acpi, virt_set_acpi, ++ NULL, NULL); ++ object_class_property_set_description(oc, "acpi", ++ "Enable ACPI"); ++ ++ object_class_property_add_bool(oc, "highmem", virt_get_highmem, ++ virt_set_highmem); ++ object_class_property_set_description(oc, "highmem", ++ "Set on/off to enable/disable using " ++ "physical address space above 32 bits"); ++ ++ object_class_property_add_str(oc, "gic-version", virt_get_gic_version, ++ virt_set_gic_version); ++ object_class_property_set_description(oc, "gic-version", ++ "Set GIC version. " ++ "Valid values are 2, 3, host and max"); ++ ++ object_class_property_add_str(oc, "x-oem-id", ++ virt_get_oem_id, ++ virt_set_oem_id); ++ object_class_property_set_description(oc, "x-oem-id", ++ "Override the default value of field OEMID " ++ "in ACPI table header." ++ "The string may be up to 6 bytes in size"); ++ ++ object_class_property_add_str(oc, "x-oem-table-id", ++ virt_get_oem_table_id, ++ virt_set_oem_table_id); ++ object_class_property_set_description(oc, "x-oem-table-id", ++ "Override the default value of field OEM Table ID " ++ "in ACPI table header." ++ "The string may be up to 8 bytes in size"); ++ object_class_property_add_bool(oc, "default_bus_bypass_iommu", ++ virt_get_default_bus_bypass_iommu, ++ virt_set_default_bus_bypass_iommu); ++ ++} ++ ++static void rhel_virt_instance_init(Object *obj) ++{ ++ VirtMachineState *vms = VIRT_MACHINE(obj); ++ VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms); ++ ++ /* EL3 is disabled by default and non-configurable for RHEL */ ++ vms->secure = false; ++ ++ /* EL2 is disabled by default and non-configurable for RHEL */ ++ vms->virt = false; ++ ++ /* High memory is enabled by default */ ++ vms->highmem = true; ++ vms->gic_version = VIRT_GIC_VERSION_NOSEL; ++ ++ vms->highmem_ecam = !vmc->no_highmem_ecam; ++ ++ if (vmc->no_its) { ++ vms->its = false; ++ } else { ++ /* Default allows ITS instantiation */ ++ vms->its = true; ++ object_property_add_bool(obj, "its", virt_get_its, ++ virt_set_its); ++ object_property_set_description(obj, "its", ++ "Set on/off to enable/disable " ++ "ITS instantiation"); ++ } ++ ++ /* Default disallows iommu instantiation */ ++ vms->iommu = VIRT_IOMMU_NONE; ++ object_property_add_str(obj, "iommu", virt_get_iommu, virt_set_iommu); ++ object_property_set_description(obj, "iommu", ++ "Set the IOMMU type. " ++ "Valid values are none and smmuv3"); ++ ++ /* Default disallows RAS instantiation and is non-configurable for RHEL */ ++ vms->ras = false; ++ ++ /* MTE is disabled by default and non-configurable for RHEL */ ++ vms->mte = false; ++ ++ vms->default_bus_bypass_iommu = false; ++ vms->irqmap = a15irqmap; ++ ++ virt_flash_create(vms); ++ vms->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6); ++ vms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8); ++ ++} ++ ++static const TypeInfo rhel_machine_info = { ++ .name = TYPE_RHEL_MACHINE, ++ .parent = TYPE_MACHINE, ++ .abstract = true, ++ .instance_size = sizeof(VirtMachineState), ++ .class_size = sizeof(VirtMachineClass), ++ .class_init = rhel_machine_class_init, ++ .instance_init = rhel_virt_instance_init, ++ .interfaces = (InterfaceInfo[]) { ++ { TYPE_HOTPLUG_HANDLER }, ++ { } ++ }, ++}; ++ ++static void rhel_machine_init(void) ++{ ++ type_register_static(&rhel_machine_info); ++} ++type_init(rhel_machine_init); ++ ++static void rhel850_virt_options(MachineClass *mc) ++{ ++ compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len); ++} ++DEFINE_RHEL_MACHINE_AS_LATEST(8, 5, 0) ++ ++static void rhel840_virt_options(MachineClass *mc) ++{ ++ rhel850_virt_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_4, hw_compat_rhel_8_4_len); ++} ++DEFINE_RHEL_MACHINE(8, 4, 0) ++ ++static void rhel830_virt_options(MachineClass *mc) ++{ ++ VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc)); ++ ++ rhel840_virt_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_3, hw_compat_rhel_8_3_len); ++ vmc->no_kvm_steal_time = true; ++} ++DEFINE_RHEL_MACHINE(8, 3, 0) ++ ++static void rhel820_virt_options(MachineClass *mc) ++{ ++ rhel830_virt_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_2, hw_compat_rhel_8_2_len); ++ mc->numa_mem_supported = true; ++ mc->auto_enable_numa_with_memdev = false; ++} ++DEFINE_RHEL_MACHINE(8, 2, 0) +diff --git a/hw/core/machine.c b/hw/core/machine.c +index be4f9864cd..62febde5aa 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -87,6 +87,8 @@ GlobalProperty hw_compat_rhel_8_3[] = { + { "nvme", "use-intel-id", "on"}, + /* hw_compat_rhel_8_3 from hw_compat_5_1 */ + { "pvpanic", "events", "1"}, /* PVPANIC_PANICKED */ ++ /* hw_compat_rhel_8_3 from hw_compat_5_1 */ ++ { "pl011", "migrate-clk", "off" }, + /* hw_compat_rhel_8_3 bz 1912846 */ + { "pci-xhci", "x-rh-late-msi-cap", "off" }, + /* hw_compat_rhel_8_3 from hw_compat_5_1 */ +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index dc6b66ffc8..9364628847 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -175,9 +175,17 @@ struct VirtMachineState { + + #define VIRT_ECAM_ID(high) (high ? VIRT_HIGH_PCIE_ECAM : VIRT_PCIE_ECAM) + ++#if 0 /* disabled for Red Hat Enterprise Linux */ + #define TYPE_VIRT_MACHINE MACHINE_TYPE_NAME("virt") + OBJECT_DECLARE_TYPE(VirtMachineState, VirtMachineClass, VIRT_MACHINE) + ++#else ++#define TYPE_RHEL_MACHINE MACHINE_TYPE_NAME("virt-rhel") ++typedef struct VirtMachineClass VirtMachineClass; ++typedef struct VirtMachineState VirtMachineState; ++DECLARE_OBJ_CHECKERS(VirtMachineState, VirtMachineClass, VIRT_MACHINE, TYPE_RHEL_MACHINE) ++#endif ++ + void virt_acpi_setup(VirtMachineState *vms); + bool virt_is_acpi_enabled(VirtMachineState *vms); + +-- +2.27.0 + diff --git a/0008-Add-ppc64-machine-types.patch b/0008-Add-ppc64-machine-types.patch new file mode 100644 index 0000000..860e803 --- /dev/null +++ b/0008-Add-ppc64-machine-types.patch @@ -0,0 +1,528 @@ +From f61b3d7dc000886e23943457ee9baf1d4cae43b4 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Fri, 19 Oct 2018 13:27:13 +0200 +Subject: Add ppc64 machine types + +Adding changes to add RHEL machine types for ppc64 architecture. + +Signed-off-by: Miroslav Rezanina + +Rebase notes (6.2.0): +- Fixed rebase conflict relicts +- Update machine type compat for 6.2 (from MR 66) + +Merged patches (6.1.0): +- c438c25ac3 redhat: Define pseries-rhel8.5.0 machine type +- a3995e2eff Remove RHEL 7.0.0 machine type (only ppc64 changes) +- ad3190a79b Remove RHEL 7.1.0 machine type (only ppc64 changes) +- 84bbe15d4e Remove RHEL 7.2.0 machine type (only ppc64 changes) +- 0215eb3356 Remove RHEL 7.3.0 machine types (only ppc64 changes) +- af69d1ca6e Remove RHEL 7.4.0 machine types (only ppc64 changes) +- 8f7a74ab78 Remove RHEL 7.5.0 machine types (only ppc64 changes) +--- + hw/ppc/spapr.c | 243 ++++++++++++++++++++++++++++++++++++++++ + hw/ppc/spapr_cpu_core.c | 13 +++ + include/hw/ppc/spapr.h | 4 + + target/ppc/compat.c | 13 ++- + target/ppc/cpu.h | 1 + + target/ppc/kvm.c | 27 +++++ + target/ppc/kvm_ppc.h | 13 +++ + 7 files changed, 313 insertions(+), 1 deletion(-) + +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index a4372ba189..5fdf8b506d 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -1622,6 +1622,9 @@ static void spapr_machine_reset(MachineState *machine) + + pef_kvm_reset(machine->cgs, &error_fatal); + spapr_caps_apply(spapr); ++ if (spapr->svm_allowed) { ++ kvmppc_svm_allow(&error_fatal); ++ } + + first_ppc_cpu = POWERPC_CPU(first_cpu); + if (kvm_enabled() && kvmppc_has_cap_mmu_radix() && +@@ -3317,6 +3320,20 @@ static void spapr_set_host_serial(Object *obj, const char *value, Error **errp) + spapr->host_serial = g_strdup(value); + } + ++static bool spapr_get_svm_allowed(Object *obj, Error **errp) ++{ ++ SpaprMachineState *spapr = SPAPR_MACHINE(obj); ++ ++ return spapr->svm_allowed; ++} ++ ++static void spapr_set_svm_allowed(Object *obj, bool value, Error **errp) ++{ ++ SpaprMachineState *spapr = SPAPR_MACHINE(obj); ++ ++ spapr->svm_allowed = value; ++} ++ + static void spapr_instance_init(Object *obj) + { + SpaprMachineState *spapr = SPAPR_MACHINE(obj); +@@ -3395,6 +3412,12 @@ static void spapr_instance_init(Object *obj) + spapr_get_host_serial, spapr_set_host_serial); + object_property_set_description(obj, "host-serial", + "Host serial number to advertise in guest device tree"); ++ object_property_add_bool(obj, "x-svm-allowed", ++ spapr_get_svm_allowed, ++ spapr_set_svm_allowed); ++ object_property_set_description(obj, "x-svm-allowed", ++ "Allow the guest to become a Secure Guest" ++ " (experimental only)"); + } + + static void spapr_machine_finalizefn(Object *obj) +@@ -4652,6 +4675,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) + vmc->client_architecture_support = spapr_vof_client_architecture_support; + vmc->quiesce = spapr_vof_quiesce; + vmc->setprop = spapr_vof_setprop; ++ smc->has_power9_support = true; + } + + static const TypeInfo spapr_machine_info = { +@@ -4703,6 +4727,7 @@ static void spapr_machine_latest_class_options(MachineClass *mc) + } \ + type_init(spapr_machine_register_##suffix) + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + /* + * pseries-7.0 + */ +@@ -4830,6 +4855,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc) + } + + DEFINE_SPAPR_MACHINE(4_1, "4.1", false); ++#endif + + /* + * pseries-4.0 +@@ -4849,6 +4875,8 @@ static bool phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, + *nv2atsd = 0; + return true; + } ++ ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void spapr_machine_4_0_class_options(MachineClass *mc) + { + SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); +@@ -5176,6 +5204,221 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) + compat_props_add(mc->compat_props, hw_compat_2_1, hw_compat_2_1_len); + } + DEFINE_SPAPR_MACHINE(2_1, "2.1", false); ++#endif ++ ++static void spapr_machine_rhel_default_class_options(MachineClass *mc) ++{ ++ /* ++ * Defaults for the latest behaviour inherited from the base class ++ * can be overriden here for all pseries-rhel* machines. ++ */ ++ ++ /* Maximum supported VCPU count */ ++ mc->max_cpus = 384; ++} ++ ++/* ++ * pseries-rhel8.5.0 ++ * like pseries-6.0 ++ */ ++ ++static void spapr_machine_rhel850_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ /* The default machine type must apply the RHEL specific defaults */ ++ spapr_machine_rhel_default_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_5, ++ hw_compat_rhel_8_5_len); ++ smc->pre_6_2_numa_affinity = true; ++ mc->smp_props.prefer_sockets = true; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel850, "rhel8.5.0", true); ++ ++/* ++ * pseries-rhel8.4.0 ++ * like pseries-5.2 ++ */ ++ ++static void spapr_machine_rhel840_class_options(MachineClass *mc) ++{ ++ spapr_machine_rhel850_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_4, ++ hw_compat_rhel_8_4_len); ++} ++ ++DEFINE_SPAPR_MACHINE(rhel840, "rhel8.4.0", false); ++ ++/* ++ * pseries-rhel8.3.0 ++ * like pseries-5.1 ++ */ ++ ++static void spapr_machine_rhel830_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel840_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_3, ++ hw_compat_rhel_8_3_len); ++ ++ /* from pseries-5.1 */ ++ smc->pre_5_2_numa_associativity = true; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel830, "rhel8.3.0", false); ++ ++/* ++ * pseries-rhel8.2.0 ++ * like pseries-4.2 + pseries-5.0 ++ * except SPAPR_CAP_CCF_ASSIST that has been backported to pseries-rhel8.1.0 ++ */ ++ ++static void spapr_machine_rhel820_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ /* from pseries-5.0 */ ++ static GlobalProperty compat[] = { ++ { TYPE_SPAPR_PCI_HOST_BRIDGE, "pre-5.1-associativity", "on" }, ++ }; ++ ++ spapr_machine_rhel830_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_2, ++ hw_compat_rhel_8_2_len); ++ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); ++ ++ /* from pseries-4.2 */ ++ smc->default_caps.caps[SPAPR_CAP_FWNMI] = SPAPR_CAP_OFF; ++ smc->rma_limit = 16 * GiB; ++ mc->nvdimm_supported = false; ++ ++ /* from pseries-5.0 */ ++ mc->numa_mem_supported = true; ++ smc->pre_5_1_assoc_refpoints = true; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel820, "rhel8.2.0", false); ++ ++/* ++ * pseries-rhel8.1.0 ++ * like pseries-4.1 ++ */ ++ ++static void spapr_machine_rhel810_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ static GlobalProperty compat[] = { ++ /* Only allow 4kiB and 64kiB IOMMU pagesizes */ ++ { TYPE_SPAPR_PCI_HOST_BRIDGE, "pgsz", "0x11000" }, ++ }; ++ ++ spapr_machine_rhel820_class_options(mc); ++ ++ /* from pseries-4.1 */ ++ smc->linux_pci_probe = false; ++ smc->smp_threads_vsmt = false; ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_1, ++ hw_compat_rhel_8_1_len); ++ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); ++ ++ /* from pseries-4.2 */ ++ smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel810, "rhel8.1.0", false); ++ ++/* ++ * pseries-rhel8.0.0 ++ * like pseries-3.1 and pseries-4.0 ++ * except SPAPR_CAP_CFPC, SPAPR_CAP_SBBC and SPAPR_CAP_IBS ++ * that have been backported to pseries-rhel8.0.0 ++ */ ++ ++static void spapr_machine_rhel800_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel810_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_0, ++ hw_compat_rhel_8_0_len); ++ ++ /* pseries-4.0 */ ++ smc->phb_placement = phb_placement_4_0; ++ smc->irq = &spapr_irq_xics; ++ smc->pre_4_1_migration = true; ++ ++ /* pseries-3.1 */ ++ mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power8_v2.0"); ++ smc->update_dt_enabled = false; ++ smc->dr_phb_enabled = false; ++ smc->broken_host_serial_model = true; ++ smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_OFF; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel800, "rhel8.0.0", false); ++ ++/* ++ * pseries-rhel7.6.0 ++ * like spapr_compat_2_12 and spapr_compat_3_0 ++ * spapr_compat_0 is empty ++ */ ++GlobalProperty spapr_compat_rhel7_6[] = { ++ { TYPE_POWERPC_CPU, "pre-3.0-migration", "on" }, ++ { TYPE_SPAPR_CPU_CORE, "pre-3.0-migration", "on" }, ++}; ++const size_t spapr_compat_rhel7_6_len = G_N_ELEMENTS(spapr_compat_rhel7_6); ++ ++ ++static void spapr_machine_rhel760_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel800_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_7_6, hw_compat_rhel_7_6_len); ++ compat_props_add(mc->compat_props, spapr_compat_rhel7_6, spapr_compat_rhel7_6_len); ++ ++ /* from spapr_machine_3_0_class_options() */ ++ smc->legacy_irq_allocation = true; ++ smc->nr_xirqs = 0x400; ++ smc->irq = &spapr_irq_xics_legacy; ++ ++ /* from spapr_machine_2_12_class_options() */ ++ /* We depend on kvm_enabled() to choose a default value for the ++ * hpt-max-page-size capability. Of course we can't do it here ++ * because this is too early and the HW accelerator isn't initialzed ++ * yet. Postpone this to machine init (see default_caps_with_cpu()). ++ */ ++ smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = 0; ++ ++ /* SPAPR_CAP_WORKAROUND enabled in pseries-rhel800 by ++ * f21757edc554 ++ * "Enable mitigations by default for pseries-4.0 machine type") ++ */ ++ smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_BROKEN; ++ smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_BROKEN; ++ smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_BROKEN; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel760, "rhel7.6.0", false); ++ ++/* ++ * pseries-rhel7.6.0-sxxm ++ * ++ * pseries-rhel7.6.0 with speculative execution exploit mitigations enabled by default ++ */ ++ ++static void spapr_machine_rhel760sxxm_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel760_class_options(mc); ++ smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_FIXED_CCD; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel760sxxm, "rhel7.6.0-sxxm", false); + + static void spapr_machine_register_types(void) + { +diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c +index fcb5dfe792..ab8fb5bf62 100644 +--- a/hw/ppc/spapr_cpu_core.c ++++ b/hw/ppc/spapr_cpu_core.c +@@ -25,6 +25,7 @@ + #include "sysemu/reset.h" + #include "sysemu/hw_accel.h" + #include "qemu/error-report.h" ++#include "cpu-models.h" + + static void spapr_reset_vcpu(PowerPCCPU *cpu) + { +@@ -259,6 +260,7 @@ static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, + { + CPUPPCState *env = &cpu->env; + CPUState *cs = CPU(cpu); ++ SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); + + if (!qdev_realize(DEVICE(cpu), NULL, errp)) { + return false; +@@ -270,6 +272,17 @@ static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, + /* Set time-base frequency to 512 MHz. vhyp must be set first. */ + cpu_ppc_tb_init(env, SPAPR_TIMEBASE_FREQ); + ++ if (!smc->has_power9_support && ++ (((spapr->max_compat_pvr && ++ ppc_compat_cmp(spapr->max_compat_pvr, ++ CPU_POWERPC_LOGICAL_3_00) >= 0)) || ++ (!spapr->max_compat_pvr && ++ ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_00, 0, 0)))) { ++ error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND, ++ "POWER9 CPU is not supported by this machine class"); ++ return false; ++ } ++ + if (spapr_irq_cpu_intc_create(spapr, cpu, errp) < 0) { + qdev_unrealize(DEVICE(cpu)); + return false; +diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h +index f5c33dcc86..4a68e0a901 100644 +--- a/include/hw/ppc/spapr.h ++++ b/include/hw/ppc/spapr.h +@@ -154,6 +154,7 @@ struct SpaprMachineClass { + bool pre_5_2_numa_associativity; + bool pre_6_2_numa_affinity; + ++ bool has_power9_support; + bool (*phb_placement)(SpaprMachineState *spapr, uint32_t index, + uint64_t *buid, hwaddr *pio, + hwaddr *mmio32, hwaddr *mmio64, +@@ -241,6 +242,9 @@ struct SpaprMachineState { + /* Set by -boot */ + char *boot_device; + ++ /* Secure Guest support via x-svm-allowed */ ++ bool svm_allowed; ++ + /*< public >*/ + char *kvm_type; + char *host_model; +diff --git a/target/ppc/compat.c b/target/ppc/compat.c +index 7949a24f5a..f207a9ba01 100644 +--- a/target/ppc/compat.c ++++ b/target/ppc/compat.c +@@ -114,8 +114,19 @@ static const CompatInfo *compat_by_pvr(uint32_t pvr) + return NULL; + } + ++long ppc_compat_cmp(uint32_t pvr1, uint32_t pvr2) ++{ ++ const CompatInfo *compat1 = compat_by_pvr(pvr1); ++ const CompatInfo *compat2 = compat_by_pvr(pvr2); ++ ++ g_assert(compat1); ++ g_assert(compat2); ++ ++ return compat1 - compat2; ++} ++ + static bool pcc_compat(PowerPCCPUClass *pcc, uint32_t compat_pvr, +- uint32_t min_compat_pvr, uint32_t max_compat_pvr) ++ uint32_t min_compat_pvr, uint32_t max_compat_pvr) + { + const CompatInfo *compat = compat_by_pvr(compat_pvr); + const CompatInfo *min = compat_by_pvr(min_compat_pvr); +diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h +index 047b24ba50..79c5ac50b9 100644 +--- a/target/ppc/cpu.h ++++ b/target/ppc/cpu.h +@@ -1462,6 +1462,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch) + + /* Compatibility modes */ + #if defined(TARGET_PPC64) ++long ppc_compat_cmp(uint32_t pvr1, uint32_t pvr2); + bool ppc_check_compat(PowerPCCPU *cpu, uint32_t compat_pvr, + uint32_t min_compat_pvr, uint32_t max_compat_pvr); + bool ppc_type_check_compat(const char *cputype, uint32_t compat_pvr, +diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c +index dc93b99189..154888cce5 100644 +--- a/target/ppc/kvm.c ++++ b/target/ppc/kvm.c +@@ -90,6 +90,7 @@ static int cap_ppc_nested_kvm_hv; + static int cap_large_decr; + static int cap_fwnmi; + static int cap_rpt_invalidate; ++static int cap_ppc_secure_guest; + + static uint32_t debug_inst_opcode; + +@@ -137,6 +138,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) + cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT); + kvmppc_get_cpu_characteristics(s); + cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV); ++ cap_ppc_secure_guest = kvm_vm_check_extension(s, KVM_CAP_PPC_SECURE_GUEST); + cap_large_decr = kvmppc_get_dec_bits(); + cap_fwnmi = kvm_vm_check_extension(s, KVM_CAP_PPC_FWNMI); + /* +@@ -2563,6 +2565,16 @@ int kvmppc_has_cap_rpt_invalidate(void) + return cap_rpt_invalidate; + } + ++bool kvmppc_has_cap_secure_guest(void) ++{ ++ return !!cap_ppc_secure_guest; ++} ++ ++int kvmppc_enable_cap_secure_guest(void) ++{ ++ return kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SECURE_GUEST, 0, 1); ++} ++ + PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void) + { + uint32_t host_pvr = mfpvr(); +@@ -2959,3 +2971,18 @@ bool kvm_arch_cpu_check_are_resettable(void) + { + return true; + } ++ ++void kvmppc_svm_allow(Error **errp) ++{ ++ if (!kvm_enabled()) { ++ error_setg(errp, "No PEF support in tcg, try x-svm-allowed=off"); ++ return; ++ } ++ ++ if (!kvmppc_has_cap_secure_guest()) { ++ error_setg(errp, "KVM implementation does not support secure guests, " ++ "try x-svm-allowed=off"); ++ } else if (kvmppc_enable_cap_secure_guest() < 0) { ++ error_setg(errp, "Error enabling x-svm-allowed, try x-svm-allowed=off"); ++ } ++} +diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h +index ee9325bf9a..20dbb95989 100644 +--- a/target/ppc/kvm_ppc.h ++++ b/target/ppc/kvm_ppc.h +@@ -40,6 +40,7 @@ int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu); + target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu, + bool radix, bool gtse, + uint64_t proc_tbl); ++void kvmppc_svm_allow(Error **errp); + #ifndef CONFIG_USER_ONLY + bool kvmppc_spapr_use_multitce(void); + int kvmppc_spapr_enable_inkernel_multitce(void); +@@ -74,6 +75,8 @@ int kvmppc_get_cap_large_decr(void); + int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable); + int kvmppc_has_cap_rpt_invalidate(void); + int kvmppc_enable_hwrng(void); ++bool kvmppc_has_cap_secure_guest(void); ++int kvmppc_enable_cap_secure_guest(void); + int kvmppc_put_books_sregs(PowerPCCPU *cpu); + PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void); + void kvmppc_check_papr_resize_hpt(Error **errp); +@@ -393,6 +396,16 @@ static inline int kvmppc_has_cap_rpt_invalidate(void) + return false; + } + ++static inline bool kvmppc_has_cap_secure_guest(void) ++{ ++ return false; ++} ++ ++static inline int kvmppc_enable_cap_secure_guest(void) ++{ ++ return -1; ++} ++ + static inline int kvmppc_enable_hwrng(void) + { + return -1; +-- +2.31.1 + diff --git a/0009-Add-ppc64-machine-types.patch b/0009-Add-ppc64-machine-types.patch new file mode 100644 index 0000000..f5ce09a --- /dev/null +++ b/0009-Add-ppc64-machine-types.patch @@ -0,0 +1,714 @@ +From 3c65320ce5b8ad3bb8c0d8fd13a88c464d5c5845 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Fri, 19 Oct 2018 13:27:13 +0200 +Subject: Add ppc64 machine types + +Adding changes to add RHEL machine types for ppc64 architecture. + +Signed-off-by: Miroslav Rezanina + +Rebase changes (4.0.0): +- remove instance options and use upstream solution +- Use upstream compat handling +- Replace SPAPR_PCI_2_7_MMIO_WIN_SIZE with value (changed upstream) +- re-add handling of instance_options (removed upstream) +- Use p8 as default for rhel machine types (p9 default upstream) +- sPAPRMachineClass renamed to SpaprMachineClass (upstream) + +Rebase changes (4.1.0): +- Update format for compat structures + +Rebase notes (weekly-210303): +- Use rhel-8.4.0 hw compat + +Merged patches (4.0.0): +- 467d59a redhat: define pseries-rhel8.0.0 machine type + +Merged patches (4.1.0): +- f21757edc target/ppc/spapr: Enable mitigations by default for pseries-4.0 machine type +- 2511c63 redhat: sync pseries-rhel7.6.0 with rhel-av-8.0.1 +- 89f01da redhat: define pseries-rhel8.1.0 machine type + +Merged patches (4.2.0): +- bcba728 redhat: update pseries-rhel8.1.0 machine type +- redhat: update pseries-rhel-7.6.0 machine type (patch 93039) +- redhat: define pseries-rhel8.2.0 machine type (patch 93041) + +Merged patches (5.1.0): +- eb121ff spapr: Enable DD2.3 accelerated count cache flush in pseries-5.0 machine (partial) + +Merged patches (5.2.0 rc0): +- 311a20f redhat: define pseries-rhel8.3.0 machine type +- 1284167 ppc: Set correct max_cpus value on spapr-rhel* machine types +- 1ab8783 redhat: update pseries-rhel8.2.0 machine type +- b162af531a target/ppc: Add experimental option for enabling secure guests + +Merged patches (weekly-201216): +- 943c936df3 redhat: Add spapr_machine_rhel_default_class_options() +- 030b5e6fba redhat: Define pseries-rhel8.4.0 machine type + +Merged patches (weekly-210602): +- b7128d8ef7 redhat: Define pseries-rhel8.5.0 machine type + +Merged patches (weekly-211006): +- c8f68b47e9 redhat: Update pseries-rhel8.5.0 +--- + hw/ppc/spapr.c | 382 ++++++++++++++++++++++++++++++++++++++++ + hw/ppc/spapr_cpu_core.c | 13 ++ + include/hw/ppc/spapr.h | 4 + + target/ppc/compat.c | 13 +- + target/ppc/cpu.h | 1 + + target/ppc/kvm.c | 27 +++ + target/ppc/kvm_ppc.h | 13 ++ + 7 files changed, 452 insertions(+), 1 deletion(-) + +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index 3b5fd749be..cace86028d 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -1593,6 +1593,9 @@ static void spapr_machine_reset(MachineState *machine) + + pef_kvm_reset(machine->cgs, &error_fatal); + spapr_caps_apply(spapr); ++ if (spapr->svm_allowed) { ++ kvmppc_svm_allow(&error_fatal); ++ } + + first_ppc_cpu = POWERPC_CPU(first_cpu); + if (kvm_enabled() && kvmppc_has_cap_mmu_radix() && +@@ -3288,6 +3291,20 @@ static void spapr_set_host_serial(Object *obj, const char *value, Error **errp) + spapr->host_serial = g_strdup(value); + } + ++static bool spapr_get_svm_allowed(Object *obj, Error **errp) ++{ ++ SpaprMachineState *spapr = SPAPR_MACHINE(obj); ++ ++ return spapr->svm_allowed; ++} ++ ++static void spapr_set_svm_allowed(Object *obj, bool value, Error **errp) ++{ ++ SpaprMachineState *spapr = SPAPR_MACHINE(obj); ++ ++ spapr->svm_allowed = value; ++} ++ + static void spapr_instance_init(Object *obj) + { + SpaprMachineState *spapr = SPAPR_MACHINE(obj); +@@ -3366,6 +3383,12 @@ static void spapr_instance_init(Object *obj) + spapr_get_host_serial, spapr_set_host_serial); + object_property_set_description(obj, "host-serial", + "Host serial number to advertise in guest device tree"); ++ object_property_add_bool(obj, "x-svm-allowed", ++ spapr_get_svm_allowed, ++ spapr_set_svm_allowed); ++ object_property_set_description(obj, "x-svm-allowed", ++ "Allow the guest to become a Secure Guest" ++ " (experimental only)"); + } + + static void spapr_machine_finalizefn(Object *obj) +@@ -4614,6 +4637,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) + vmc->client_architecture_support = spapr_vof_client_architecture_support; + vmc->quiesce = spapr_vof_quiesce; + vmc->setprop = spapr_vof_setprop; ++ smc->has_power9_support = true; + } + + static const TypeInfo spapr_machine_info = { +@@ -4665,6 +4689,7 @@ static void spapr_machine_latest_class_options(MachineClass *mc) + } \ + type_init(spapr_machine_register_##suffix) + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + /* + * pseries-6.2 + */ +@@ -4781,6 +4806,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc) + } + + DEFINE_SPAPR_MACHINE(4_1, "4.1", false); ++#endif + + /* + * pseries-4.0 +@@ -4800,6 +4826,8 @@ static bool phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, + *nv2atsd = 0; + return true; + } ++ ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void spapr_machine_4_0_class_options(MachineClass *mc) + { + SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); +@@ -4958,6 +4986,7 @@ DEFINE_SPAPR_MACHINE(2_8, "2.8", false); + /* + * pseries-2.7 + */ ++#endif + + static bool phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, + uint64_t *buid, hwaddr *pio, +@@ -5013,6 +5042,7 @@ static bool phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, + return true; + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void spapr_machine_2_7_class_options(MachineClass *mc) + { + SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); +@@ -5127,6 +5157,358 @@ static void spapr_machine_2_1_class_options(MachineClass *mc) + compat_props_add(mc->compat_props, hw_compat_2_1, hw_compat_2_1_len); + } + DEFINE_SPAPR_MACHINE(2_1, "2.1", false); ++#endif ++ ++static void spapr_machine_rhel_default_class_options(MachineClass *mc) ++{ ++ /* ++ * Defaults for the latest behaviour inherited from the base class ++ * can be overriden here for all pseries-rhel* machines. ++ */ ++ ++ /* Maximum supported VCPU count */ ++ mc->max_cpus = 384; ++} ++ ++/* ++ * pseries-rhel8.5.0 ++ * like pseries-6.0 ++ */ ++ ++static void spapr_machine_rhel850_class_options(MachineClass *mc) ++{ ++ /* The default machine type must apply the RHEL specific defaults */ ++ spapr_machine_rhel_default_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_5, ++ hw_compat_rhel_8_5_len); ++} ++ ++DEFINE_SPAPR_MACHINE(rhel850, "rhel8.5.0", true); ++ ++/* ++ * pseries-rhel8.4.0 ++ * like pseries-5.2 ++ */ ++ ++static void spapr_machine_rhel840_class_options(MachineClass *mc) ++{ ++ spapr_machine_rhel850_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_4, ++ hw_compat_rhel_8_4_len); ++} ++ ++DEFINE_SPAPR_MACHINE(rhel840, "rhel8.4.0", false); ++ ++/* ++ * pseries-rhel8.3.0 ++ * like pseries-5.1 ++ */ ++ ++static void spapr_machine_rhel830_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel840_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_3, ++ hw_compat_rhel_8_3_len); ++ ++ /* from pseries-5.1 */ ++ smc->pre_5_2_numa_associativity = true; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel830, "rhel8.3.0", false); ++ ++/* ++ * pseries-rhel8.2.0 ++ * like pseries-4.2 + pseries-5.0 ++ * except SPAPR_CAP_CCF_ASSIST that has been backported to pseries-rhel8.1.0 ++ */ ++ ++static void spapr_machine_rhel820_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ /* from pseries-5.0 */ ++ static GlobalProperty compat[] = { ++ { TYPE_SPAPR_PCI_HOST_BRIDGE, "pre-5.1-associativity", "on" }, ++ }; ++ ++ spapr_machine_rhel830_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_2, ++ hw_compat_rhel_8_2_len); ++ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); ++ ++ /* from pseries-4.2 */ ++ smc->default_caps.caps[SPAPR_CAP_FWNMI] = SPAPR_CAP_OFF; ++ smc->rma_limit = 16 * GiB; ++ mc->nvdimm_supported = false; ++ ++ /* from pseries-5.0 */ ++ mc->numa_mem_supported = true; ++ smc->pre_5_1_assoc_refpoints = true; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel820, "rhel8.2.0", false); ++ ++/* ++ * pseries-rhel8.1.0 ++ * like pseries-4.1 ++ */ ++ ++static void spapr_machine_rhel810_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ static GlobalProperty compat[] = { ++ /* Only allow 4kiB and 64kiB IOMMU pagesizes */ ++ { TYPE_SPAPR_PCI_HOST_BRIDGE, "pgsz", "0x11000" }, ++ }; ++ ++ spapr_machine_rhel820_class_options(mc); ++ ++ /* from pseries-4.1 */ ++ smc->linux_pci_probe = false; ++ smc->smp_threads_vsmt = false; ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_1, ++ hw_compat_rhel_8_1_len); ++ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); ++ ++ /* from pseries-4.2 */ ++ smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel810, "rhel8.1.0", false); ++ ++/* ++ * pseries-rhel8.0.0 ++ * like pseries-3.1 and pseries-4.0 ++ * except SPAPR_CAP_CFPC, SPAPR_CAP_SBBC and SPAPR_CAP_IBS ++ * that have been backported to pseries-rhel8.0.0 ++ */ ++ ++static void spapr_machine_rhel800_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel810_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_0, ++ hw_compat_rhel_8_0_len); ++ ++ /* pseries-4.0 */ ++ smc->phb_placement = phb_placement_4_0; ++ smc->irq = &spapr_irq_xics; ++ smc->pre_4_1_migration = true; ++ ++ /* pseries-3.1 */ ++ mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power8_v2.0"); ++ smc->update_dt_enabled = false; ++ smc->dr_phb_enabled = false; ++ smc->broken_host_serial_model = true; ++ smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_OFF; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel800, "rhel8.0.0", false); ++ ++/* ++ * pseries-rhel7.6.0 ++ * like spapr_compat_2_12 and spapr_compat_3_0 ++ * spapr_compat_0 is empty ++ */ ++GlobalProperty spapr_compat_rhel7_6[] = { ++ { TYPE_POWERPC_CPU, "pre-3.0-migration", "on" }, ++ { TYPE_SPAPR_CPU_CORE, "pre-3.0-migration", "on" }, ++}; ++const size_t spapr_compat_rhel7_6_len = G_N_ELEMENTS(spapr_compat_rhel7_6); ++ ++ ++static void spapr_machine_rhel760_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel800_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_7_6, hw_compat_rhel_7_6_len); ++ compat_props_add(mc->compat_props, spapr_compat_rhel7_6, spapr_compat_rhel7_6_len); ++ ++ /* from spapr_machine_3_0_class_options() */ ++ smc->legacy_irq_allocation = true; ++ smc->nr_xirqs = 0x400; ++ smc->irq = &spapr_irq_xics_legacy; ++ ++ /* from spapr_machine_2_12_class_options() */ ++ /* We depend on kvm_enabled() to choose a default value for the ++ * hpt-max-page-size capability. Of course we can't do it here ++ * because this is too early and the HW accelerator isn't initialzed ++ * yet. Postpone this to machine init (see default_caps_with_cpu()). ++ */ ++ smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = 0; ++ ++ /* SPAPR_CAP_WORKAROUND enabled in pseries-rhel800 by ++ * f21757edc554 ++ * "Enable mitigations by default for pseries-4.0 machine type") ++ */ ++ smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_BROKEN; ++ smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_BROKEN; ++ smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_BROKEN; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel760, "rhel7.6.0", false); ++ ++/* ++ * pseries-rhel7.6.0-sxxm ++ * ++ * pseries-rhel7.6.0 with speculative execution exploit mitigations enabled by default ++ */ ++ ++static void spapr_machine_rhel760sxxm_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel760_class_options(mc); ++ smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_FIXED_CCD; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel760sxxm, "rhel7.6.0-sxxm", false); ++ ++static void spapr_machine_rhel750_class_options(MachineClass *mc) ++{ ++ spapr_machine_rhel760_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_7_5, hw_compat_rhel_7_5_len); ++ ++} ++ ++DEFINE_SPAPR_MACHINE(rhel750, "rhel7.5.0", false); ++ ++/* ++ * pseries-rhel7.5.0-sxxm ++ * ++ * pseries-rhel7.5.0 with speculative execution exploit mitigations enabled by default ++ */ ++ ++static void spapr_machine_rhel750sxxm_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel750_class_options(mc); ++ smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_FIXED_CCD; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel750sxxm, "rhel7.5.0-sxxm", false); ++ ++/* ++ * pseries-rhel7.4.0 ++ * like spapr_compat_2_9 ++ */ ++GlobalProperty spapr_compat_rhel7_4[] = { ++ { TYPE_POWERPC_CPU, "pre-2.10-migration", "on" }, ++}; ++const size_t spapr_compat_rhel7_4_len = G_N_ELEMENTS(spapr_compat_rhel7_4); ++ ++static void spapr_machine_rhel740_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel750_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_7_4, hw_compat_rhel_7_4_len); ++ compat_props_add(mc->compat_props, spapr_compat_rhel7_4, spapr_compat_rhel7_4_len); ++ smc->has_power9_support = false; ++ smc->pre_2_10_has_unused_icps = true; ++ smc->resize_hpt_default = SPAPR_RESIZE_HPT_DISABLED; ++ smc->default_caps.caps[SPAPR_CAP_HTM] = SPAPR_CAP_ON; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel740, "rhel7.4.0", false); ++ ++/* ++ * pseries-rhel7.4.0-sxxm ++ * ++ * pseries-rhel7.4.0 with speculative execution exploit mitigations enabled by default ++ */ ++ ++static void spapr_machine_rhel740sxxm_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel740_class_options(mc); ++ smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_FIXED_CCD; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel740sxxm, "rhel7.4.0-sxxm", false); ++ ++/* ++ * pseries-rhel7.3.0 ++ * like spapr_compat_2_6/_2_7/_2_8 but "ddw" has been backported to RHEL7_3 ++ */ ++GlobalProperty spapr_compat_rhel7_3[] = { ++ { TYPE_SPAPR_PCI_HOST_BRIDGE, "mem_win_size", "0xf80000000" }, ++ { TYPE_SPAPR_PCI_HOST_BRIDGE, "mem64_win_size", "0" }, ++ { TYPE_POWERPC_CPU, "pre-2.8-migration", "on" }, ++ { TYPE_SPAPR_PCI_HOST_BRIDGE, "pre-2.8-migration", "on" }, ++ { TYPE_SPAPR_PCI_HOST_BRIDGE, "pcie-extended-configuration-space", "off" }, ++}; ++const size_t spapr_compat_rhel7_3_len = G_N_ELEMENTS(spapr_compat_rhel7_3); ++ ++static void spapr_machine_rhel730_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel740_class_options(mc); ++ mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power7_v2.3"); ++ mc->default_machine_opts = "modern-hotplug-events=off"; ++ compat_props_add(mc->compat_props, hw_compat_rhel_7_3, hw_compat_rhel_7_3_len); ++ compat_props_add(mc->compat_props, spapr_compat_rhel7_3, spapr_compat_rhel7_3_len); ++ ++ smc->phb_placement = phb_placement_2_7; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel730, "rhel7.3.0", false); ++ ++/* ++ * pseries-rhel7.3.0-sxxm ++ * ++ * pseries-rhel7.3.0 with speculative execution exploit mitigations enabled by default ++ */ ++ ++static void spapr_machine_rhel730sxxm_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel730_class_options(mc); ++ smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_WORKAROUND; ++ smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_FIXED_CCD; ++} ++ ++DEFINE_SPAPR_MACHINE(rhel730sxxm, "rhel7.3.0-sxxm", false); ++ ++/* ++ * pseries-rhel7.2.0 ++ */ ++/* Should be like spapr_compat_2_5 + 2_4 + 2_3, but "dynamic-reconfiguration" ++ * has been backported to RHEL7_2 so we don't need it here. ++ */ ++ ++GlobalProperty spapr_compat_rhel7_2[] = { ++ { "spapr-vlan", "use-rx-buffer-pools", "off" }, ++ { TYPE_SPAPR_PCI_HOST_BRIDGE, "ddw", "off" }, ++}; ++const size_t spapr_compat_rhel7_2_len = G_N_ELEMENTS(spapr_compat_rhel7_2); ++ ++static void spapr_machine_rhel720_class_options(MachineClass *mc) ++{ ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ ++ spapr_machine_rhel730_class_options(mc); ++ smc->use_ohci_by_default = true; ++ mc->has_hotpluggable_cpus = NULL; ++ compat_props_add(mc->compat_props, hw_compat_rhel_7_2, hw_compat_rhel_7_2_len); ++ compat_props_add(mc->compat_props, spapr_compat_rhel7_2, spapr_compat_rhel7_2_len); ++} ++ ++DEFINE_SPAPR_MACHINE(rhel720, "rhel7.2.0", false); + + static void spapr_machine_register_types(void) + { +diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c +index 8ba34f6a1d..78eca1c04a 100644 +--- a/hw/ppc/spapr_cpu_core.c ++++ b/hw/ppc/spapr_cpu_core.c +@@ -24,6 +24,7 @@ + #include "sysemu/reset.h" + #include "sysemu/hw_accel.h" + #include "qemu/error-report.h" ++#include "cpu-models.h" + + static void spapr_reset_vcpu(PowerPCCPU *cpu) + { +@@ -250,6 +251,7 @@ static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, + { + CPUPPCState *env = &cpu->env; + CPUState *cs = CPU(cpu); ++ SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); + + if (!qdev_realize(DEVICE(cpu), NULL, errp)) { + return false; +@@ -261,6 +263,17 @@ static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, + cpu_ppc_set_vhyp(cpu, PPC_VIRTUAL_HYPERVISOR(spapr)); + kvmppc_set_papr(cpu); + ++ if (!smc->has_power9_support && ++ (((spapr->max_compat_pvr && ++ ppc_compat_cmp(spapr->max_compat_pvr, ++ CPU_POWERPC_LOGICAL_3_00) >= 0)) || ++ (!spapr->max_compat_pvr && ++ ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_00, 0, 0)))) { ++ error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND, ++ "POWER9 CPU is not supported by this machine class"); ++ return false; ++ } ++ + if (spapr_irq_cpu_intc_create(spapr, cpu, errp) < 0) { + qdev_unrealize(DEVICE(cpu)); + return false; +diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h +index ee7504b976..37a014d59c 100644 +--- a/include/hw/ppc/spapr.h ++++ b/include/hw/ppc/spapr.h +@@ -154,6 +154,7 @@ struct SpaprMachineClass { + bool pre_5_2_numa_associativity; + bool pre_6_2_numa_affinity; + ++ bool has_power9_support; + bool (*phb_placement)(SpaprMachineState *spapr, uint32_t index, + uint64_t *buid, hwaddr *pio, + hwaddr *mmio32, hwaddr *mmio64, +@@ -237,6 +238,9 @@ struct SpaprMachineState { + + /* Set by -boot */ + char *boot_device; ++ ++ /* Secure Guest support via x-svm-allowed */ ++ bool svm_allowed; + + /*< public >*/ + char *kvm_type; +diff --git a/target/ppc/compat.c b/target/ppc/compat.c +index 7949a24f5a..f207a9ba01 100644 +--- a/target/ppc/compat.c ++++ b/target/ppc/compat.c +@@ -114,8 +114,19 @@ static const CompatInfo *compat_by_pvr(uint32_t pvr) + return NULL; + } + ++long ppc_compat_cmp(uint32_t pvr1, uint32_t pvr2) ++{ ++ const CompatInfo *compat1 = compat_by_pvr(pvr1); ++ const CompatInfo *compat2 = compat_by_pvr(pvr2); ++ ++ g_assert(compat1); ++ g_assert(compat2); ++ ++ return compat1 - compat2; ++} ++ + static bool pcc_compat(PowerPCCPUClass *pcc, uint32_t compat_pvr, +- uint32_t min_compat_pvr, uint32_t max_compat_pvr) ++ uint32_t min_compat_pvr, uint32_t max_compat_pvr) + { + const CompatInfo *compat = compat_by_pvr(compat_pvr); + const CompatInfo *min = compat_by_pvr(min_compat_pvr); +diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h +index e946da5f3a..23e8b76c85 100644 +--- a/target/ppc/cpu.h ++++ b/target/ppc/cpu.h +@@ -1401,6 +1401,7 @@ static inline int cpu_mmu_index(CPUPPCState *env, bool ifetch) + + /* Compatibility modes */ + #if defined(TARGET_PPC64) ++long ppc_compat_cmp(uint32_t pvr1, uint32_t pvr2); + bool ppc_check_compat(PowerPCCPU *cpu, uint32_t compat_pvr, + uint32_t min_compat_pvr, uint32_t max_compat_pvr); + bool ppc_type_check_compat(const char *cputype, uint32_t compat_pvr, +diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c +index dc93b99189..154888cce5 100644 +--- a/target/ppc/kvm.c ++++ b/target/ppc/kvm.c +@@ -90,6 +90,7 @@ static int cap_ppc_nested_kvm_hv; + static int cap_large_decr; + static int cap_fwnmi; + static int cap_rpt_invalidate; ++static int cap_ppc_secure_guest; + + static uint32_t debug_inst_opcode; + +@@ -137,6 +138,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) + cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT); + kvmppc_get_cpu_characteristics(s); + cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV); ++ cap_ppc_secure_guest = kvm_vm_check_extension(s, KVM_CAP_PPC_SECURE_GUEST); + cap_large_decr = kvmppc_get_dec_bits(); + cap_fwnmi = kvm_vm_check_extension(s, KVM_CAP_PPC_FWNMI); + /* +@@ -2563,6 +2565,16 @@ int kvmppc_has_cap_rpt_invalidate(void) + return cap_rpt_invalidate; + } + ++bool kvmppc_has_cap_secure_guest(void) ++{ ++ return !!cap_ppc_secure_guest; ++} ++ ++int kvmppc_enable_cap_secure_guest(void) ++{ ++ return kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SECURE_GUEST, 0, 1); ++} ++ + PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void) + { + uint32_t host_pvr = mfpvr(); +@@ -2959,3 +2971,18 @@ bool kvm_arch_cpu_check_are_resettable(void) + { + return true; + } ++ ++void kvmppc_svm_allow(Error **errp) ++{ ++ if (!kvm_enabled()) { ++ error_setg(errp, "No PEF support in tcg, try x-svm-allowed=off"); ++ return; ++ } ++ ++ if (!kvmppc_has_cap_secure_guest()) { ++ error_setg(errp, "KVM implementation does not support secure guests, " ++ "try x-svm-allowed=off"); ++ } else if (kvmppc_enable_cap_secure_guest() < 0) { ++ error_setg(errp, "Error enabling x-svm-allowed, try x-svm-allowed=off"); ++ } ++} +diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h +index ee9325bf9a..20dbb95989 100644 +--- a/target/ppc/kvm_ppc.h ++++ b/target/ppc/kvm_ppc.h +@@ -40,6 +40,7 @@ int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu); + target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu, + bool radix, bool gtse, + uint64_t proc_tbl); ++void kvmppc_svm_allow(Error **errp); + #ifndef CONFIG_USER_ONLY + bool kvmppc_spapr_use_multitce(void); + int kvmppc_spapr_enable_inkernel_multitce(void); +@@ -74,6 +75,8 @@ int kvmppc_get_cap_large_decr(void); + int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable); + int kvmppc_has_cap_rpt_invalidate(void); + int kvmppc_enable_hwrng(void); ++bool kvmppc_has_cap_secure_guest(void); ++int kvmppc_enable_cap_secure_guest(void); + int kvmppc_put_books_sregs(PowerPCCPU *cpu); + PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void); + void kvmppc_check_papr_resize_hpt(Error **errp); +@@ -393,6 +396,16 @@ static inline int kvmppc_has_cap_rpt_invalidate(void) + return false; + } + ++static inline bool kvmppc_has_cap_secure_guest(void) ++{ ++ return false; ++} ++ ++static inline int kvmppc_enable_cap_secure_guest(void) ++{ ++ return -1; ++} ++ + static inline int kvmppc_enable_hwrng(void) + { + return -1; +-- +2.27.0 + diff --git a/0009-Add-s390x-machine-types.patch b/0009-Add-s390x-machine-types.patch new file mode 100644 index 0000000..2d8b554 --- /dev/null +++ b/0009-Add-s390x-machine-types.patch @@ -0,0 +1,186 @@ +From 680f343e58a50a99d17bc7dedd3ee90980912023 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Fri, 19 Oct 2018 13:47:32 +0200 +Subject: Add s390x machine types + +Adding changes to add RHEL machine types for s390x architecture. + +Signed-off-by: Miroslav Rezanina +-- +Merged patches (6.1.0): +- 64a9a5c971 hw/s390x: Remove the RHEL7-only machine type +- 395516d62b redhat: s390x: add rhel-8.5.0 compat machine + +Merged patches (6.2.0): +- 3bf66f4520 redhat: Add s390x machine type compatibility update for 6.1 rebase + +Merged patches (7.0.0): +- e6ff4de4f7 redhat: Add s390x machine type compatibility handling for the rebase to v6.2 +- 4b0efa7e21 redhat: Add rhel8.6.0 and rhel9.0.0 machine types for s390x +- dcc64971bf RHEL: mark old machine types as deprecated (partialy) +--- + hw/core/machine.c | 6 +++ + hw/s390x/s390-virtio-ccw.c | 104 ++++++++++++++++++++++++++++++++++++- + include/hw/boards.h | 2 + + 3 files changed, 111 insertions(+), 1 deletion(-) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index ea430d844e..77202a3570 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -37,6 +37,12 @@ + #include "hw/virtio/virtio.h" + #include "hw/virtio/virtio-pci.h" + ++/* ++ * RHEL only: machine types for previous major releases are deprecated ++ */ ++const char *rhel_old_machine_deprecation = ++ "machine types for previous major releases are deprecated"; ++ + /* + * Mostly the same as hw_compat_6_0 and hw_compat_6_1 + */ +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index 90480e7cf9..ec4176a1e0 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -767,7 +767,7 @@ bool css_migration_enabled(void) + { \ + MachineClass *mc = MACHINE_CLASS(oc); \ + ccw_machine_##suffix##_class_options(mc); \ +- mc->desc = "VirtIO-ccw based S390 machine v" verstr; \ ++ mc->desc = "VirtIO-ccw based S390 machine " verstr; \ + if (latest) { \ + mc->alias = "s390-ccw-virtio"; \ + mc->is_default = true; \ +@@ -791,6 +791,7 @@ bool css_migration_enabled(void) + } \ + type_init(ccw_machine_register_##suffix) + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void ccw_machine_7_0_instance_options(MachineState *machine) + { + } +@@ -1115,6 +1116,107 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) + compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); + } + DEFINE_CCW_MACHINE(2_4, "2.4", false); ++#endif ++ ++static void ccw_machine_rhel900_instance_options(MachineState *machine) ++{ ++} ++ ++static void ccw_machine_rhel900_class_options(MachineClass *mc) ++{ ++} ++DEFINE_CCW_MACHINE(rhel900, "rhel9.0.0", true); ++ ++static void ccw_machine_rhel860_instance_options(MachineState *machine) ++{ ++ /* Note: The -rhel8.6.0 and -rhel9.0.0 machines are technically identical */ ++ ccw_machine_rhel900_instance_options(machine); ++} ++ ++static void ccw_machine_rhel860_class_options(MachineClass *mc) ++{ ++ ccw_machine_rhel900_class_options(mc); ++ ++ /* All RHEL machines for prior major releases are deprecated */ ++ mc->deprecation_reason = rhel_old_machine_deprecation; ++} ++DEFINE_CCW_MACHINE(rhel860, "rhel8.6.0", false); ++ ++static void ccw_machine_rhel850_instance_options(MachineState *machine) ++{ ++ static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_0 }; ++ ++ ccw_machine_rhel860_instance_options(machine); ++ ++ s390_set_qemu_cpu_model(0x2964, 13, 2, qemu_cpu_feat); ++ ++ s390_cpudef_featoff_greater(16, 1, S390_FEAT_NNPA); ++ s390_cpudef_featoff_greater(16, 1, S390_FEAT_VECTOR_PACKED_DECIMAL_ENH2); ++ s390_cpudef_featoff_greater(16, 1, S390_FEAT_BEAR_ENH); ++ s390_cpudef_featoff_greater(16, 1, S390_FEAT_RDP); ++ s390_cpudef_featoff_greater(16, 1, S390_FEAT_PAI); ++} ++ ++static void ccw_machine_rhel850_class_options(MachineClass *mc) ++{ ++ ccw_machine_rhel860_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len); ++ mc->smp_props.prefer_sockets = true; ++} ++DEFINE_CCW_MACHINE(rhel850, "rhel8.5.0", false); ++ ++static void ccw_machine_rhel840_instance_options(MachineState *machine) ++{ ++ ccw_machine_rhel850_instance_options(machine); ++} ++ ++static void ccw_machine_rhel840_class_options(MachineClass *mc) ++{ ++ ccw_machine_rhel850_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_4, hw_compat_rhel_8_4_len); ++} ++DEFINE_CCW_MACHINE(rhel840, "rhel8.4.0", false); ++ ++static void ccw_machine_rhel820_instance_options(MachineState *machine) ++{ ++ ccw_machine_rhel840_instance_options(machine); ++} ++ ++static void ccw_machine_rhel820_class_options(MachineClass *mc) ++{ ++ ccw_machine_rhel840_class_options(mc); ++ mc->fixup_ram_size = s390_fixup_ram_size; ++ /* we did not publish a rhel8.3.0 machine */ ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_3, hw_compat_rhel_8_3_len); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_2, hw_compat_rhel_8_2_len); ++} ++DEFINE_CCW_MACHINE(rhel820, "rhel8.2.0", false); ++ ++static void ccw_machine_rhel760_instance_options(MachineState *machine) ++{ ++ static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V3_1 }; ++ ++ ccw_machine_rhel820_instance_options(machine); ++ ++ s390_set_qemu_cpu_model(0x2827, 12, 2, qemu_cpu_feat); ++ ++ /* The multiple-epoch facility was not available with rhel7.6.0 on z14GA1 */ ++ s390_cpudef_featoff(14, 1, S390_FEAT_MULTIPLE_EPOCH); ++ s390_cpudef_featoff(14, 1, S390_FEAT_PTFF_QSIE); ++ s390_cpudef_featoff(14, 1, S390_FEAT_PTFF_QTOUE); ++ s390_cpudef_featoff(14, 1, S390_FEAT_PTFF_STOE); ++ s390_cpudef_featoff(14, 1, S390_FEAT_PTFF_STOUE); ++} ++ ++static void ccw_machine_rhel760_class_options(MachineClass *mc) ++{ ++ ccw_machine_rhel820_class_options(mc); ++ /* We never published the s390x version of RHEL-AV 8.0 and 8.1, so add this here */ ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_1, hw_compat_rhel_8_1_len); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_0, hw_compat_rhel_8_0_len); ++ compat_props_add(mc->compat_props, hw_compat_rhel_7_6, hw_compat_rhel_7_6_len); ++} ++DEFINE_CCW_MACHINE(rhel760, "rhel7.6.0", false); + + static void ccw_machine_register_types(void) + { +diff --git a/include/hw/boards.h b/include/hw/boards.h +index c90a19b4d1..bf59275f18 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -470,4 +470,6 @@ extern const size_t hw_compat_rhel_8_0_len; + extern GlobalProperty hw_compat_rhel_7_6[]; + extern const size_t hw_compat_rhel_7_6_len; + ++extern const char *rhel_old_machine_deprecation; ++ + #endif +-- +2.31.1 + diff --git a/0010-Add-s390x-machine-types.patch b/0010-Add-s390x-machine-types.patch new file mode 100644 index 0000000..fbb8841 --- /dev/null +++ b/0010-Add-s390x-machine-types.patch @@ -0,0 +1,165 @@ +From 4ad9a0d0582eef78946b47563eb2c5b7ddf0cbb0 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Fri, 19 Oct 2018 13:47:32 +0200 +Subject: Add s390x machine types + +Adding changes to add RHEL machine types for s390x architecture. + +Signed-off-by: Miroslav Rezanina + +Rebase changes (weekly-4.1.0): +- Use upstream compat handling + +Rebase notes (weekly-210303): +- Use rhel-8.4.0 hw compat + +Merged patches (3.1.0): +- 29df663 s390x/cpumodel: default enable bpb and ppa15 for z196 and later + +Merged patches (4.1.0): +- 6c200d665b hw/s390x/s390-virtio-ccw: Add machine types for RHEL8.0.0 + +Merged patches (4.2.0): +- fb192e5 redhat: s390x: Rename s390-ccw-virtio-rhel8.0.0 to s390-ccw-virtio-rhel8.1.0 +- a9b22e8 redhat: s390x: Add proper compatibility options for the -rhel7.6.0 machine +- hw/s390x: Add the s390-ccw-virtio-rhel8.2.0 machine types (patch 92954) + +Merged patches (weekly-201216): +- a6ae745cce redhat: s390x: add rhel-8.4.0 compat machine + +Merged patches (weekly-210602): +- 50835d3429 redhat: s390x: add rhel-8.5.0 compat machine + +Merged patches (weekly-211006): +- a3bcde27fe redhat: Add s390x machine type compatibility update for 6.1 rebase +--- + hw/s390x/s390-virtio-ccw.c | 99 +++++++++++++++++++++++++++++++++++++- + 1 file changed, 98 insertions(+), 1 deletion(-) + +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index 653587ea62..181856e6cf 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -767,7 +767,7 @@ bool css_migration_enabled(void) + { \ + MachineClass *mc = MACHINE_CLASS(oc); \ + ccw_machine_##suffix##_class_options(mc); \ +- mc->desc = "VirtIO-ccw based S390 machine v" verstr; \ ++ mc->desc = "VirtIO-ccw based S390 machine " verstr; \ + if (latest) { \ + mc->alias = "s390-ccw-virtio"; \ + mc->is_default = true; \ +@@ -791,6 +791,7 @@ bool css_migration_enabled(void) + } \ + type_init(ccw_machine_register_##suffix) + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void ccw_machine_6_2_instance_options(MachineState *machine) + { + } +@@ -1100,6 +1101,102 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) + compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); + } + DEFINE_CCW_MACHINE(2_4, "2.4", false); ++#endif ++ ++static void ccw_machine_rhel850_instance_options(MachineState *machine) ++{ ++} ++ ++static void ccw_machine_rhel850_class_options(MachineClass *mc) ++{ ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len); ++} ++DEFINE_CCW_MACHINE(rhel850, "rhel8.5.0", true); ++ ++static void ccw_machine_rhel840_instance_options(MachineState *machine) ++{ ++ ccw_machine_rhel850_instance_options(machine); ++} ++ ++static void ccw_machine_rhel840_class_options(MachineClass *mc) ++{ ++ ccw_machine_rhel850_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_4, hw_compat_rhel_8_4_len); ++} ++DEFINE_CCW_MACHINE(rhel840, "rhel8.4.0", false); ++ ++static void ccw_machine_rhel820_instance_options(MachineState *machine) ++{ ++ ccw_machine_rhel840_instance_options(machine); ++} ++ ++static void ccw_machine_rhel820_class_options(MachineClass *mc) ++{ ++ ccw_machine_rhel840_class_options(mc); ++ mc->fixup_ram_size = s390_fixup_ram_size; ++ /* we did not publish a rhel8.3.0 machine */ ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_3, hw_compat_rhel_8_3_len); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_2, hw_compat_rhel_8_2_len); ++} ++DEFINE_CCW_MACHINE(rhel820, "rhel8.2.0", false); ++ ++static void ccw_machine_rhel760_instance_options(MachineState *machine) ++{ ++ static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V3_1 }; ++ ++ ccw_machine_rhel820_instance_options(machine); ++ ++ s390_set_qemu_cpu_model(0x2827, 12, 2, qemu_cpu_feat); ++ ++ /* The multiple-epoch facility was not available with rhel7.6.0 on z14GA1 */ ++ s390_cpudef_featoff(14, 1, S390_FEAT_MULTIPLE_EPOCH); ++ s390_cpudef_featoff(14, 1, S390_FEAT_PTFF_QSIE); ++ s390_cpudef_featoff(14, 1, S390_FEAT_PTFF_QTOUE); ++ s390_cpudef_featoff(14, 1, S390_FEAT_PTFF_STOE); ++ s390_cpudef_featoff(14, 1, S390_FEAT_PTFF_STOUE); ++} ++ ++static void ccw_machine_rhel760_class_options(MachineClass *mc) ++{ ++ ccw_machine_rhel820_class_options(mc); ++ /* We never published the s390x version of RHEL-AV 8.0 and 8.1, so add this here */ ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_1, hw_compat_rhel_8_1_len); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_0, hw_compat_rhel_8_0_len); ++ compat_props_add(mc->compat_props, hw_compat_rhel_7_6, hw_compat_rhel_7_6_len); ++} ++DEFINE_CCW_MACHINE(rhel760, "rhel7.6.0", false); ++ ++static void ccw_machine_rhel750_instance_options(MachineState *machine) ++{ ++ static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V2_11 }; ++ ccw_machine_rhel760_instance_options(machine); ++ ++ /* before 2.12 we emulated the very first z900, and RHEL 7.5 is ++ based on 2.10 */ ++ s390_set_qemu_cpu_model(0x2064, 7, 1, qemu_cpu_feat); ++ ++ /* bpb and ppa15 were only in the full model in RHEL 7.5 */ ++ s390_cpudef_featoff_greater(11, 1, S390_FEAT_PPA15); ++ s390_cpudef_featoff_greater(11, 1, S390_FEAT_BPB); ++} ++ ++GlobalProperty ccw_compat_rhel_7_5[] = { ++ { ++ .driver = TYPE_SCLP_EVENT_FACILITY, ++ .property = "allow_all_mask_sizes", ++ .value = "off", ++ }, ++}; ++const size_t ccw_compat_rhel_7_5_len = G_N_ELEMENTS(ccw_compat_rhel_7_5); ++ ++static void ccw_machine_rhel750_class_options(MachineClass *mc) ++{ ++ ccw_machine_rhel760_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_7_5, hw_compat_rhel_7_5_len); ++ compat_props_add(mc->compat_props, ccw_compat_rhel_7_5, ccw_compat_rhel_7_5_len); ++ S390_CCW_MACHINE_CLASS(mc)->hpage_1m_allowed = false; ++} ++DEFINE_CCW_MACHINE(rhel750, "rhel7.5.0", false); + + static void ccw_machine_register_types(void) + { +-- +2.27.0 + diff --git a/0010-Add-x86_64-machine-types.patch b/0010-Add-x86_64-machine-types.patch new file mode 100644 index 0000000..7c48967 --- /dev/null +++ b/0010-Add-x86_64-machine-types.patch @@ -0,0 +1,714 @@ +From 427a575ca57966bc72e1ebf218081da530d435d7 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Fri, 19 Oct 2018 13:10:31 +0200 +Subject: Add x86_64 machine types + +Adding changes to add RHEL machine types for x86_64 architecture. + +Signed-off-by: Miroslav Rezanina + +Rebase notes (6.1.0): +- Update qemu64 cpu spec + +Rebase notes (7.0.0): +- Reset alias for all machine-types except latest one + +Merged patches (6.1.0): +- 59c284ad3b x86: Add x86 rhel8.5 machine types +- a8868b42fe redhat: x86: Enable 'kvm-asyncpf-int' by default +- a3995e2eff Remove RHEL 7.0.0 machine type (only x86_64 changes) +- ad3190a79b Remove RHEL 7.1.0 machine type (only x86_64 changes) +- 84bbe15d4e Remove RHEL 7.2.0 machine type (only x86_64 changes) +- 0215eb3356 Remove RHEL 7.3.0 machine types (only x86_64 changes) +- af69d1ca6e Remove RHEL 7.4.0 machine types (only x86_64 changes) +- 8f7a74ab78 Remove RHEL 7.5.0 machine types (only x86_64 changes) + +Merged patches (7.0.0): +- eae7d8dd3c x86/rhel machine types: Add pc_rhel_8_5_compat +- 6762f56469 x86/rhel machine types: Wire compat into q35 and i440fx +- 5762101438 rhel machine types/x86: set prefer_sockets +- 9ba9ddc632 x86: Add q35 RHEL 8.6.0 machine type +- 6110d865e5 x86: Add q35 RHEL 9.0.0 machine type +- dcc64971bf RHEL: mark old machine types as deprecated (partialy) +- 6b396f182b RHEL: disable "seqpacket" for "vhost-vsock-device" in rhel8.6.0 +--- + hw/core/machine.c | 10 ++ + hw/i386/pc.c | 135 +++++++++++++++++++++- + hw/i386/pc_piix.c | 79 ++++++++++++- + hw/i386/pc_q35.c | 227 ++++++++++++++++++++++++++++++++++++- + hw/s390x/s390-virtio-ccw.c | 1 + + include/hw/boards.h | 5 + + include/hw/i386/pc.h | 24 ++++ + target/i386/kvm/kvm-cpu.c | 1 + + target/i386/kvm/kvm.c | 4 + + tests/qtest/pvpanic-test.c | 5 +- + 10 files changed, 484 insertions(+), 7 deletions(-) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 77202a3570..28989b6e7b 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -43,6 +43,16 @@ + const char *rhel_old_machine_deprecation = + "machine types for previous major releases are deprecated"; + ++GlobalProperty hw_compat_rhel_8_6[] = { ++ /* hw_compat_rhel_8_6 bz 2065589 */ ++ /* ++ * vhost-vsock device in RHEL 8 kernels doesn't support seqpacket, so ++ * we need do disable it downstream on the latest hw_compat_rhel_8. ++ */ ++ { "vhost-vsock-device", "seqpacket", "off" }, ++}; ++const size_t hw_compat_rhel_8_6_len = G_N_ELEMENTS(hw_compat_rhel_8_6); ++ + /* + * Mostly the same as hw_compat_6_0 and hw_compat_6_1 + */ +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index fd55fc725c..263d882af6 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -375,6 +375,137 @@ GlobalProperty pc_compat_1_4[] = { + }; + const size_t pc_compat_1_4_len = G_N_ELEMENTS(pc_compat_1_4); + ++/* This macro is for changes to properties that are RHEL specific, ++ * different to the current upstream and to be applied to the latest ++ * machine type. ++ */ ++GlobalProperty pc_rhel_compat[] = { ++ { TYPE_X86_CPU, "host-phys-bits", "on" }, ++ { TYPE_X86_CPU, "host-phys-bits-limit", "48" }, ++ { TYPE_X86_CPU, "vmx-entry-load-perf-global-ctrl", "off" }, ++ { TYPE_X86_CPU, "vmx-exit-load-perf-global-ctrl", "off" }, ++ /* bz 1508330 */ ++ { "vfio-pci", "x-no-geforce-quirks", "on" }, ++ /* bz 1941397 */ ++ { TYPE_X86_CPU, "kvm-asyncpf-int", "on" }, ++}; ++const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); ++ ++GlobalProperty pc_rhel_8_5_compat[] = { ++ /* pc_rhel_8_5_compat from pc_compat_6_0 */ ++ { "qemu64" "-" TYPE_X86_CPU, "family", "6" }, ++ /* pc_rhel_8_5_compat from pc_compat_6_0 */ ++ { "qemu64" "-" TYPE_X86_CPU, "model", "6" }, ++ /* pc_rhel_8_5_compat from pc_compat_6_0 */ ++ { "qemu64" "-" TYPE_X86_CPU, "stepping", "3" }, ++ /* pc_rhel_8_5_compat from pc_compat_6_0 */ ++ { TYPE_X86_CPU, "x-vendor-cpuid-only", "off" }, ++ /* pc_rhel_8_5_compat from pc_compat_6_0 */ ++ { "ICH9-LPC", ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, "off" }, ++ ++ /* pc_rhel_8_5_compat from pc_compat_6_1 */ ++ { TYPE_X86_CPU, "hv-version-id-build", "0x1bbc" }, ++ /* pc_rhel_8_5_compat from pc_compat_6_1 */ ++ { TYPE_X86_CPU, "hv-version-id-major", "0x0006" }, ++ /* pc_rhel_8_5_compat from pc_compat_6_1 */ ++ { TYPE_X86_CPU, "hv-version-id-minor", "0x0001" }, ++}; ++const size_t pc_rhel_8_5_compat_len = G_N_ELEMENTS(pc_rhel_8_5_compat); ++ ++GlobalProperty pc_rhel_8_4_compat[] = { ++ /* pc_rhel_8_4_compat from pc_compat_5_2 */ ++ { "ICH9-LPC", "x-smi-cpu-hotunplug", "off" }, ++ { TYPE_X86_CPU, "kvm-asyncpf-int", "off" }, ++}; ++const size_t pc_rhel_8_4_compat_len = G_N_ELEMENTS(pc_rhel_8_4_compat); ++ ++GlobalProperty pc_rhel_8_3_compat[] = { ++ /* pc_rhel_8_3_compat from pc_compat_5_1 */ ++ { "ICH9-LPC", "x-smi-cpu-hotplug", "off" }, ++}; ++const size_t pc_rhel_8_3_compat_len = G_N_ELEMENTS(pc_rhel_8_3_compat); ++ ++GlobalProperty pc_rhel_8_2_compat[] = { ++ /* pc_rhel_8_2_compat from pc_compat_4_2 */ ++ { "mch", "smbase-smram", "off" }, ++}; ++const size_t pc_rhel_8_2_compat_len = G_N_ELEMENTS(pc_rhel_8_2_compat); ++ ++/* pc_rhel_8_1_compat is empty since pc_4_1_compat is */ ++GlobalProperty pc_rhel_8_1_compat[] = { }; ++const size_t pc_rhel_8_1_compat_len = G_N_ELEMENTS(pc_rhel_8_1_compat); ++ ++GlobalProperty pc_rhel_8_0_compat[] = { ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "intel-iommu", "dma-drain", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G3" "-" TYPE_X86_CPU, "rdtscp", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G4" "-" TYPE_X86_CPU, "rdtscp", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G4" "-" TYPE_X86_CPU, "npt", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G4" "-" TYPE_X86_CPU, "nrip-save", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G5" "-" TYPE_X86_CPU, "rdtscp", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G5" "-" TYPE_X86_CPU, "npt", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G5" "-" TYPE_X86_CPU, "nrip-save", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "EPYC" "-" TYPE_X86_CPU, "npt", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "EPYC" "-" TYPE_X86_CPU, "nrip-save", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "EPYC-IBPB" "-" TYPE_X86_CPU, "npt", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "EPYC-IBPB" "-" TYPE_X86_CPU, "nrip-save", "off" }, ++ /** The mpx=on entries from pc_compat_3_1 are in pc_rhel_7_6_compat **/ ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Cascadelake-Server" "-" TYPE_X86_CPU, "stepping", "5" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { TYPE_X86_CPU, "x-intel-pt-auto-level", "off" }, ++}; ++const size_t pc_rhel_8_0_compat_len = G_N_ELEMENTS(pc_rhel_8_0_compat); ++ ++/* Similar to PC_COMPAT_3_0 + PC_COMPAT_2_12, but: ++ * all of the 2_12 stuff was already in 7.6 from bz 1481253 ++ * x-migrate-smi-count comes from PC_COMPAT_2_11 but ++ * is really tied to kernel version so keep it off on 7.x ++ * machine types irrespective of host. ++ */ ++GlobalProperty pc_rhel_7_6_compat[] = { ++ /* pc_rhel_7_6_compat from pc_compat_3_0 */ ++ { TYPE_X86_CPU, "x-hv-synic-kvm-only", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_3_0 */ ++ { "Skylake-Server" "-" TYPE_X86_CPU, "pku", "off" }, ++ /* pc_rhel_7_6_compat from pc_compat_3_0 */ ++ { "Skylake-Server-IBRS" "-" TYPE_X86_CPU, "pku", "off" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { TYPE_X86_CPU, "x-migrate-smi-count", "off" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Skylake-Client" "-" TYPE_X86_CPU, "mpx", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Skylake-Client-IBRS" "-" TYPE_X86_CPU, "mpx", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Skylake-Server" "-" TYPE_X86_CPU, "mpx", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Skylake-Server-IBRS" "-" TYPE_X86_CPU, "mpx", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Cascadelake-Server" "-" TYPE_X86_CPU, "mpx", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Icelake-Client" "-" TYPE_X86_CPU, "mpx", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Icelake-Server" "-" TYPE_X86_CPU, "mpx", "on" }, ++}; ++const size_t pc_rhel_7_6_compat_len = G_N_ELEMENTS(pc_rhel_7_6_compat); ++ ++/* ++ * The PC_RHEL_*_COMPAT serve the same purpose for RHEL-7 machine ++ * types as the PC_COMPAT_* do for upstream types. ++ * PC_RHEL_7_*_COMPAT apply both to i440fx and q35 types. ++ */ ++ + GSIState *pc_gsi_create(qemu_irq **irqs, bool pci_enabled) + { + GSIState *s; +@@ -1738,6 +1869,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) + pcmc->pvh_enabled = true; + pcmc->kvmclock_create_always = true; + assert(!mc->get_hotplug_handler); ++ mc->async_pf_vmexit_disable = false; + mc->get_hotplug_handler = pc_get_hotplug_handler; + mc->hotplug_allowed = pc_hotplug_allowed; + mc->cpu_index_to_instance_props = x86_cpu_index_to_props; +@@ -1748,7 +1880,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) + mc->has_hotpluggable_cpus = true; + mc->default_boot_order = "cad"; + mc->block_default_type = IF_IDE; +- mc->max_cpus = 255; ++ /* 240: max CPU count for RHEL */ ++ mc->max_cpus = 240; + mc->reset = pc_machine_reset; + mc->wakeup = pc_machine_wakeup; + hc->pre_plug = pc_machine_device_pre_plug_cb; +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index c797e98312..0cacc0d623 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -50,6 +50,7 @@ + #include "qapi/error.h" + #include "qemu/error-report.h" + #include "sysemu/xen.h" ++#include "migration/migration.h" + #ifdef CONFIG_XEN + #include + #include "hw/xen/xen_pt.h" +@@ -174,8 +175,8 @@ static void pc_init1(MachineState *machine, + if (pcmc->smbios_defaults) { + MachineClass *mc = MACHINE_GET_CLASS(machine); + /* These values are guest ABI, do not change */ +- smbios_set_defaults("QEMU", "Standard PC (i440FX + PIIX, 1996)", +- mc->name, pcmc->smbios_legacy_mode, ++ smbios_set_defaults("Red Hat", "KVM", ++ mc->desc, pcmc->smbios_legacy_mode, + pcmc->smbios_uuid_encoded, + pcmc->smbios_stream_product, + pcmc->smbios_stream_version, +@@ -314,6 +315,7 @@ static void pc_init1(MachineState *machine, + * hw_compat_*, pc_compat_*, or * pc_*_machine_options(). + */ + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void pc_compat_2_3_fn(MachineState *machine) + { + X86MachineState *x86ms = X86_MACHINE(machine); +@@ -967,3 +969,76 @@ static void xenfv_3_1_machine_options(MachineClass *m) + DEFINE_PC_MACHINE(xenfv, "xenfv-3.1", pc_xen_hvm_init, + xenfv_3_1_machine_options); + #endif ++#endif /* Disabled for Red Hat Enterprise Linux */ ++ ++/* Red Hat Enterprise Linux machine types */ ++ ++/* Options for the latest rhel7 machine type */ ++static void pc_machine_rhel7_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ m->family = "pc_piix_Y"; ++ m->default_machine_opts = "firmware=bios-256k.bin,hpet=off"; ++ pcmc->default_nic_model = "e1000"; ++ pcmc->pci_root_uid = 0; ++ m->default_display = "std"; ++ m->no_parallel = 1; ++ m->numa_mem_supported = true; ++ m->auto_enable_numa_with_memdev = false; ++ machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE); ++ compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); ++ m->alias = "pc"; ++ m->is_default = 1; ++ m->smp_props.prefer_sockets = true; ++} ++ ++static void pc_init_rhel760(MachineState *machine) ++{ ++ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ ++ TYPE_I440FX_PCI_DEVICE); ++} ++ ++static void pc_machine_rhel760_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_machine_rhel7_options(m); ++ m->desc = "RHEL 7.6.0 PC (i440FX + PIIX, 1996)"; ++ m->async_pf_vmexit_disable = true; ++ m->smbus_no_migration_support = true; ++ ++ /* All RHEL machines for prior major releases are deprecated */ ++ m->deprecation_reason = rhel_old_machine_deprecation; ++ ++ pcmc->pvh_enabled = false; ++ pcmc->default_cpu_version = CPU_VERSION_LEGACY; ++ pcmc->kvmclock_create_always = false; ++ /* From pc_i440fx_5_1_machine_options() */ ++ pcmc->pci_root_uid = 1; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_6, ++ hw_compat_rhel_8_6_len); ++ compat_props_add(m->compat_props, hw_compat_rhel_8_5, ++ hw_compat_rhel_8_5_len); ++ compat_props_add(m->compat_props, pc_rhel_8_5_compat, ++ pc_rhel_8_5_compat_len); ++ compat_props_add(m->compat_props, hw_compat_rhel_8_4, ++ hw_compat_rhel_8_4_len); ++ compat_props_add(m->compat_props, pc_rhel_8_4_compat, ++ pc_rhel_8_4_compat_len); ++ compat_props_add(m->compat_props, hw_compat_rhel_8_3, ++ hw_compat_rhel_8_3_len); ++ compat_props_add(m->compat_props, pc_rhel_8_3_compat, ++ pc_rhel_8_3_compat_len); ++ compat_props_add(m->compat_props, hw_compat_rhel_8_2, ++ hw_compat_rhel_8_2_len); ++ compat_props_add(m->compat_props, pc_rhel_8_2_compat, ++ pc_rhel_8_2_compat_len); ++ compat_props_add(m->compat_props, hw_compat_rhel_8_1, hw_compat_rhel_8_1_len); ++ compat_props_add(m->compat_props, pc_rhel_8_1_compat, pc_rhel_8_1_compat_len); ++ compat_props_add(m->compat_props, hw_compat_rhel_8_0, hw_compat_rhel_8_0_len); ++ compat_props_add(m->compat_props, pc_rhel_8_0_compat, pc_rhel_8_0_compat_len); ++ compat_props_add(m->compat_props, hw_compat_rhel_7_6, hw_compat_rhel_7_6_len); ++ compat_props_add(m->compat_props, pc_rhel_7_6_compat, pc_rhel_7_6_compat_len); ++} ++ ++DEFINE_PC_MACHINE(rhel760, "pc-i440fx-rhel7.6.0", pc_init_rhel760, ++ pc_machine_rhel760_options); +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index b695f88c45..157160e069 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -197,8 +197,8 @@ static void pc_q35_init(MachineState *machine) + + if (pcmc->smbios_defaults) { + /* These values are guest ABI, do not change */ +- smbios_set_defaults("QEMU", "Standard PC (Q35 + ICH9, 2009)", +- mc->name, pcmc->smbios_legacy_mode, ++ smbios_set_defaults("Red Hat", "KVM", ++ mc->desc, pcmc->smbios_legacy_mode, + pcmc->smbios_uuid_encoded, + pcmc->smbios_stream_product, + pcmc->smbios_stream_version, +@@ -342,6 +342,7 @@ static void pc_q35_init(MachineState *machine) + DEFINE_PC_MACHINE(suffix, name, pc_init_##suffix, optionfn) + + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void pc_q35_machine_options(MachineClass *m) + { + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); +@@ -631,3 +632,225 @@ static void pc_q35_2_4_machine_options(MachineClass *m) + + DEFINE_Q35_MACHINE(v2_4, "pc-q35-2.4", NULL, + pc_q35_2_4_machine_options); ++#endif /* Disabled for Red Hat Enterprise Linux */ ++ ++/* Red Hat Enterprise Linux machine types */ ++ ++/* Options for the latest rhel q35 machine type */ ++static void pc_q35_machine_rhel_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pcmc->default_nic_model = "e1000e"; ++ pcmc->pci_root_uid = 0; ++ m->family = "pc_q35_Z"; ++ m->units_per_default_bus = 1; ++ m->default_machine_opts = "firmware=bios-256k.bin,hpet=off"; ++ m->default_display = "std"; ++ m->no_floppy = 1; ++ m->no_parallel = 1; ++ pcmc->default_cpu_version = 1; ++ machine_class_allow_dynamic_sysbus_dev(m, TYPE_AMD_IOMMU_DEVICE); ++ machine_class_allow_dynamic_sysbus_dev(m, TYPE_INTEL_IOMMU_DEVICE); ++ machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE); ++ m->alias = "q35"; ++ m->max_cpus = 710; ++ compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); ++} ++ ++static void pc_q35_init_rhel900(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel900_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel_options(m); ++ m->desc = "RHEL-9.0.0 PC (Q35 + ICH9, 2009)"; ++ pcmc->smbios_stream_product = "RHEL"; ++ pcmc->smbios_stream_version = "9.0.0"; ++} ++ ++DEFINE_PC_MACHINE(q35_rhel900, "pc-q35-rhel9.0.0", pc_q35_init_rhel900, ++ pc_q35_machine_rhel900_options); ++ ++static void pc_q35_init_rhel860(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel860_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel900_options(m); ++ m->desc = "RHEL-8.6.0 PC (Q35 + ICH9, 2009)"; ++ m->alias = NULL; ++ ++ /* All RHEL machines for prior major releases are deprecated */ ++ m->deprecation_reason = rhel_old_machine_deprecation; ++ ++ pcmc->smbios_stream_product = "RHEL-AV"; ++ pcmc->smbios_stream_version = "8.6.0"; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_6, ++ hw_compat_rhel_8_6_len); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel860, "pc-q35-rhel8.6.0", pc_q35_init_rhel860, ++ pc_q35_machine_rhel860_options); ++ ++ ++static void pc_q35_init_rhel850(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel850_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel860_options(m); ++ m->desc = "RHEL-8.5.0 PC (Q35 + ICH9, 2009)"; ++ m->alias = NULL; ++ pcmc->smbios_stream_product = "RHEL-AV"; ++ pcmc->smbios_stream_version = "8.5.0"; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_5, ++ hw_compat_rhel_8_5_len); ++ compat_props_add(m->compat_props, pc_rhel_8_5_compat, ++ pc_rhel_8_5_compat_len); ++ m->smp_props.prefer_sockets = true; ++} ++ ++DEFINE_PC_MACHINE(q35_rhel850, "pc-q35-rhel8.5.0", pc_q35_init_rhel850, ++ pc_q35_machine_rhel850_options); ++ ++ ++static void pc_q35_init_rhel840(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel840_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel850_options(m); ++ m->desc = "RHEL-8.4.0 PC (Q35 + ICH9, 2009)"; ++ m->alias = NULL; ++ pcmc->smbios_stream_product = "RHEL-AV"; ++ pcmc->smbios_stream_version = "8.4.0"; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_4, ++ hw_compat_rhel_8_4_len); ++ compat_props_add(m->compat_props, pc_rhel_8_4_compat, ++ pc_rhel_8_4_compat_len); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel840, "pc-q35-rhel8.4.0", pc_q35_init_rhel840, ++ pc_q35_machine_rhel840_options); ++ ++ ++static void pc_q35_init_rhel830(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel830_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel840_options(m); ++ m->desc = "RHEL-8.3.0 PC (Q35 + ICH9, 2009)"; ++ m->alias = NULL; ++ pcmc->smbios_stream_product = "RHEL-AV"; ++ pcmc->smbios_stream_version = "8.3.0"; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_3, ++ hw_compat_rhel_8_3_len); ++ compat_props_add(m->compat_props, pc_rhel_8_3_compat, ++ pc_rhel_8_3_compat_len); ++ /* From pc_q35_5_1_machine_options() */ ++ pcmc->kvmclock_create_always = false; ++ /* From pc_q35_5_1_machine_options() */ ++ pcmc->pci_root_uid = 1; ++} ++ ++DEFINE_PC_MACHINE(q35_rhel830, "pc-q35-rhel8.3.0", pc_q35_init_rhel830, ++ pc_q35_machine_rhel830_options); ++ ++static void pc_q35_init_rhel820(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel820_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel830_options(m); ++ m->desc = "RHEL-8.2.0 PC (Q35 + ICH9, 2009)"; ++ m->alias = NULL; ++ m->numa_mem_supported = true; ++ m->auto_enable_numa_with_memdev = false; ++ pcmc->smbios_stream_product = "RHEL-AV"; ++ pcmc->smbios_stream_version = "8.2.0"; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_2, ++ hw_compat_rhel_8_2_len); ++ compat_props_add(m->compat_props, pc_rhel_8_2_compat, ++ pc_rhel_8_2_compat_len); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel820, "pc-q35-rhel8.2.0", pc_q35_init_rhel820, ++ pc_q35_machine_rhel820_options); ++ ++static void pc_q35_init_rhel810(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel810_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel820_options(m); ++ m->desc = "RHEL-8.1.0 PC (Q35 + ICH9, 2009)"; ++ m->alias = NULL; ++ pcmc->smbios_stream_product = NULL; ++ pcmc->smbios_stream_version = NULL; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_1, hw_compat_rhel_8_1_len); ++ compat_props_add(m->compat_props, pc_rhel_8_1_compat, pc_rhel_8_1_compat_len); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel810, "pc-q35-rhel8.1.0", pc_q35_init_rhel810, ++ pc_q35_machine_rhel810_options); ++ ++static void pc_q35_init_rhel800(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel800_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel810_options(m); ++ m->desc = "RHEL-8.0.0 PC (Q35 + ICH9, 2009)"; ++ m->smbus_no_migration_support = true; ++ m->alias = NULL; ++ pcmc->pvh_enabled = false; ++ pcmc->default_cpu_version = CPU_VERSION_LEGACY; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_0, hw_compat_rhel_8_0_len); ++ compat_props_add(m->compat_props, pc_rhel_8_0_compat, pc_rhel_8_0_compat_len); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel800, "pc-q35-rhel8.0.0", pc_q35_init_rhel800, ++ pc_q35_machine_rhel800_options); ++ ++static void pc_q35_init_rhel760(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel760_options(MachineClass *m) ++{ ++ pc_q35_machine_rhel800_options(m); ++ m->alias = NULL; ++ m->desc = "RHEL-7.6.0 PC (Q35 + ICH9, 2009)"; ++ m->async_pf_vmexit_disable = true; ++ compat_props_add(m->compat_props, hw_compat_rhel_7_6, hw_compat_rhel_7_6_len); ++ compat_props_add(m->compat_props, pc_rhel_7_6_compat, pc_rhel_7_6_compat_len); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel760, "pc-q35-rhel7.6.0", pc_q35_init_rhel760, ++ pc_q35_machine_rhel760_options); +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index ec4176a1e0..465a2a09d2 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -1136,6 +1136,7 @@ static void ccw_machine_rhel860_instance_options(MachineState *machine) + static void ccw_machine_rhel860_class_options(MachineClass *mc) + { + ccw_machine_rhel900_class_options(mc); ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_6, hw_compat_rhel_8_6_len); + + /* All RHEL machines for prior major releases are deprecated */ + mc->deprecation_reason = rhel_old_machine_deprecation; +diff --git a/include/hw/boards.h b/include/hw/boards.h +index bf59275f18..d1555665df 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -266,6 +266,8 @@ struct MachineClass { + strList *allowed_dynamic_sysbus_devices; + bool auto_enable_numa_with_memhp; + bool auto_enable_numa_with_memdev; ++ /* RHEL only */ ++ bool async_pf_vmexit_disable; + bool ignore_boot_device_suffixes; + bool smbus_no_migration_support; + bool nvdimm_supported; +@@ -449,6 +451,9 @@ extern const size_t hw_compat_2_2_len; + extern GlobalProperty hw_compat_2_1[]; + extern const size_t hw_compat_2_1_len; + ++extern GlobalProperty hw_compat_rhel_8_6[]; ++extern const size_t hw_compat_rhel_8_6_len; ++ + extern GlobalProperty hw_compat_rhel_8_5[]; + extern const size_t hw_compat_rhel_8_5_len; + +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 91331059d9..419a6ec24b 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -289,6 +289,30 @@ extern const size_t pc_compat_1_5_len; + extern GlobalProperty pc_compat_1_4[]; + extern const size_t pc_compat_1_4_len; + ++extern GlobalProperty pc_rhel_compat[]; ++extern const size_t pc_rhel_compat_len; ++ ++extern GlobalProperty pc_rhel_8_5_compat[]; ++extern const size_t pc_rhel_8_5_compat_len; ++ ++extern GlobalProperty pc_rhel_8_4_compat[]; ++extern const size_t pc_rhel_8_4_compat_len; ++ ++extern GlobalProperty pc_rhel_8_3_compat[]; ++extern const size_t pc_rhel_8_3_compat_len; ++ ++extern GlobalProperty pc_rhel_8_2_compat[]; ++extern const size_t pc_rhel_8_2_compat_len; ++ ++extern GlobalProperty pc_rhel_8_1_compat[]; ++extern const size_t pc_rhel_8_1_compat_len; ++ ++extern GlobalProperty pc_rhel_8_0_compat[]; ++extern const size_t pc_rhel_8_0_compat_len; ++ ++extern GlobalProperty pc_rhel_7_6_compat[]; ++extern const size_t pc_rhel_7_6_compat_len; ++ + /* Helper for setting model-id for CPU models that changed model-id + * depending on QEMU versions up to QEMU 2.4. + */ +diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c +index 5eb955ce9a..74c1396a93 100644 +--- a/target/i386/kvm/kvm-cpu.c ++++ b/target/i386/kvm/kvm-cpu.c +@@ -137,6 +137,7 @@ static PropValue kvm_default_props[] = { + { "acpi", "off" }, + { "monitor", "off" }, + { "svm", "off" }, ++ { "kvm-pv-unhalt", "on" }, + { NULL, NULL }, + }; + +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index 9cf8e03669..6d1e009443 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -3488,6 +3488,7 @@ static int kvm_get_msrs(X86CPU *cpu) + struct kvm_msr_entry *msrs = cpu->kvm_msr_buf->entries; + int ret, i; + uint64_t mtrr_top_bits; ++ MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); + + kvm_msr_buf_reset(cpu); + +@@ -3822,6 +3823,9 @@ static int kvm_get_msrs(X86CPU *cpu) + break; + case MSR_KVM_ASYNC_PF_EN: + env->async_pf_en_msr = msrs[i].data; ++ if (mc->async_pf_vmexit_disable) { ++ env->async_pf_en_msr &= ~(1ULL << 2); ++ } + break; + case MSR_KVM_ASYNC_PF_INT: + env->async_pf_int_msr = msrs[i].data; +diff --git a/tests/qtest/pvpanic-test.c b/tests/qtest/pvpanic-test.c +index 6dcad2db49..580c2c43d2 100644 +--- a/tests/qtest/pvpanic-test.c ++++ b/tests/qtest/pvpanic-test.c +@@ -17,7 +17,7 @@ static void test_panic_nopause(void) + QDict *response, *data; + QTestState *qts; + +- qts = qtest_init("-device pvpanic -action panic=none"); ++ qts = qtest_init("-M q35 -device pvpanic -action panic=none"); + + val = qtest_inb(qts, 0x505); + g_assert_cmpuint(val, ==, 3); +@@ -40,7 +40,8 @@ static void test_panic(void) + QDict *response, *data; + QTestState *qts; + +- qts = qtest_init("-device pvpanic -action panic=pause"); ++ /* RHEL: Use q35 */ ++ qts = qtest_init("-M q35 -device pvpanic -action panic=pause"); + + val = qtest_inb(qts, 0x505); + g_assert_cmpuint(val, ==, 3); +-- +2.31.1 + diff --git a/0011-Add-x86_64-machine-types.patch b/0011-Add-x86_64-machine-types.patch new file mode 100644 index 0000000..2702772 --- /dev/null +++ b/0011-Add-x86_64-machine-types.patch @@ -0,0 +1,1276 @@ +From c2b3564ce466bc5069bf9f5b0694025c68b0858d Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Fri, 19 Oct 2018 13:10:31 +0200 +Subject: Add x86_64 machine types + +Adding changes to add RHEL machine types for x86_64 architecture. + +Signed-off-by: Miroslav Rezanina + +Rebase changes (qemu-4.0.0): +- Use upstream compat handling + +Rebase notes (3.1.0): +- Removed xsave changes + +Rebase notes (4.1.0): +- Updated format for compat structures + +Rebase notes (4.2.0-rc2): +- Use X86MachineClass for save_tsc_khz (upstream change) + +Rebase notes (weekly-210303): +- Use rhel-8.4.0 hw compat + +Rebase notes (weekly-210519): +- kvm_default_props moved to new file (upstream) + +Rebase notes (6.2.0-rc0): +- linuxboot_dma_enabled moved to X86MachineState + +Merged patches (4.1.0): +- f4dc802 pc: 7.5 compat entries +- 456ed3e pc: PC_RHEL7_6_COMPAT +- 04119ee pc: Add compat for pc-i440fx-rhel7.6.0 machine type +- b3b3687 pc: Add pc-q35-8.0.0 machine type +- 8d46fc6 pc: Add x-migrate-smi-count=off to PC_RHEL7_6_COMPAT +- 1de7949 kvm: clear out KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT for older machine types +- 18cf0d7 target/i386: Disable MPX support on named CPU models (partialy) +- 2660667 rhel: Set host-phys-bits-limit=48 on rhel machine-types + +Merged patches (4.2.0): +- 7d5c2ef pc: Don't make die-id mandatory unless necessary +- e42808c x86 machine types: pc_rhel_8_0_compat +- 9de83a8 x86 machine types: q35: Fixup units_per_default_bus +- 6df1559 x86 machine types: Fixup dynamic sysbus entries +- 0784125 x86 machine types: add pc-q35-rhel8.1.0 +- machines/x86: Add rhel 8.2 machine type (patch 92959) + +Merged patches (5.1.0): +- 481357e RHEL: hw/i386: disable nested PERF_GLOBAL_CTRL MSR support +- e6c3fbf hw/smbios: set new default SMBIOS fields for Windows driver support (partialy) + +Merged patches (5.2.0 rc0): +- b02c9f5 x86: Add 8.3.0 x86_64 machine type +- f2edc4f q35: Set max_cpus to 512 +- 6d7ba66 machine types/numa: set numa_mem_supported on old machine types (partialy) +- 25c5644 machine_types/numa: compatibility for auto_enable_numa_with_memdev (partialy) +- e2d3209 x86: lpc9: let firmware negotiate 'CPU hotplug with SMI' features (partialy) + +Merged patches (weekly-210120): +- d0afeaa0c4 RHEL: Switch pvpanic test to q35 +- e19cdad83c 8.4 x86 machine type + +Merged patches (weekly-210203): +- 96f8781bd6 q35: Increase max_cpus to 710 on pc-q35-rhel8* machine types + +Merged patches (weekly-210224): +- 70d3924521 redhat: Add some devices for exporting upstream machine types + - machine type chunks only + +Merged patches (6.0.0 rc0): +- 031c690804 i386/acpi: restore device paths for pre-5.1 vms + +Merged patches (weekly-210623): +- 64c350696f x86: Add x86 rhel8.5 machine types +- 1c8fe5e164 redhat: x86: Enable 'kvm-asyncpf-int' by default + +Merged patches (weekly-210714): +- 618e2424ed redhat: Expose upstream machines pc-4.2 and pc-2.11 +- c4d1aa8bf2 redhat: Enable FDC device for upstream machines too +- 66882f9a32 redhat: Add hw_compat_4_2_extra and apply to upstream machines + +Fix machine type +--- + hw/block/fdc.c | 5 +- + hw/i386/acpi-build.c | 3 + + hw/i386/pc.c | 298 ++++++++++++++++++++++++++++++++++++- + hw/i386/pc_piix.c | 274 +++++++++++++++++++++++++++++++++- + hw/i386/pc_q35.c | 234 ++++++++++++++++++++++++++++- + include/hw/boards.h | 2 + + include/hw/i386/pc.h | 45 ++++++ + target/i386/kvm/kvm-cpu.c | 1 + + target/i386/kvm/kvm.c | 4 + + tests/qtest/pvpanic-test.c | 5 +- + 10 files changed, 862 insertions(+), 9 deletions(-) + +diff --git a/hw/block/fdc.c b/hw/block/fdc.c +index 97fa6de423..63042ef030 100644 +--- a/hw/block/fdc.c ++++ b/hw/block/fdc.c +@@ -2341,7 +2341,10 @@ void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl, Error **errp) + + /* Restricted for Red Hat Enterprise Linux: */ + MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); +- if (!strstr(mc->name, "-rhel7.")) { ++ if (!strstr(mc->name, "-rhel7.") && ++ /* Exported two upstream machine types allows FDC too */ ++ strcmp(mc->name, "pc-i440fx-4.2") && ++ strcmp(mc->name, "pc-i440fx-2.11")) { + error_setg(errp, "Device %s is not supported with machine type %s", + object_get_typename(OBJECT(dev)), mc->name); + return; +diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c +index a99c6e4fe3..447ea35275 100644 +--- a/hw/i386/acpi-build.c ++++ b/hw/i386/acpi-build.c +@@ -230,6 +230,9 @@ static void acpi_get_pm_info(MachineState *machine, AcpiPmInfo *pm) + pm->fadt.reset_reg = r; + pm->fadt.reset_val = 0xf; + pm->fadt.flags |= 1 << ACPI_FADT_F_RESET_REG_SUP; ++ if (object_property_get_bool(lpc, ++ "__com.redhat_force-rev1-fadt", NULL)) ++ pm->fadt.rev = 1; + pm->cpu_hp_io_base = ICH9_CPU_HOTPLUG_IO_BASE; + pm->smi_on_cpuhp = + !!(smi_features & BIT_ULL(ICH9_LPC_SMI_F_CPU_HOTPLUG_BIT)); +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index a2ef40ecbc..e8109954ca 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -371,6 +371,296 @@ GlobalProperty pc_compat_1_4[] = { + }; + const size_t pc_compat_1_4_len = G_N_ELEMENTS(pc_compat_1_4); + ++/* This macro is for changes to properties that are RHEL specific, ++ * different to the current upstream and to be applied to the latest ++ * machine type. ++ */ ++GlobalProperty pc_rhel_compat[] = { ++ { TYPE_X86_CPU, "host-phys-bits", "on" }, ++ { TYPE_X86_CPU, "host-phys-bits-limit", "48" }, ++ { TYPE_X86_CPU, "vmx-entry-load-perf-global-ctrl", "off" }, ++ { TYPE_X86_CPU, "vmx-exit-load-perf-global-ctrl", "off" }, ++ /* bz 1508330 */ ++ { "vfio-pci", "x-no-geforce-quirks", "on" }, ++ /* bz 1941397 */ ++ { TYPE_X86_CPU, "kvm-asyncpf-int", "on" }, ++}; ++const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); ++ ++GlobalProperty pc_rhel_8_4_compat[] = { ++ /* pc_rhel_8_4_compat from pc_compat_5_2 */ ++ { "ICH9-LPC", "x-smi-cpu-hotunplug", "off" }, ++ { TYPE_X86_CPU, "kvm-asyncpf-int", "off" }, ++}; ++const size_t pc_rhel_8_4_compat_len = G_N_ELEMENTS(pc_rhel_8_4_compat); ++ ++GlobalProperty pc_rhel_8_3_compat[] = { ++ /* pc_rhel_8_3_compat from pc_compat_5_1 */ ++ { "ICH9-LPC", "x-smi-cpu-hotplug", "off" }, ++}; ++const size_t pc_rhel_8_3_compat_len = G_N_ELEMENTS(pc_rhel_8_3_compat); ++ ++GlobalProperty pc_rhel_8_2_compat[] = { ++ /* pc_rhel_8_2_compat from pc_compat_4_2 */ ++ { "mch", "smbase-smram", "off" }, ++}; ++const size_t pc_rhel_8_2_compat_len = G_N_ELEMENTS(pc_rhel_8_2_compat); ++ ++/* pc_rhel_8_1_compat is empty since pc_4_1_compat is */ ++GlobalProperty pc_rhel_8_1_compat[] = { }; ++const size_t pc_rhel_8_1_compat_len = G_N_ELEMENTS(pc_rhel_8_1_compat); ++ ++GlobalProperty pc_rhel_8_0_compat[] = { ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "intel-iommu", "dma-drain", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G3" "-" TYPE_X86_CPU, "rdtscp", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G4" "-" TYPE_X86_CPU, "rdtscp", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G4" "-" TYPE_X86_CPU, "npt", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G4" "-" TYPE_X86_CPU, "nrip-save", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G5" "-" TYPE_X86_CPU, "rdtscp", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G5" "-" TYPE_X86_CPU, "npt", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Opteron_G5" "-" TYPE_X86_CPU, "nrip-save", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "EPYC" "-" TYPE_X86_CPU, "npt", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "EPYC" "-" TYPE_X86_CPU, "nrip-save", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "EPYC-IBPB" "-" TYPE_X86_CPU, "npt", "off" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "EPYC-IBPB" "-" TYPE_X86_CPU, "nrip-save", "off" }, ++ /** The mpx=on entries from pc_compat_3_1 are in pc_rhel_7_6_compat **/ ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { "Cascadelake-Server" "-" TYPE_X86_CPU, "stepping", "5" }, ++ /* pc_rhel_8_0_compat from pc_compat_3_1 */ ++ { TYPE_X86_CPU, "x-intel-pt-auto-level", "off" }, ++}; ++const size_t pc_rhel_8_0_compat_len = G_N_ELEMENTS(pc_rhel_8_0_compat); ++ ++/* Similar to PC_COMPAT_3_0 + PC_COMPAT_2_12, but: ++ * all of the 2_12 stuff was already in 7.6 from bz 1481253 ++ * x-migrate-smi-count comes from PC_COMPAT_2_11 but ++ * is really tied to kernel version so keep it off on 7.x ++ * machine types irrespective of host. ++ */ ++GlobalProperty pc_rhel_7_6_compat[] = { ++ /* pc_rhel_7_6_compat from pc_compat_3_0 */ ++ { TYPE_X86_CPU, "x-hv-synic-kvm-only", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_3_0 */ ++ { "Skylake-Server" "-" TYPE_X86_CPU, "pku", "off" }, ++ /* pc_rhel_7_6_compat from pc_compat_3_0 */ ++ { "Skylake-Server-IBRS" "-" TYPE_X86_CPU, "pku", "off" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { TYPE_X86_CPU, "x-migrate-smi-count", "off" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Skylake-Client" "-" TYPE_X86_CPU, "mpx", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Skylake-Client-IBRS" "-" TYPE_X86_CPU, "mpx", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Skylake-Server" "-" TYPE_X86_CPU, "mpx", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Skylake-Server-IBRS" "-" TYPE_X86_CPU, "mpx", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Cascadelake-Server" "-" TYPE_X86_CPU, "mpx", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Icelake-Client" "-" TYPE_X86_CPU, "mpx", "on" }, ++ /* pc_rhel_7_6_compat from pc_compat_2_11 */ ++ { "Icelake-Server" "-" TYPE_X86_CPU, "mpx", "on" }, ++}; ++const size_t pc_rhel_7_6_compat_len = G_N_ELEMENTS(pc_rhel_7_6_compat); ++ ++/* Similar to PC_COMPAT_2_11 + PC_COMPAT_2_10, but: ++ * - x-hv-max-vps was backported to 7.5 ++ * - x-pci-hole64-fix was backported to 7.5 ++ */ ++GlobalProperty pc_rhel_7_5_compat[] = { ++ /* pc_rhel_7_5_compat from pc_compat_2_11 */ ++ { "Skylake-Server" "-" TYPE_X86_CPU, "clflushopt", "off" }, ++ /* pc_rhel_7_5_compat from pc_compat_2_12 */ ++ { TYPE_X86_CPU, "legacy-cache", "on" }, ++ /* pc_rhel_7_5_compat from pc_compat_2_12 */ ++ { TYPE_X86_CPU, "topoext", "off" }, ++ /* pc_rhel_7_5_compat from pc_compat_2_12 */ ++ { "EPYC-" TYPE_X86_CPU, "xlevel", stringify(0x8000000a) }, ++ /* pc_rhel_7_5_compat from pc_compat_2_12 */ ++ { "EPYC-IBPB-" TYPE_X86_CPU, "xlevel", stringify(0x8000000a) }, ++}; ++const size_t pc_rhel_7_5_compat_len = G_N_ELEMENTS(pc_rhel_7_5_compat); ++ ++GlobalProperty pc_rhel_7_4_compat[] = { ++ /* pc_rhel_7_4_compat from pc_compat_2_9 */ ++ { "mch", "extended-tseg-mbytes", stringify(0) }, ++ /* bz 1489800 */ ++ { "ICH9-LPC", "__com.redhat_force-rev1-fadt", "on" }, ++ /* pc_rhel_7_4_compat from pc_compat_2_10 */ ++ { "i440FX-pcihost", "x-pci-hole64-fix", "off" }, ++ /* pc_rhel_7_4_compat from pc_compat_2_10 */ ++ { "q35-pcihost", "x-pci-hole64-fix", "off" }, ++ /* pc_rhel_7_4_compat from pc_compat_2_10 */ ++ { TYPE_X86_CPU, "x-hv-max-vps", "0x40" }, ++}; ++const size_t pc_rhel_7_4_compat_len = G_N_ELEMENTS(pc_rhel_7_4_compat); ++ ++GlobalProperty pc_rhel_7_3_compat[] = { ++ /* pc_rhel_7_3_compat from pc_compat_2_8 */ ++ { "kvmclock", "x-mach-use-reliable-get-clock", "off" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_7 */ ++ { TYPE_X86_CPU, "l3-cache", "off" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_7 */ ++ { TYPE_X86_CPU, "full-cpuid-auto-level", "off" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_7 */ ++ { "Opteron_G3" "-" TYPE_X86_CPU, "family", "15" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_7 */ ++ { "Opteron_G3" "-" TYPE_X86_CPU, "model", "6" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_7 */ ++ { "Opteron_G3" "-" TYPE_X86_CPU, "stepping", "1" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_7 */ ++ { "isa-pcspk", "migrate", "off" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_6 */ ++ { TYPE_X86_CPU, "cpuid-0xb", "off" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_8 */ ++ { "ICH9-LPC", "x-smi-broadcast", "off" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_8 */ ++ { TYPE_X86_CPU, "vmware-cpuid-freq", "off" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_8 */ ++ { "Haswell-" TYPE_X86_CPU, "stepping", "1" }, ++ /* pc_rhel_7_3_compat from pc_compat_2_3 added in 2.9*/ ++ { TYPE_X86_CPU, "kvm-no-smi-migration", "on" }, ++}; ++const size_t pc_rhel_7_3_compat_len = G_N_ELEMENTS(pc_rhel_7_3_compat); ++ ++GlobalProperty pc_rhel_7_2_compat[] = { ++ { "phenom" "-" TYPE_X86_CPU, "rdtscp", "off"}, ++ { "qemu64" "-" TYPE_X86_CPU, "sse4a", "on" }, ++ { "qemu64" "-" TYPE_X86_CPU, "abm", "on" }, ++ { "Haswell-" TYPE_X86_CPU, "abm", "off" }, ++ { "Haswell-IBRS" "-" TYPE_X86_CPU, "abm", "off" }, ++ { "Haswell-noTSX-" TYPE_X86_CPU, "abm", "off" }, ++ { "Haswell-noTSX-IBRS" "-" TYPE_X86_CPU, "abm", "off" }, ++ { "Broadwell-" TYPE_X86_CPU, "abm", "off" }, ++ { "Broadwell-IBRS" "-" TYPE_X86_CPU, "abm", "off" }, ++ { "Broadwell-noTSX-" TYPE_X86_CPU, "abm", "off" }, ++ { "Broadwell-noTSX-IBRS" "-" TYPE_X86_CPU, "abm", "off" }, ++ { "host" "-" TYPE_X86_CPU, "host-cache-info", "on" }, ++ { TYPE_X86_CPU, "check", "off" }, ++ { "qemu32" "-" TYPE_X86_CPU, "popcnt", "on" }, ++ { TYPE_X86_CPU, "arat", "off" }, ++ { "usb-redir", "streams", "off" }, ++ { TYPE_X86_CPU, "fill-mtrr-mask", "off" }, ++ { "apic-common", "legacy-instance-id", "on" }, ++}; ++const size_t pc_rhel_7_2_compat_len = G_N_ELEMENTS(pc_rhel_7_2_compat); ++ ++GlobalProperty pc_rhel_7_1_compat[] = { ++ { "kvm64" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "kvm32" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Conroe" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Penryn" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Nehalem" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Nehalem-IBRS" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Westmere" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Westmere-IBRS" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "SandyBridge" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "SandyBridge-IBRS" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Haswell" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Haswell-IBRS" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Broadwell" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Broadwell-IBRS" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Opteron_G1" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Opteron_G2" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Opteron_G3" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Opteron_G4" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Opteron_G5" "-" TYPE_X86_CPU, "vme", "off" }, ++ { "Haswell" "-" TYPE_X86_CPU, "f16c", "off" }, ++ { "Haswell-IBRS" "-" TYPE_X86_CPU, "f16c", "off" }, ++ { "Haswell" "-" TYPE_X86_CPU, "rdrand", "off" }, ++ { "Haswell-IBRS" "-" TYPE_X86_CPU, "rdrand", "off" }, ++ { "Broadwell" "-" TYPE_X86_CPU, "f16c", "off" }, ++ { "Broadwell-IBRS" "-" TYPE_X86_CPU, "f16c", "off" }, ++ { "Broadwell" "-" TYPE_X86_CPU, "rdrand", "off" }, ++ { "Broadwell-IBRS" "-" TYPE_X86_CPU, "rdrand", "off" }, ++ { "coreduo" "-" TYPE_X86_CPU, "vmx", "on" }, ++ { "core2duo" "-" TYPE_X86_CPU, "vmx", "on" }, ++ { "qemu64" "-" TYPE_X86_CPU, "min-level", stringify(4) }, ++ { "kvm64" "-" TYPE_X86_CPU, "min-level", stringify(5) }, ++ { "pentium3" "-" TYPE_X86_CPU, "min-level", stringify(2) }, ++ { "n270" "-" TYPE_X86_CPU, "min-level", stringify(5) }, ++ { "Conroe" "-" TYPE_X86_CPU, "min-level", stringify(4) }, ++ { "Penryn" "-" TYPE_X86_CPU, "min-level", stringify(4) }, ++ { "Nehalem" "-" TYPE_X86_CPU, "min-level", stringify(4) }, ++ { "n270" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "Penryn" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "Conroe" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "Nehalem" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "Westmere" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "SandyBridge" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "IvyBridge" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "Haswell" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "Haswell-noTSX" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "Broadwell" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++ { "Broadwell-noTSX" "-" TYPE_X86_CPU, "min-xlevel", stringify(0x8000000a) }, ++}; ++const size_t pc_rhel_7_1_compat_len = G_N_ELEMENTS(pc_rhel_7_1_compat); ++ ++/* ++ * The PC_RHEL_*_COMPAT serve the same purpose for RHEL-7 machine ++ * types as the PC_COMPAT_* do for upstream types. ++ * PC_RHEL_7_*_COMPAT apply both to i440fx and q35 types. ++ */ ++ ++/* ++ * RHEL-7 is based on QEMU 1.5.3, so this needs the PC_COMPAT_* ++ * between our base and 1.5, less stuff backported to RHEL-7.0 ++ * (usb-device.msos-desc), less stuff for devices we changed ++ * (qemu64-x86_64-cpu) or don't support (hpet, pci-serial-2x, ++ * pci-serial-4x) in 7.0. ++ */ ++GlobalProperty pc_rhel_7_0_compat[] = { ++ { "virtio-scsi-pci", "any_layout", "off" }, ++ { "PIIX4_PM", "memory-hotplug-support", "off" }, ++ { "apic", "version", stringify(0x11) }, ++ { "nec-usb-xhci", "superspeed-ports-first", "off" }, ++ { "nec-usb-xhci", "force-pcie-endcap", "on" }, ++ { "pci-serial", "prog_if", stringify(0) }, ++ { "virtio-net-pci", "guest_announce", "off" }, ++ { "ICH9-LPC", "memory-hotplug-support", "off" }, ++ { "xio3130-downstream", COMPAT_PROP_PCP, "off" }, ++ { "ioh3420", COMPAT_PROP_PCP, "off" }, ++ { "PIIX4_PM", "acpi-pci-hotplug-with-bridge-support", "off" }, ++ { "e1000", "mitigation", "off" }, ++ { "virtio-net-pci", "ctrl_guest_offloads", "off" }, ++ { "Conroe" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Penryn" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Nehalem" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Nehalem-IBRS" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Westmere" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Westmere-IBRS" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Opteron_G1" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Opteron_G2" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Opteron_G3" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Opteron_G4" "-" TYPE_X86_CPU, "x2apic", "on" }, ++ { "Opteron_G5" "-" TYPE_X86_CPU, "x2apic", "on" }, ++}; ++const size_t pc_rhel_7_0_compat_len = G_N_ELEMENTS(pc_rhel_7_0_compat); ++ ++/* ++ * RHEL: These properties only apply to the RHEL exported machine types ++ * pc-4.2/2.11 for the purpose to have a limited upstream machines support ++ * which can be migrated to RHEL. Let's avoid touching hw_compat_4_2 directly ++ * so that we can have some isolation against the upstream code. ++ */ ++GlobalProperty hw_compat_4_2_extra[] = { ++ /* By default enlarge the default virtio-net-pci ROM to 512KB. */ ++ { "virtio-net-pci", "romsize", "0x80000" }, ++}; ++const size_t hw_compat_4_2_extra_len = G_N_ELEMENTS(hw_compat_4_2_extra); ++ + GSIState *pc_gsi_create(qemu_irq **irqs, bool pci_enabled) + { + GSIState *s; +@@ -904,7 +1194,8 @@ void pc_memory_init(PCMachineState *pcms, + option_rom_mr = g_malloc(sizeof(*option_rom_mr)); + memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE, + &error_fatal); +- if (pcmc->pci_enabled) { ++ /* RH difference: See bz 1489800, explicitly make ROM ro */ ++ if (pcmc->pc_rom_ro) { + memory_region_set_readonly(option_rom_mr, true); + } + memory_region_add_subregion_overlap(rom_memory, +@@ -1694,6 +1985,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) + pcmc->pvh_enabled = true; + pcmc->kvmclock_create_always = true; + assert(!mc->get_hotplug_handler); ++ pcmc->pc_rom_ro = true; ++ mc->async_pf_vmexit_disable = false; + mc->get_hotplug_handler = pc_get_hotplug_handler; + mc->hotplug_allowed = pc_hotplug_allowed; + mc->cpu_index_to_instance_props = x86_cpu_index_to_props; +@@ -1704,7 +1997,8 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) + mc->has_hotpluggable_cpus = true; + mc->default_boot_order = "cad"; + mc->block_default_type = IF_IDE; +- mc->max_cpus = 255; ++ /* 240: max CPU count for RHEL */ ++ mc->max_cpus = 240; + mc->reset = pc_machine_reset; + mc->wakeup = pc_machine_wakeup; + hc->pre_plug = pc_machine_device_pre_plug_cb; +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index dda3f64f19..2885edffe9 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -50,6 +50,7 @@ + #include "qapi/error.h" + #include "qemu/error-report.h" + #include "sysemu/xen.h" ++#include "migration/migration.h" + #ifdef CONFIG_XEN + #include + #include "hw/xen/xen_pt.h" +@@ -174,8 +175,8 @@ static void pc_init1(MachineState *machine, + if (pcmc->smbios_defaults) { + MachineClass *mc = MACHINE_GET_CLASS(machine); + /* These values are guest ABI, do not change */ +- smbios_set_defaults("QEMU", "Standard PC (i440FX + PIIX, 1996)", +- mc->name, pcmc->smbios_legacy_mode, ++ smbios_set_defaults("Red Hat", "KVM", ++ mc->desc, pcmc->smbios_legacy_mode, + pcmc->smbios_uuid_encoded, + pcmc->smbios_stream_product, + pcmc->smbios_stream_version, +@@ -314,6 +315,15 @@ static void pc_init1(MachineState *machine, + * hw_compat_*, pc_compat_*, or * pc_*_machine_options(). + */ + ++/* ++ * NOTE! Not all the upstream machine types are disabled for RHEL. For ++ * providing a very limited support for upstream machine types, pc machines ++ * 2.11 and 4.2 are exposed explicitly. This will make the below "#if" macros ++ * a bit messed up, but please read this comment first so that we can have a ++ * rough understanding of what we're going to do. ++ */ ++ ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void pc_compat_2_3_fn(MachineState *machine) + { + X86MachineState *x86ms = X86_MACHINE(machine); +@@ -389,6 +399,8 @@ static void pc_xen_hvm_init(MachineState *machine) + } + #endif + ++#endif /* Disabled for Red Hat Enterprise Linux */ ++ + #define DEFINE_I440FX_MACHINE(suffix, name, compatfn, optionfn) \ + static void pc_init_##suffix(MachineState *machine) \ + { \ +@@ -424,8 +436,10 @@ static void pc_i440fx_6_2_machine_options(MachineClass *m) + pcmc->default_cpu_version = 1; + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v6_2, "pc-i440fx-6.2", NULL, + pc_i440fx_6_2_machine_options); ++#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_6_1_machine_options(MachineClass *m) + { +@@ -437,8 +451,10 @@ static void pc_i440fx_6_1_machine_options(MachineClass *m) + m->smp_props.prefer_sockets = true; + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v6_1, "pc-i440fx-6.1", NULL, + pc_i440fx_6_1_machine_options); ++#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_6_0_machine_options(MachineClass *m) + { +@@ -449,8 +465,10 @@ static void pc_i440fx_6_0_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_6_0, pc_compat_6_0_len); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v6_0, "pc-i440fx-6.0", NULL, + pc_i440fx_6_0_machine_options); ++#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_5_2_machine_options(MachineClass *m) + { +@@ -461,8 +479,10 @@ static void pc_i440fx_5_2_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_5_2, pc_compat_5_2_len); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v5_2, "pc-i440fx-5.2", NULL, + pc_i440fx_5_2_machine_options); ++#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_5_1_machine_options(MachineClass *m) + { +@@ -477,8 +497,10 @@ static void pc_i440fx_5_1_machine_options(MachineClass *m) + pcmc->pci_root_uid = 1; + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v5_1, "pc-i440fx-5.1", NULL, + pc_i440fx_5_1_machine_options); ++#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_5_0_machine_options(MachineClass *m) + { +@@ -491,8 +513,10 @@ static void pc_i440fx_5_0_machine_options(MachineClass *m) + m->auto_enable_numa_with_memdev = false; + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v5_0, "pc-i440fx-5.0", NULL, + pc_i440fx_5_0_machine_options); ++#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_4_2_machine_options(MachineClass *m) + { +@@ -501,8 +525,21 @@ static void pc_i440fx_4_2_machine_options(MachineClass *m) + m->is_default = false; + compat_props_add(m->compat_props, hw_compat_4_2, hw_compat_4_2_len); + compat_props_add(m->compat_props, pc_compat_4_2, pc_compat_4_2_len); ++ ++ /* ++ * RHEL: Mark all upstream machines as deprecated because they're not ++ * supported by RHEL, even if exported. ++ */ ++ m->deprecation_reason = "Not supported by RHEL"; ++ /* ++ * RHEL: Specific compat properties to have limited support for upstream ++ * machines exported. ++ */ ++ compat_props_add(m->compat_props, hw_compat_4_2_extra, ++ hw_compat_4_2_extra_len); + } + ++/* RHEL: Export pc-4.2 */ + DEFINE_I440FX_MACHINE(v4_2, "pc-i440fx-4.2", NULL, + pc_i440fx_4_2_machine_options); + +@@ -515,8 +552,10 @@ static void pc_i440fx_4_1_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_4_1, pc_compat_4_1_len); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v4_1, "pc-i440fx-4.1", NULL, + pc_i440fx_4_1_machine_options); ++#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_4_0_machine_options(MachineClass *m) + { +@@ -529,8 +568,10 @@ static void pc_i440fx_4_0_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_4_0, pc_compat_4_0_len); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v4_0, "pc-i440fx-4.0", NULL, + pc_i440fx_4_0_machine_options); ++#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_3_1_machine_options(MachineClass *m) + { +@@ -546,8 +587,10 @@ static void pc_i440fx_3_1_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_3_1, pc_compat_3_1_len); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v3_1, "pc-i440fx-3.1", NULL, + pc_i440fx_3_1_machine_options); ++#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_3_0_machine_options(MachineClass *m) + { +@@ -556,8 +599,10 @@ static void pc_i440fx_3_0_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_3_0, pc_compat_3_0_len); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v3_0, "pc-i440fx-3.0", NULL, + pc_i440fx_3_0_machine_options); ++#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_2_12_machine_options(MachineClass *m) + { +@@ -566,8 +611,10 @@ static void pc_i440fx_2_12_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_2_12, pc_compat_2_12_len); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v2_12, "pc-i440fx-2.12", NULL, + pc_i440fx_2_12_machine_options); ++#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_2_11_machine_options(MachineClass *m) + { +@@ -576,9 +623,11 @@ static void pc_i440fx_2_11_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_2_11, pc_compat_2_11_len); + } + ++/* RHEL: Export pc-2.11 */ + DEFINE_I440FX_MACHINE(v2_11, "pc-i440fx-2.11", NULL, + pc_i440fx_2_11_machine_options); + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void pc_i440fx_2_10_machine_options(MachineClass *m) + { + pc_i440fx_2_11_machine_options(m); +@@ -951,3 +1000,224 @@ static void xenfv_3_1_machine_options(MachineClass *m) + DEFINE_PC_MACHINE(xenfv, "xenfv-3.1", pc_xen_hvm_init, + xenfv_3_1_machine_options); + #endif ++#endif /* Disabled for Red Hat Enterprise Linux */ ++ ++/* Red Hat Enterprise Linux machine types */ ++ ++/* Options for the latest rhel7 machine type */ ++static void pc_machine_rhel7_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ m->family = "pc_piix_Y"; ++ m->default_machine_opts = "firmware=bios-256k.bin,hpet=off"; ++ pcmc->default_nic_model = "e1000"; ++ pcmc->pci_root_uid = 0; ++ m->default_display = "std"; ++ m->no_parallel = 1; ++ m->numa_mem_supported = true; ++ m->auto_enable_numa_with_memdev = false; ++ machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE); ++ compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); ++ m->alias = "pc"; ++ m->is_default = 1; ++} ++ ++static void pc_init_rhel760(MachineState *machine) ++{ ++ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ ++ TYPE_I440FX_PCI_DEVICE); ++} ++ ++static void pc_machine_rhel760_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_machine_rhel7_options(m); ++ m->desc = "RHEL 7.6.0 PC (i440FX + PIIX, 1996)"; ++ m->async_pf_vmexit_disable = true; ++ m->smbus_no_migration_support = true; ++ pcmc->pvh_enabled = false; ++ pcmc->default_cpu_version = CPU_VERSION_LEGACY; ++ pcmc->kvmclock_create_always = false; ++ /* From pc_i440fx_5_1_machine_options() */ ++ pcmc->pci_root_uid = 1; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_4, ++ hw_compat_rhel_8_4_len); ++ compat_props_add(m->compat_props, pc_rhel_8_4_compat, ++ pc_rhel_8_4_compat_len); ++ compat_props_add(m->compat_props, hw_compat_rhel_8_3, ++ hw_compat_rhel_8_3_len); ++ compat_props_add(m->compat_props, pc_rhel_8_3_compat, ++ pc_rhel_8_3_compat_len); ++ compat_props_add(m->compat_props, hw_compat_rhel_8_2, ++ hw_compat_rhel_8_2_len); ++ compat_props_add(m->compat_props, pc_rhel_8_2_compat, ++ pc_rhel_8_2_compat_len); ++ compat_props_add(m->compat_props, hw_compat_rhel_8_1, hw_compat_rhel_8_1_len); ++ compat_props_add(m->compat_props, pc_rhel_8_1_compat, pc_rhel_8_1_compat_len); ++ compat_props_add(m->compat_props, hw_compat_rhel_8_0, hw_compat_rhel_8_0_len); ++ compat_props_add(m->compat_props, pc_rhel_8_0_compat, pc_rhel_8_0_compat_len); ++ compat_props_add(m->compat_props, hw_compat_rhel_7_6, hw_compat_rhel_7_6_len); ++ compat_props_add(m->compat_props, pc_rhel_7_6_compat, pc_rhel_7_6_compat_len); ++} ++ ++DEFINE_PC_MACHINE(rhel760, "pc-i440fx-rhel7.6.0", pc_init_rhel760, ++ pc_machine_rhel760_options); ++ ++static void pc_init_rhel750(MachineState *machine) ++{ ++ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ ++ TYPE_I440FX_PCI_DEVICE); ++} ++ ++static void pc_machine_rhel750_options(MachineClass *m) ++{ ++ pc_machine_rhel760_options(m); ++ m->alias = NULL; ++ m->is_default = 0; ++ m->desc = "RHEL 7.5.0 PC (i440FX + PIIX, 1996)"; ++ m->auto_enable_numa_with_memhp = false; ++ compat_props_add(m->compat_props, hw_compat_rhel_7_5, hw_compat_rhel_7_5_len); ++ compat_props_add(m->compat_props, pc_rhel_7_5_compat, pc_rhel_7_5_compat_len); ++} ++ ++DEFINE_PC_MACHINE(rhel750, "pc-i440fx-rhel7.5.0", pc_init_rhel750, ++ pc_machine_rhel750_options); ++ ++static void pc_init_rhel740(MachineState *machine) ++{ ++ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ ++ TYPE_I440FX_PCI_DEVICE); ++} ++ ++static void pc_machine_rhel740_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_machine_rhel750_options(m); ++ m->desc = "RHEL 7.4.0 PC (i440FX + PIIX, 1996)"; ++ pcmc->pc_rom_ro = false; ++ compat_props_add(m->compat_props, hw_compat_rhel_7_4, hw_compat_rhel_7_4_len); ++ compat_props_add(m->compat_props, pc_rhel_7_4_compat, pc_rhel_7_4_compat_len); ++} ++ ++DEFINE_PC_MACHINE(rhel740, "pc-i440fx-rhel7.4.0", pc_init_rhel740, ++ pc_machine_rhel740_options); ++ ++static void pc_init_rhel730(MachineState *machine) ++{ ++ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ ++ TYPE_I440FX_PCI_DEVICE); ++} ++ ++static void pc_machine_rhel730_options(MachineClass *m) ++{ ++ X86MachineClass *x86mc = X86_MACHINE_CLASS(m); ++ pc_machine_rhel740_options(m); ++ m->desc = "RHEL 7.3.0 PC (i440FX + PIIX, 1996)"; ++ x86mc->fwcfg_dma_enabled = false; ++ compat_props_add(m->compat_props, hw_compat_rhel_7_3, hw_compat_rhel_7_3_len); ++ compat_props_add(m->compat_props, pc_rhel_7_3_compat, pc_rhel_7_3_compat_len); ++} ++ ++DEFINE_PC_MACHINE(rhel730, "pc-i440fx-rhel7.3.0", pc_init_rhel730, ++ pc_machine_rhel730_options); ++ ++ ++static void pc_init_rhel720(MachineState *machine) ++{ ++ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ ++ TYPE_I440FX_PCI_DEVICE); ++} ++ ++static void pc_machine_rhel720_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ X86MachineClass *x86mc = X86_MACHINE_CLASS(m); ++ pc_machine_rhel730_options(m); ++ m->desc = "RHEL 7.2.0 PC (i440FX + PIIX, 1996)"; ++ /* From pc_i440fx_2_5_machine_options */ ++ x86mc->save_tsc_khz = false; ++ m->legacy_fw_cfg_order = 1; ++ /* Note: broken_reserved_end was already in 7.2 */ ++ /* From pc_i440fx_2_6_machine_options */ ++ pcmc->legacy_cpu_hotplug = true; ++ compat_props_add(m->compat_props, hw_compat_rhel_7_2, hw_compat_rhel_7_2_len); ++ compat_props_add(m->compat_props, pc_rhel_7_2_compat, pc_rhel_7_2_compat_len); ++} ++ ++DEFINE_PC_MACHINE(rhel720, "pc-i440fx-rhel7.2.0", pc_init_rhel720, ++ pc_machine_rhel720_options); ++ ++static void pc_compat_rhel710(MachineState *machine) ++{ ++ PCMachineState *pcms = PC_MACHINE(machine); ++ PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); ++ ++ /* From pc_compat_2_2 */ ++ pcmc->rsdp_in_ram = false; ++ machine->suppress_vmdesc = true; ++ ++ /* From pc_compat_2_1 */ ++ pcmc->smbios_uuid_encoded = false; ++ x86_cpu_change_kvm_default("svm", NULL); ++ pcmc->enforce_aligned_dimm = false; ++ ++ /* Disable all the extra subsections that were added in 2.2 */ ++ migrate_pre_2_2 = true; ++ ++ /* From pc_i440fx_2_4_machine_options */ ++ pcmc->broken_reserved_end = true; ++} ++ ++static void pc_init_rhel710(MachineState *machine) ++{ ++ pc_compat_rhel710(machine); ++ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ ++ TYPE_I440FX_PCI_DEVICE); ++} ++ ++static void pc_machine_rhel710_options(MachineClass *m) ++{ ++ pc_machine_rhel720_options(m); ++ m->family = "pc_piix_Y"; ++ m->desc = "RHEL 7.1.0 PC (i440FX + PIIX, 1996)"; ++ m->default_display = "cirrus"; ++ compat_props_add(m->compat_props, hw_compat_rhel_7_1, hw_compat_rhel_7_1_len); ++ compat_props_add(m->compat_props, pc_rhel_7_1_compat, pc_rhel_7_1_compat_len); ++} ++ ++DEFINE_PC_MACHINE(rhel710, "pc-i440fx-rhel7.1.0", pc_init_rhel710, ++ pc_machine_rhel710_options); ++ ++static void pc_compat_rhel700(MachineState *machine) ++{ ++ PCMachineState *pcms = PC_MACHINE(machine); ++ PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); ++ ++ pc_compat_rhel710(machine); ++ ++ /* Upstream enables it for everyone, we're a little more selective */ ++ x86_cpu_change_kvm_default("x2apic", NULL); ++ x86_cpu_change_kvm_default("svm", NULL); ++ pcmc->legacy_acpi_table_size = 6418; /* see pc_compat_2_0() */ ++ pcmc->smbios_legacy_mode = true; ++ pcmc->has_reserved_memory = false; ++ migrate_cve_2014_5263_xhci_fields = true; ++} ++ ++static void pc_init_rhel700(MachineState *machine) ++{ ++ pc_compat_rhel700(machine); ++ pc_init1(machine, TYPE_I440FX_PCI_HOST_BRIDGE, \ ++ TYPE_I440FX_PCI_DEVICE); ++} ++ ++static void pc_machine_rhel700_options(MachineClass *m) ++{ ++ pc_machine_rhel710_options(m); ++ m->family = "pc_piix_Y"; ++ m->desc = "RHEL 7.0.0 PC (i440FX + PIIX, 1996)"; ++ compat_props_add(m->compat_props, pc_rhel_7_0_compat, pc_rhel_7_0_compat_len); ++} ++ ++DEFINE_PC_MACHINE(rhel700, "pc-i440fx-rhel7.0.0", pc_init_rhel700, ++ pc_machine_rhel700_options); +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 235054a643..c67418b6a9 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -197,8 +197,8 @@ static void pc_q35_init(MachineState *machine) + + if (pcmc->smbios_defaults) { + /* These values are guest ABI, do not change */ +- smbios_set_defaults("QEMU", "Standard PC (Q35 + ICH9, 2009)", +- mc->name, pcmc->smbios_legacy_mode, ++ smbios_set_defaults("Red Hat", "KVM", ++ mc->desc, pcmc->smbios_legacy_mode, + pcmc->smbios_uuid_encoded, + pcmc->smbios_stream_product, + pcmc->smbios_stream_version, +@@ -342,6 +342,7 @@ static void pc_q35_init(MachineState *machine) + DEFINE_PC_MACHINE(suffix, name, pc_init_##suffix, optionfn) + + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void pc_q35_machine_options(MachineClass *m) + { + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); +@@ -620,3 +621,232 @@ static void pc_q35_2_4_machine_options(MachineClass *m) + + DEFINE_Q35_MACHINE(v2_4, "pc-q35-2.4", NULL, + pc_q35_2_4_machine_options); ++#endif /* Disabled for Red Hat Enterprise Linux */ ++ ++/* Red Hat Enterprise Linux machine types */ ++ ++/* Options for the latest rhel q35 machine type */ ++static void pc_q35_machine_rhel_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pcmc->default_nic_model = "e1000e"; ++ pcmc->pci_root_uid = 0; ++ m->family = "pc_q35_Z"; ++ m->units_per_default_bus = 1; ++ m->default_machine_opts = "firmware=bios-256k.bin,hpet=off"; ++ m->default_display = "std"; ++ m->no_floppy = 1; ++ m->no_parallel = 1; ++ pcmc->default_cpu_version = 1; ++ machine_class_allow_dynamic_sysbus_dev(m, TYPE_AMD_IOMMU_DEVICE); ++ machine_class_allow_dynamic_sysbus_dev(m, TYPE_INTEL_IOMMU_DEVICE); ++ machine_class_allow_dynamic_sysbus_dev(m, TYPE_RAMFB_DEVICE); ++ m->alias = "q35"; ++ m->max_cpus = 710; ++ compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); ++} ++ ++static void pc_q35_init_rhel850(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel850_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel_options(m); ++ m->desc = "RHEL-8.5.0 PC (Q35 + ICH9, 2009)"; ++ pcmc->smbios_stream_product = "RHEL-AV"; ++ pcmc->smbios_stream_version = "8.5.0"; ++} ++ ++DEFINE_PC_MACHINE(q35_rhel850, "pc-q35-rhel8.5.0", pc_q35_init_rhel850, ++ pc_q35_machine_rhel850_options); ++ ++ ++static void pc_q35_init_rhel840(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel840_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel850_options(m); ++ m->desc = "RHEL-8.4.0 PC (Q35 + ICH9, 2009)"; ++ m->alias = NULL; ++ pcmc->smbios_stream_product = "RHEL-AV"; ++ pcmc->smbios_stream_version = "8.4.0"; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_4, ++ hw_compat_rhel_8_4_len); ++ compat_props_add(m->compat_props, pc_rhel_8_4_compat, ++ pc_rhel_8_4_compat_len); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel840, "pc-q35-rhel8.4.0", pc_q35_init_rhel840, ++ pc_q35_machine_rhel840_options); ++ ++ ++static void pc_q35_init_rhel830(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel830_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel840_options(m); ++ m->desc = "RHEL-8.3.0 PC (Q35 + ICH9, 2009)"; ++ pcmc->smbios_stream_product = "RHEL-AV"; ++ pcmc->smbios_stream_version = "8.3.0"; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_3, ++ hw_compat_rhel_8_3_len); ++ compat_props_add(m->compat_props, pc_rhel_8_3_compat, ++ pc_rhel_8_3_compat_len); ++ /* From pc_q35_5_1_machine_options() */ ++ pcmc->kvmclock_create_always = false; ++ /* From pc_q35_5_1_machine_options() */ ++ pcmc->pci_root_uid = 1; ++} ++ ++DEFINE_PC_MACHINE(q35_rhel830, "pc-q35-rhel8.3.0", pc_q35_init_rhel830, ++ pc_q35_machine_rhel830_options); ++ ++static void pc_q35_init_rhel820(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel820_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel830_options(m); ++ m->desc = "RHEL-8.2.0 PC (Q35 + ICH9, 2009)"; ++ m->numa_mem_supported = true; ++ m->auto_enable_numa_with_memdev = false; ++ pcmc->smbios_stream_product = "RHEL-AV"; ++ pcmc->smbios_stream_version = "8.2.0"; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_2, ++ hw_compat_rhel_8_2_len); ++ compat_props_add(m->compat_props, pc_rhel_8_2_compat, ++ pc_rhel_8_2_compat_len); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel820, "pc-q35-rhel8.2.0", pc_q35_init_rhel820, ++ pc_q35_machine_rhel820_options); ++ ++static void pc_q35_init_rhel810(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel810_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel820_options(m); ++ m->desc = "RHEL-8.1.0 PC (Q35 + ICH9, 2009)"; ++ m->alias = NULL; ++ pcmc->smbios_stream_product = NULL; ++ pcmc->smbios_stream_version = NULL; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_1, hw_compat_rhel_8_1_len); ++ compat_props_add(m->compat_props, pc_rhel_8_1_compat, pc_rhel_8_1_compat_len); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel810, "pc-q35-rhel8.1.0", pc_q35_init_rhel810, ++ pc_q35_machine_rhel810_options); ++ ++static void pc_q35_init_rhel800(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel800_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel810_options(m); ++ m->desc = "RHEL-8.0.0 PC (Q35 + ICH9, 2009)"; ++ m->smbus_no_migration_support = true; ++ m->alias = NULL; ++ pcmc->pvh_enabled = false; ++ pcmc->default_cpu_version = CPU_VERSION_LEGACY; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_0, hw_compat_rhel_8_0_len); ++ compat_props_add(m->compat_props, pc_rhel_8_0_compat, pc_rhel_8_0_compat_len); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel800, "pc-q35-rhel8.0.0", pc_q35_init_rhel800, ++ pc_q35_machine_rhel800_options); ++ ++static void pc_q35_init_rhel760(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel760_options(MachineClass *m) ++{ ++ pc_q35_machine_rhel800_options(m); ++ m->alias = NULL; ++ m->desc = "RHEL-7.6.0 PC (Q35 + ICH9, 2009)"; ++ m->async_pf_vmexit_disable = true; ++ compat_props_add(m->compat_props, hw_compat_rhel_7_6, hw_compat_rhel_7_6_len); ++ compat_props_add(m->compat_props, pc_rhel_7_6_compat, pc_rhel_7_6_compat_len); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel760, "pc-q35-rhel7.6.0", pc_q35_init_rhel760, ++ pc_q35_machine_rhel760_options); ++ ++static void pc_q35_init_rhel750(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel750_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel760_options(m); ++ m->alias = NULL; ++ m->desc = "RHEL-7.5.0 PC (Q35 + ICH9, 2009)"; ++ m->auto_enable_numa_with_memhp = false; ++ pcmc->default_nic_model = "e1000"; ++ compat_props_add(m->compat_props, hw_compat_rhel_7_5, hw_compat_rhel_7_5_len); ++ compat_props_add(m->compat_props, pc_rhel_7_5_compat, pc_rhel_7_5_compat_len); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel750, "pc-q35-rhel7.5.0", pc_q35_init_rhel750, ++ pc_q35_machine_rhel750_options); ++ ++static void pc_q35_init_rhel740(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel740_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel750_options(m); ++ m->desc = "RHEL-7.4.0 PC (Q35 + ICH9, 2009)"; ++ pcmc->pc_rom_ro = false; ++ compat_props_add(m->compat_props, hw_compat_rhel_7_4, hw_compat_rhel_7_4_len); ++ compat_props_add(m->compat_props, pc_rhel_7_4_compat, pc_rhel_7_4_compat_len); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel740, "pc-q35-rhel7.4.0", pc_q35_init_rhel740, ++ pc_q35_machine_rhel740_options); ++ ++static void pc_q35_init_rhel730(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel730_options(MachineClass *m) ++{ ++ X86MachineClass *x86mc = X86_MACHINE_CLASS(m); ++ pc_q35_machine_rhel740_options(m); ++ m->desc = "RHEL-7.3.0 PC (Q35 + ICH9, 2009)"; ++ m->max_cpus = 255; ++ x86mc->fwcfg_dma_enabled = false; ++ compat_props_add(m->compat_props, hw_compat_rhel_7_3, hw_compat_rhel_7_3_len); ++ compat_props_add(m->compat_props, pc_rhel_7_3_compat, pc_rhel_7_3_compat_len); ++} ++ ++DEFINE_PC_MACHINE(q35_rhel730, "pc-q35-rhel7.3.0", pc_q35_init_rhel730, ++ pc_q35_machine_rhel730_options); +diff --git a/include/hw/boards.h b/include/hw/boards.h +index 8bba96ef2b..04e8759815 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -263,6 +263,8 @@ struct MachineClass { + strList *allowed_dynamic_sysbus_devices; + bool auto_enable_numa_with_memhp; + bool auto_enable_numa_with_memdev; ++ /* RHEL only */ ++ bool async_pf_vmexit_disable; + bool ignore_boot_device_suffixes; + bool smbus_no_migration_support; + bool nvdimm_supported; +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 7ccc9a1a07..d0544ee119 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -125,6 +125,9 @@ struct PCMachineClass { + + /* create kvmclock device even when KVM PV features are not exposed */ + bool kvmclock_create_always; ++ ++ /* RH only, see bz 1489800 */ ++ bool pc_rom_ro; + }; + + #define TYPE_PC_MACHINE "generic-pc-machine" +@@ -280,6 +283,48 @@ extern const size_t pc_compat_1_5_len; + extern GlobalProperty pc_compat_1_4[]; + extern const size_t pc_compat_1_4_len; + ++extern GlobalProperty pc_rhel_compat[]; ++extern const size_t pc_rhel_compat_len; ++ ++extern GlobalProperty pc_rhel_8_4_compat[]; ++extern const size_t pc_rhel_8_4_compat_len; ++ ++extern GlobalProperty pc_rhel_8_3_compat[]; ++extern const size_t pc_rhel_8_3_compat_len; ++ ++extern GlobalProperty pc_rhel_8_2_compat[]; ++extern const size_t pc_rhel_8_2_compat_len; ++ ++extern GlobalProperty pc_rhel_8_1_compat[]; ++extern const size_t pc_rhel_8_1_compat_len; ++ ++extern GlobalProperty pc_rhel_8_0_compat[]; ++extern const size_t pc_rhel_8_0_compat_len; ++ ++extern GlobalProperty pc_rhel_7_6_compat[]; ++extern const size_t pc_rhel_7_6_compat_len; ++ ++extern GlobalProperty pc_rhel_7_5_compat[]; ++extern const size_t pc_rhel_7_5_compat_len; ++ ++extern GlobalProperty pc_rhel_7_4_compat[]; ++extern const size_t pc_rhel_7_4_compat_len; ++ ++extern GlobalProperty pc_rhel_7_3_compat[]; ++extern const size_t pc_rhel_7_3_compat_len; ++ ++extern GlobalProperty pc_rhel_7_2_compat[]; ++extern const size_t pc_rhel_7_2_compat_len; ++ ++extern GlobalProperty pc_rhel_7_1_compat[]; ++extern const size_t pc_rhel_7_1_compat_len; ++ ++extern GlobalProperty pc_rhel_7_0_compat[]; ++extern const size_t pc_rhel_7_0_compat_len; ++ ++extern GlobalProperty hw_compat_4_2_extra[]; ++extern const size_t hw_compat_4_2_extra_len; ++ + /* Helper for setting model-id for CPU models that changed model-id + * depending on QEMU versions up to QEMU 2.4. + */ +diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c +index d95028018e..7b004065ae 100644 +--- a/target/i386/kvm/kvm-cpu.c ++++ b/target/i386/kvm/kvm-cpu.c +@@ -131,6 +131,7 @@ static PropValue kvm_default_props[] = { + { "acpi", "off" }, + { "monitor", "off" }, + { "svm", "off" }, ++ { "kvm-pv-unhalt", "on" }, + { NULL, NULL }, + }; + +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index 5a698bde19..a668f521ac 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -3336,6 +3336,7 @@ static int kvm_get_msrs(X86CPU *cpu) + struct kvm_msr_entry *msrs = cpu->kvm_msr_buf->entries; + int ret, i; + uint64_t mtrr_top_bits; ++ MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); + + kvm_msr_buf_reset(cpu); + +@@ -3665,6 +3666,9 @@ static int kvm_get_msrs(X86CPU *cpu) + break; + case MSR_KVM_ASYNC_PF_EN: + env->async_pf_en_msr = msrs[i].data; ++ if (mc->async_pf_vmexit_disable) { ++ env->async_pf_en_msr &= ~(1ULL << 2); ++ } + break; + case MSR_KVM_ASYNC_PF_INT: + env->async_pf_int_msr = msrs[i].data; +diff --git a/tests/qtest/pvpanic-test.c b/tests/qtest/pvpanic-test.c +index 6dcad2db49..580c2c43d2 100644 +--- a/tests/qtest/pvpanic-test.c ++++ b/tests/qtest/pvpanic-test.c +@@ -17,7 +17,7 @@ static void test_panic_nopause(void) + QDict *response, *data; + QTestState *qts; + +- qts = qtest_init("-device pvpanic -action panic=none"); ++ qts = qtest_init("-M q35 -device pvpanic -action panic=none"); + + val = qtest_inb(qts, 0x505); + g_assert_cmpuint(val, ==, 3); +@@ -40,7 +40,8 @@ static void test_panic(void) + QDict *response, *data; + QTestState *qts; + +- qts = qtest_init("-device pvpanic -action panic=pause"); ++ /* RHEL: Use q35 */ ++ qts = qtest_init("-M q35 -device pvpanic -action panic=pause"); + + val = qtest_inb(qts, 0x505); + g_assert_cmpuint(val, ==, 3); +-- +2.27.0 + diff --git a/0011-Enable-make-check.patch b/0011-Enable-make-check.patch new file mode 100644 index 0000000..832b38d --- /dev/null +++ b/0011-Enable-make-check.patch @@ -0,0 +1,186 @@ +From 5e419e5e0a721bdbbfa6d9b82c8be5c5b3d26a01 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Wed, 2 Sep 2020 09:39:41 +0200 +Subject: Enable make check + +Fixing tests after device disabling and machine types changes and enabling +make check run during build. + +Signed-off-by: Miroslav Rezanina +--- +Rebase changes (6.1.0): +- removed unnecessary test changes + +Rebase changes (6.2.0): +- new way of disabling bios-table-test + +Rebase changes (7.0.0): +- Disable testing virtio-iommu-pci +- Rename default_bus_bypass_iommu property to default-bus-bypass-iommu +- Disable qtest-bios-table for aarch64 +- Removed redhat chunks for boot-serial-test.c, cdrom-test.c and cpu-plug-test.c qtests +- Do not disable boot-order-test, prom-env-test and boot-serial-test qtests +- Use rhel machine type for new intel hda qtest +- Remove unnecessary changes in iotest 051 +- Remove changes in bios-tables-test.c and prom-env-test.c qtests + +Merged patches (6.1.0): +- 2f129df7d3 redhat: Enable the 'test-block-iothread' test again +--- + .distro/qemu-kvm.spec.template | 5 ++--- + tests/qtest/fuzz-e1000e-test.c | 2 +- + tests/qtest/fuzz-virtio-scsi-test.c | 2 +- + tests/qtest/intel-hda-test.c | 2 +- + tests/qtest/libqos/meson.build | 2 +- + tests/qtest/lpc-ich9-test.c | 2 +- + tests/qtest/meson.build | 4 ---- + tests/qtest/usb-hcd-xhci-test.c | 4 ++++ + tests/qtest/virtio-net-failover.c | 1 + + 9 files changed, 12 insertions(+), 12 deletions(-) + +diff --git a/tests/qtest/fuzz-e1000e-test.c b/tests/qtest/fuzz-e1000e-test.c +index 66229e6096..947fba73b7 100644 +--- a/tests/qtest/fuzz-e1000e-test.c ++++ b/tests/qtest/fuzz-e1000e-test.c +@@ -17,7 +17,7 @@ static void test_lp1879531_eth_get_rss_ex_dst_addr(void) + { + QTestState *s; + +- s = qtest_init("-nographic -monitor none -serial none -M pc-q35-5.0"); ++ s = qtest_init("-nographic -monitor none -serial none -M pc-q35-rhel8.4.0"); + + qtest_outl(s, 0xcf8, 0x80001010); + qtest_outl(s, 0xcfc, 0xe1020000); +diff --git a/tests/qtest/fuzz-virtio-scsi-test.c b/tests/qtest/fuzz-virtio-scsi-test.c +index aaf6d10e18..43727d62ac 100644 +--- a/tests/qtest/fuzz-virtio-scsi-test.c ++++ b/tests/qtest/fuzz-virtio-scsi-test.c +@@ -19,7 +19,7 @@ static void test_mmio_oob_from_memory_region_cache(void) + { + QTestState *s; + +- s = qtest_init("-M pc-q35-5.2 -display none -m 512M " ++ s = qtest_init("-M pc-q35-rhel8.4.0 -display none -m 512M " + "-device virtio-scsi,num_queues=8,addr=03.0 "); + + qtest_outl(s, 0xcf8, 0x80001811); +diff --git a/tests/qtest/intel-hda-test.c b/tests/qtest/intel-hda-test.c +index a58c98e4d1..c8387e39ce 100644 +--- a/tests/qtest/intel-hda-test.c ++++ b/tests/qtest/intel-hda-test.c +@@ -38,7 +38,7 @@ static void test_issue542_ich6(void) + { + QTestState *s; + +- s = qtest_init("-nographic -nodefaults -M pc-q35-6.2 " ++ s = qtest_init("-nographic -nodefaults -M pc-q35-rhel9.0.0 " + "-device intel-hda,id=" HDA_ID CODEC_DEVICES); + + qtest_outl(s, 0xcf8, 0x80000804); +diff --git a/tests/qtest/libqos/meson.build b/tests/qtest/libqos/meson.build +index e988d15791..46f7dcb81a 100644 +--- a/tests/qtest/libqos/meson.build ++++ b/tests/qtest/libqos/meson.build +@@ -41,7 +41,7 @@ libqos_srcs = files('../libqtest.c', + 'virtio-rng.c', + 'virtio-scsi.c', + 'virtio-serial.c', +- 'virtio-iommu.c', ++# 'virtio-iommu.c', + + # qgraph machines: + 'aarch64-xlnx-zcu102-machine.c', +diff --git a/tests/qtest/lpc-ich9-test.c b/tests/qtest/lpc-ich9-test.c +index fe0bef9980..7a9d51579b 100644 +--- a/tests/qtest/lpc-ich9-test.c ++++ b/tests/qtest/lpc-ich9-test.c +@@ -15,7 +15,7 @@ static void test_lp1878642_pci_bus_get_irq_level_assert(void) + { + QTestState *s; + +- s = qtest_init("-M pc-q35-5.0 " ++ s = qtest_init("-M pc-q35-rhel8.4.0 " + "-nographic -monitor none -serial none"); + + qtest_outl(s, 0xcf8, 0x8000f840); /* PMBASE */ +diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build +index d25f82bb5a..67cd32def1 100644 +--- a/tests/qtest/meson.build ++++ b/tests/qtest/meson.build +@@ -73,7 +73,6 @@ qtests_i386 = \ + config_all_devices.has_key('CONFIG_Q35') and \ + config_all_devices.has_key('CONFIG_VIRTIO_PCI') and \ + slirp.found() ? ['virtio-net-failover'] : []) + \ +- (unpack_edk2_blobs ? ['bios-tables-test'] : []) + \ + qtests_pci + \ + ['fdc-test', + 'ide-test', +@@ -86,7 +85,6 @@ qtests_i386 = \ + 'drive_del-test', + 'tco-test', + 'cpu-plug-test', +- 'q35-test', + 'vmgenid-test', + 'migration-test', + 'test-x86-cpuid-compat', +@@ -216,7 +214,6 @@ qtests_arm = \ + + # TODO: once aarch64 TCG is fixed on ARM 32 bit host, make bios-tables-test unconditional + qtests_aarch64 = \ +- (cpu != 'arm' and unpack_edk2_blobs ? ['bios-tables-test'] : []) + \ + (config_all_devices.has_key('CONFIG_TPM_TIS_SYSBUS') ? ['tpm-tis-device-test'] : []) + \ + (config_all_devices.has_key('CONFIG_TPM_TIS_SYSBUS') ? ['tpm-tis-device-swtpm-test'] : []) + \ + (config_all_devices.has_key('CONFIG_XLNX_ZYNQMP_ARM') ? ['xlnx-can-test', 'fuzz-xlnx-dp-test'] : []) + \ +@@ -231,7 +228,6 @@ qtests_s390x = \ + (config_host.has_key('CONFIG_POSIX') ? ['test-filter-redirector'] : []) + \ + ['boot-serial-test', + 'drive_del-test', +- 'device-plug-test', + 'virtio-ccw-test', + 'cpu-plug-test', + 'migration-test'] +diff --git a/tests/qtest/usb-hcd-xhci-test.c b/tests/qtest/usb-hcd-xhci-test.c +index 10ef9d2a91..3855873050 100644 +--- a/tests/qtest/usb-hcd-xhci-test.c ++++ b/tests/qtest/usb-hcd-xhci-test.c +@@ -21,6 +21,7 @@ static void test_xhci_hotplug(void) + usb_test_hotplug(global_qtest, "xhci", "1", NULL); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void test_usb_uas_hotplug(void) + { + QTestState *qts = global_qtest; +@@ -36,6 +37,7 @@ static void test_usb_uas_hotplug(void) + qtest_qmp_device_del(qts, "scsihd"); + qtest_qmp_device_del(qts, "uas"); + } ++#endif + + static void test_usb_ccid_hotplug(void) + { +@@ -56,7 +58,9 @@ int main(int argc, char **argv) + + qtest_add_func("/xhci/pci/init", test_xhci_init); + qtest_add_func("/xhci/pci/hotplug", test_xhci_hotplug); ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + qtest_add_func("/xhci/pci/hotplug/usb-uas", test_usb_uas_hotplug); ++#endif + qtest_add_func("/xhci/pci/hotplug/usb-ccid", test_usb_ccid_hotplug); + + qtest_start("-device nec-usb-xhci,id=xhci" +diff --git a/tests/qtest/virtio-net-failover.c b/tests/qtest/virtio-net-failover.c +index 78811f1c92..44de8af00c 100644 +--- a/tests/qtest/virtio-net-failover.c ++++ b/tests/qtest/virtio-net-failover.c +@@ -25,6 +25,7 @@ + #define PCI_SEL_BASE 0x0010 + + #define BASE_MACHINE "-M q35 -nodefaults " \ ++ "-global ICH9-LPC.acpi-pci-hotplug-with-bridge-support=on " \ + "-device pcie-root-port,id=root0,addr=0x1,bus=pcie.0,chassis=1 " \ + "-device pcie-root-port,id=root1,addr=0x2,bus=pcie.0,chassis=2 " + +-- +2.31.1 + diff --git a/0012-Enable-make-check.patch b/0012-Enable-make-check.patch new file mode 100644 index 0000000..b2ff35a --- /dev/null +++ b/0012-Enable-make-check.patch @@ -0,0 +1,407 @@ +From 740a2dd943a2e0fcd41a9cd8eb94a136f8f49fa2 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Wed, 2 Sep 2020 09:39:41 +0200 +Subject: Enable make check + +Fixing tests after device disabling and machine types changes and enabling +make check run during build. + +Signed-off-by: Miroslav Rezanina + +Rebase changes (4.0.0): +- Remove testing for pseries-2.7 in endianess test +- Disable device-plug-test on s390x as it use disabled device +- Do not run cpu-plug-tests on 7.3 and older machine types + +Rebase changes (4.1.0-rc0): +- removed iotests 068 + +Rebase changes (4.1.0-rc1): +- remove all 205 tests (unstable) + +Rebase changes (4.2.0-rc0): +- partially disable hd-geo-test (requires lsi53c895a) + +Rebase changes (5.1.0-rc1): +- Disable qtest/q35-test (uses upstream machine types) +- Do not run iotests on make checka +- Enabled iotests 071 and 099 + +Rebase changes (5.2.0 rc0): +- Disable cdrom tests (unsupported devices) on x86_64 +- disable fuzz test + +Rebase changes (6.0.0): +- Disabled xlnx-can-test +- Disable pxb-pcie subtest for bios-table-test +- Replace qtest usage of upstream q35 machine type with pc-q35-rhel8.4.0 +- Not run cdrom-test on aarch64 + +Rebase changes (6.1.0): +- Remove unnecessary test disabling changes + +Rebase changes (weekly-211006): +- New handling for bios-table-test (disabled downstream) + +Merged patches (4.0.0): +- f7ffd13 Remove 7 qcow2 and luks iotests that are taking > 25 sec to run during the fast train build proce + +Merged patches (4.1.0-rc0): +- 41288ff redhat: Remove raw iotest 205 +--- + redhat/qemu-kvm.spec.template | 2 +- + tests/qemu-iotests/051 | 8 ++++---- + tests/qtest/bios-tables-test.c | 5 ++++- + tests/qtest/boot-serial-test.c | 6 +++++- + tests/qtest/cdrom-test.c | 4 ++++ + tests/qtest/cpu-plug-test.c | 4 ++-- + tests/qtest/fuzz-e1000e-test.c | 2 +- + tests/qtest/fuzz-virtio-scsi-test.c | 2 +- + tests/qtest/hd-geo-test.c | 4 ++++ + tests/qtest/lpc-ich9-test.c | 2 +- + tests/qtest/meson.build | 13 ++++--------- + tests/qtest/prom-env-test.c | 4 ++++ + tests/qtest/test-x86-cpuid-compat.c | 2 ++ + tests/qtest/usb-hcd-xhci-test.c | 4 ++++ + 14 files changed, 41 insertions(+), 21 deletions(-) + +diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051 +index 1d2fa93a11..c8a2815f54 100755 +--- a/tests/qemu-iotests/051 ++++ b/tests/qemu-iotests/051 +@@ -174,9 +174,9 @@ run_qemu -drive if=virtio + case "$QEMU_DEFAULT_MACHINE" in + pc) + run_qemu -drive if=none,id=disk -device ide-cd,drive=disk +- run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-cd,drive=disk ++# run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-cd,drive=disk + run_qemu -drive if=none,id=disk -device ide-hd,drive=disk +- run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-hd,drive=disk ++# run_qemu -drive if=none,id=disk -device lsi53c895a -device scsi-hd,drive=disk + ;; + *) + ;; +@@ -225,9 +225,9 @@ run_qemu -drive file="$TEST_IMG",if=virtio,readonly=on + case "$QEMU_DEFAULT_MACHINE" in + pc) + run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device ide-cd,drive=disk +- run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-cd,drive=disk ++# run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-cd,drive=disk + run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device ide-hd,drive=disk +- run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-hd,drive=disk ++# run_qemu -drive file="$TEST_IMG",if=none,id=disk,readonly=on -device lsi53c895a -device scsi-hd,drive=disk + ;; + *) + ;; +diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c +index 258874167e..16d8304cde 100644 +--- a/tests/qtest/bios-tables-test.c ++++ b/tests/qtest/bios-tables-test.c +@@ -1372,6 +1372,7 @@ static void test_acpi_virt_tcg_numamem(void) + + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void test_acpi_virt_tcg_pxb(void) + { + test_data data = { +@@ -1403,6 +1404,7 @@ static void test_acpi_virt_tcg_pxb(void) + + free_test_data(&data); + } ++#endif + + static void test_acpi_tcg_acpi_hmat(const char *machine) + { +@@ -1644,7 +1646,8 @@ int main(int argc, char *argv[]) + qtest_add_func("acpi/virt", test_acpi_virt_tcg); + qtest_add_func("acpi/virt/numamem", test_acpi_virt_tcg_numamem); + qtest_add_func("acpi/virt/memhp", test_acpi_virt_tcg_memhp); +- qtest_add_func("acpi/virt/pxb", test_acpi_virt_tcg_pxb); ++ /* Disabled for Red Hat Enterprise Linux ++ qtest_add_func("acpi/virt/pxb", test_acpi_virt_tcg_pxb); */ + qtest_add_func("acpi/virt/oem-fields", test_acpi_oem_fields_virt); + } + } +diff --git a/tests/qtest/boot-serial-test.c b/tests/qtest/boot-serial-test.c +index 83828ba270..294476b959 100644 +--- a/tests/qtest/boot-serial-test.c ++++ b/tests/qtest/boot-serial-test.c +@@ -148,19 +148,23 @@ static testdef_t tests[] = { + { "ppc", "g3beige", "", "PowerPC,750" }, + { "ppc", "mac99", "", "PowerPC,G4" }, + { "ppc", "sam460ex", "-m 256", "DRAM: 256 MiB" }, ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { "ppc64", "ppce500", "", "U-Boot" }, + { "ppc64", "40p", "-m 192", "Memory: 192M" }, + { "ppc64", "mac99", "", "PowerPC,970FX" }, ++#endif + { "ppc64", "pseries", + "-machine " PSERIES_DEFAULT_CAPABILITIES, + "Open Firmware" }, ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { "ppc64", "powernv8", "", "OPAL" }, + { "ppc64", "powernv9", "", "OPAL" }, + { "ppc64", "sam460ex", "-device e1000", "8086 100e" }, ++#endif + { "i386", "isapc", "-cpu qemu32 -device sga", "SGABIOS" }, + { "i386", "pc", "-device sga", "SGABIOS" }, + { "i386", "q35", "-device sga", "SGABIOS" }, +- { "x86_64", "isapc", "-cpu qemu32 -device sga", "SGABIOS" }, ++ { "x86_64", "pc", "-cpu qemu32 -device sga", "SGABIOS" }, + { "x86_64", "q35", "-device sga", "SGABIOS" }, + { "sparc", "LX", "", "TMS390S10" }, + { "sparc", "SS-4", "", "MB86904" }, +diff --git a/tests/qtest/cdrom-test.c b/tests/qtest/cdrom-test.c +index 5af944a5fb..69d9bac38a 100644 +--- a/tests/qtest/cdrom-test.c ++++ b/tests/qtest/cdrom-test.c +@@ -140,6 +140,7 @@ static void add_x86_tests(void) + qtest_add_data_func("cdrom/boot/isapc", "-M isapc " + "-drive if=ide,media=cdrom,file=", test_cdboot); + } ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + qtest_add_data_func("cdrom/boot/am53c974", + "-device am53c974 -device scsi-cd,drive=cd1 " + "-drive if=none,id=cd1,format=raw,file=", test_cdboot); +@@ -155,6 +156,7 @@ static void add_x86_tests(void) + qtest_add_data_func("cdrom/boot/megasas-gen2", "-M q35 " + "-device megasas-gen2 -device scsi-cd,drive=cd1 " + "-blockdev file,node-name=cd1,filename=", test_cdboot); ++#endif + } + + static void add_s390x_tests(void) +@@ -220,6 +222,7 @@ int main(int argc, char **argv) + "magnum", "malta", "pica61", NULL + }; + add_cdrom_param_tests(mips64machines); ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + } else if (g_str_equal(arch, "arm") || g_str_equal(arch, "aarch64")) { + const char *armmachines[] = { + "realview-eb", "realview-eb-mpcore", "realview-pb-a8", +@@ -227,6 +230,7 @@ int main(int argc, char **argv) + "vexpress-a9", "virt", NULL + }; + add_cdrom_param_tests(armmachines); ++#endif + } else { + const char *nonemachine[] = { "none", NULL }; + add_cdrom_param_tests(nonemachine); +diff --git a/tests/qtest/cpu-plug-test.c b/tests/qtest/cpu-plug-test.c +index a1c689414b..a8f076711c 100644 +--- a/tests/qtest/cpu-plug-test.c ++++ b/tests/qtest/cpu-plug-test.c +@@ -110,8 +110,8 @@ static void add_pseries_test_case(const char *mname) + char *path; + PlugTestData *data; + +- if (!g_str_has_prefix(mname, "pseries-") || +- (g_str_has_prefix(mname, "pseries-2.") && atoi(&mname[10]) < 7)) { ++ if (!g_str_has_prefix(mname, "pseries-rhel") || ++ (g_str_has_prefix(mname, "pseries-rhel7.") && atoi(&mname[14]) < 4)) { + return; + } + data = g_new(PlugTestData, 1); +diff --git a/tests/qtest/fuzz-e1000e-test.c b/tests/qtest/fuzz-e1000e-test.c +index 66229e6096..947fba73b7 100644 +--- a/tests/qtest/fuzz-e1000e-test.c ++++ b/tests/qtest/fuzz-e1000e-test.c +@@ -17,7 +17,7 @@ static void test_lp1879531_eth_get_rss_ex_dst_addr(void) + { + QTestState *s; + +- s = qtest_init("-nographic -monitor none -serial none -M pc-q35-5.0"); ++ s = qtest_init("-nographic -monitor none -serial none -M pc-q35-rhel8.4.0"); + + qtest_outl(s, 0xcf8, 0x80001010); + qtest_outl(s, 0xcfc, 0xe1020000); +diff --git a/tests/qtest/fuzz-virtio-scsi-test.c b/tests/qtest/fuzz-virtio-scsi-test.c +index aaf6d10e18..43727d62ac 100644 +--- a/tests/qtest/fuzz-virtio-scsi-test.c ++++ b/tests/qtest/fuzz-virtio-scsi-test.c +@@ -19,7 +19,7 @@ static void test_mmio_oob_from_memory_region_cache(void) + { + QTestState *s; + +- s = qtest_init("-M pc-q35-5.2 -display none -m 512M " ++ s = qtest_init("-M pc-q35-rhel8.4.0 -display none -m 512M " + "-device virtio-scsi,num_queues=8,addr=03.0 "); + + qtest_outl(s, 0xcf8, 0x80001811); +diff --git a/tests/qtest/hd-geo-test.c b/tests/qtest/hd-geo-test.c +index 113126ae06..999ef2aace 100644 +--- a/tests/qtest/hd-geo-test.c ++++ b/tests/qtest/hd-geo-test.c +@@ -737,6 +737,7 @@ static void test_override_ide(void) + test_override(args, expected); + } + ++#if 0 /* Require lsi53c895a - not supported on RHEL */ + static void test_override_scsi(void) + { + TestArgs *args = create_args(); +@@ -781,6 +782,7 @@ static void test_override_scsi_2_controllers(void) + add_scsi_disk(args, 3, 1, 0, 1, 2, 0, 1, 0); + test_override(args, expected); + } ++#endif + + static void test_override_virtio_blk(void) + { +@@ -960,9 +962,11 @@ int main(int argc, char **argv) + qtest_add_func("hd-geo/ide/device/user/chst", test_ide_device_user_chst); + if (have_qemu_img()) { + qtest_add_func("hd-geo/override/ide", test_override_ide); ++#if 0 /* Require lsi53c895a - not supported on RHEL */ + qtest_add_func("hd-geo/override/scsi", test_override_scsi); + qtest_add_func("hd-geo/override/scsi_2_controllers", + test_override_scsi_2_controllers); ++#endif + qtest_add_func("hd-geo/override/virtio_blk", test_override_virtio_blk); + qtest_add_func("hd-geo/override/zero_chs", test_override_zero_chs); + qtest_add_func("hd-geo/override/scsi_hot_unplug", +diff --git a/tests/qtest/lpc-ich9-test.c b/tests/qtest/lpc-ich9-test.c +index fe0bef9980..7a9d51579b 100644 +--- a/tests/qtest/lpc-ich9-test.c ++++ b/tests/qtest/lpc-ich9-test.c +@@ -15,7 +15,7 @@ static void test_lp1878642_pci_bus_get_irq_level_assert(void) + { + QTestState *s; + +- s = qtest_init("-M pc-q35-5.0 " ++ s = qtest_init("-M pc-q35-rhel8.4.0 " + "-nographic -monitor none -serial none"); + + qtest_outl(s, 0xcf8, 0x8000f840); /* PMBASE */ +diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build +index c9d8458062..049e06c057 100644 +--- a/tests/qtest/meson.build ++++ b/tests/qtest/meson.build +@@ -68,7 +68,6 @@ qtests_i386 = \ + (config_all_devices.has_key('CONFIG_RTL8139_PCI') ? ['rtl8139-test'] : []) + \ + (config_all_devices.has_key('CONFIG_E1000E_PCI_EXPRESS') ? ['fuzz-e1000e-test'] : []) + \ + (config_all_devices.has_key('CONFIG_ESP_PCI') ? ['am53c974-test'] : []) + \ +- (unpack_edk2_blobs ? ['bios-tables-test'] : []) + \ + qtests_pci + \ + ['fdc-test', + 'ide-test', +@@ -81,7 +80,6 @@ qtests_i386 = \ + 'drive_del-test', + 'tco-test', + 'cpu-plug-test', +- 'q35-test', + 'vmgenid-test', + 'migration-test', + 'test-x86-cpuid-compat', +@@ -130,17 +128,15 @@ qtests_mips64el = \ + + qtests_ppc = \ + (config_all_devices.has_key('CONFIG_ISA_TESTDEV') ? ['endianness-test'] : []) + \ +- (config_all_devices.has_key('CONFIG_M48T59') ? ['m48t59-test'] : []) + \ +- ['boot-order-test', 'prom-env-test', 'boot-serial-test'] \ ++ (config_all_devices.has_key('CONFIG_M48T59') ? ['m48t59-test'] : []) + + qtests_ppc64 = \ + (config_all_devices.has_key('CONFIG_PSERIES') ? ['device-plug-test'] : []) + \ + (config_all_devices.has_key('CONFIG_POWERNV') ? ['pnv-xscom-test'] : []) + \ + (config_all_devices.has_key('CONFIG_PSERIES') ? ['rtas-test'] : []) + \ +- (slirp.found() ? ['pxe-test', 'test-netfilter'] : []) + \ ++ (slirp.found() ? ['pxe-test'] : []) + \ + (config_all_devices.has_key('CONFIG_USB_UHCI') ? ['usb-hcd-uhci-test'] : []) + \ + (config_all_devices.has_key('CONFIG_USB_XHCI_NEC') ? ['usb-hcd-xhci-test'] : []) + \ +- (config_host.has_key('CONFIG_POSIX') ? ['test-filter-mirror'] : []) + \ + qtests_pci + ['migration-test', 'numa-test', 'cpu-plug-test', 'drive_del-test'] + + qtests_sh4 = (config_all_devices.has_key('CONFIG_ISA_TESTDEV') ? ['endianness-test'] : []) +@@ -186,8 +182,8 @@ qtests_aarch64 = \ + ['arm-cpu-features', + 'numa-test', + 'boot-serial-test', +- 'xlnx-can-test', +- 'fuzz-xlnx-dp-test', ++# 'xlnx-can-test', ++# 'fuzz-xlnx-dp-test', + 'migration-test'] + + qtests_s390x = \ +@@ -196,7 +192,6 @@ qtests_s390x = \ + (config_host.has_key('CONFIG_POSIX') ? ['test-filter-redirector'] : []) + \ + ['boot-serial-test', + 'drive_del-test', +- 'device-plug-test', + 'virtio-ccw-test', + 'cpu-plug-test', + 'migration-test'] +diff --git a/tests/qtest/prom-env-test.c b/tests/qtest/prom-env-test.c +index f41d80154a..f8dc478ce8 100644 +--- a/tests/qtest/prom-env-test.c ++++ b/tests/qtest/prom-env-test.c +@@ -89,10 +89,14 @@ int main(int argc, char *argv[]) + if (!strcmp(arch, "ppc")) { + add_tests(ppc_machines); + } else if (!strcmp(arch, "ppc64")) { ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + add_tests(ppc_machines); + if (g_test_slow()) { ++#endif + qtest_add_data_func("prom-env/pseries", "pseries", test_machine); ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + } ++#endif + } else if (!strcmp(arch, "sparc")) { + add_tests(sparc_machines); + } else if (!strcmp(arch, "sparc64")) { +diff --git a/tests/qtest/test-x86-cpuid-compat.c b/tests/qtest/test-x86-cpuid-compat.c +index f28848e06e..6b2fd398a2 100644 +--- a/tests/qtest/test-x86-cpuid-compat.c ++++ b/tests/qtest/test-x86-cpuid-compat.c +@@ -300,6 +300,7 @@ int main(int argc, char **argv) + "-cpu 486,xlevel2=0xC0000002,xstore=on", + "xlevel2", 0xC0000002); + ++#if 0 /* Disabled in Red Hat Enterprise Linux */ + /* Check compatibility of old machine-types that didn't + * auto-increase level/xlevel/xlevel2: */ + +@@ -350,6 +351,7 @@ int main(int argc, char **argv) + add_cpuid_test("x86/cpuid/xlevel-compat/pc-i440fx-2.4/npt-on", + "-machine pc-i440fx-2.4 -cpu SandyBridge,svm=on,npt=on", + "xlevel", 0x80000008); ++#endif + + /* Test feature parsing */ + add_feature_test("x86/cpuid/features/plus", +diff --git a/tests/qtest/usb-hcd-xhci-test.c b/tests/qtest/usb-hcd-xhci-test.c +index 10ef9d2a91..3855873050 100644 +--- a/tests/qtest/usb-hcd-xhci-test.c ++++ b/tests/qtest/usb-hcd-xhci-test.c +@@ -21,6 +21,7 @@ static void test_xhci_hotplug(void) + usb_test_hotplug(global_qtest, "xhci", "1", NULL); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void test_usb_uas_hotplug(void) + { + QTestState *qts = global_qtest; +@@ -36,6 +37,7 @@ static void test_usb_uas_hotplug(void) + qtest_qmp_device_del(qts, "scsihd"); + qtest_qmp_device_del(qts, "uas"); + } ++#endif + + static void test_usb_ccid_hotplug(void) + { +@@ -56,7 +58,9 @@ int main(int argc, char **argv) + + qtest_add_func("/xhci/pci/init", test_xhci_init); + qtest_add_func("/xhci/pci/hotplug", test_xhci_hotplug); ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + qtest_add_func("/xhci/pci/hotplug/usb-uas", test_usb_uas_hotplug); ++#endif + qtest_add_func("/xhci/pci/hotplug/usb-ccid", test_usb_ccid_hotplug); + + qtest_start("-device nec-usb-xhci,id=xhci" +-- +2.27.0 + diff --git a/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch b/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch new file mode 100644 index 0000000..c9e42b2 --- /dev/null +++ b/0012-vfio-cap-number-of-devices-that-can-be-assigned.patch @@ -0,0 +1,104 @@ +From c358fd4c224a9c3f64b4a8fff34cc6b1dc201fa0 Mon Sep 17 00:00:00 2001 +From: Bandan Das +Date: Tue, 3 Dec 2013 20:05:13 +0100 +Subject: vfio: cap number of devices that can be assigned + +RH-Author: Bandan Das +Message-id: <1386101113-31560-3-git-send-email-bsd@redhat.com> +Patchwork-id: 55984 +O-Subject: [PATCH RHEL7 qemu-kvm v2 2/2] vfio: cap number of devices that can be assigned +Bugzilla: 678368 +RH-Acked-by: Alex Williamson +RH-Acked-by: Marcelo Tosatti +RH-Acked-by: Michael S. Tsirkin + +Go through all groups to get count of total number of devices +active to enforce limit + +Reasoning from Alex for the limit(32) - Assuming 3 slots per +device, with 125 slots (number of memory slots for RHEL 7), +we can support almost 40 devices and still have few slots left +for other uses. Stepping down a bit, the number 32 arbitrarily +matches the number of slots on a PCI bus and is also a nice power +of two. + +Count of slots increased to 509 later so we could increase limit +to 64 as some usecases require more than 32 devices. + +Signed-off-by: Bandan Das +--- + hw/vfio/pci.c | 29 ++++++++++++++++++++++++++++- + hw/vfio/pci.h | 1 + + 2 files changed, 29 insertions(+), 1 deletion(-) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 67a183f17b..1e20f9fd59 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -45,6 +45,9 @@ + + #define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug" + ++/* RHEL only: Set once for the first assigned dev */ ++static uint16_t device_limit; ++ + static void vfio_disable_interrupts(VFIOPCIDevice *vdev); + static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled); + +@@ -2810,9 +2813,30 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + ssize_t len; + struct stat st; + int groupid; +- int i, ret; ++ int ret, i = 0; + bool is_mdev; + ++ if (device_limit && device_limit != vdev->assigned_device_limit) { ++ error_setg(errp, "Assigned device limit has been redefined. " ++ "Old:%d, New:%d", ++ device_limit, vdev->assigned_device_limit); ++ return; ++ } else { ++ device_limit = vdev->assigned_device_limit; ++ } ++ ++ QLIST_FOREACH(group, &vfio_group_list, next) { ++ QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { ++ i++; ++ } ++ } ++ ++ if (i >= vdev->assigned_device_limit) { ++ error_setg(errp, "Maximum supported vfio devices (%d) " ++ "already attached", vdev->assigned_device_limit); ++ return; ++ } ++ + if (!vdev->vbasedev.sysfsdev) { + if (!(~vdev->host.domain || ~vdev->host.bus || + ~vdev->host.slot || ~vdev->host.function)) { +@@ -3249,6 +3273,9 @@ static Property vfio_pci_dev_properties[] = { + DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false), + DEFINE_PROP_BOOL("x-no-geforce-quirks", VFIOPCIDevice, + no_geforce_quirks, false), ++ /* RHEL only */ ++ DEFINE_PROP_UINT16("x-assigned-device-limit", VFIOPCIDevice, ++ assigned_device_limit, 64), + DEFINE_PROP_BOOL("x-no-kvm-ioeventfd", VFIOPCIDevice, no_kvm_ioeventfd, + false), + DEFINE_PROP_BOOL("x-no-vfio-ioeventfd", VFIOPCIDevice, no_vfio_ioeventfd, +diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h +index 64777516d1..e0fe6ca97e 100644 +--- a/hw/vfio/pci.h ++++ b/hw/vfio/pci.h +@@ -139,6 +139,7 @@ struct VFIOPCIDevice { + EventNotifier err_notifier; + EventNotifier req_notifier; + int (*resetfn)(struct VFIOPCIDevice *); ++ uint16_t assigned_device_limit; + uint32_t vendor_id; + uint32_t device_id; + uint32_t sub_vendor_id; +-- +2.31.1 + diff --git a/0013-Add-support-statement-to-help-output.patch b/0013-Add-support-statement-to-help-output.patch new file mode 100644 index 0000000..4826ea4 --- /dev/null +++ b/0013-Add-support-statement-to-help-output.patch @@ -0,0 +1,55 @@ +From ba0c7a5f6b9a1f75666db6b3b795ddf03695dc26 Mon Sep 17 00:00:00 2001 +From: Eduardo Habkost +Date: Wed, 4 Dec 2013 18:53:17 +0100 +Subject: Add support statement to -help output + +RH-Author: Eduardo Habkost +Message-id: <1386183197-27761-1-git-send-email-ehabkost@redhat.com> +Patchwork-id: 55994 +O-Subject: [qemu-kvm RHEL7 PATCH] Add support statement to -help output +Bugzilla: 972773 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: knoel@redhat.com +RH-Acked-by: Paolo Bonzini + +Add support statement to -help output, reporting direct qemu-kvm usage +as unsupported by Red Hat, and advising users to use libvirt instead. + +Signed-off-by: Eduardo Habkost +--- + softmmu/vl.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/softmmu/vl.c b/softmmu/vl.c +index 6f646531a0..9d5dab43d2 100644 +--- a/softmmu/vl.c ++++ b/softmmu/vl.c +@@ -831,9 +831,17 @@ static void version(void) + QEMU_COPYRIGHT "\n"); + } + ++static void print_rh_warning(void) ++{ ++ printf("\nWARNING: Direct use of qemu-kvm from the command line is not supported by Red Hat.\n" ++ "WARNING: Use libvirt as the stable management interface.\n" ++ "WARNING: Some command line options listed here may not be available in future releases.\n\n"); ++} ++ + static void help(int exitcode) + { + version(); ++ print_rh_warning(); + printf("usage: %s [options] [disk_image]\n\n" + "'disk_image' is a raw hard disk image for IDE hard disk 0\n\n", + g_get_prgname()); +@@ -859,6 +867,7 @@ static void help(int exitcode) + "\n" + QEMU_HELP_BOTTOM "\n"); + ++ print_rh_warning(); + exit(exitcode); + } + +-- +2.31.1 + diff --git a/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch b/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch new file mode 100644 index 0000000..d9c8d42 --- /dev/null +++ b/0013-vfio-cap-number-of-devices-that-can-be-assigned.patch @@ -0,0 +1,110 @@ +From e9ebc159a9acf108e1ec6f622be3f256cf14aba7 Mon Sep 17 00:00:00 2001 +From: Bandan Das +Date: Tue, 3 Dec 2013 20:05:13 +0100 +Subject: vfio: cap number of devices that can be assigned + +RH-Author: Bandan Das +Message-id: <1386101113-31560-3-git-send-email-bsd@redhat.com> +Patchwork-id: 55984 +O-Subject: [PATCH RHEL7 qemu-kvm v2 2/2] vfio: cap number of devices that can be assigned +Bugzilla: 678368 +RH-Acked-by: Alex Williamson +RH-Acked-by: Marcelo Tosatti +RH-Acked-by: Michael S. Tsirkin + +Go through all groups to get count of total number of devices +active to enforce limit + +Reasoning from Alex for the limit(32) - Assuming 3 slots per +device, with 125 slots (number of memory slots for RHEL 7), +we can support almost 40 devices and still have few slots left +for other uses. Stepping down a bit, the number 32 arbitrarily +matches the number of slots on a PCI bus and is also a nice power +of two. + +Signed-off-by: Bandan Das + +Rebase notes (2.8.0): +- removed return value for vfio_realize (commit 1a22aca) + +Merged patches (2.9.0): +- 17eb774 vfio: Use error_setg when reporting max assigned device overshoot + + Merged patches (4.1.0-rc3): +- 2b89558 vfio: increase the cap on number of assigned devices to 64 +--- + hw/vfio/pci.c | 29 ++++++++++++++++++++++++++++- + hw/vfio/pci.h | 1 + + 2 files changed, 29 insertions(+), 1 deletion(-) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 7b45353ce2..eb725a3aee 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -45,6 +45,9 @@ + + #define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug" + ++/* RHEL only: Set once for the first assigned dev */ ++static uint16_t device_limit; ++ + static void vfio_disable_interrupts(VFIOPCIDevice *vdev); + static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled); + +@@ -2807,9 +2810,30 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + ssize_t len; + struct stat st; + int groupid; +- int i, ret; ++ int ret, i = 0; + bool is_mdev; + ++ if (device_limit && device_limit != vdev->assigned_device_limit) { ++ error_setg(errp, "Assigned device limit has been redefined. " ++ "Old:%d, New:%d", ++ device_limit, vdev->assigned_device_limit); ++ return; ++ } else { ++ device_limit = vdev->assigned_device_limit; ++ } ++ ++ QLIST_FOREACH(group, &vfio_group_list, next) { ++ QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { ++ i++; ++ } ++ } ++ ++ if (i >= vdev->assigned_device_limit) { ++ error_setg(errp, "Maximum supported vfio devices (%d) " ++ "already attached", vdev->assigned_device_limit); ++ return; ++ } ++ + if (!vdev->vbasedev.sysfsdev) { + if (!(~vdev->host.domain || ~vdev->host.bus || + ~vdev->host.slot || ~vdev->host.function)) { +@@ -3246,6 +3270,9 @@ static Property vfio_pci_dev_properties[] = { + DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false), + DEFINE_PROP_BOOL("x-no-geforce-quirks", VFIOPCIDevice, + no_geforce_quirks, false), ++ /* RHEL only */ ++ DEFINE_PROP_UINT16("x-assigned-device-limit", VFIOPCIDevice, ++ assigned_device_limit, 64), + DEFINE_PROP_BOOL("x-no-kvm-ioeventfd", VFIOPCIDevice, no_kvm_ioeventfd, + false), + DEFINE_PROP_BOOL("x-no-vfio-ioeventfd", VFIOPCIDevice, no_vfio_ioeventfd, +diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h +index 64777516d1..e0fe6ca97e 100644 +--- a/hw/vfio/pci.h ++++ b/hw/vfio/pci.h +@@ -139,6 +139,7 @@ struct VFIOPCIDevice { + EventNotifier err_notifier; + EventNotifier req_notifier; + int (*resetfn)(struct VFIOPCIDevice *); ++ uint16_t assigned_device_limit; + uint32_t vendor_id; + uint32_t device_id; + uint32_t sub_vendor_id; +-- +2.27.0 + diff --git a/0014-Add-support-statement-to-help-output.patch b/0014-Add-support-statement-to-help-output.patch new file mode 100644 index 0000000..2259e13 --- /dev/null +++ b/0014-Add-support-statement-to-help-output.patch @@ -0,0 +1,55 @@ +From b736b0c41dd62ed6f874a7b33ca1d4f9ceab4573 Mon Sep 17 00:00:00 2001 +From: Eduardo Habkost +Date: Wed, 4 Dec 2013 18:53:17 +0100 +Subject: Add support statement to -help output + +RH-Author: Eduardo Habkost +Message-id: <1386183197-27761-1-git-send-email-ehabkost@redhat.com> +Patchwork-id: 55994 +O-Subject: [qemu-kvm RHEL7 PATCH] Add support statement to -help output +Bugzilla: 972773 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: knoel@redhat.com +RH-Acked-by: Paolo Bonzini + +Add support statement to -help output, reporting direct qemu-kvm usage +as unsupported by Red Hat, and advising users to use libvirt instead. + +Signed-off-by: Eduardo Habkost +--- + softmmu/vl.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/softmmu/vl.c b/softmmu/vl.c +index 620a1f1367..d46b8fb4ab 100644 +--- a/softmmu/vl.c ++++ b/softmmu/vl.c +@@ -827,9 +827,17 @@ static void version(void) + QEMU_COPYRIGHT "\n"); + } + ++static void print_rh_warning(void) ++{ ++ printf("\nWARNING: Direct use of qemu-kvm from the command line is not supported by Red Hat.\n" ++ "WARNING: Use libvirt as the stable management interface.\n" ++ "WARNING: Some command line options listed here may not be available in future releases.\n\n"); ++} ++ + static void help(int exitcode) + { + version(); ++ print_rh_warning(); + printf("usage: %s [options] [disk_image]\n\n" + "'disk_image' is a raw hard disk image for IDE hard disk 0\n\n", + error_get_progname()); +@@ -855,6 +863,7 @@ static void help(int exitcode) + "\n" + QEMU_HELP_BOTTOM "\n"); + ++ print_rh_warning(); + exit(exitcode); + } + +-- +2.27.0 + diff --git a/0014-globally-limit-the-maximum-number-of-CPUs.patch b/0014-globally-limit-the-maximum-number-of-CPUs.patch new file mode 100644 index 0000000..6764a84 --- /dev/null +++ b/0014-globally-limit-the-maximum-number-of-CPUs.patch @@ -0,0 +1,45 @@ +From 9ebfd2f6cfa8e79c92e58fd169f90cc768fb865a Mon Sep 17 00:00:00 2001 +From: Andrew Jones +Date: Tue, 21 Jan 2014 10:46:52 +0100 +Subject: globally limit the maximum number of CPUs + +We now globally limit the number of VCPUs. +Especially, there is no way one can specify more than +max_cpus VCPUs for a VM. + +This allows us the restore the ppc max_cpus limitation to the upstream +default and minimize the ppc hack in kvm-all.c. + +Signed-off-by: David Hildenbrand +Signed-off-by: Miroslav Rezanina +Signed-off-by: Danilo Cesar Lemes de Paula +--- + accel/kvm/kvm-all.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 5f1377ca04..fdf0e4d429 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -2430,6 +2430,18 @@ static int kvm_init(MachineState *ms) + soft_vcpus_limit = kvm_recommended_vcpus(s); + hard_vcpus_limit = kvm_max_vcpus(s); + ++#ifdef HOST_PPC64 ++ /* ++ * On POWER, the kernel advertises a soft limit based on the ++ * number of CPU threads on the host. We want to allow exceeding ++ * this for testing purposes, so we don't want to set hard limit ++ * to soft limit as on x86. ++ */ ++#else ++ /* RHEL doesn't support nr_vcpus > soft_vcpus_limit */ ++ hard_vcpus_limit = soft_vcpus_limit; ++#endif ++ + while (nc->name) { + if (nc->num > soft_vcpus_limit) { + warn_report("Number of %s cpus requested (%d) exceeds " +-- +2.31.1 + diff --git a/0015-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch b/0015-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch new file mode 100644 index 0000000..722484d --- /dev/null +++ b/0015-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch @@ -0,0 +1,61 @@ +From 4b6c8cdc52fdf94d4098d278defb3833dce1d189 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Wed, 8 Jul 2020 08:35:50 +0200 +Subject: Use qemu-kvm in documentation instead of qemu-system- + +Patchwork-id: 62380 +O-Subject: [RHEV-7.1 qemu-kvm-rhev PATCHv4] Use qemu-kvm in documentation instead of qemu-system-i386 +Bugzilla: 1140620 +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Markus Armbruster +RH-Acked-by: Stefan Hajnoczi + +From: Miroslav Rezanina + +We change the name and location of qemu-kvm binaries. Update documentation +to reflect this change. Only architectures available in RHEL are updated. + +Signed-off-by: Miroslav Rezanina +--- + docs/defs.rst.inc | 4 ++-- + qemu-options.hx | 10 +++++----- + 2 files changed, 7 insertions(+), 7 deletions(-) + +diff --git a/docs/defs.rst.inc b/docs/defs.rst.inc +index 52d6454b93..d74dbdeca9 100644 +--- a/docs/defs.rst.inc ++++ b/docs/defs.rst.inc +@@ -9,7 +9,7 @@ + but the manpages will end up misrendered with following normal text + incorrectly in boldface. + +-.. |qemu_system| replace:: qemu-system-x86_64 +-.. |qemu_system_x86| replace:: qemu-system-x86_64 ++.. |qemu_system| replace:: qemu-kvm ++.. |qemu_system_x86| replace:: qemu-kvm + .. |I2C| replace:: I\ :sup:`2`\ C + .. |I2S| replace:: I\ :sup:`2`\ S +diff --git a/qemu-options.hx b/qemu-options.hx +index 34e9b32a5c..924f61ab6d 100644 +--- a/qemu-options.hx ++++ b/qemu-options.hx +@@ -3233,11 +3233,11 @@ SRST + + :: + +- qemu -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \ +- -numa node,memdev=mem \ +- -chardev socket,id=chr0,path=/path/to/socket \ +- -netdev type=vhost-user,id=net0,chardev=chr0 \ +- -device virtio-net-pci,netdev=net0 ++ qemu-kvm -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \ ++ -numa node,memdev=mem \ ++ -chardev socket,id=chr0,path=/path/to/socket \ ++ -netdev type=vhost-user,id=net0,chardev=chr0 \ ++ -device virtio-net-pci,netdev=net0 + + ``-netdev vhost-vdpa,vhostdev=/path/to/dev`` + Establish a vhost-vdpa netdev. +-- +2.31.1 + diff --git a/0015-globally-limit-the-maximum-number-of-CPUs.patch b/0015-globally-limit-the-maximum-number-of-CPUs.patch new file mode 100644 index 0000000..31d9643 --- /dev/null +++ b/0015-globally-limit-the-maximum-number-of-CPUs.patch @@ -0,0 +1,65 @@ +From 9a7621819821ee88d2f99d6b629fd87aa9a07758 Mon Sep 17 00:00:00 2001 +From: Andrew Jones +Date: Tue, 21 Jan 2014 10:46:52 +0100 +Subject: globally limit the maximum number of CPUs + +We now globally limit the number of VCPUs. +Especially, there is no way one can specify more than +max_cpus VCPUs for a VM. + +This allows us the restore the ppc max_cpus limitation to the upstream +default and minimize the ppc hack in kvm-all.c. + +Signed-off-by: David Hildenbrand +Signed-off-by: Miroslav Rezanina +Signed-off-by: Danilo Cesar Lemes de Paula + +Rebase notes (2.11.0): +- Removed CONFIG_RHV reference +- Update commit log + +Merged patches (2.11.0): +- 92fef14623 redhat: remove manual max_cpus limitations for ppc +- bb722e9eff redhat: globally limit the maximum number of CPUs +- fdeef3c1c7 RHEL: Set vcpus hard limit to 240 for Power +- 0584216921 Match POWER max cpus to x86 + +Signed-off-by: Andrew Jones + +Merged patches (5.1.0): +- redhat: globally limit the maximum number of CPUs +- redhat: remove manual max_cpus limitations for ppc +- use recommended max vcpu count + +Merged patches (5.2.0 rc0): +- f8a4123 vl: Remove downstream-only MAX_RHEL_CPUS code +--- + accel/kvm/kvm-all.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index eecd8031cf..8f2a53438f 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -2423,6 +2423,18 @@ static int kvm_init(MachineState *ms) + soft_vcpus_limit = kvm_recommended_vcpus(s); + hard_vcpus_limit = kvm_max_vcpus(s); + ++#ifdef HOST_PPC64 ++ /* ++ * On POWER, the kernel advertises a soft limit based on the ++ * number of CPU threads on the host. We want to allow exceeding ++ * this for testing purposes, so we don't want to set hard limit ++ * to soft limit as on x86. ++ */ ++#else ++ /* RHEL doesn't support nr_vcpus > soft_vcpus_limit */ ++ hard_vcpus_limit = soft_vcpus_limit; ++#endif ++ + while (nc->name) { + if (nc->num > soft_vcpus_limit) { + warn_report("Number of %s cpus requested (%d) exceeds " +-- +2.27.0 + diff --git a/0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch b/0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch new file mode 100644 index 0000000..9eda7c3 --- /dev/null +++ b/0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch @@ -0,0 +1,126 @@ +From 0d3fc0b4c5773c6cabb0a58c064475f76eb6ac1e Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Wed, 8 Jul 2020 08:35:50 +0200 +Subject: Use qemu-kvm in documentation instead of qemu-system- + +Patchwork-id: 62380 +O-Subject: [RHEV-7.1 qemu-kvm-rhev PATCHv4] Use qemu-kvm in documentation instead of qemu-system-i386 +Bugzilla: 1140620 +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Markus Armbruster +RH-Acked-by: Stefan Hajnoczi + +From: Miroslav Rezanina + +We change the name and location of qemu-kvm binaries. Update documentation +to reflect this change. Only architectures available in RHEL are updated. + +Signed-off-by: Miroslav Rezanina + +Rebase notes (5.1.0 rc0): + - qemu-block-drivers.texi converted to qemu-block-drivers.rst (upstream) + +Rebase notes (5.2.0 rc0): + - rewrite patch to new docs structure +--- + docs/defs.rst.inc | 4 ++-- + docs/tools/qemu-trace-stap.rst | 14 +++++++------- + qemu-options.hx | 10 +++++----- + 3 files changed, 14 insertions(+), 14 deletions(-) + +diff --git a/docs/defs.rst.inc b/docs/defs.rst.inc +index 52d6454b93..d74dbdeca9 100644 +--- a/docs/defs.rst.inc ++++ b/docs/defs.rst.inc +@@ -9,7 +9,7 @@ + but the manpages will end up misrendered with following normal text + incorrectly in boldface. + +-.. |qemu_system| replace:: qemu-system-x86_64 +-.. |qemu_system_x86| replace:: qemu-system-x86_64 ++.. |qemu_system| replace:: qemu-kvm ++.. |qemu_system_x86| replace:: qemu-kvm + .. |I2C| replace:: I\ :sup:`2`\ C + .. |I2S| replace:: I\ :sup:`2`\ S +diff --git a/docs/tools/qemu-trace-stap.rst b/docs/tools/qemu-trace-stap.rst +index d53073b52b..9e93df084f 100644 +--- a/docs/tools/qemu-trace-stap.rst ++++ b/docs/tools/qemu-trace-stap.rst +@@ -46,19 +46,19 @@ The following commands are valid: + any of the listed names. If no *PATTERN* is given, the all possible + probes will be listed. + +- For example, to list all probes available in the ``qemu-system-x86_64`` ++ For example, to list all probes available in the ``qemu-kvm`` + binary: + + :: + +- $ qemu-trace-stap list qemu-system-x86_64 ++ $ qemu-trace-stap list qemu-kvm + + To filter the list to only cover probes related to QEMU's cryptographic + subsystem, in a binary outside ``$PATH`` + + :: + +- $ qemu-trace-stap list /opt/qemu/4.0.0/bin/qemu-system-x86_64 'qcrypto*' ++ $ qemu-trace-stap list /opt/qemu/4.0.0/bin/qemu-kvm 'qcrypto*' + + .. option:: run OPTIONS BINARY PATTERN... + +@@ -90,18 +90,18 @@ The following commands are valid: + Restrict the tracing session so that it only triggers for the process + identified by *PID*. + +- For example, to monitor all processes executing ``qemu-system-x86_64`` ++ For example, to monitor all processes executing ``qemu-kvm`` + as found on ``$PATH``, displaying all I/O related probes: + + :: + +- $ qemu-trace-stap run qemu-system-x86_64 'qio*' ++ $ qemu-trace-stap run qemu-kvm 'qio*' + + To monitor only the QEMU process with PID 1732 + + :: + +- $ qemu-trace-stap run --pid=1732 qemu-system-x86_64 'qio*' ++ $ qemu-trace-stap run --pid=1732 qemu-kvm 'qio*' + + To monitor QEMU processes running an alternative binary outside of + ``$PATH``, displaying verbose information about setup of the +@@ -109,7 +109,7 @@ The following commands are valid: + + :: + +- $ qemu-trace-stap -v run /opt/qemu/4.0.0/qemu-system-x86_64 'qio*' ++ $ qemu-trace-stap -v run /opt/qemu/4.0.0/qemu-kvm 'qio*' + + See also + -------- +diff --git a/qemu-options.hx b/qemu-options.hx +index ae2c6dbbfc..94c4a8dbaf 100644 +--- a/qemu-options.hx ++++ b/qemu-options.hx +@@ -3150,11 +3150,11 @@ SRST + + :: + +- qemu -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \ +- -numa node,memdev=mem \ +- -chardev socket,id=chr0,path=/path/to/socket \ +- -netdev type=vhost-user,id=net0,chardev=chr0 \ +- -device virtio-net-pci,netdev=net0 ++ qemu-kvm -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,share=on \ ++ -numa node,memdev=mem \ ++ -chardev socket,id=chr0,path=/path/to/socket \ ++ -netdev type=vhost-user,id=net0,chardev=chr0 \ ++ -device virtio-net-pci,netdev=net0 + + ``-netdev vhost-vdpa,vhostdev=/path/to/dev`` + Establish a vhost-vdpa netdev. +-- +2.27.0 + diff --git a/0016-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch b/0016-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch new file mode 100644 index 0000000..9f08024 --- /dev/null +++ b/0016-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch @@ -0,0 +1,66 @@ +From b72e04cb7e417d9e1c973223747ab3a27abda8b4 Mon Sep 17 00:00:00 2001 +From: Fam Zheng +Date: Wed, 14 Jun 2017 15:37:01 +0200 +Subject: virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only] + +RH-Author: Fam Zheng +Message-id: <20170614153701.14757-1-famz@redhat.com> +Patchwork-id: 75613 +O-Subject: [RHV-7.4 qemu-kvm-rhev PATCH v3] virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only] +Bugzilla: 1378816 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Max Reitz + +We need a fix for RHEL 7.4 and 7.3.z, but unfortunately upstream isn't +ready. If it were, the changes will be too invasive. To have an idea: + +https://lists.gnu.org/archive/html/qemu-devel/2017-05/msg05400.html + +is an incomplete attempt to fix part of the issue, and the remaining +work unfortunately involve even more complex changes. + +As a band-aid, this partially reverts the effect of ef8875b +(virtio-scsi: Remove op blocker for dataplane, since v2.7). We cannot +simply revert that commit as a whole because we already shipped it in +qemu-kvm-rhev 7.3, since when, block jobs has been possible. We should +only block what has been broken. Also, faithfully reverting the above +commit means adding back the removed op blocker, but that is not enough, +because it still crashes when inserting media into an initially empty +scsi-cd. + +All in all, scsi-cd on virtio-scsi-dataplane has basically been unusable +unless the scsi-cd never enters an empty state, so, disable it +altogether. Otherwise it would be much more difficult to avoid +crashing. + +Signed-off-by: Fam Zheng +Signed-off-by: Miroslav Rezanina +Signed-off-by: Danilo C. L. de Paula +--- + hw/scsi/virtio-scsi.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c +index 34a968ecfb..7f6da33a8a 100644 +--- a/hw/scsi/virtio-scsi.c ++++ b/hw/scsi/virtio-scsi.c +@@ -896,6 +896,15 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, + AioContext *old_context; + int ret; + ++ /* XXX: Remove this check once block backend is capable of handling ++ * AioContext change upon eject/insert. ++ * s->ctx is NULL if ioeventfd is off, s->ctx is qemu_get_aio_context() if ++ * data plane is not used, both cases are safe for scsi-cd. */ ++ if (s->ctx && s->ctx != qemu_get_aio_context() && ++ object_dynamic_cast(OBJECT(dev), "scsi-cd")) { ++ error_setg(errp, "scsi-cd is not supported by data plane"); ++ return; ++ } + if (s->ctx && !s->dataplane_fenced) { + if (blk_op_is_blocked(sd->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { + return; +-- +2.31.1 + diff --git a/0017-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch b/0017-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch new file mode 100644 index 0000000..2bc687c --- /dev/null +++ b/0017-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch @@ -0,0 +1,60 @@ +From 64a06662cdea0ff62efb122be4eab506b2a842d9 Mon Sep 17 00:00:00 2001 +From: David Gibson +Date: Wed, 6 Feb 2019 03:58:56 +0000 +Subject: BZ1653590: Require at least 64kiB pages for downstream guests & hosts + +RH-Author: David Gibson +Message-id: <20190206035856.19058-1-dgibson@redhat.com> +Patchwork-id: 84246 +O-Subject: [RHELAV-8.0/rhel qemu-kvm PATCH] BZ1653590: Require at least 64kiB pages for downstream guests & hosts +Bugzilla: 1653590 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Serhii Popovych +RH-Acked-by: Thomas Huth + +Most current POWER guests require 64kiB page support, so that's the default +for the cap-hpt-max-pagesize option in qemu which limits available guest +page sizes. We warn if the value is set smaller than that, but don't +outright fail upstream, because we need to allow for the possibility of +guest (and/or host) kernels configured for 4kiB page sizes. + +Downstream, however, we simply don't support 4kiB pagesize configured +kernels in guest or host, so we can have qemu simply error out in this +situation. + +Testing: Attempted to start a guest with cap-hpt-max-page-size=4k and verified + it failed immediately with a qemu error + +Signed-off-by: David Gibson +Signed-off-by: Danilo C. L. de Paula +--- + hw/ppc/spapr_caps.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c +index 655ab856a0..6aa7f93df9 100644 +--- a/hw/ppc/spapr_caps.c ++++ b/hw/ppc/spapr_caps.c +@@ -329,12 +329,19 @@ bool spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize, + static void cap_hpt_maxpagesize_apply(SpaprMachineState *spapr, + uint8_t val, Error **errp) + { ++#if 0 /* disabled for RHEL */ + if (val < 12) { + error_setg(errp, "Require at least 4kiB hpt-max-page-size"); + return; + } else if (val < 16) { + warn_report("Many guests require at least 64kiB hpt-max-page-size"); + } ++#else /* Only page sizes >=64kiB supported for RHEL */ ++ if (val < 16) { ++ error_setg(errp, "Require at least 64kiB hpt-max-page-size"); ++ return; ++ } ++#endif + + spapr_check_pagesize(spapr, qemu_minrampagesize(), errp); + } +-- +2.31.1 + diff --git a/0017-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch b/0017-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch new file mode 100644 index 0000000..6b60efc --- /dev/null +++ b/0017-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch @@ -0,0 +1,66 @@ +From d95768c039a2bf6b68422f83a8d55dad41bd3181 Mon Sep 17 00:00:00 2001 +From: Fam Zheng +Date: Wed, 14 Jun 2017 15:37:01 +0200 +Subject: virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only] + +RH-Author: Fam Zheng +Message-id: <20170614153701.14757-1-famz@redhat.com> +Patchwork-id: 75613 +O-Subject: [RHV-7.4 qemu-kvm-rhev PATCH v3] virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only] +Bugzilla: 1378816 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Max Reitz + +We need a fix for RHEL 7.4 and 7.3.z, but unfortunately upstream isn't +ready. If it were, the changes will be too invasive. To have an idea: + +https://lists.gnu.org/archive/html/qemu-devel/2017-05/msg05400.html + +is an incomplete attempt to fix part of the issue, and the remaining +work unfortunately involve even more complex changes. + +As a band-aid, this partially reverts the effect of ef8875b +(virtio-scsi: Remove op blocker for dataplane, since v2.7). We cannot +simply revert that commit as a whole because we already shipped it in +qemu-kvm-rhev 7.3, since when, block jobs has been possible. We should +only block what has been broken. Also, faithfully reverting the above +commit means adding back the removed op blocker, but that is not enough, +because it still crashes when inserting media into an initially empty +scsi-cd. + +All in all, scsi-cd on virtio-scsi-dataplane has basically been unusable +unless the scsi-cd never enters an empty state, so, disable it +altogether. Otherwise it would be much more difficult to avoid +crashing. + +Signed-off-by: Fam Zheng +Signed-off-by: Miroslav Rezanina +Signed-off-by: Danilo C. L. de Paula +--- + hw/scsi/virtio-scsi.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c +index 51fd09522a..a35257c35a 100644 +--- a/hw/scsi/virtio-scsi.c ++++ b/hw/scsi/virtio-scsi.c +@@ -896,6 +896,15 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, + AioContext *old_context; + int ret; + ++ /* XXX: Remove this check once block backend is capable of handling ++ * AioContext change upon eject/insert. ++ * s->ctx is NULL if ioeventfd is off, s->ctx is qemu_get_aio_context() if ++ * data plane is not used, both cases are safe for scsi-cd. */ ++ if (s->ctx && s->ctx != qemu_get_aio_context() && ++ object_dynamic_cast(OBJECT(dev), "scsi-cd")) { ++ error_setg(errp, "scsi-cd is not supported by data plane"); ++ return; ++ } + if (s->ctx && !s->dataplane_fenced) { + if (blk_op_is_blocked(sd->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { + return; +-- +2.27.0 + diff --git a/0018-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch b/0018-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch new file mode 100644 index 0000000..e07746d --- /dev/null +++ b/0018-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch @@ -0,0 +1,60 @@ +From 92bb62c47eab021f8dabecd09b5fbc1706e6a29c Mon Sep 17 00:00:00 2001 +From: David Gibson +Date: Wed, 6 Feb 2019 03:58:56 +0000 +Subject: BZ1653590: Require at least 64kiB pages for downstream guests & hosts + +RH-Author: David Gibson +Message-id: <20190206035856.19058-1-dgibson@redhat.com> +Patchwork-id: 84246 +O-Subject: [RHELAV-8.0/rhel qemu-kvm PATCH] BZ1653590: Require at least 64kiB pages for downstream guests & hosts +Bugzilla: 1653590 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Serhii Popovych +RH-Acked-by: Thomas Huth + +Most current POWER guests require 64kiB page support, so that's the default +for the cap-hpt-max-pagesize option in qemu which limits available guest +page sizes. We warn if the value is set smaller than that, but don't +outright fail upstream, because we need to allow for the possibility of +guest (and/or host) kernels configured for 4kiB page sizes. + +Downstream, however, we simply don't support 4kiB pagesize configured +kernels in guest or host, so we can have qemu simply error out in this +situation. + +Testing: Attempted to start a guest with cap-hpt-max-page-size=4k and verified + it failed immediately with a qemu error + +Signed-off-by: David Gibson +Signed-off-by: Danilo C. L. de Paula +--- + hw/ppc/spapr_caps.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c +index ed7c077a0d..48a8efe678 100644 +--- a/hw/ppc/spapr_caps.c ++++ b/hw/ppc/spapr_caps.c +@@ -332,12 +332,19 @@ bool spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize, + static void cap_hpt_maxpagesize_apply(SpaprMachineState *spapr, + uint8_t val, Error **errp) + { ++#if 0 /* disabled for RHEL */ + if (val < 12) { + error_setg(errp, "Require at least 4kiB hpt-max-page-size"); + return; + } else if (val < 16) { + warn_report("Many guests require at least 64kiB hpt-max-page-size"); + } ++#else /* Only page sizes >=64kiB supported for RHEL */ ++ if (val < 16) { ++ error_setg(errp, "Require at least 64kiB hpt-max-page-size"); ++ return; ++ } ++#endif + + spapr_check_pagesize(spapr, qemu_minrampagesize(), errp); + } +-- +2.27.0 + diff --git a/0018-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch b/0018-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch new file mode 100644 index 0000000..d7401d5 --- /dev/null +++ b/0018-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch @@ -0,0 +1,77 @@ +From 54f9157a918e1404f2f17ce89a9c8b9088c1bc06 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 20 Aug 2021 18:25:12 +0200 +Subject: qcow2: Deprecation warning when opening v2 images rw +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Kevin Wolf +RH-MergeRequest: 37: qcow2: Deprecation warning when opening v2 images rw +RH-Commit: [1/1] f450d0ae32d35063b28c72c4f2d2ebb9e6d8db3e (kmwolf/centos-qemu-kvm) +RH-Bugzilla: 1951814 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Hanna Reitz +RH-Acked-by: Thomas Huth +RH-Acked-by: Philippe Mathieu-Daudé + +qcow2 v3 has been around for a long time (since QEMU 1.1/RHEL 7), so +there is no real reason any more to use it. People still using it might +do so unintentionally. Warn about it and suggest upgrading during the +RHEL 9 timeframe so that the code can possibly be disabled in RHEL 10. + +The warning is restricted to read-write mode and the system emulator. +The primary motivation for not having it in qemu-img is that 'qemu-img +amend' for upgrades would warn otherwise. It also avoids having to make +too many changes to the test suite. + +bdrv_uses_whitelist() is used as a proxy for deciding whether we are +running in a tool or the system emulator. This is not entirely clean, +but it's what is available and the same function qcow2_do_open() already +uses it this way for another warning. + +Signed-off-by: Kevin Wolf + +patch_name: kvm-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch +present_in_specfile: true +location_in_specfile: 116 +--- +Rebase notes (6.1.0): +- Replace bs->read_only with bdrv_is_read_only +--- + block/qcow2.c | 6 ++++++ + tests/qemu-iotests/common.filter | 1 + + 2 files changed, 7 insertions(+) + +diff --git a/block/qcow2.c b/block/qcow2.c +index b5c47931ef..a795e457ac 100644 +--- a/block/qcow2.c ++++ b/block/qcow2.c +@@ -1337,6 +1337,12 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, + ret = -ENOTSUP; + goto fail; + } ++ if (header.version < 3 && !bdrv_is_read_only(bs) && bdrv_uses_whitelist()) { ++ warn_report_once("qcow2 v2 images are deprecated and may not be " ++ "supported in future versions. Please consider " ++ "upgrading the image with 'qemu-img amend " ++ "-o compat=v3'."); ++ } + + s->qcow_version = header.version; + +diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter +index cc9f1a5891..6a13757177 100644 +--- a/tests/qemu-iotests/common.filter ++++ b/tests/qemu-iotests/common.filter +@@ -83,6 +83,7 @@ _filter_qemu() + { + gsed -e "s#\\(^\\|(qemu) \\)$(basename $QEMU_PROG):#\1QEMU_PROG:#" \ + -e 's#^QEMU [0-9]\+\.[0-9]\+\.[0-9]\+ monitor#QEMU X.Y.Z monitor#' \ ++ -e "/qcow2 v2 images are deprecated/d" \ + -e $'s#\r##' # QEMU monitor uses \r\n line endings + } + +-- +2.31.1 + diff --git a/0019-WRB-Introduce-RHEL-9.0.0-hw-compat-structure.patch b/0019-WRB-Introduce-RHEL-9.0.0-hw-compat-structure.patch new file mode 100644 index 0000000..285cd6b --- /dev/null +++ b/0019-WRB-Introduce-RHEL-9.0.0-hw-compat-structure.patch @@ -0,0 +1,135 @@ +From 1d6439527aa6ccabb58208c94417778ccc19de39 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Wed, 9 Feb 2022 04:16:25 -0500 +Subject: WRB: Introduce RHEL 9.0.0 hw compat structure + +General compatibility structure for post RHEL 9.0.0 rebase. + +Signed-off-by: Miroslav Rezanina +--- + hw/core/machine.c | 9 +++++++++ + hw/i386/pc.c | 6 ++++++ + hw/i386/pc_piix.c | 4 ++++ + hw/i386/pc_q35.c | 4 ++++ + hw/s390x/s390-virtio-ccw.c | 2 ++ + include/hw/boards.h | 3 +++ + include/hw/i386/pc.h | 3 +++ + 7 files changed, 31 insertions(+) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 28989b6e7b..dffc3ef4ab 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -53,6 +53,15 @@ GlobalProperty hw_compat_rhel_8_6[] = { + }; + const size_t hw_compat_rhel_8_6_len = G_N_ELEMENTS(hw_compat_rhel_8_6); + ++/* ++ * Mostly the same as hw_compat_6_2 ++ */ ++GlobalProperty hw_compat_rhel_9_0[] = { ++ /* hw_compat_rhel_9_0 from hw_compat_6_2 */ ++ { "PIIX4_PM", "x-not-migrate-acpi-index", "on"}, ++}; ++const size_t hw_compat_rhel_9_0_len = G_N_ELEMENTS(hw_compat_rhel_9_0); ++ + /* + * Mostly the same as hw_compat_6_0 and hw_compat_6_1 + */ +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 263d882af6..0886cfe3fe 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -391,6 +391,12 @@ GlobalProperty pc_rhel_compat[] = { + }; + const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); + ++GlobalProperty pc_rhel_9_0_compat[] = { ++ /* pc_rhel_9_0_compat from pc_compat_6_2 */ ++ { "virtio-mem", "unplugged-inaccessible", "off" }, ++}; ++const size_t pc_rhel_9_0_compat_len = G_N_ELEMENTS(pc_rhel_9_0_compat); ++ + GlobalProperty pc_rhel_8_5_compat[] = { + /* pc_rhel_8_5_compat from pc_compat_6_0 */ + { "qemu64" "-" TYPE_X86_CPU, "family", "6" }, +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index 0cacc0d623..dc987fe93b 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -1014,6 +1014,10 @@ static void pc_machine_rhel760_options(MachineClass *m) + pcmc->kvmclock_create_always = false; + /* From pc_i440fx_5_1_machine_options() */ + pcmc->pci_root_uid = 1; ++ compat_props_add(m->compat_props, hw_compat_rhel_9_0, ++ hw_compat_rhel_9_0_len); ++ compat_props_add(m->compat_props, pc_rhel_9_0_compat, ++ pc_rhel_9_0_compat_len); + compat_props_add(m->compat_props, hw_compat_rhel_8_6, + hw_compat_rhel_8_6_len); + compat_props_add(m->compat_props, hw_compat_rhel_8_5, +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 157160e069..52c253c570 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -669,6 +669,10 @@ static void pc_q35_machine_rhel900_options(MachineClass *m) + m->desc = "RHEL-9.0.0 PC (Q35 + ICH9, 2009)"; + pcmc->smbios_stream_product = "RHEL"; + pcmc->smbios_stream_version = "9.0.0"; ++ compat_props_add(m->compat_props, hw_compat_rhel_9_0, ++ hw_compat_rhel_9_0_len); ++ compat_props_add(m->compat_props, pc_rhel_9_0_compat, ++ pc_rhel_9_0_compat_len); + } + + DEFINE_PC_MACHINE(q35_rhel900, "pc-q35-rhel9.0.0", pc_q35_init_rhel900, +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index 465a2a09d2..08e0f6a79b 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -1118,12 +1118,14 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) + DEFINE_CCW_MACHINE(2_4, "2.4", false); + #endif + ++ + static void ccw_machine_rhel900_instance_options(MachineState *machine) + { + } + + static void ccw_machine_rhel900_class_options(MachineClass *mc) + { ++ compat_props_add(mc->compat_props, hw_compat_rhel_9_0, hw_compat_rhel_9_0_len); + } + DEFINE_CCW_MACHINE(rhel900, "rhel9.0.0", true); + +diff --git a/include/hw/boards.h b/include/hw/boards.h +index d1555665df..635e45dd71 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -451,6 +451,9 @@ extern const size_t hw_compat_2_2_len; + extern GlobalProperty hw_compat_2_1[]; + extern const size_t hw_compat_2_1_len; + ++extern GlobalProperty hw_compat_rhel_9_0[]; ++extern const size_t hw_compat_rhel_9_0_len; ++ + extern GlobalProperty hw_compat_rhel_8_6[]; + extern const size_t hw_compat_rhel_8_6_len; + +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 419a6ec24b..a492c420b5 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -292,6 +292,9 @@ extern const size_t pc_compat_1_4_len; + extern GlobalProperty pc_rhel_compat[]; + extern const size_t pc_rhel_compat_len; + ++extern GlobalProperty pc_rhel_9_0_compat[]; ++extern const size_t pc_rhel_9_0_compat_len; ++ + extern GlobalProperty pc_rhel_8_5_compat[]; + extern const size_t pc_rhel_8_5_compat_len; + +-- +2.31.1 + diff --git a/0019-compat-Update-hw_compat_rhel_8_5.patch b/0019-compat-Update-hw_compat_rhel_8_5.patch new file mode 100644 index 0000000..6d2b7c3 --- /dev/null +++ b/0019-compat-Update-hw_compat_rhel_8_5.patch @@ -0,0 +1,53 @@ +From a9b5da617c29f48199cbea08d6a1c083877dce10 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Mon, 15 Nov 2021 14:22:29 +0100 +Subject: compat: Update hw_compat_rhel_8_5 + +RH-Author: Laurent Vivier +RH-MergeRequest: 66: redhat: Update pseries-rhel8.5.0 machine type +RH-Commit: [1/2] 232f2ad2b29d250fbdb8fcea9d814704c575ba2b +RH-Bugzilla: 2022608 +RH-Acked-by: Eric Auger +RH-Acked-by: Cornelia Huck +RH-Acked-by: Greg Kurz + +Add properties from hw_compat_6_1 as it already includes the ones from +hw_compat_6_0. Add a lately added property from 6.0 too. + +Signed-off-by: Laurent Vivier +-- +Rebase notes (6.2.0 rc3): +- Included compatc changes introduced in RC2 +--- + hw/core/machine.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 62febde5aa..736c765c30 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -38,7 +38,7 @@ + #include "hw/virtio/virtio-pci.h" + + /* +- * Mostly the same as hw_compat_6_0 ++ * Mostly the same as hw_compat_6_0 and hw_compat_6_1 + */ + GlobalProperty hw_compat_rhel_8_5[] = { + /* hw_compat_rhel_8_5 from hw_compat_6_0 */ +@@ -51,6 +51,12 @@ GlobalProperty hw_compat_rhel_8_5[] = { + { "e1000", "init-vet", "off" }, + /* hw_compat_rhel_8_5 from hw_compat_6_0 */ + { "e1000e", "init-vet", "off" }, ++ /* hw_compat_rhel_8_5 from hw_compat_6_0 */ ++ { "vhost-vsock-device", "seqpacket", "off" }, ++ /* hw_compat_rhel_8_5 from hw_compat_6_1 */ ++ { "vhost-user-vsock-device", "seqpacket", "off" }, ++ /* hw_compat_rhel_8_5 from hw_compat_6_1 */ ++ { "nvme-ns", "shared", "off" }, + }; + const size_t hw_compat_rhel_8_5_len = G_N_ELEMENTS(hw_compat_rhel_8_5); + +-- +2.27.0 + diff --git a/0020-redhat-Update-pseries-rhel8.5.0-machine-type.patch b/0020-redhat-Update-pseries-rhel8.5.0-machine-type.patch new file mode 100644 index 0000000..af8e9dd --- /dev/null +++ b/0020-redhat-Update-pseries-rhel8.5.0-machine-type.patch @@ -0,0 +1,43 @@ +From 82358c35f04f026820b3907069a6c19cd95b654d Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Mon, 15 Nov 2021 14:25:33 +0100 +Subject: redhat: Update pseries-rhel8.5.0 machine type + +RH-Author: Laurent Vivier +RH-MergeRequest: 66: redhat: Update pseries-rhel8.5.0 machine type +RH-Commit: [2/2] 36f7ad1ea56baaaecb139875ad0a90a6470196be +RH-Bugzilla: 2022608 +RH-Acked-by: Eric Auger +RH-Acked-by: Cornelia Huck +RH-Acked-by: Greg Kurz +` +We don't introduce a new machine type for rhel8.6.0 but we need +to keep compatibility with rhel8.5.0 machine type. + +Signed-off-by: Laurent Vivier +--- + hw/ppc/spapr.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index cace86028d..2f27888d8a 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -5177,10 +5177,14 @@ static void spapr_machine_rhel_default_class_options(MachineClass *mc) + + static void spapr_machine_rhel850_class_options(MachineClass *mc) + { ++ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); ++ + /* The default machine type must apply the RHEL specific defaults */ + spapr_machine_rhel_default_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_rhel_8_5, + hw_compat_rhel_8_5_len); ++ smc->pre_6_2_numa_affinity = true; ++ mc->smp_props.prefer_sockets = true; + } + + DEFINE_SPAPR_MACHINE(rhel850, "rhel8.5.0", true); +-- +2.27.0 + diff --git a/0020-redhat-Update-s390x-machine-type-compatibility-for-r.patch b/0020-redhat-Update-s390x-machine-type-compatibility-for-r.patch new file mode 100644 index 0000000..d3b91d0 --- /dev/null +++ b/0020-redhat-Update-s390x-machine-type-compatibility-for-r.patch @@ -0,0 +1,38 @@ +From c8ad21ca31892f8798cf82508c2b2c61bf3b9895 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Mon, 4 Apr 2022 12:15:50 +0200 +Subject: redhat: Update s390x machine type compatibility for rebase to QEMU + 7.0.0 + +RH-Author: Thomas Huth +RH-MergeRequest: 143: Update machine type compatibility for QEMU 7.0.0 update [s390x] +RH-Commit: [23/23] 0ecf97d7bdddc50565b5779c64744b353f715cbd +RH-Bugzilla: 2064782 +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +No s390x-specific machine class property updates required this time, +only an update to the default qemu cpu model. + +Signed-off-by: Thomas Huth +--- + hw/s390x/s390-virtio-ccw.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index 08e0f6a79b..4a491d4988 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -1121,6 +1121,9 @@ DEFINE_CCW_MACHINE(2_4, "2.4", false); + + static void ccw_machine_rhel900_instance_options(MachineState *machine) + { ++ static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_2 }; ++ ++ s390_set_qemu_cpu_model(0x3906, 14, 2, qemu_cpu_feat); + } + + static void ccw_machine_rhel900_class_options(MachineClass *mc) +-- +2.31.1 + diff --git a/0021-pc-Move-s3-s4-suspend-disabling-to-compat.patch b/0021-pc-Move-s3-s4-suspend-disabling-to-compat.patch new file mode 100644 index 0000000..f9535a8 --- /dev/null +++ b/0021-pc-Move-s3-s4-suspend-disabling-to-compat.patch @@ -0,0 +1,70 @@ +From 38b89dc24551258b630f09d1c654b6c72b265c79 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Thu, 14 Apr 2022 14:58:43 +0100 +Subject: pc: Move s3/s4 suspend disabling to compat + +RH-Author: Dr. David Alan Gilbert +RH-MergeRequest: 155: 7.0 machine type fixes (x86) +RH-Commit: [26/26] 7d666032d5f5dab1444ebba085f92f2de4e86699 +RH-Bugzilla: 2064771 + +Our downstream patches currently have tweaks in the C code to disable +s3/s4; Thomas pointed out we can just set the property. + +Signed-off-by: Dr. David Alan Gilbert +--- + hw/acpi/ich9.c | 4 ++-- + hw/acpi/piix4.c | 4 ++-- + hw/i386/pc.c | 6 ++++++ + 3 files changed, 10 insertions(+), 4 deletions(-) + +diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c +index de1e401cdf..bd9bbade70 100644 +--- a/hw/acpi/ich9.c ++++ b/hw/acpi/ich9.c +@@ -435,8 +435,8 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm) + static const uint32_t gpe0_len = ICH9_PMIO_GPE0_LEN; + pm->acpi_memory_hotplug.is_enabled = true; + pm->cpu_hotplug_legacy = true; +- pm->disable_s3 = 1; +- pm->disable_s4 = 1; ++ pm->disable_s3 = 0; ++ pm->disable_s4 = 0; + pm->s4_val = 2; + pm->use_acpi_hotplug_bridge = true; + pm->keep_pci_slot_hpc = true; +diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c +index 28544e78c3..2fb2b43248 100644 +--- a/hw/acpi/piix4.c ++++ b/hw/acpi/piix4.c +@@ -653,8 +653,8 @@ static void piix4_send_gpe(AcpiDeviceIf *adev, AcpiEventStatusBits ev) + + static Property piix4_pm_properties[] = { + DEFINE_PROP_UINT32("smb_io_base", PIIX4PMState, smb_io_base, 0), +- DEFINE_PROP_UINT8(ACPI_PM_PROP_S3_DISABLED, PIIX4PMState, disable_s3, 1), +- DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_DISABLED, PIIX4PMState, disable_s4, 1), ++ DEFINE_PROP_UINT8(ACPI_PM_PROP_S3_DISABLED, PIIX4PMState, disable_s3, 0), ++ DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_DISABLED, PIIX4PMState, disable_s4, 0), + DEFINE_PROP_UINT8(ACPI_PM_PROP_S4_VAL, PIIX4PMState, s4_val, 2), + DEFINE_PROP_BOOL(ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, PIIX4PMState, + use_acpi_hotplug_bridge, true), +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 0886cfe3fe..f98f842f80 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -380,6 +380,12 @@ const size_t pc_compat_1_4_len = G_N_ELEMENTS(pc_compat_1_4); + * machine type. + */ + GlobalProperty pc_rhel_compat[] = { ++ /* we don't support s3/s4 suspend */ ++ { "PIIX4_PM", "disable_s3", "1" }, ++ { "PIIX4_PM", "disable_s4", "1" }, ++ { "ICH9-LPC", "disable_s3", "1" }, ++ { "ICH9-LPC", "disable_s4", "1" }, ++ + { TYPE_X86_CPU, "host-phys-bits", "on" }, + { TYPE_X86_CPU, "host-phys-bits-limit", "48" }, + { TYPE_X86_CPU, "vmx-entry-load-perf-global-ctrl", "off" }, +-- +2.31.1 + diff --git a/0021-redhat-virt-rhel8.5.0-Update-machine-type-compatibil.patch b/0021-redhat-virt-rhel8.5.0-Update-machine-type-compatibil.patch new file mode 100644 index 0000000..3bcf4e0 --- /dev/null +++ b/0021-redhat-virt-rhel8.5.0-Update-machine-type-compatibil.patch @@ -0,0 +1,51 @@ +From ce73e939b993cc6be170cdb5d3f2068270593f2b Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Tue, 16 Nov 2021 17:03:07 +0100 +Subject: redhat: virt-rhel8.5.0: Update machine type compatibility for QEMU + 6.2.0 update + +RH-Author: Eric Auger +RH-MergeRequest: 75: redhat: virt-rhel8.5.0: Update machine type compatibility for QEMU 6.2.0 update +RH-Commit: [21/21] f027d13654944e3d34e3356affe7af952eec2bed +RH-Bugzilla: 2022607 +RH-Acked-by: Gavin Shan +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Andrew Jones +RH-Acked-by: Cornelia Huck +RH-Acked-by: Laurent Vivier + +To keep compatibility with 8.5-AV machine type we need to +turn few new options on by default: +smp_props.prefer_sockets, no_cpu_topology, no_tcg_its + +TESTED: migrate from rhel-av-8.5.0 to rhel-8.6.0 and vice-versa +with upstream fix: 33a0c404fb hw/intc/arm_gicv3_its: Revert version +increments in vmstate_its + +Signed-off-by: Eric Auger +Signed-off-by: Miroslav Rezanina +--- + hw/arm/virt.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index c77d26ab13..e8941afd01 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3225,8 +3225,13 @@ type_init(rhel_machine_init); + + static void rhel850_virt_options(MachineClass *mc) + { ++ VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc)); ++ + compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); + compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len); ++ mc->smp_props.prefer_sockets = true; ++ vmc->no_cpu_topology = true; ++ vmc->no_tcg_its = true; + } + DEFINE_RHEL_MACHINE_AS_LATEST(8, 5, 0) + +-- +2.27.0 + diff --git a/0022-Fix-virtio-net-pci-vectors-compat.patch b/0022-Fix-virtio-net-pci-vectors-compat.patch new file mode 100644 index 0000000..b484ea1 --- /dev/null +++ b/0022-Fix-virtio-net-pci-vectors-compat.patch @@ -0,0 +1,45 @@ +From f9643b6934657292aae0b830627b1e5f9b8cbaa1 Mon Sep 17 00:00:00 2001 +From: Eduardo Habkost +Date: Tue, 19 Oct 2021 13:17:06 -0400 +Subject: Fix virtio-net-pci* "vectors" compat + +RH-Author: Dr. David Alan Gilbert +RH-MergeRequest: 77: 8.6/6.2 mt fixes +RH-Commit: [21/23] 8ad581932275d2698a99f31bec40b14f1dbd3d2e +RH-Bugzilla: 2026443 +RH-Acked-by: Miroslav Rezanina + +hw_compat_rhel_8_4 has an issue: it affects only "virtio-net-pci" +but not "virtio-net-pci-transitional" and +"virtio-net-pci-non-transitional". The solution is to use the +"virtio-net-pci-base" type in compat_props. + +An equivalent fix will be submitted for hw_compat_5_2 upstream. + +Signed-off-by: Eduardo Habkost +(cherry picked from commit d45823ab0d0138b2fbaf2ed1e1896d2052f3ccb3) +Signed-off-by: Miroslav Rezanina +--- + hw/core/machine.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 736c765c30..024b025fc2 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -71,7 +71,11 @@ GlobalProperty hw_compat_rhel_8_4[] = { + /* hw_compat_rhel_8_4 from hw_compat_5_2 */ + { "virtio-blk-device", "report-discard-granularity", "off" }, + /* hw_compat_rhel_8_4 from hw_compat_5_2 */ +- { "virtio-net-pci", "vectors", "3"}, ++ /* ++ * Upstream incorrectly had "virtio-net-pci" instead of "virtio-net-pci-base", ++ * (https://bugzilla.redhat.com/show_bug.cgi?id=1999141) ++ */ ++ { "virtio-net-pci-base", "vectors", "3"}, + }; + const size_t hw_compat_rhel_8_4_len = G_N_ELEMENTS(hw_compat_rhel_8_4); + +-- +2.27.0 + diff --git a/0023-x86-rhel-machine-types-Add-pc_rhel_8_5_compat.patch b/0023-x86-rhel-machine-types-Add-pc_rhel_8_5_compat.patch new file mode 100644 index 0000000..8572d61 --- /dev/null +++ b/0023-x86-rhel-machine-types-Add-pc_rhel_8_5_compat.patch @@ -0,0 +1,73 @@ +From 7ad8814e583dcc7dc23e3e8398570243b8f176a1 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 23 Nov 2021 17:57:42 +0000 +Subject: x86/rhel machine types: Add pc_rhel_8_5_compat + +RH-Author: Dr. David Alan Gilbert +RH-MergeRequest: 77: 8.6/6.2 mt fixes +RH-Commit: [22/23] 8bf555c5d78f344b97ffd5c888c7a7bed592d9d0 +RH-Bugzilla: 2026443 +RH-Acked-by: Miroslav Rezanina + +Add pc_rhel_8_5_compat as the merge of pc_compat_6_1 and pc_compat_6_0 +(since 8.5 was based on 6.0). + +Note, x-keep-pci-slot-hpc flipped back and forward, leaving it out +looks like it leaves us with the original. + +Signed-off-by: Dr. David Alan Gilbert +Signed-off-by: Miroslav Rezanina +--- + hw/i386/pc.c | 21 +++++++++++++++++++++ + include/hw/i386/pc.h | 3 +++ + 2 files changed, 24 insertions(+) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index e8109954ca..4c08a1971c 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -387,6 +387,27 @@ GlobalProperty pc_rhel_compat[] = { + }; + const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); + ++GlobalProperty pc_rhel_8_5_compat[] = { ++ /* pc_rhel_8_5_compat from pc_compat_6_0 */ ++ { "qemu64" "-" TYPE_X86_CPU, "family", "6" }, ++ /* pc_rhel_8_5_compat from pc_compat_6_0 */ ++ { "qemu64" "-" TYPE_X86_CPU, "model", "6" }, ++ /* pc_rhel_8_5_compat from pc_compat_6_0 */ ++ { "qemu64" "-" TYPE_X86_CPU, "stepping", "3" }, ++ /* pc_rhel_8_5_compat from pc_compat_6_0 */ ++ { TYPE_X86_CPU, "x-vendor-cpuid-only", "off" }, ++ /* pc_rhel_8_5_compat from pc_compat_6_0 */ ++ { "ICH9-LPC", ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, "off" }, ++ ++ /* pc_rhel_8_5_compat from pc_compat_6_1 */ ++ { TYPE_X86_CPU, "hv-version-id-build", "0x1bbc" }, ++ /* pc_rhel_8_5_compat from pc_compat_6_1 */ ++ { TYPE_X86_CPU, "hv-version-id-major", "0x0006" }, ++ /* pc_rhel_8_5_compat from pc_compat_6_1 */ ++ { TYPE_X86_CPU, "hv-version-id-minor", "0x0001" }, ++}; ++const size_t pc_rhel_8_5_compat_len = G_N_ELEMENTS(pc_rhel_8_5_compat); ++ + GlobalProperty pc_rhel_8_4_compat[] = { + /* pc_rhel_8_4_compat from pc_compat_5_2 */ + { "ICH9-LPC", "x-smi-cpu-hotunplug", "off" }, +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index d0544ee119..9e8bfb69f8 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -286,6 +286,9 @@ extern const size_t pc_compat_1_4_len; + extern GlobalProperty pc_rhel_compat[]; + extern const size_t pc_rhel_compat_len; + ++extern GlobalProperty pc_rhel_8_5_compat[]; ++extern const size_t pc_rhel_8_5_compat_len; ++ + extern GlobalProperty pc_rhel_8_4_compat[]; + extern const size_t pc_rhel_8_4_compat_len; + +-- +2.27.0 + diff --git a/0024-x86-rhel-machine-types-Wire-compat-into-q35-and-i440.patch b/0024-x86-rhel-machine-types-Wire-compat-into-q35-and-i440.patch new file mode 100644 index 0000000..4acfa88 --- /dev/null +++ b/0024-x86-rhel-machine-types-Wire-compat-into-q35-and-i440.patch @@ -0,0 +1,54 @@ +From 7bd99eebadfdbea6a76585b526e7cab1ee8b1fde Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 23 Nov 2021 18:07:49 +0000 +Subject: x86/rhel machine types: Wire compat into q35 and i440fx + +RH-Author: Dr. David Alan Gilbert +RH-MergeRequest: 77: 8.6/6.2 mt fixes +RH-Commit: [23/23] fc3861aeccc943b434231193ef45ffbc0b3cf6c6 +RH-Bugzilla: 2026443 +RH-Acked-by: Miroslav Rezanina + +Wire the pc_rhel_8_5 compat data into both piix and q35 +to keep the existing machine types compatible. + +Signed-off-by: Dr. David Alan Gilbert +Signed-off-by: Miroslav Rezanina +--- + hw/i386/pc_piix.c | 4 ++++ + hw/i386/pc_q35.c | 4 ++++ + 2 files changed, 8 insertions(+) + +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index 2885edffe9..37fab00733 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -1040,6 +1040,10 @@ static void pc_machine_rhel760_options(MachineClass *m) + pcmc->kvmclock_create_always = false; + /* From pc_i440fx_5_1_machine_options() */ + pcmc->pci_root_uid = 1; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_5, ++ hw_compat_rhel_8_5_len); ++ compat_props_add(m->compat_props, pc_rhel_8_5_compat, ++ pc_rhel_8_5_compat_len); + compat_props_add(m->compat_props, hw_compat_rhel_8_4, + hw_compat_rhel_8_4_len); + compat_props_add(m->compat_props, pc_rhel_8_4_compat, +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index c67418b6a9..78876e1101 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -658,6 +658,10 @@ static void pc_q35_machine_rhel850_options(MachineClass *m) + m->desc = "RHEL-8.5.0 PC (Q35 + ICH9, 2009)"; + pcmc->smbios_stream_product = "RHEL-AV"; + pcmc->smbios_stream_version = "8.5.0"; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_5, ++ hw_compat_rhel_8_5_len); ++ compat_props_add(m->compat_props, pc_rhel_8_5_compat, ++ pc_rhel_8_5_compat_len); + } + + DEFINE_PC_MACHINE(q35_rhel850, "pc-q35-rhel8.5.0", pc_q35_init_rhel850, +-- +2.27.0 + diff --git a/0025-redhat-Add-s390x-machine-type-compatibility-handling.patch b/0025-redhat-Add-s390x-machine-type-compatibility-handling.patch new file mode 100644 index 0000000..1ae8a99 --- /dev/null +++ b/0025-redhat-Add-s390x-machine-type-compatibility-handling.patch @@ -0,0 +1,58 @@ +From 265a57f2955b7f0b65e3f57f89aa1ff2541d3f73 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 26 Nov 2021 09:37:11 +0100 +Subject: redhat: Add s390x machine type compatibility handling for the rebase + to v6.2 + +RH-Author: Thomas Huth +RH-MergeRequest: 80: Add s390x machine type compatibility handling for the rebase to v6.2 +RH-Commit: [26/26] c45cf594604f6dd23954696b9c84d2025e328d11 +RH-Bugzilla: 2022602 +RH-Acked-by: David Hildenbrand +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cornelia Huck + +Add compatibility handling for the rhel8.5.0 machine type (and +recursively older, of course). + +Based on the following upstream commits: + + 463e50da8b - s390x/cpumodel: Bump up QEMU model to a stripped-down IBM z14 GA2 + 30e398f796 - s390x/cpumodel: Add more feature to gen16 default model + 4a0af2930a - machine: Prefer cores over sockets in smp parsing since 6.2 + 2b52619994 - machine: Move smp_prefer_sockets to struct SMPCompatProps + +Signed-off-by: Thomas Huth +--- + hw/s390x/s390-virtio-ccw.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index 181856e6cf..cf13c457d6 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -1105,11 +1105,21 @@ DEFINE_CCW_MACHINE(2_4, "2.4", false); + + static void ccw_machine_rhel850_instance_options(MachineState *machine) + { ++ static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_0 }; ++ ++ s390_set_qemu_cpu_model(0x2964, 13, 2, qemu_cpu_feat); ++ ++ s390_cpudef_featoff_greater(16, 1, S390_FEAT_NNPA); ++ s390_cpudef_featoff_greater(16, 1, S390_FEAT_VECTOR_PACKED_DECIMAL_ENH2); ++ s390_cpudef_featoff_greater(16, 1, S390_FEAT_BEAR_ENH); ++ s390_cpudef_featoff_greater(16, 1, S390_FEAT_RDP); ++ s390_cpudef_featoff_greater(16, 1, S390_FEAT_PAI); + } + + static void ccw_machine_rhel850_class_options(MachineClass *mc) + { + compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len); ++ mc->smp_props.prefer_sockets = true; + } + DEFINE_CCW_MACHINE(rhel850, "rhel8.5.0", true); + +-- +2.27.0 + diff --git a/81-kvm-rhel.rules b/81-kvm-rhel.rules new file mode 100644 index 0000000..787cad6 --- /dev/null +++ b/81-kvm-rhel.rules @@ -0,0 +1 @@ +DEVPATH=="*/kvm", ACTION=="change", RUN+="/lib/udev/udev-kvm-check $env{COUNT} $env{EVENT}" diff --git a/85-kvm.preset b/85-kvm.preset new file mode 100644 index 0000000..8024052 --- /dev/null +++ b/85-kvm.preset @@ -0,0 +1,5 @@ +# Enable kvm-setup by default. This can have odd side effects on +# PowerNV systems that aren't intended as KVM hosts, but at present we +# only support RHEL on PowerNV for the purpose of being a RHEV host. + +enable kvm-setup.service diff --git a/95-kvm-memlock.conf b/95-kvm-memlock.conf new file mode 100644 index 0000000..fc59dbe --- /dev/null +++ b/95-kvm-memlock.conf @@ -0,0 +1,10 @@ +# The KVM HV implementation on Power can require a significant amount +# of unswappable memory (about half of which also needs to be host +# physically contiguous) to hold the guest's Hash Page Table (HPT) - +# roughly 1/64th of the guest's RAM size, minimum 16MiB. +# +# These limits allow unprivileged users to start smallish VMs, such as +# those used by libguestfs. +# +* hard memlock 65536 +* soft memlock 65536 diff --git a/99-qemu-guest-agent.rules b/99-qemu-guest-agent.rules new file mode 100644 index 0000000..8a290ab --- /dev/null +++ b/99-qemu-guest-agent.rules @@ -0,0 +1,2 @@ +SUBSYSTEM=="virtio-ports", ATTR{name}=="org.qemu.guest_agent.0", \ + TAG+="systemd" ENV{SYSTEMD_WANTS}="qemu-guest-agent.service" diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..15f93e6 --- /dev/null +++ b/README.rst @@ -0,0 +1,19 @@ +=================== +qemu-kvm development +=================== + +qemu-kvm is maintained in a `source tree`_ rather than directly in dist-git. +This provides way to develope using regular source code structure and provides +way to generate SRPM and build using koji service. In addition, local build using +CentOS 9 Stream specific configuration. + +Developers deliver all changes to source-git using merge request. Only maintainers +will be pushing changes sent to source-git to dist-git. + +Each release in dist-git is tagged in the source repository so you can easily +check out the source tree for a build. The tags are in the format +name-version-release, but note release doesn't contain the dist tag since the +source can be built in different build roots (Fedora, CentOS, etc.) + +.. _source tree: https://gitlab.com/redhat/centos-stream/src/qemu-kvm + diff --git a/README.tests b/README.tests new file mode 100644 index 0000000..9932773 --- /dev/null +++ b/README.tests @@ -0,0 +1,39 @@ +qemu-kvm-tests README +===================== + +The qemu-kvm-tests rpm contains tests that can be used to verify the +functionality of the installed qemu-kvm package + +When installed, the files from this rpm will be arranged in the following +directory structure + +tests-src/ +├── README +├── scripts +│   ├── qemu.py +│   └── qmp +└── tests + ├── acceptance + ├── Makefile.include + └── qemu-iotests + +The tests/ directory within the tests-src/ directory is setup to remain a copy +of a subset of the tests/ directory from the QEMU source tree + +The avocado_qemu tests and qemu-iotests, along with files required for the +execution of the avocado_qemu tests (scripts/qemu.py and scripts/qmp/) will be +installed in a new location - /usr/lib64/qemu-kvm/tests-src/ + +avocado_qemu tests: +The avocado_qemu tests can be executed by running the following avocado command: +avocado run -p qemu_bin=/usr/libexec/qemu-kvm /usr/lib64/qemu-kvm/tests/acceptance/ +Avocado needs to be installed separately using either pip or from source as +Avocado is not being packaged for RHEL-8. + +qemu-iotests: +symlinks to corresponding binaries need to be created for QEMU_PROG, +QEMU_IO_PROG, QEMU_IMG_PROG, and QEMU_NBD_PROG before the iotests can be +executed. + +The primary purpose of this package is to make these tests available to be +executed as gating tests for the virt module in the RHEL-8 OSCI environment. diff --git a/bridge.conf b/bridge.conf new file mode 100644 index 0000000..a573665 --- /dev/null +++ b/bridge.conf @@ -0,0 +1 @@ +allow virbr0 diff --git a/gating.yaml b/gating.yaml new file mode 100644 index 0000000..8d17eb8 --- /dev/null +++ b/gating.yaml @@ -0,0 +1,9 @@ +# recipients: kvmqe-ci, yfu +--- !Policy +product_versions: + - rhel-9 +decision_context: osci_compose_gate +subject_type: brew-build +rules: + - !PassingTestCaseRule {test_case_name: kvm-ci.qemu-kvm.x86_64-intel.brew-build.gating.tier1.functional} + - !PassingTestCaseRule {test_case_name: kvm-ci.qemu-kvm.x86_64-amd.brew-build.gating.tier1.functional} diff --git a/ksm.service b/ksm.service new file mode 100644 index 0000000..35c6f1d --- /dev/null +++ b/ksm.service @@ -0,0 +1,13 @@ +[Unit] +Description=Kernel Samepage Merging +ConditionPathExists=/sys/kernel/mm/ksm + +[Service] +Type=oneshot +RemainAfterExit=yes +EnvironmentFile=-/etc/sysconfig/ksm +ExecStart=/usr/libexec/ksmctl start +ExecStop=/usr/libexec/ksmctl stop + +[Install] +WantedBy=multi-user.target diff --git a/ksm.sysconfig b/ksm.sysconfig new file mode 100644 index 0000000..d99656d --- /dev/null +++ b/ksm.sysconfig @@ -0,0 +1,4 @@ +# The maximum number of unswappable kernel pages +# which may be allocated by ksm (0 for unlimited) +# If unset, defaults to half of total memory +# KSM_MAX_KERNEL_PAGES= diff --git a/ksmctl.c b/ksmctl.c new file mode 100644 index 0000000..af39591 --- /dev/null +++ b/ksmctl.c @@ -0,0 +1,77 @@ +/* Start/stop KSM, for systemd. + * Copyright (C) 2009, 2011 Red Hat, Inc. + * Written by Paolo Bonzini . + * Based on the original sysvinit script by Dan Kenigsberg + * This file is distributed under the GNU General Public License, version 2 + * or later. */ + +#include +#include +#include +#include +#include +#include + +#define KSM_MAX_KERNEL_PAGES_FILE "/sys/kernel/mm/ksm/max_kernel_pages" +#define KSM_RUN_FILE "/sys/kernel/mm/ksm/run" + +char *program_name; + +int usage(void) +{ + fprintf(stderr, "Usage: %s {start|stop}\n", program_name); + return 1; +} + +int write_value(uint64_t value, char *filename) +{ + FILE *fp; + if (!(fp = fopen(filename, "w")) || + fprintf(fp, "%llu\n", (unsigned long long) value) == EOF || + fflush(fp) == EOF || + fclose(fp) == EOF) + return 1; + + return 0; +} + +uint64_t ksm_max_kernel_pages() +{ + char *var = getenv("KSM_MAX_KERNEL_PAGES"); + char *endptr; + uint64_t value; + if (var && *var) { + value = strtoll(var, &endptr, 0); + if (value < LLONG_MAX && !*endptr) + return value; + } + /* Unless KSM_MAX_KERNEL_PAGES is set, let KSM munch up to half of + * total memory. */ + return sysconf(_SC_PHYS_PAGES) / 2; +} + +int start(void) +{ + if (access(KSM_MAX_KERNEL_PAGES_FILE, R_OK) >= 0) + write_value(ksm_max_kernel_pages(), KSM_MAX_KERNEL_PAGES_FILE); + return write_value(1, KSM_RUN_FILE); +} + +int stop(void) +{ + return write_value(0, KSM_RUN_FILE); +} + +int main(int argc, char **argv) +{ + program_name = argv[0]; + if (argc < 2) { + return usage(); + } else if (!strcmp(argv[1], "start")) { + return start(); + } else if (!strcmp(argv[1], "stop")) { + return stop(); + } else { + return usage(); + } +} diff --git a/ksmtuned b/ksmtuned new file mode 100644 index 0000000..7bc5743 --- /dev/null +++ b/ksmtuned @@ -0,0 +1,139 @@ +#!/bin/bash +# +# Copyright 2009 Red Hat, Inc. and/or its affiliates. +# Released under the GPL +# +# Author: Dan Kenigsberg +# +# ksmtuned - a simple script that controls whether (and with what vigor) ksm +# should search for duplicated pages. +# +# starts ksm when memory commited to qemu processes exceeds a threshold, and +# make ksm work harder and harder untill memory load falls below that +# threshold. +# +# send SIGUSR1 to this process right after a new qemu process is started, or +# following its death, to retune ksm accordingly +# +# needs testing and ironing. contact danken@redhat.com if something breaks. + +if [ -f /etc/ksmtuned.conf ]; then + . /etc/ksmtuned.conf +fi + +debug() { + if [ -n "$DEBUG" ]; then + s="`/bin/date`: $*" + [ -n "$LOGFILE" ] && echo "$s" >> "$LOGFILE" || echo "$s" + fi +} + + +KSM_MONITOR_INTERVAL=${KSM_MONITOR_INTERVAL:-60} +KSM_NPAGES_BOOST=${KSM_NPAGES_BOOST:-300} +KSM_NPAGES_DECAY=${KSM_NPAGES_DECAY:--50} + +KSM_NPAGES_MIN=${KSM_NPAGES_MIN:-64} +KSM_NPAGES_MAX=${KSM_NPAGES_MAX:-1250} +# millisecond sleep between ksm scans for 16Gb server. Smaller servers sleep +# more, bigger sleep less. +KSM_SLEEP_MSEC=${KSM_SLEEP_MSEC:-10} + +KSM_THRES_COEF=${KSM_THRES_COEF:-20} +KSM_THRES_CONST=${KSM_THRES_CONST:-2048} + +total=`awk '/^MemTotal:/ {print $2}' /proc/meminfo` +debug total $total + +npages=0 +sleep=$[KSM_SLEEP_MSEC * 16 * 1024 * 1024 / total] +[ $sleep -le 10 ] && sleep=10 +debug sleep $sleep +thres=$[total * KSM_THRES_COEF / 100] +if [ $KSM_THRES_CONST -gt $thres ]; then + thres=$KSM_THRES_CONST +fi +debug thres $thres + +KSMCTL () { + case x$1 in + xstop) + echo 0 > /sys/kernel/mm/ksm/run + ;; + xstart) + echo $2 > /sys/kernel/mm/ksm/pages_to_scan + echo $3 > /sys/kernel/mm/ksm/sleep_millisecs + echo 1 > /sys/kernel/mm/ksm/run + ;; + esac +} + +committed_memory () { + # calculate how much memory is committed to running qemu processes + local pidlist + pidlist=$(pgrep -d ' ' -- '^qemu(-(kvm|system-.+)|:.{1,11})$') + if [ -n "$pidlist" ]; then + ps -p "$pidlist" -o rsz= + fi | awk '{ sum += $1 }; END { print 0+sum }' +} + +free_memory () { + awk '/^(MemFree|Buffers|Cached):/ {free += $2}; END {print free}' \ + /proc/meminfo +} + +increase_npages() { + local delta + delta=${1:-0} + npages=$[npages + delta] + if [ $npages -lt $KSM_NPAGES_MIN ]; then + npages=$KSM_NPAGES_MIN + elif [ $npages -gt $KSM_NPAGES_MAX ]; then + npages=$KSM_NPAGES_MAX + fi + echo $npages +} + + +adjust () { + local free committed + free=`free_memory` + committed=`committed_memory` + debug committed $committed free $free + if [ $[committed + thres] -lt $total -a $free -gt $thres ]; then + KSMCTL stop + debug "$[committed + thres] < $total and free > $thres, stop ksm" + return 1 + fi + debug "$[committed + thres] > $total, start ksm" + if [ $free -lt $thres ]; then + npages=`increase_npages $KSM_NPAGES_BOOST` + debug "$free < $thres, boost" + else + npages=`increase_npages $KSM_NPAGES_DECAY` + debug "$free > $thres, decay" + fi + KSMCTL start $npages $sleep + debug "KSMCTL start $npages $sleep" + return 0 +} + +function nothing () { + : +} + +loop () { + trap nothing SIGUSR1 + while true + do + sleep $KSM_MONITOR_INTERVAL & + wait $! + adjust + done +} + +PIDFILE=${PIDFILE-/var/run/ksmtune.pid} +if touch "$PIDFILE"; then + loop & + echo $! > "$PIDFILE" +fi diff --git a/ksmtuned.conf b/ksmtuned.conf new file mode 100644 index 0000000..fc4518c --- /dev/null +++ b/ksmtuned.conf @@ -0,0 +1,21 @@ +# Configuration file for ksmtuned. + +# How long ksmtuned should sleep between tuning adjustments +# KSM_MONITOR_INTERVAL=60 + +# Millisecond sleep between ksm scans for 16Gb server. +# Smaller servers sleep more, bigger sleep less. +# KSM_SLEEP_MSEC=10 + +# KSM_NPAGES_BOOST=300 +# KSM_NPAGES_DECAY=-50 +# KSM_NPAGES_MIN=64 +# KSM_NPAGES_MAX=1250 + +# KSM_THRES_COEF=20 +# KSM_THRES_CONST=2048 + +# uncomment the following if you want ksmtuned debug info + +# LOGFILE=/var/log/ksmtuned +# DEBUG=1 diff --git a/ksmtuned.service b/ksmtuned.service new file mode 100644 index 0000000..39febcc --- /dev/null +++ b/ksmtuned.service @@ -0,0 +1,12 @@ +[Unit] +Description=Kernel Samepage Merging (KSM) Tuning Daemon +After=ksm.service +Requires=ksm.service + +[Service] +ExecStart=/usr/sbin/ksmtuned +ExecReload=/bin/kill -USR1 $MAINPID +Type=forking + +[Install] +WantedBy=multi-user.target diff --git a/kvm-Enable-SGX-RH-Only.patch b/kvm-Enable-SGX-RH-Only.patch new file mode 100644 index 0000000..efc8cac --- /dev/null +++ b/kvm-Enable-SGX-RH-Only.patch @@ -0,0 +1,28 @@ +From db6e042fe4fdc1a1bbf562a46b15d4d8e33e2fa6 Mon Sep 17 00:00:00 2001 +From: Paul Lai +Date: Tue, 25 Jan 2022 15:16:22 -0500 +Subject: [PATCH 4/7] Enable SGX -- RH Only + +RH-Author: Paul Lai +RH-MergeRequest: 111: numa: Enable numa for SGX EPC sections +RH-Commit: [4/5] cea874f29984897ef1232fb7749c13203c888034 +RH-Bugzilla: 1518984 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Bandan Das +RH-Acked-by: Cornelia Huck +--- + configs/devices/x86_64-softmmu/x86_64-rh-devices.mak | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak +index ddf036f042..fdbbdf9742 100644 +--- a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak ++++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak +@@ -102,3 +102,4 @@ CONFIG_TPM_CRB=y + CONFIG_TPM_TIS_ISA=y + CONFIG_TPM_EMULATOR=y + CONFIG_TPM_PASSTHROUGH=y ++CONFIG_SGX=y +-- +2.27.0 + diff --git a/kvm-Enable-virtio-iommu-pci-on-aarch64.patch b/kvm-Enable-virtio-iommu-pci-on-aarch64.patch new file mode 100644 index 0000000..3aafd3c --- /dev/null +++ b/kvm-Enable-virtio-iommu-pci-on-aarch64.patch @@ -0,0 +1,41 @@ +From 85781b8745fa1581a66f64011d61a4f0c4e103dc Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Fri, 6 May 2022 17:03:11 +0200 +Subject: [PATCH 3/5] Enable virtio-iommu-pci on aarch64 + +RH-Author: Eric Auger +RH-MergeRequest: 83: Enable virtio-iommu-pci on aarch64 +RH-Commit: [1/1] 23e5c0832e52c66adf5fd6daccdc3edddc7ecb8b (eauger1/centos-qemu-kvm) +RH-Bugzilla: 1477099 +RH-Acked-by: Gavin Shan +RH-Acked-by: Andrew Jones +RH-Acked-by: Cornelia Huck + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1477099 +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=45128798 +Upstream Status: RHEL-only +Tested: With virtio-net-pci and virtio-block-pci + +let's enable the virtio-iommu-pci device on aarch64 by +turning CONFIG_VIRTIO_IOMMU on. + +Signed-off-by: Eric Auger +--- + configs/devices/aarch64-softmmu/aarch64-rh-devices.mak | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak +index 187938573f..1618d31b89 100644 +--- a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak ++++ b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak +@@ -23,6 +23,7 @@ CONFIG_VFIO_PCI=y + CONFIG_VIRTIO_MMIO=y + CONFIG_VIRTIO_PCI=y + CONFIG_VIRTIO_MEM=y ++CONFIG_VIRTIO_IOMMU=y + CONFIG_XIO3130=y + CONFIG_NVDIMM=y + CONFIG_ACPI_APEI=y +-- +2.31.1 + diff --git a/kvm-Enable-virtio-iommu-pci-on-x86_64.patch b/kvm-Enable-virtio-iommu-pci-on-x86_64.patch new file mode 100644 index 0000000..2eb24df --- /dev/null +++ b/kvm-Enable-virtio-iommu-pci-on-x86_64.patch @@ -0,0 +1,41 @@ +From c531a39171201f8a1d063e6af752e5d629c1b4bf Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Thu, 9 Jun 2022 11:35:18 +0200 +Subject: [PATCH 4/6] Enable virtio-iommu-pci on x86_64 + +RH-Author: Eric Auger +RH-MergeRequest: 100: Enable virtio-iommu-pci on x86_64 +RH-Commit: [1/1] a164af477efc7cb9d3d76a0e644f198f7c9fb2b5 (eauger1/centos-qemu-kvm) +RH-Bugzilla: 2094252 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: MST +RH-Acked-by: Cornelia Huck + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2094252 +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=45871185 +Upstream Status: RHEL-only +Tested: With virtio-net-pci and virtio-block-pci + +let's enable the virtio-iommu-pci device on x86_64 by +turning CONFIG_VIRTIO_IOMMU on. + +Signed-off-by: Eric Auger +--- + configs/devices/x86_64-softmmu/x86_64-rh-devices.mak | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak +index d0c9e66641..3850b9de72 100644 +--- a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak ++++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak +@@ -90,6 +90,7 @@ CONFIG_VHOST_USER_BLK=y + CONFIG_VIRTIO_MEM=y + CONFIG_VIRTIO_PCI=y + CONFIG_VIRTIO_VGA=y ++CONFIG_VIRTIO_IOMMU=y + CONFIG_VMMOUSE=y + CONFIG_VMPORT=y + CONFIG_VTD=y +-- +2.31.1 + diff --git a/kvm-Introduce-event-loop-base-abstract-class.patch b/kvm-Introduce-event-loop-base-abstract-class.patch new file mode 100644 index 0000000..9f987ea --- /dev/null +++ b/kvm-Introduce-event-loop-base-abstract-class.patch @@ -0,0 +1,503 @@ +From 1163da281c178359dd7e1cf1ced5c98caa600f8e Mon Sep 17 00:00:00 2001 +From: Nicolas Saenz Julienne +Date: Mon, 25 Apr 2022 09:57:21 +0200 +Subject: [PATCH 01/16] Introduce event-loop-base abstract class + +RH-Author: Nicolas Saenz Julienne +RH-MergeRequest: 93: util/thread-pool: Expose minimum and maximum size +RH-Commit: [1/3] 5817205d8f56cc4aa98bd5963ecac54a59bad990 +RH-Bugzilla: 2031024 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Stefan Hajnoczi + +Introduce the 'event-loop-base' abstract class, it'll hold the +properties common to all event loops and provide the necessary hooks for +their creation and maintenance. Then have iothread inherit from it. + +EventLoopBaseClass is defined as user creatable and provides a hook for +its children to attach themselves to the user creatable class 'complete' +function. It also provides an update_params() callback to propagate +property changes onto its children. + +The new 'event-loop-base' class will live in the root directory. It is +built on its own using the 'link_whole' option (there are no direct +function dependencies between the class and its children, it all happens +trough 'constructor' magic). And also imposes new compilation +dependencies: + + qom <- event-loop-base <- blockdev (iothread.c) + +And in subsequent patches: + + qom <- event-loop-base <- qemuutil (util/main-loop.c) + +All this forced some amount of reordering in meson.build: + + - Moved qom build definition before qemuutil. Doing it the other way + around (i.e. moving qemuutil after qom) isn't possible as a lot of + core libraries that live in between the two depend on it. + + - Process the 'hw' subdir earlier, as it introduces files into the + 'qom' source set. + +No functional changes intended. + +Signed-off-by: Nicolas Saenz Julienne +Reviewed-by: Stefan Hajnoczi +Acked-by: Markus Armbruster +Message-id: 20220425075723.20019-2-nsaenzju@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 7d5983e3c8c40b1d0668faba31d79905c4fadd7d) +--- + event-loop-base.c | 104 +++++++++++++++++++++++++++++++ + include/sysemu/event-loop-base.h | 36 +++++++++++ + include/sysemu/iothread.h | 6 +- + iothread.c | 65 ++++++------------- + meson.build | 23 ++++--- + qapi/qom.json | 22 +++++-- + 6 files changed, 192 insertions(+), 64 deletions(-) + create mode 100644 event-loop-base.c + create mode 100644 include/sysemu/event-loop-base.h + +diff --git a/event-loop-base.c b/event-loop-base.c +new file mode 100644 +index 0000000000..a924c73a7c +--- /dev/null ++++ b/event-loop-base.c +@@ -0,0 +1,104 @@ ++/* ++ * QEMU event-loop base ++ * ++ * Copyright (C) 2022 Red Hat Inc ++ * ++ * Authors: ++ * Stefan Hajnoczi ++ * Nicolas Saenz Julienne ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ */ ++ ++#include "qemu/osdep.h" ++#include "qom/object_interfaces.h" ++#include "qapi/error.h" ++#include "sysemu/event-loop-base.h" ++ ++typedef struct { ++ const char *name; ++ ptrdiff_t offset; /* field's byte offset in EventLoopBase struct */ ++} EventLoopBaseParamInfo; ++ ++static EventLoopBaseParamInfo aio_max_batch_info = { ++ "aio-max-batch", offsetof(EventLoopBase, aio_max_batch), ++}; ++ ++static void event_loop_base_get_param(Object *obj, Visitor *v, ++ const char *name, void *opaque, Error **errp) ++{ ++ EventLoopBase *event_loop_base = EVENT_LOOP_BASE(obj); ++ EventLoopBaseParamInfo *info = opaque; ++ int64_t *field = (void *)event_loop_base + info->offset; ++ ++ visit_type_int64(v, name, field, errp); ++} ++ ++static void event_loop_base_set_param(Object *obj, Visitor *v, ++ const char *name, void *opaque, Error **errp) ++{ ++ EventLoopBaseClass *bc = EVENT_LOOP_BASE_GET_CLASS(obj); ++ EventLoopBase *base = EVENT_LOOP_BASE(obj); ++ EventLoopBaseParamInfo *info = opaque; ++ int64_t *field = (void *)base + info->offset; ++ int64_t value; ++ ++ if (!visit_type_int64(v, name, &value, errp)) { ++ return; ++ } ++ ++ if (value < 0) { ++ error_setg(errp, "%s value must be in range [0, %" PRId64 "]", ++ info->name, INT64_MAX); ++ return; ++ } ++ ++ *field = value; ++ ++ if (bc->update_params) { ++ bc->update_params(base, errp); ++ } ++ ++ return; ++} ++ ++static void event_loop_base_complete(UserCreatable *uc, Error **errp) ++{ ++ EventLoopBaseClass *bc = EVENT_LOOP_BASE_GET_CLASS(uc); ++ EventLoopBase *base = EVENT_LOOP_BASE(uc); ++ ++ if (bc->init) { ++ bc->init(base, errp); ++ } ++} ++ ++static void event_loop_base_class_init(ObjectClass *klass, void *class_data) ++{ ++ UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass); ++ ucc->complete = event_loop_base_complete; ++ ++ object_class_property_add(klass, "aio-max-batch", "int", ++ event_loop_base_get_param, ++ event_loop_base_set_param, ++ NULL, &aio_max_batch_info); ++} ++ ++static const TypeInfo event_loop_base_info = { ++ .name = TYPE_EVENT_LOOP_BASE, ++ .parent = TYPE_OBJECT, ++ .instance_size = sizeof(EventLoopBase), ++ .class_size = sizeof(EventLoopBaseClass), ++ .class_init = event_loop_base_class_init, ++ .abstract = true, ++ .interfaces = (InterfaceInfo[]) { ++ { TYPE_USER_CREATABLE }, ++ { } ++ } ++}; ++ ++static void register_types(void) ++{ ++ type_register_static(&event_loop_base_info); ++} ++type_init(register_types); +diff --git a/include/sysemu/event-loop-base.h b/include/sysemu/event-loop-base.h +new file mode 100644 +index 0000000000..8e77d8b69f +--- /dev/null ++++ b/include/sysemu/event-loop-base.h +@@ -0,0 +1,36 @@ ++/* ++ * QEMU event-loop backend ++ * ++ * Copyright (C) 2022 Red Hat Inc ++ * ++ * Authors: ++ * Nicolas Saenz Julienne ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ */ ++#ifndef QEMU_EVENT_LOOP_BASE_H ++#define QEMU_EVENT_LOOP_BASE_H ++ ++#include "qom/object.h" ++#include "block/aio.h" ++#include "qemu/typedefs.h" ++ ++#define TYPE_EVENT_LOOP_BASE "event-loop-base" ++OBJECT_DECLARE_TYPE(EventLoopBase, EventLoopBaseClass, ++ EVENT_LOOP_BASE) ++ ++struct EventLoopBaseClass { ++ ObjectClass parent_class; ++ ++ void (*init)(EventLoopBase *base, Error **errp); ++ void (*update_params)(EventLoopBase *base, Error **errp); ++}; ++ ++struct EventLoopBase { ++ Object parent; ++ ++ /* AioContext AIO engine parameters */ ++ int64_t aio_max_batch; ++}; ++#endif +diff --git a/include/sysemu/iothread.h b/include/sysemu/iothread.h +index 7f714bd136..8f8601d6ab 100644 +--- a/include/sysemu/iothread.h ++++ b/include/sysemu/iothread.h +@@ -17,11 +17,12 @@ + #include "block/aio.h" + #include "qemu/thread.h" + #include "qom/object.h" ++#include "sysemu/event-loop-base.h" + + #define TYPE_IOTHREAD "iothread" + + struct IOThread { +- Object parent_obj; ++ EventLoopBase parent_obj; + + QemuThread thread; + AioContext *ctx; +@@ -37,9 +38,6 @@ struct IOThread { + int64_t poll_max_ns; + int64_t poll_grow; + int64_t poll_shrink; +- +- /* AioContext AIO engine parameters */ +- int64_t aio_max_batch; + }; + typedef struct IOThread IOThread; + +diff --git a/iothread.c b/iothread.c +index 0f98af0f2a..8fa2f3bfb8 100644 +--- a/iothread.c ++++ b/iothread.c +@@ -17,6 +17,7 @@ + #include "qemu/module.h" + #include "block/aio.h" + #include "block/block.h" ++#include "sysemu/event-loop-base.h" + #include "sysemu/iothread.h" + #include "qapi/error.h" + #include "qapi/qapi-commands-misc.h" +@@ -152,10 +153,15 @@ static void iothread_init_gcontext(IOThread *iothread) + iothread->main_loop = g_main_loop_new(iothread->worker_context, TRUE); + } + +-static void iothread_set_aio_context_params(IOThread *iothread, Error **errp) ++static void iothread_set_aio_context_params(EventLoopBase *base, Error **errp) + { ++ IOThread *iothread = IOTHREAD(base); + ERRP_GUARD(); + ++ if (!iothread->ctx) { ++ return; ++ } ++ + aio_context_set_poll_params(iothread->ctx, + iothread->poll_max_ns, + iothread->poll_grow, +@@ -166,14 +172,15 @@ static void iothread_set_aio_context_params(IOThread *iothread, Error **errp) + } + + aio_context_set_aio_params(iothread->ctx, +- iothread->aio_max_batch, ++ iothread->parent_obj.aio_max_batch, + errp); + } + +-static void iothread_complete(UserCreatable *obj, Error **errp) ++ ++static void iothread_init(EventLoopBase *base, Error **errp) + { + Error *local_error = NULL; +- IOThread *iothread = IOTHREAD(obj); ++ IOThread *iothread = IOTHREAD(base); + char *thread_name; + + iothread->stopping = false; +@@ -189,7 +196,7 @@ static void iothread_complete(UserCreatable *obj, Error **errp) + */ + iothread_init_gcontext(iothread); + +- iothread_set_aio_context_params(iothread, &local_error); ++ iothread_set_aio_context_params(base, &local_error); + if (local_error) { + error_propagate(errp, local_error); + aio_context_unref(iothread->ctx); +@@ -201,7 +208,7 @@ static void iothread_complete(UserCreatable *obj, Error **errp) + * to inherit. + */ + thread_name = g_strdup_printf("IO %s", +- object_get_canonical_path_component(OBJECT(obj))); ++ object_get_canonical_path_component(OBJECT(base))); + qemu_thread_create(&iothread->thread, thread_name, iothread_run, + iothread, QEMU_THREAD_JOINABLE); + g_free(thread_name); +@@ -226,9 +233,6 @@ static IOThreadParamInfo poll_grow_info = { + static IOThreadParamInfo poll_shrink_info = { + "poll-shrink", offsetof(IOThread, poll_shrink), + }; +-static IOThreadParamInfo aio_max_batch_info = { +- "aio-max-batch", offsetof(IOThread, aio_max_batch), +-}; + + static void iothread_get_param(Object *obj, Visitor *v, + const char *name, IOThreadParamInfo *info, Error **errp) +@@ -288,35 +292,12 @@ static void iothread_set_poll_param(Object *obj, Visitor *v, + } + } + +-static void iothread_get_aio_param(Object *obj, Visitor *v, +- const char *name, void *opaque, Error **errp) +-{ +- IOThreadParamInfo *info = opaque; +- +- iothread_get_param(obj, v, name, info, errp); +-} +- +-static void iothread_set_aio_param(Object *obj, Visitor *v, +- const char *name, void *opaque, Error **errp) +-{ +- IOThread *iothread = IOTHREAD(obj); +- IOThreadParamInfo *info = opaque; +- +- if (!iothread_set_param(obj, v, name, info, errp)) { +- return; +- } +- +- if (iothread->ctx) { +- aio_context_set_aio_params(iothread->ctx, +- iothread->aio_max_batch, +- errp); +- } +-} +- + static void iothread_class_init(ObjectClass *klass, void *class_data) + { +- UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass); +- ucc->complete = iothread_complete; ++ EventLoopBaseClass *bc = EVENT_LOOP_BASE_CLASS(klass); ++ ++ bc->init = iothread_init; ++ bc->update_params = iothread_set_aio_context_params; + + object_class_property_add(klass, "poll-max-ns", "int", + iothread_get_poll_param, +@@ -330,23 +311,15 @@ static void iothread_class_init(ObjectClass *klass, void *class_data) + iothread_get_poll_param, + iothread_set_poll_param, + NULL, &poll_shrink_info); +- object_class_property_add(klass, "aio-max-batch", "int", +- iothread_get_aio_param, +- iothread_set_aio_param, +- NULL, &aio_max_batch_info); + } + + static const TypeInfo iothread_info = { + .name = TYPE_IOTHREAD, +- .parent = TYPE_OBJECT, ++ .parent = TYPE_EVENT_LOOP_BASE, + .class_init = iothread_class_init, + .instance_size = sizeof(IOThread), + .instance_init = iothread_instance_init, + .instance_finalize = iothread_instance_finalize, +- .interfaces = (InterfaceInfo[]) { +- {TYPE_USER_CREATABLE}, +- {} +- }, + }; + + static void iothread_register_types(void) +@@ -383,7 +356,7 @@ static int query_one_iothread(Object *object, void *opaque) + info->poll_max_ns = iothread->poll_max_ns; + info->poll_grow = iothread->poll_grow; + info->poll_shrink = iothread->poll_shrink; +- info->aio_max_batch = iothread->aio_max_batch; ++ info->aio_max_batch = iothread->parent_obj.aio_max_batch; + + QAPI_LIST_APPEND(*tail, info); + return 0; +diff --git a/meson.build b/meson.build +index 6f7e430f0f..b9c919a55e 100644 +--- a/meson.build ++++ b/meson.build +@@ -2804,6 +2804,7 @@ subdir('qom') + subdir('authz') + subdir('crypto') + subdir('ui') ++subdir('hw') + + + if enable_modules +@@ -2811,6 +2812,18 @@ if enable_modules + modulecommon = declare_dependency(link_whole: libmodulecommon, compile_args: '-DBUILD_DSO') + endif + ++qom_ss = qom_ss.apply(config_host, strict: false) ++libqom = static_library('qom', qom_ss.sources() + genh, ++ dependencies: [qom_ss.dependencies()], ++ name_suffix: 'fa') ++qom = declare_dependency(link_whole: libqom) ++ ++event_loop_base = files('event-loop-base.c') ++event_loop_base = static_library('event-loop-base', sources: event_loop_base + genh, ++ build_by_default: true) ++event_loop_base = declare_dependency(link_whole: event_loop_base, ++ dependencies: [qom]) ++ + stub_ss = stub_ss.apply(config_all, strict: false) + + util_ss.add_all(trace_ss) +@@ -2897,7 +2910,6 @@ subdir('monitor') + subdir('net') + subdir('replay') + subdir('semihosting') +-subdir('hw') + subdir('tcg') + subdir('fpu') + subdir('accel') +@@ -3022,13 +3034,6 @@ qemu_syms = custom_target('qemu.syms', output: 'qemu.syms', + capture: true, + command: [undefsym, nm, '@INPUT@']) + +-qom_ss = qom_ss.apply(config_host, strict: false) +-libqom = static_library('qom', qom_ss.sources() + genh, +- dependencies: [qom_ss.dependencies()], +- name_suffix: 'fa') +- +-qom = declare_dependency(link_whole: libqom) +- + authz_ss = authz_ss.apply(config_host, strict: false) + libauthz = static_library('authz', authz_ss.sources() + genh, + dependencies: [authz_ss.dependencies()], +@@ -3081,7 +3086,7 @@ libblockdev = static_library('blockdev', blockdev_ss.sources() + genh, + build_by_default: false) + + blockdev = declare_dependency(link_whole: [libblockdev], +- dependencies: [block]) ++ dependencies: [block, event_loop_base]) + + qmp_ss = qmp_ss.apply(config_host, strict: false) + libqmp = static_library('qmp', qmp_ss.sources() + genh, +diff --git a/qapi/qom.json b/qapi/qom.json +index eeb5395ff3..a2439533c5 100644 +--- a/qapi/qom.json ++++ b/qapi/qom.json +@@ -499,6 +499,20 @@ + '*repeat': 'bool', + '*grab-toggle': 'GrabToggleKeys' } } + ++## ++# @EventLoopBaseProperties: ++# ++# Common properties for event loops ++# ++# @aio-max-batch: maximum number of requests in a batch for the AIO engine, ++# 0 means that the engine will use its default. ++# (default: 0) ++# ++# Since: 7.1 ++## ++{ 'struct': 'EventLoopBaseProperties', ++ 'data': { '*aio-max-batch': 'int' } } ++ + ## + # @IothreadProperties: + # +@@ -516,17 +530,15 @@ + # algorithm detects it is spending too long polling without + # encountering events. 0 selects a default behaviour (default: 0) + # +-# @aio-max-batch: maximum number of requests in a batch for the AIO engine, +-# 0 means that the engine will use its default +-# (default:0, since 6.1) ++# The @aio-max-batch option is available since 6.1. + # + # Since: 2.0 + ## + { 'struct': 'IothreadProperties', ++ 'base': 'EventLoopBaseProperties', + 'data': { '*poll-max-ns': 'int', + '*poll-grow': 'int', +- '*poll-shrink': 'int', +- '*aio-max-batch': 'int' } } ++ '*poll-shrink': 'int' } } + + ## + # @MemoryBackendProperties: +-- +2.31.1 + diff --git a/kvm-QIOChannel-Add-flags-on-io_writev-and-introduce-io_f.patch b/kvm-QIOChannel-Add-flags-on-io_writev-and-introduce-io_f.patch new file mode 100644 index 0000000..c7b8898 --- /dev/null +++ b/kvm-QIOChannel-Add-flags-on-io_writev-and-introduce-io_f.patch @@ -0,0 +1,420 @@ +From cda3fcf14f2883fea633e25256f6c14a71271adf Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Fri, 13 May 2022 03:28:31 -0300 +Subject: [PATCH 08/18] QIOChannel: Add flags on io_writev and introduce + io_flush callback +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd +RH-Commit: [2/11] 06acfb6b0cb2c25733c2eb198011f7623b5a7024 (LeoBras/centos-qemu-kvm) +RH-Bugzilla: 1968509 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +Add flags to io_writev and introduce io_flush as optional callback to +QIOChannelClass, allowing the implementation of zero copy writes by +subclasses. + +How to use them: +- Write data using qio_channel_writev*(...,QIO_CHANNEL_WRITE_FLAG_ZERO_COPY), +- Wait write completion with qio_channel_flush(). + +Notes: +As some zero copy write implementations work asynchronously, it's +recommended to keep the write buffer untouched until the return of +qio_channel_flush(), to avoid the risk of sending an updated buffer +instead of the buffer state during write. + +As io_flush callback is optional, if a subclass does not implement it, then: +- io_flush will return 0 without changing anything. + +Also, some functions like qio_channel_writev_full_all() were adapted to +receive a flag parameter. That allows shared code between zero copy and +non-zero copy writev, and also an easier implementation on new flags. + +Signed-off-by: Leonardo Bras +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Peter Xu +Reviewed-by: Juan Quintela +Message-Id: <20220513062836.965425-3-leobras@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit b88651cb4d4fa416fdbb6afaf5b26ec8c035eaad) +Signed-off-by: Leonardo Bras +--- + chardev/char-io.c | 2 +- + hw/remote/mpqemu-link.c | 2 +- + include/io/channel.h | 38 +++++++++++++++++++++- + io/channel-buffer.c | 1 + + io/channel-command.c | 1 + + io/channel-file.c | 1 + + io/channel-socket.c | 2 ++ + io/channel-tls.c | 1 + + io/channel-websock.c | 1 + + io/channel.c | 49 +++++++++++++++++++++++------ + migration/rdma.c | 1 + + scsi/pr-manager-helper.c | 2 +- + tests/unit/test-io-channel-socket.c | 1 + + 13 files changed, 88 insertions(+), 14 deletions(-) + +diff --git a/chardev/char-io.c b/chardev/char-io.c +index 8ced184160..4451128cba 100644 +--- a/chardev/char-io.c ++++ b/chardev/char-io.c +@@ -122,7 +122,7 @@ int io_channel_send_full(QIOChannel *ioc, + + ret = qio_channel_writev_full( + ioc, &iov, 1, +- fds, nfds, NULL); ++ fds, nfds, 0, NULL); + if (ret == QIO_CHANNEL_ERR_BLOCK) { + if (offset) { + return offset; +diff --git a/hw/remote/mpqemu-link.c b/hw/remote/mpqemu-link.c +index 7e841820e5..e8f556bd27 100644 +--- a/hw/remote/mpqemu-link.c ++++ b/hw/remote/mpqemu-link.c +@@ -69,7 +69,7 @@ bool mpqemu_msg_send(MPQemuMsg *msg, QIOChannel *ioc, Error **errp) + } + + if (!qio_channel_writev_full_all(ioc, send, G_N_ELEMENTS(send), +- fds, nfds, errp)) { ++ fds, nfds, 0, errp)) { + ret = true; + } else { + trace_mpqemu_send_io_error(msg->cmd, msg->size, nfds); +diff --git a/include/io/channel.h b/include/io/channel.h +index 88988979f8..c680ee7480 100644 +--- a/include/io/channel.h ++++ b/include/io/channel.h +@@ -32,12 +32,15 @@ OBJECT_DECLARE_TYPE(QIOChannel, QIOChannelClass, + + #define QIO_CHANNEL_ERR_BLOCK -2 + ++#define QIO_CHANNEL_WRITE_FLAG_ZERO_COPY 0x1 ++ + typedef enum QIOChannelFeature QIOChannelFeature; + + enum QIOChannelFeature { + QIO_CHANNEL_FEATURE_FD_PASS, + QIO_CHANNEL_FEATURE_SHUTDOWN, + QIO_CHANNEL_FEATURE_LISTEN, ++ QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY, + }; + + +@@ -104,6 +107,7 @@ struct QIOChannelClass { + size_t niov, + int *fds, + size_t nfds, ++ int flags, + Error **errp); + ssize_t (*io_readv)(QIOChannel *ioc, + const struct iovec *iov, +@@ -136,6 +140,8 @@ struct QIOChannelClass { + IOHandler *io_read, + IOHandler *io_write, + void *opaque); ++ int (*io_flush)(QIOChannel *ioc, ++ Error **errp); + }; + + /* General I/O handling functions */ +@@ -228,6 +234,7 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc, + * @niov: the length of the @iov array + * @fds: an array of file handles to send + * @nfds: number of file handles in @fds ++ * @flags: write flags (QIO_CHANNEL_WRITE_FLAG_*) + * @errp: pointer to a NULL-initialized error object + * + * Write data to the IO channel, reading it from the +@@ -260,6 +267,7 @@ ssize_t qio_channel_writev_full(QIOChannel *ioc, + size_t niov, + int *fds, + size_t nfds, ++ int flags, + Error **errp); + + /** +@@ -837,6 +845,7 @@ int qio_channel_readv_full_all(QIOChannel *ioc, + * @niov: the length of the @iov array + * @fds: an array of file handles to send + * @nfds: number of file handles in @fds ++ * @flags: write flags (QIO_CHANNEL_WRITE_FLAG_*) + * @errp: pointer to a NULL-initialized error object + * + * +@@ -846,6 +855,14 @@ int qio_channel_readv_full_all(QIOChannel *ioc, + * to be written, yielding from the current coroutine + * if required. + * ++ * If QIO_CHANNEL_WRITE_FLAG_ZERO_COPY is passed in flags, ++ * instead of waiting for all requested data to be written, ++ * this function will wait until it's all queued for writing. ++ * In this case, if the buffer gets changed between queueing and ++ * sending, the updated buffer will be sent. If this is not a ++ * desired behavior, it's suggested to call qio_channel_flush() ++ * before reusing the buffer. ++ * + * Returns: 0 if all bytes were written, or -1 on error + */ + +@@ -853,6 +870,25 @@ int qio_channel_writev_full_all(QIOChannel *ioc, + const struct iovec *iov, + size_t niov, + int *fds, size_t nfds, +- Error **errp); ++ int flags, Error **errp); ++ ++/** ++ * qio_channel_flush: ++ * @ioc: the channel object ++ * @errp: pointer to a NULL-initialized error object ++ * ++ * Will block until every packet queued with ++ * qio_channel_writev_full() + QIO_CHANNEL_WRITE_FLAG_ZERO_COPY ++ * is sent, or return in case of any error. ++ * ++ * If not implemented, acts as a no-op, and returns 0. ++ * ++ * Returns -1 if any error is found, ++ * 1 if every send failed to use zero copy. ++ * 0 otherwise. ++ */ ++ ++int qio_channel_flush(QIOChannel *ioc, ++ Error **errp); + + #endif /* QIO_CHANNEL_H */ +diff --git a/io/channel-buffer.c b/io/channel-buffer.c +index baa4e2b089..bf52011be2 100644 +--- a/io/channel-buffer.c ++++ b/io/channel-buffer.c +@@ -81,6 +81,7 @@ static ssize_t qio_channel_buffer_writev(QIOChannel *ioc, + size_t niov, + int *fds, + size_t nfds, ++ int flags, + Error **errp) + { + QIOChannelBuffer *bioc = QIO_CHANNEL_BUFFER(ioc); +diff --git a/io/channel-command.c b/io/channel-command.c +index 338da73ade..54560464ae 100644 +--- a/io/channel-command.c ++++ b/io/channel-command.c +@@ -258,6 +258,7 @@ static ssize_t qio_channel_command_writev(QIOChannel *ioc, + size_t niov, + int *fds, + size_t nfds, ++ int flags, + Error **errp) + { + QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc); +diff --git a/io/channel-file.c b/io/channel-file.c +index d7cf6d278f..ef6807a6be 100644 +--- a/io/channel-file.c ++++ b/io/channel-file.c +@@ -114,6 +114,7 @@ static ssize_t qio_channel_file_writev(QIOChannel *ioc, + size_t niov, + int *fds, + size_t nfds, ++ int flags, + Error **errp) + { + QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc); +diff --git a/io/channel-socket.c b/io/channel-socket.c +index 7a8d9f69c9..a1be2197ca 100644 +--- a/io/channel-socket.c ++++ b/io/channel-socket.c +@@ -525,6 +525,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, + size_t niov, + int *fds, + size_t nfds, ++ int flags, + Error **errp) + { + QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); +@@ -620,6 +621,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, + size_t niov, + int *fds, + size_t nfds, ++ int flags, + Error **errp) + { + QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); +diff --git a/io/channel-tls.c b/io/channel-tls.c +index 2ae1b92fc0..4ce890a538 100644 +--- a/io/channel-tls.c ++++ b/io/channel-tls.c +@@ -301,6 +301,7 @@ static ssize_t qio_channel_tls_writev(QIOChannel *ioc, + size_t niov, + int *fds, + size_t nfds, ++ int flags, + Error **errp) + { + QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc); +diff --git a/io/channel-websock.c b/io/channel-websock.c +index 55145a6a8c..9619906ac3 100644 +--- a/io/channel-websock.c ++++ b/io/channel-websock.c +@@ -1127,6 +1127,7 @@ static ssize_t qio_channel_websock_writev(QIOChannel *ioc, + size_t niov, + int *fds, + size_t nfds, ++ int flags, + Error **errp) + { + QIOChannelWebsock *wioc = QIO_CHANNEL_WEBSOCK(ioc); +diff --git a/io/channel.c b/io/channel.c +index e8b019dc36..0640941ac5 100644 +--- a/io/channel.c ++++ b/io/channel.c +@@ -72,18 +72,32 @@ ssize_t qio_channel_writev_full(QIOChannel *ioc, + size_t niov, + int *fds, + size_t nfds, ++ int flags, + Error **errp) + { + QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc); + +- if ((fds || nfds) && +- !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_FD_PASS)) { ++ if (fds || nfds) { ++ if (!qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_FD_PASS)) { ++ error_setg_errno(errp, EINVAL, ++ "Channel does not support file descriptor passing"); ++ return -1; ++ } ++ if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) { ++ error_setg_errno(errp, EINVAL, ++ "Zero Copy does not support file descriptor passing"); ++ return -1; ++ } ++ } ++ ++ if ((flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) && ++ !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY)) { + error_setg_errno(errp, EINVAL, +- "Channel does not support file descriptor passing"); ++ "Requested Zero Copy feature is not available"); + return -1; + } + +- return klass->io_writev(ioc, iov, niov, fds, nfds, errp); ++ return klass->io_writev(ioc, iov, niov, fds, nfds, flags, errp); + } + + +@@ -217,14 +231,14 @@ int qio_channel_writev_all(QIOChannel *ioc, + size_t niov, + Error **errp) + { +- return qio_channel_writev_full_all(ioc, iov, niov, NULL, 0, errp); ++ return qio_channel_writev_full_all(ioc, iov, niov, NULL, 0, 0, errp); + } + + int qio_channel_writev_full_all(QIOChannel *ioc, + const struct iovec *iov, + size_t niov, + int *fds, size_t nfds, +- Error **errp) ++ int flags, Error **errp) + { + int ret = -1; + struct iovec *local_iov = g_new(struct iovec, niov); +@@ -237,8 +251,10 @@ int qio_channel_writev_full_all(QIOChannel *ioc, + + while (nlocal_iov > 0) { + ssize_t len; +- len = qio_channel_writev_full(ioc, local_iov, nlocal_iov, fds, nfds, +- errp); ++ ++ len = qio_channel_writev_full(ioc, local_iov, nlocal_iov, fds, ++ nfds, flags, errp); ++ + if (len == QIO_CHANNEL_ERR_BLOCK) { + if (qemu_in_coroutine()) { + qio_channel_yield(ioc, G_IO_OUT); +@@ -277,7 +293,7 @@ ssize_t qio_channel_writev(QIOChannel *ioc, + size_t niov, + Error **errp) + { +- return qio_channel_writev_full(ioc, iov, niov, NULL, 0, errp); ++ return qio_channel_writev_full(ioc, iov, niov, NULL, 0, 0, errp); + } + + +@@ -297,7 +313,7 @@ ssize_t qio_channel_write(QIOChannel *ioc, + Error **errp) + { + struct iovec iov = { .iov_base = (char *)buf, .iov_len = buflen }; +- return qio_channel_writev_full(ioc, &iov, 1, NULL, 0, errp); ++ return qio_channel_writev_full(ioc, &iov, 1, NULL, 0, 0, errp); + } + + +@@ -473,6 +489,19 @@ off_t qio_channel_io_seek(QIOChannel *ioc, + return klass->io_seek(ioc, offset, whence, errp); + } + ++int qio_channel_flush(QIOChannel *ioc, ++ Error **errp) ++{ ++ QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc); ++ ++ if (!klass->io_flush || ++ !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY)) { ++ return 0; ++ } ++ ++ return klass->io_flush(ioc, errp); ++} ++ + + static void qio_channel_restart_read(void *opaque) + { +diff --git a/migration/rdma.c b/migration/rdma.c +index ef1e65ec36..672d1958a9 100644 +--- a/migration/rdma.c ++++ b/migration/rdma.c +@@ -2840,6 +2840,7 @@ static ssize_t qio_channel_rdma_writev(QIOChannel *ioc, + size_t niov, + int *fds, + size_t nfds, ++ int flags, + Error **errp) + { + QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc); +diff --git a/scsi/pr-manager-helper.c b/scsi/pr-manager-helper.c +index 451c7631b7..3be52a98d5 100644 +--- a/scsi/pr-manager-helper.c ++++ b/scsi/pr-manager-helper.c +@@ -77,7 +77,7 @@ static int pr_manager_helper_write(PRManagerHelper *pr_mgr, + iov.iov_base = (void *)buf; + iov.iov_len = sz; + n_written = qio_channel_writev_full(QIO_CHANNEL(pr_mgr->ioc), &iov, 1, +- nfds ? &fd : NULL, nfds, errp); ++ nfds ? &fd : NULL, nfds, 0, errp); + + if (n_written <= 0) { + assert(n_written != QIO_CHANNEL_ERR_BLOCK); +diff --git a/tests/unit/test-io-channel-socket.c b/tests/unit/test-io-channel-socket.c +index c49eec1f03..6713886d02 100644 +--- a/tests/unit/test-io-channel-socket.c ++++ b/tests/unit/test-io-channel-socket.c +@@ -444,6 +444,7 @@ static void test_io_channel_unix_fd_pass(void) + G_N_ELEMENTS(iosend), + fdsend, + G_N_ELEMENTS(fdsend), ++ 0, + &error_abort); + + qio_channel_readv_full(dst, +-- +2.35.3 + diff --git a/kvm-QIOChannelSocket-Fix-zero-copy-send-so-socket-flush-.patch b/kvm-QIOChannelSocket-Fix-zero-copy-send-so-socket-flush-.patch new file mode 100644 index 0000000..9d134e6 --- /dev/null +++ b/kvm-QIOChannelSocket-Fix-zero-copy-send-so-socket-flush-.patch @@ -0,0 +1,58 @@ +From e70f01749addd7d0b7aa7fa4fdedb664f98e6b9b Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Mon, 20 Jun 2022 02:39:43 -0300 +Subject: [PATCH 16/18] QIOChannelSocket: Fix zero-copy send so socket flush + works +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd +RH-Commit: [10/11] a2dfac987e24026b1a78e90b86234ca206b6401f (LeoBras/centos-qemu-kvm) +RH-Bugzilla: 1968509 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +Somewhere between v6 and v7 the of the zero-copy-send patchset a crucial +part of the flushing mechanism got missing: incrementing zero_copy_queued. + +Without that, the flushing interface becomes a no-op, and there is no +guarantee the buffer is really sent. + +This can go as bad as causing a corruption in RAM during migration. + +Fixes: 2bc58ffc2926 ("QIOChannelSocket: Implement io_writev zero copy flag & io_flush for CONFIG_LINUX") +Reported-by: 徐闯 +Signed-off-by: Leonardo Bras +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Peter Xu +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 4f5a09714c983a3471fd12e3c7f3196e95c650c1) +Signed-off-by: Leonardo Bras +--- + io/channel-socket.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/io/channel-socket.c b/io/channel-socket.c +index 7490e5943d..8ae8b212cf 100644 +--- a/io/channel-socket.c ++++ b/io/channel-socket.c +@@ -612,6 +612,11 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, + "Unable to write to socket"); + return -1; + } ++ ++ if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) { ++ sioc->zero_copy_queued++; ++ } ++ + return ret; + } + #else /* WIN32 */ +-- +2.35.3 + diff --git a/kvm-QIOChannelSocket-Implement-io_writev-zero-copy-flag-.patch b/kvm-QIOChannelSocket-Implement-io_writev-zero-copy-flag-.patch new file mode 100644 index 0000000..89aa806 --- /dev/null +++ b/kvm-QIOChannelSocket-Implement-io_writev-zero-copy-flag-.patch @@ -0,0 +1,249 @@ +From 4aeba0365d30dabe2e70dc172683f0878a4a9621 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Fri, 13 May 2022 03:28:32 -0300 +Subject: [PATCH 09/18] QIOChannelSocket: Implement io_writev zero copy flag & + io_flush for CONFIG_LINUX +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd +RH-Commit: [3/11] 9afeac1f5ac7675624660a0281726c09c8321180 (LeoBras/centos-qemu-kvm) +RH-Bugzilla: 1968509 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +For CONFIG_LINUX, implement the new zero copy flag and the optional callback +io_flush on QIOChannelSocket, but enables it only when MSG_ZEROCOPY +feature is available in the host kernel, which is checked on +qio_channel_socket_connect_sync() + +qio_channel_socket_flush() was implemented by counting how many times +sendmsg(...,MSG_ZEROCOPY) was successfully called, and then reading the +socket's error queue, in order to find how many of them finished sending. +Flush will loop until those counters are the same, or until some error occurs. + +Notes on using writev() with QIO_CHANNEL_WRITE_FLAG_ZERO_COPY: +1: Buffer +- As MSG_ZEROCOPY tells the kernel to use the same user buffer to avoid copying, +some caution is necessary to avoid overwriting any buffer before it's sent. +If something like this happen, a newer version of the buffer may be sent instead. +- If this is a problem, it's recommended to call qio_channel_flush() before freeing +or re-using the buffer. + +2: Locked memory +- When using MSG_ZERCOCOPY, the buffer memory will be locked after queued, and +unlocked after it's sent. +- Depending on the size of each buffer, and how often it's sent, it may require +a larger amount of locked memory than usually available to non-root user. +- If the required amount of locked memory is not available, writev_zero_copy +will return an error, which can abort an operation like migration, +- Because of this, when an user code wants to add zero copy as a feature, it +requires a mechanism to disable it, so it can still be accessible to less +privileged users. + +Signed-off-by: Leonardo Bras +Reviewed-by: Peter Xu +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Juan Quintela +Message-Id: <20220513062836.965425-4-leobras@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 2bc58ffc2926a4efdd03edfb5909861fefc68c3d) +Signed-off-by: Leonardo Bras +--- + include/io/channel-socket.h | 2 + + io/channel-socket.c | 116 ++++++++++++++++++++++++++++++++++-- + 2 files changed, 114 insertions(+), 4 deletions(-) + +diff --git a/include/io/channel-socket.h b/include/io/channel-socket.h +index e747e63514..513c428fe4 100644 +--- a/include/io/channel-socket.h ++++ b/include/io/channel-socket.h +@@ -47,6 +47,8 @@ struct QIOChannelSocket { + socklen_t localAddrLen; + struct sockaddr_storage remoteAddr; + socklen_t remoteAddrLen; ++ ssize_t zero_copy_queued; ++ ssize_t zero_copy_sent; + }; + + +diff --git a/io/channel-socket.c b/io/channel-socket.c +index a1be2197ca..fbd2214d20 100644 +--- a/io/channel-socket.c ++++ b/io/channel-socket.c +@@ -26,6 +26,14 @@ + #include "io/channel-watch.h" + #include "trace.h" + #include "qapi/clone-visitor.h" ++#ifdef CONFIG_LINUX ++#include ++#include ++ ++#if (defined(MSG_ZEROCOPY) && defined(SO_ZEROCOPY)) ++#define QEMU_MSG_ZEROCOPY ++#endif ++#endif + + #define SOCKET_MAX_FDS 16 + +@@ -55,6 +63,8 @@ qio_channel_socket_new(void) + + sioc = QIO_CHANNEL_SOCKET(object_new(TYPE_QIO_CHANNEL_SOCKET)); + sioc->fd = -1; ++ sioc->zero_copy_queued = 0; ++ sioc->zero_copy_sent = 0; + + ioc = QIO_CHANNEL(sioc); + qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN); +@@ -154,6 +164,16 @@ int qio_channel_socket_connect_sync(QIOChannelSocket *ioc, + return -1; + } + ++#ifdef QEMU_MSG_ZEROCOPY ++ int ret, v = 1; ++ ret = setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &v, sizeof(v)); ++ if (ret == 0) { ++ /* Zero copy available on host */ ++ qio_channel_set_feature(QIO_CHANNEL(ioc), ++ QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY); ++ } ++#endif ++ + return 0; + } + +@@ -534,6 +554,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, + char control[CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS)]; + size_t fdsize = sizeof(int) * nfds; + struct cmsghdr *cmsg; ++ int sflags = 0; + + memset(control, 0, CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS)); + +@@ -558,15 +579,31 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, + memcpy(CMSG_DATA(cmsg), fds, fdsize); + } + ++#ifdef QEMU_MSG_ZEROCOPY ++ if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) { ++ sflags = MSG_ZEROCOPY; ++ } ++#endif ++ + retry: +- ret = sendmsg(sioc->fd, &msg, 0); ++ ret = sendmsg(sioc->fd, &msg, sflags); + if (ret <= 0) { +- if (errno == EAGAIN) { ++ switch (errno) { ++ case EAGAIN: + return QIO_CHANNEL_ERR_BLOCK; +- } +- if (errno == EINTR) { ++ case EINTR: + goto retry; ++#ifdef QEMU_MSG_ZEROCOPY ++ case ENOBUFS: ++ if (sflags & MSG_ZEROCOPY) { ++ error_setg_errno(errp, errno, ++ "Process can't lock enough memory for using MSG_ZEROCOPY"); ++ return -1; ++ } ++ break; ++#endif + } ++ + error_setg_errno(errp, errno, + "Unable to write to socket"); + return -1; +@@ -660,6 +697,74 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, + } + #endif /* WIN32 */ + ++ ++#ifdef QEMU_MSG_ZEROCOPY ++static int qio_channel_socket_flush(QIOChannel *ioc, ++ Error **errp) ++{ ++ QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); ++ struct msghdr msg = {}; ++ struct sock_extended_err *serr; ++ struct cmsghdr *cm; ++ char control[CMSG_SPACE(sizeof(*serr))]; ++ int received; ++ int ret = 1; ++ ++ msg.msg_control = control; ++ msg.msg_controllen = sizeof(control); ++ memset(control, 0, sizeof(control)); ++ ++ while (sioc->zero_copy_sent < sioc->zero_copy_queued) { ++ received = recvmsg(sioc->fd, &msg, MSG_ERRQUEUE); ++ if (received < 0) { ++ switch (errno) { ++ case EAGAIN: ++ /* Nothing on errqueue, wait until something is available */ ++ qio_channel_wait(ioc, G_IO_ERR); ++ continue; ++ case EINTR: ++ continue; ++ default: ++ error_setg_errno(errp, errno, ++ "Unable to read errqueue"); ++ return -1; ++ } ++ } ++ ++ cm = CMSG_FIRSTHDR(&msg); ++ if (cm->cmsg_level != SOL_IP && ++ cm->cmsg_type != IP_RECVERR) { ++ error_setg_errno(errp, EPROTOTYPE, ++ "Wrong cmsg in errqueue"); ++ return -1; ++ } ++ ++ serr = (void *) CMSG_DATA(cm); ++ if (serr->ee_errno != SO_EE_ORIGIN_NONE) { ++ error_setg_errno(errp, serr->ee_errno, ++ "Error on socket"); ++ return -1; ++ } ++ if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY) { ++ error_setg_errno(errp, serr->ee_origin, ++ "Error not from zero copy"); ++ return -1; ++ } ++ ++ /* No errors, count successfully finished sendmsg()*/ ++ sioc->zero_copy_sent += serr->ee_data - serr->ee_info + 1; ++ ++ /* If any sendmsg() succeeded using zero copy, return 0 at the end */ ++ if (serr->ee_code != SO_EE_CODE_ZEROCOPY_COPIED) { ++ ret = 0; ++ } ++ } ++ ++ return ret; ++} ++ ++#endif /* QEMU_MSG_ZEROCOPY */ ++ + static int + qio_channel_socket_set_blocking(QIOChannel *ioc, + bool enabled, +@@ -790,6 +895,9 @@ static void qio_channel_socket_class_init(ObjectClass *klass, + ioc_klass->io_set_delay = qio_channel_socket_set_delay; + ioc_klass->io_create_watch = qio_channel_socket_create_watch; + ioc_klass->io_set_aio_fd_handler = qio_channel_socket_set_aio_fd_handler; ++#ifdef QEMU_MSG_ZEROCOPY ++ ioc_klass->io_flush = qio_channel_socket_flush; ++#endif + } + + static const TypeInfo qio_channel_socket_info = { +-- +2.35.3 + diff --git a/kvm-QIOChannelSocket-Introduce-assert-and-reduce-ifdefs-.patch b/kvm-QIOChannelSocket-Introduce-assert-and-reduce-ifdefs-.patch new file mode 100644 index 0000000..6fc0c76 --- /dev/null +++ b/kvm-QIOChannelSocket-Introduce-assert-and-reduce-ifdefs-.patch @@ -0,0 +1,82 @@ +From 60bf942a58db12c821f2a6a49e2e0b04b99bec30 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Mon, 20 Jun 2022 02:39:42 -0300 +Subject: [PATCH 15/18] QIOChannelSocket: Introduce assert and reduce ifdefs to + improve readability +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd +RH-Commit: [9/11] eaa02d68301852ccc98bdacc7387d8d03be1cb05 (LeoBras/centos-qemu-kvm) +RH-Bugzilla: 1968509 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +During implementation of MSG_ZEROCOPY feature, a lot of #ifdefs were +introduced, particularly at qio_channel_socket_writev(). + +Rewrite some of those changes so it's easier to read. + +Also, introduce an assert to help detect incorrect zero-copy usage is when +it's disabled on build. + +Signed-off-by: Leonardo Bras +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Juan Quintela +Reviewed-by: Peter Xu +Signed-off-by: Juan Quintela +Signed-off-by: Dr. David Alan Gilbert + dgilbert: Fixed up thinko'd g_assert_unreachable->g_assert_not_reached +(cherry picked from commit 803ca43e4c7fcf32f9f68c118301ccd0c83ece3f) +Signed-off-by: Leonardo Bras +--- + io/channel-socket.c | 14 +++++++++----- + 1 file changed, 9 insertions(+), 5 deletions(-) + +diff --git a/io/channel-socket.c b/io/channel-socket.c +index fbd2214d20..7490e5943d 100644 +--- a/io/channel-socket.c ++++ b/io/channel-socket.c +@@ -579,11 +579,17 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, + memcpy(CMSG_DATA(cmsg), fds, fdsize); + } + +-#ifdef QEMU_MSG_ZEROCOPY + if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) { ++#ifdef QEMU_MSG_ZEROCOPY + sflags = MSG_ZEROCOPY; +- } ++#else ++ /* ++ * We expect QIOChannel class entry point to have ++ * blocked this code path already ++ */ ++ g_assert_not_reached(); + #endif ++ } + + retry: + ret = sendmsg(sioc->fd, &msg, sflags); +@@ -593,15 +599,13 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, + return QIO_CHANNEL_ERR_BLOCK; + case EINTR: + goto retry; +-#ifdef QEMU_MSG_ZEROCOPY + case ENOBUFS: +- if (sflags & MSG_ZEROCOPY) { ++ if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) { + error_setg_errno(errp, errno, + "Process can't lock enough memory for using MSG_ZEROCOPY"); + return -1; + } + break; +-#endif + } + + error_setg_errno(errp, errno, +-- +2.35.3 + diff --git a/kvm-RHEL-disable-seqpacket-for-vhost-vsock-device-in-rhe.patch b/kvm-RHEL-disable-seqpacket-for-vhost-vsock-device-in-rhe.patch new file mode 100644 index 0000000..27cc557 --- /dev/null +++ b/kvm-RHEL-disable-seqpacket-for-vhost-vsock-device-in-rhe.patch @@ -0,0 +1,107 @@ +From e0e4f01c6f4fb5881960f72ae4e80951b711131e Mon Sep 17 00:00:00 2001 +From: Stefano Garzarella +Date: Thu, 24 Mar 2022 16:04:57 +0100 +Subject: [PATCH 1/5] RHEL: disable "seqpacket" for "vhost-vsock-device" in + rhel8.6.0 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Stefano Garzarella +RH-MergeRequest: 136: RHEL: disable "seqpacket" for "vhost-vsock-device" in rhel8.6.0 [rhel-8.7.0] +RH-Commit: [1/1] d82ea09e123679521503689f7d9af1c03dc71bfc +RH-Bugzilla: 2068202 +RH-Acked-by: Jason Wang +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Stefan Hajnoczi + +vhost-vsock device in RHEL 8 kernels doesn't support seqpacket. +To avoid problems when migrating a VM from RHEL 9 host, we need to +disable it in rhel8-* machine types. + +Signed-off-by: Stefano Garzarella +--- + hw/core/machine.c | 10 ++++++++++ + hw/i386/pc_piix.c | 2 ++ + hw/i386/pc_q35.c | 2 ++ + hw/s390x/s390-virtio-ccw.c | 1 + + include/hw/boards.h | 3 +++ + 5 files changed, 18 insertions(+) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 024b025fc2..76fcabec7a 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -37,6 +37,16 @@ + #include "hw/virtio/virtio.h" + #include "hw/virtio/virtio-pci.h" + ++GlobalProperty hw_compat_rhel_8_6[] = { ++ /* hw_compat_rhel_8_6 bz 2068202 */ ++ /* ++ * vhost-vsock device in RHEL 8 kernels doesn't support seqpacket, so ++ * we need do disable it downstream on the latest hw_compat_rhel_8. ++ */ ++ { "vhost-vsock-device", "seqpacket", "off" }, ++}; ++const size_t hw_compat_rhel_8_6_len = G_N_ELEMENTS(hw_compat_rhel_8_6); ++ + /* + * Mostly the same as hw_compat_6_0 and hw_compat_6_1 + */ +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index f03a8f0db8..ab6d03e07a 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -998,6 +998,8 @@ static void pc_machine_rhel760_options(MachineClass *m) + pcmc->kvmclock_create_always = false; + /* From pc_i440fx_5_1_machine_options() */ + pcmc->pci_root_uid = 1; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_6, ++ hw_compat_rhel_8_6_len); + compat_props_add(m->compat_props, hw_compat_rhel_8_5, + hw_compat_rhel_8_5_len); + compat_props_add(m->compat_props, pc_rhel_8_5_compat, +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 5559261d9e..882fe7a68d 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -658,6 +658,8 @@ static void pc_q35_machine_rhel860_options(MachineClass *m) + m->desc = "RHEL-8.6.0 PC (Q35 + ICH9, 2009)"; + pcmc->smbios_stream_product = "RHEL-AV"; + pcmc->smbios_stream_version = "8.6.0"; ++ compat_props_add(m->compat_props, hw_compat_rhel_8_6, ++ hw_compat_rhel_8_6_len); + } + + DEFINE_PC_MACHINE(q35_rhel860, "pc-q35-rhel8.6.0", pc_q35_init_rhel860, +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index 9795eb9406..bec270598b 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -1109,6 +1109,7 @@ static void ccw_machine_rhel860_instance_options(MachineState *machine) + + static void ccw_machine_rhel860_class_options(MachineClass *mc) + { ++ compat_props_add(mc->compat_props, hw_compat_rhel_8_6, hw_compat_rhel_8_6_len); + } + DEFINE_CCW_MACHINE(rhel860, "rhel8.6.0", true); + +diff --git a/include/hw/boards.h b/include/hw/boards.h +index 04e8759815..4ddb798144 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -443,6 +443,9 @@ extern const size_t hw_compat_2_2_len; + extern GlobalProperty hw_compat_2_1[]; + extern const size_t hw_compat_2_1_len; + ++extern GlobalProperty hw_compat_rhel_8_6[]; ++extern const size_t hw_compat_rhel_8_6_len; ++ + extern GlobalProperty hw_compat_rhel_8_5[]; + extern const size_t hw_compat_rhel_8_5_len; + +-- +2.27.0 + diff --git a/kvm-RHEL-only-AArch64-Drop-unsupported-CPU-types.patch b/kvm-RHEL-only-AArch64-Drop-unsupported-CPU-types.patch new file mode 100644 index 0000000..8a0aeb0 --- /dev/null +++ b/kvm-RHEL-only-AArch64-Drop-unsupported-CPU-types.patch @@ -0,0 +1,237 @@ +From 055edf068196622a3e1868c9e4c991d410272a6d Mon Sep 17 00:00:00 2001 +From: Andrew Jones +Date: Wed, 15 Jun 2022 15:28:27 +0200 +Subject: [PATCH 03/18] RHEL-only: AArch64: Drop unsupported CPU types +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Daniel P. Berrangé +RH-MergeRequest: 94: i386, aarch64, s390x: deprecate many named CPU models +RH-Commit: [3/6] 21f54c86dc87e5e75a64459b5a385686bc09640c (berrange/centos-src-qemu) +RH-Bugzilla: 2060839 +RH-Acked-by: Thomas Huth +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Cornelia Huck + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2066824 +Upstream Status: RHEL only + +We only need to support AArch64 cpu types and we only need three +types: + 1) A base type to use with TCG, i.e. a cpu type with only base + features. 'cortex-a57' serves this role and is currently used + by libguestfs. + 2) The 'max' type, which is for both KVM and TCG and is good for + tests that just specify 'max' but run under both. 'max' with + TCG also provides the VM with all the CPU features TCG + supports, which is good for VMs that need features not + provided by the basic cortex-a57. + 3) The host type which is used with KVM. + +Signed-off-by: Andrew Jones +--- + hw/arm/virt.c | 4 ++++ + target/arm/cpu64.c | 6 ++++++ + target/arm/cpu_tcg.c | 12 ++---------- + tests/qtest/arm-cpu-features.c | 6 ++++++ + 4 files changed, 18 insertions(+), 10 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 95d012d6eb..74119976d3 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -239,12 +239,16 @@ static const int a15irqmap[] = { + }; + + static const char *valid_cpus[] = { ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + ARM_CPU_TYPE_NAME("cortex-a7"), + ARM_CPU_TYPE_NAME("cortex-a15"), + ARM_CPU_TYPE_NAME("cortex-a53"), ++#endif /* disabled for RHEL */ + ARM_CPU_TYPE_NAME("cortex-a57"), ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + ARM_CPU_TYPE_NAME("cortex-a72"), + ARM_CPU_TYPE_NAME("a64fx"), ++#endif /* disabled for RHEL */ + ARM_CPU_TYPE_NAME("host"), + ARM_CPU_TYPE_NAME("max"), + }; +diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c +index eb44c05822..e80b831073 100644 +--- a/target/arm/cpu64.c ++++ b/target/arm/cpu64.c +@@ -146,6 +146,7 @@ static void aarch64_a57_initfn(Object *obj) + define_arm_cp_regs(cpu, cortex_a72_a57_a53_cp_reginfo); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void aarch64_a53_initfn(Object *obj) + { + ARMCPU *cpu = ARM_CPU(obj); +@@ -249,6 +250,7 @@ static void aarch64_a72_initfn(Object *obj) + cpu->gic_vprebits = 5; + define_arm_cp_regs(cpu, cortex_a72_a57_a53_cp_reginfo); + } ++#endif /* disabled for RHEL */ + + void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) + { +@@ -923,6 +925,7 @@ static void aarch64_max_initfn(Object *obj) + qdev_property_add_static(DEVICE(obj), &arm_cpu_lpa2_property); + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void aarch64_a64fx_initfn(Object *obj) + { + ARMCPU *cpu = ARM_CPU(obj); +@@ -969,12 +972,15 @@ static void aarch64_a64fx_initfn(Object *obj) + + /* TODO: Add A64FX specific HPC extension registers */ + } ++#endif /* disabled for RHEL */ + + static const ARMCPUInfo aarch64_cpus[] = { + { .name = "cortex-a57", .initfn = aarch64_a57_initfn }, ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { .name = "cortex-a53", .initfn = aarch64_a53_initfn }, + { .name = "cortex-a72", .initfn = aarch64_a72_initfn }, + { .name = "a64fx", .initfn = aarch64_a64fx_initfn }, ++#endif /* disabled for RHEL */ + { .name = "max", .initfn = aarch64_max_initfn }, + #if defined(CONFIG_KVM) || defined(CONFIG_HVF) + { .name = "host", .initfn = aarch64_host_initfn }, +diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c +index 3826fa5122..74727fc92c 100644 +--- a/target/arm/cpu_tcg.c ++++ b/target/arm/cpu_tcg.c +@@ -19,10 +19,10 @@ + #include "hw/boards.h" + #endif + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + /* CPU models. These are not needed for the AArch64 linux-user build. */ + #if !defined(CONFIG_USER_ONLY) || !defined(TARGET_AARCH64) + +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + #if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG) + static bool arm_v7m_cpu_exec_interrupt(CPUState *cs, int interrupt_request) + { +@@ -376,7 +376,6 @@ static void cortex_a9_initfn(Object *obj) + cpu->ccsidr[1] = 0x200fe019; /* 16k L1 icache. */ + define_arm_cp_regs(cpu, cortexa9_cp_reginfo); + } +-#endif /* disabled for RHEL */ + + #ifndef CONFIG_USER_ONLY + static uint64_t a15_l2ctlr_read(CPUARMState *env, const ARMCPRegInfo *ri) +@@ -402,7 +401,6 @@ static const ARMCPRegInfo cortexa15_cp_reginfo[] = { + REGINFO_SENTINEL + }; + +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void cortex_a7_initfn(Object *obj) + { + ARMCPU *cpu = ARM_CPU(obj); +@@ -448,7 +446,6 @@ static void cortex_a7_initfn(Object *obj) + cpu->ccsidr[2] = 0x711fe07a; /* 4096K L2 unified cache */ + define_arm_cp_regs(cpu, cortexa15_cp_reginfo); /* Same as A15 */ + } +-#endif /* disabled for RHEL */ + + static void cortex_a15_initfn(Object *obj) + { +@@ -492,7 +489,6 @@ static void cortex_a15_initfn(Object *obj) + define_arm_cp_regs(cpu, cortexa15_cp_reginfo); + } + +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void cortex_m0_initfn(Object *obj) + { + ARMCPU *cpu = ARM_CPU(obj); +@@ -933,7 +929,6 @@ static void arm_v7m_class_init(ObjectClass *oc, void *data) + + cc->gdb_core_xml_file = "arm-m-profile.xml"; + } +-#endif /* disabled for RHEL */ + + #ifndef TARGET_AARCH64 + /* +@@ -1013,7 +1008,6 @@ static void arm_max_initfn(Object *obj) + #endif /* !TARGET_AARCH64 */ + + static const ARMCPUInfo arm_tcg_cpus[] = { +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + { .name = "arm926", .initfn = arm926_initfn }, + { .name = "arm946", .initfn = arm946_initfn }, + { .name = "arm1026", .initfn = arm1026_initfn }, +@@ -1029,9 +1023,7 @@ static const ARMCPUInfo arm_tcg_cpus[] = { + { .name = "cortex-a7", .initfn = cortex_a7_initfn }, + { .name = "cortex-a8", .initfn = cortex_a8_initfn }, + { .name = "cortex-a9", .initfn = cortex_a9_initfn }, +-#endif /* disabled for RHEL */ + { .name = "cortex-a15", .initfn = cortex_a15_initfn }, +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + { .name = "cortex-m0", .initfn = cortex_m0_initfn, + .class_init = arm_v7m_class_init }, + { .name = "cortex-m3", .initfn = cortex_m3_initfn, +@@ -1062,7 +1054,6 @@ static const ARMCPUInfo arm_tcg_cpus[] = { + { .name = "pxa270-b1", .initfn = pxa270b1_initfn }, + { .name = "pxa270-c0", .initfn = pxa270c0_initfn }, + { .name = "pxa270-c5", .initfn = pxa270c5_initfn }, +-#endif /* disabled for RHEL */ + #ifndef TARGET_AARCH64 + { .name = "max", .initfn = arm_max_initfn }, + #endif +@@ -1090,3 +1081,4 @@ static void arm_tcg_cpu_register_types(void) + type_init(arm_tcg_cpu_register_types) + + #endif /* !CONFIG_USER_ONLY || !TARGET_AARCH64 */ ++#endif /* disabled for RHEL */ +diff --git a/tests/qtest/arm-cpu-features.c b/tests/qtest/arm-cpu-features.c +index f76652143a..fe2a0a070d 100644 +--- a/tests/qtest/arm-cpu-features.c ++++ b/tests/qtest/arm-cpu-features.c +@@ -440,8 +440,10 @@ static void test_query_cpu_model_expansion(const void *data) + assert_error(qts, "host", "The CPU type 'host' requires KVM", NULL); + + /* Test expected feature presence/absence for some cpu types */ ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + assert_has_feature_enabled(qts, "cortex-a15", "pmu"); + assert_has_not_feature(qts, "cortex-a15", "aarch64"); ++#endif /* disabled for RHEL */ + + /* Enabling and disabling pmu should always work. */ + assert_has_feature_enabled(qts, "max", "pmu"); +@@ -458,6 +460,7 @@ static void test_query_cpu_model_expansion(const void *data) + assert_has_feature_enabled(qts, "cortex-a57", "pmu"); + assert_has_feature_enabled(qts, "cortex-a57", "aarch64"); + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + assert_has_feature_enabled(qts, "a64fx", "pmu"); + assert_has_feature_enabled(qts, "a64fx", "aarch64"); + /* +@@ -470,6 +473,7 @@ static void test_query_cpu_model_expansion(const void *data) + "{ 'sve384': true }"); + assert_error(qts, "a64fx", "cannot enable sve640", + "{ 'sve640': true }"); ++#endif /* disabled for RHEL */ + + sve_tests_default(qts, "max"); + pauth_tests_default(qts, "max"); +@@ -505,9 +509,11 @@ static void test_query_cpu_model_expansion_kvm(const void *data) + QDict *resp; + char *error; + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + assert_error(qts, "cortex-a15", + "We cannot guarantee the CPU type 'cortex-a15' works " + "with KVM on this host", NULL); ++#endif /* disabled for RHEL */ + + assert_has_feature_enabled(qts, "host", "aarch64"); + +-- +2.35.3 + diff --git a/kvm-RHEL-only-tests-avocado-Switch-aarch64-tests-from-a5.patch b/kvm-RHEL-only-tests-avocado-Switch-aarch64-tests-from-a5.patch new file mode 100644 index 0000000..a1cc4c7 --- /dev/null +++ b/kvm-RHEL-only-tests-avocado-Switch-aarch64-tests-from-a5.patch @@ -0,0 +1,95 @@ +From d710394f68eb0b6116dd8ac76f619c192e0d5972 Mon Sep 17 00:00:00 2001 +From: Andrew Jones +Date: Wed, 15 Jun 2022 15:28:27 +0200 +Subject: [PATCH 02/18] RHEL-only: tests/avocado: Switch aarch64 tests from a53 + to a57 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Daniel P. Berrangé +RH-MergeRequest: 94: i386, aarch64, s390x: deprecate many named CPU models +RH-Commit: [2/6] e85ef69b42c411a6997e4da10ba05176368769b3 (berrange/centos-src-qemu) +RH-Bugzilla: 2060839 +RH-Acked-by: Thomas Huth +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Cornelia Huck + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2066824 +Upstream Status: RHEL only + +We plan to remove the cortex-a53 from the supported cpu types. Switch +all avocado tests that use it to the cortex-a57, which will work the +same and we intend to keep. We don't want to try and upstream this +change since the better upstream change would be to switch from the +a53 to 'max', but the upstream tests also need to use later guest +kernels to use 'max' (see qemu upstream commit 0942820408dc +("hw/arm/virt: Disable LPA2 for -machine virt-6.2") + +Signed-off-by: Andrew Jones +--- + tests/avocado/replay_kernel.py | 2 +- + tests/avocado/reverse_debugging.py | 2 +- + tests/avocado/tcg_plugins.py | 6 +++--- + 3 files changed, 5 insertions(+), 5 deletions(-) + +diff --git a/tests/avocado/replay_kernel.py b/tests/avocado/replay_kernel.py +index 0b2b0dc692..3a7b5f0748 100644 +--- a/tests/avocado/replay_kernel.py ++++ b/tests/avocado/replay_kernel.py +@@ -147,7 +147,7 @@ def test_aarch64_virt(self): + """ + :avocado: tags=arch:aarch64 + :avocado: tags=machine:virt +- :avocado: tags=cpu:cortex-a53 ++ :avocado: tags=cpu:cortex-a57 + """ + kernel_url = ('https://archives.fedoraproject.org/pub/archive/fedora' + '/linux/releases/29/Everything/aarch64/os/images/pxeboot' +diff --git a/tests/avocado/reverse_debugging.py b/tests/avocado/reverse_debugging.py +index d2921e70c3..66d185ed42 100644 +--- a/tests/avocado/reverse_debugging.py ++++ b/tests/avocado/reverse_debugging.py +@@ -198,7 +198,7 @@ def test_aarch64_virt(self): + """ + :avocado: tags=arch:aarch64 + :avocado: tags=machine:virt +- :avocado: tags=cpu:cortex-a53 ++ :avocado: tags=cpu:cortex-a57 + """ + kernel_url = ('https://archives.fedoraproject.org/pub/archive/fedora' + '/linux/releases/29/Everything/aarch64/os/images/pxeboot' +diff --git a/tests/avocado/tcg_plugins.py b/tests/avocado/tcg_plugins.py +index 642d2e49e3..93b3afd823 100644 +--- a/tests/avocado/tcg_plugins.py ++++ b/tests/avocado/tcg_plugins.py +@@ -68,7 +68,7 @@ def test_aarch64_virt_insn(self): + :avocado: tags=accel:tcg + :avocado: tags=arch:aarch64 + :avocado: tags=machine:virt +- :avocado: tags=cpu:cortex-a53 ++ :avocado: tags=cpu:cortex-a57 + """ + kernel_path = self._grab_aarch64_kernel() + kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE + +@@ -94,7 +94,7 @@ def test_aarch64_virt_insn_icount(self): + :avocado: tags=accel:tcg + :avocado: tags=arch:aarch64 + :avocado: tags=machine:virt +- :avocado: tags=cpu:cortex-a53 ++ :avocado: tags=cpu:cortex-a57 + """ + kernel_path = self._grab_aarch64_kernel() + kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE + +@@ -120,7 +120,7 @@ def test_aarch64_virt_mem_icount(self): + :avocado: tags=accel:tcg + :avocado: tags=arch:aarch64 + :avocado: tags=machine:virt +- :avocado: tags=cpu:cortex-a53 ++ :avocado: tags=cpu:cortex-a57 + """ + kernel_path = self._grab_aarch64_kernel() + kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE + +-- +2.35.3 + diff --git a/kvm-Revert-globally-limit-the-maximum-number-of-CPUs.patch b/kvm-Revert-globally-limit-the-maximum-number-of-CPUs.patch new file mode 100644 index 0000000..7740d0b --- /dev/null +++ b/kvm-Revert-globally-limit-the-maximum-number-of-CPUs.patch @@ -0,0 +1,58 @@ +From 5ab8613582fd56b847fe75750acb5b7255900b35 Mon Sep 17 00:00:00 2001 +From: Vitaly Kuznetsov +Date: Thu, 9 Jun 2022 11:55:15 +0200 +Subject: [PATCH 15/16] Revert "globally limit the maximum number of CPUs" + +RH-Author: Vitaly Kuznetsov +RH-MergeRequest: 99: Revert "globally limit the maximum number of CPUs" +RH-Commit: [1/1] 13100d4a2209b2190a3654c1f9cf4ebade1e8d24 (vkuznets/qemu-kvm-c9s) +RH-Bugzilla: 2094270 +RH-Acked-by: Andrew Jones +RH-Acked-by: Cornelia Huck +RH-Acked-by: Thomas Huth + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2094270 +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=45871149 +Upstream Status: RHEL-only +Tested: with upstream kernel + +Downstream QEMU carries a patch that sets the hard limit of possible vCPUs +to the value that the KVM code of the kernel recommends as soft limit. +Upstream KVM code has been changed recently to not use an arbitrary soft +limit anymore, but to cap the value on the amount of available physical +CPUs of the host. This defeats the purpose of the downstream change in +QEMU completely. Drop the downstream-only patch to allow CPU overcommit. + +This reverts commit 6669f6fa677d43144f39d6ad59725b7ba622f1c2. + +Signed-off-by: Vitaly Kuznetsov +--- + accel/kvm/kvm-all.c | 12 ------------ + 1 file changed, 12 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index fdf0e4d429..5f1377ca04 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -2430,18 +2430,6 @@ static int kvm_init(MachineState *ms) + soft_vcpus_limit = kvm_recommended_vcpus(s); + hard_vcpus_limit = kvm_max_vcpus(s); + +-#ifdef HOST_PPC64 +- /* +- * On POWER, the kernel advertises a soft limit based on the +- * number of CPU threads on the host. We want to allow exceeding +- * this for testing purposes, so we don't want to set hard limit +- * to soft limit as on x86. +- */ +-#else +- /* RHEL doesn't support nr_vcpus > soft_vcpus_limit */ +- hard_vcpus_limit = soft_vcpus_limit; +-#endif +- + while (nc->name) { + if (nc->num > soft_vcpus_limit) { + warn_report("Number of %s cpus requested (%d) exceeds " +-- +2.31.1 + diff --git a/kvm-Revert-migration-Simplify-unqueue_page.patch b/kvm-Revert-migration-Simplify-unqueue_page.patch new file mode 100644 index 0000000..f5c97f6 --- /dev/null +++ b/kvm-Revert-migration-Simplify-unqueue_page.patch @@ -0,0 +1,134 @@ +From 5ea59b17866add54e5ae8c76d3cb472c67e1fa91 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Tue, 2 Aug 2022 08:19:49 +0200 +Subject: [PATCH 32/32] Revert "migration: Simplify unqueue_page()" + +RH-Author: Thomas Huth +RH-MergeRequest: 112: Fix postcopy migration on s390x +RH-Commit: [2/2] 3913c9ed3f27f4b66245913da29d0c46db0c6567 (thuth/qemu-kvm-cs9) +RH-Bugzilla: 2099934 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand +RH-Acked-by: Peter Xu + +This reverts commit cfd66f30fb0f735df06ff4220e5000290a43dad3. + +The simplification of unqueue_page() introduced a bug that sometimes +breaks migration on s390x hosts. + +The problem is not fully understood yet, but since we are already in +the freeze for QEMU 7.1 and we need something working there, let's +revert this patch for the upcoming release. The optimization can be +redone later again in a proper way if necessary. + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2099934 +Signed-off-by: Thomas Huth +Message-Id: <20220802061949.331576-1-thuth@redhat.com> +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 777f53c75983dd10756f5dbfc8af50fe11da81c1) +Conflicts: + migration/trace-events + (trivial contextual conflict) +Signed-off-by: Thomas Huth +--- + migration/ram.c | 37 ++++++++++++++++++++++++++----------- + migration/trace-events | 3 ++- + 2 files changed, 28 insertions(+), 12 deletions(-) + +diff --git a/migration/ram.c b/migration/ram.c +index fb6db54642..ee40e4a718 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -1548,7 +1548,6 @@ static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset) + { + struct RAMSrcPageRequest *entry; + RAMBlock *block = NULL; +- size_t page_size; + + if (!postcopy_has_request(rs)) { + return NULL; +@@ -1565,13 +1564,10 @@ static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset) + entry = QSIMPLEQ_FIRST(&rs->src_page_requests); + block = entry->rb; + *offset = entry->offset; +- page_size = qemu_ram_pagesize(block); +- /* Each page request should only be multiple page size of the ramblock */ +- assert((entry->len % page_size) == 0); + +- if (entry->len > page_size) { +- entry->len -= page_size; +- entry->offset += page_size; ++ if (entry->len > TARGET_PAGE_SIZE) { ++ entry->len -= TARGET_PAGE_SIZE; ++ entry->offset += TARGET_PAGE_SIZE; + } else { + memory_region_unref(block->mr); + QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req); +@@ -1579,9 +1575,6 @@ static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset) + migration_consume_urgent_request(); + } + +- trace_unqueue_page(block->idstr, *offset, +- test_bit((*offset >> TARGET_PAGE_BITS), block->bmap)); +- + return block; + } + +@@ -1956,8 +1949,30 @@ static bool get_queued_page(RAMState *rs, PageSearchStatus *pss) + { + RAMBlock *block; + ram_addr_t offset; ++ bool dirty; ++ ++ do { ++ block = unqueue_page(rs, &offset); ++ /* ++ * We're sending this page, and since it's postcopy nothing else ++ * will dirty it, and we must make sure it doesn't get sent again ++ * even if this queue request was received after the background ++ * search already sent it. ++ */ ++ if (block) { ++ unsigned long page; ++ ++ page = offset >> TARGET_PAGE_BITS; ++ dirty = test_bit(page, block->bmap); ++ if (!dirty) { ++ trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset, ++ page); ++ } else { ++ trace_get_queued_page(block->idstr, (uint64_t)offset, page); ++ } ++ } + +- block = unqueue_page(rs, &offset); ++ } while (block && !dirty); + + if (!block) { + /* +diff --git a/migration/trace-events b/migration/trace-events +index 1aec580e92..09d61ed1f4 100644 +--- a/migration/trace-events ++++ b/migration/trace-events +@@ -85,6 +85,8 @@ put_qlist_end(const char *field_name, const char *vmsd_name) "%s(%s)" + qemu_file_fclose(void) "" + + # ram.c ++get_queued_page(const char *block_name, uint64_t tmp_offset, unsigned long page_abs) "%s/0x%" PRIx64 " page_abs=0x%lx" ++get_queued_page_not_dirty(const char *block_name, uint64_t tmp_offset, unsigned long page_abs) "%s/0x%" PRIx64 " page_abs=0x%lx" + migration_bitmap_sync_start(void) "" + migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64 + migration_bitmap_clear_dirty(char *str, uint64_t start, uint64_t size, unsigned long page) "rb %s start 0x%"PRIx64" size 0x%"PRIx64" page 0x%lx" +@@ -110,7 +112,6 @@ ram_save_iterate_big_wait(uint64_t milliconds, int iterations) "big wait: %" PRI + ram_load_complete(int ret, uint64_t seq_iter) "exit_code %d seq iteration %" PRIu64 + ram_write_tracking_ramblock_start(const char *block_id, size_t page_size, void *addr, size_t length) "%s: page_size: %zu addr: %p length: %zu" + ram_write_tracking_ramblock_stop(const char *block_id, size_t page_size, void *addr, size_t length) "%s: page_size: %zu addr: %p length: %zu" +-unqueue_page(char *block, uint64_t offset, bool dirty) "ramblock '%s' offset 0x%"PRIx64" dirty %d" + + # multifd.c + multifd_new_send_channel_async(uint8_t id) "channel %u" +-- +2.31.1 + diff --git a/kvm-Revert-redhat-Add-hw_compat_4_2_extra-and-apply-to-u.patch b/kvm-Revert-redhat-Add-hw_compat_4_2_extra-and-apply-to-u.patch new file mode 100644 index 0000000..56af50f --- /dev/null +++ b/kvm-Revert-redhat-Add-hw_compat_4_2_extra-and-apply-to-u.patch @@ -0,0 +1,93 @@ +From e626dc16d130c724c400b99a93daad0a9abeae59 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 22 Mar 2022 19:23:36 -0400 +Subject: [PATCH 01/18] Revert "redhat: Add hw_compat_4_2_extra and apply to + upstream machines" + +RH-Author: Jon Maloy +RH-MergeRequest: 131: Revert "redhat: Add hw_compat_4_2_extra and apply to upstream machines" +RH-Commit: [1/3] 47b7d9e5062f5e215d5ed1a3ecdc1a87ac3fa630 (jmaloy/qemu-kvm) +RH-Bugzilla: 2062613 +RH-Acked-by: Peter Xu +RH-Acked-by: Dr. David Alan Gilbert + +BZ: https://bugzilla.redhat.com/2062613 +UPSTREAM: no +BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=44038000 + +commit dc2e9ec1e014950c7918e23a3e9b0096b34a4a92 +Author: Dr. David Alan Gilbert +Date: Wed Mar 9 10:31:53 2022 +0000 + + Revert "redhat: Add hw_compat_4_2_extra and apply to upstream machines" + + This reverts commit 66882f9a3230246409f3918424aca26add5c034a. + We no longer need these compat machines it was added for. + + Signed-off-by: Dr. David Alan Gilbert + +(cherry picked from commit dc2e9ec1e014950c7918e23a3e9b0096b34a4a92) +Signed-off-by: Jon Maloy +--- + hw/i386/pc.c | 12 ------------ + hw/i386/pc_piix.c | 6 ------ + include/hw/i386/pc.h | 3 --- + 3 files changed, 21 deletions(-) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 4c08a1971c..357257349b 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -670,18 +670,6 @@ GlobalProperty pc_rhel_7_0_compat[] = { + }; + const size_t pc_rhel_7_0_compat_len = G_N_ELEMENTS(pc_rhel_7_0_compat); + +-/* +- * RHEL: These properties only apply to the RHEL exported machine types +- * pc-4.2/2.11 for the purpose to have a limited upstream machines support +- * which can be migrated to RHEL. Let's avoid touching hw_compat_4_2 directly +- * so that we can have some isolation against the upstream code. +- */ +-GlobalProperty hw_compat_4_2_extra[] = { +- /* By default enlarge the default virtio-net-pci ROM to 512KB. */ +- { "virtio-net-pci", "romsize", "0x80000" }, +-}; +-const size_t hw_compat_4_2_extra_len = G_N_ELEMENTS(hw_compat_4_2_extra); +- + GSIState *pc_gsi_create(qemu_irq **irqs, bool pci_enabled) + { + GSIState *s; +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index c30057c443..7b7076cbc7 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -531,12 +531,6 @@ static void pc_i440fx_4_2_machine_options(MachineClass *m) + * supported by RHEL, even if exported. + */ + m->deprecation_reason = "Not supported by RHEL"; +- /* +- * RHEL: Specific compat properties to have limited support for upstream +- * machines exported. +- */ +- compat_props_add(m->compat_props, hw_compat_4_2_extra, +- hw_compat_4_2_extra_len); + } + + /* RHEL: Export pc-4.2 */ +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 9e8bfb69f8..4a593acb50 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -325,9 +325,6 @@ extern const size_t pc_rhel_7_1_compat_len; + extern GlobalProperty pc_rhel_7_0_compat[]; + extern const size_t pc_rhel_7_0_compat_len; + +-extern GlobalProperty hw_compat_4_2_extra[]; +-extern const size_t hw_compat_4_2_extra_len; +- + /* Helper for setting model-id for CPU models that changed model-id + * depending on QEMU versions up to QEMU 2.4. + */ +-- +2.27.0 + diff --git a/kvm-Revert-redhat-Enable-FDC-device-for-upstream-machine.patch b/kvm-Revert-redhat-Enable-FDC-device-for-upstream-machine.patch new file mode 100644 index 0000000..1b2051a --- /dev/null +++ b/kvm-Revert-redhat-Enable-FDC-device-for-upstream-machine.patch @@ -0,0 +1,53 @@ +From 5bf8f1d69fea1225e927fbb3efe549a2a9d47d92 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 22 Mar 2022 19:23:36 -0400 +Subject: [PATCH 02/18] Revert "redhat: Enable FDC device for upstream machines + too" + +RH-Author: Jon Maloy +RH-MergeRequest: 131: Revert "redhat: Add hw_compat_4_2_extra and apply to upstream machines" +RH-Commit: [2/3] 4e3c945e3de9bb9d9a6d24115f0719168c9669fe (jmaloy/qemu-kvm) +RH-Bugzilla: 2062613 +RH-Acked-by: Peter Xu +RH-Acked-by: Dr. David Alan Gilbert + +BZ: https://bugzilla.redhat.com/2062613 +UPSTREAM: no +BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=44038000 + +commit 597cb6ca1da4a3eea77c1e4928f55203a1d5c70c +Author: Dr. David Alan Gilbert +Date: Wed Mar 9 10:32:39 2022 +0000 + + Revert "redhat: Enable FDC device for upstream machines too" + + This reverts commit c4d1aa8bf21fe98da94a9cff30b7c25bed12c17f. + We no longer need these compat machines it was added for. + + Signed-off-by: Dr. David Alan Gilbert + +(cherry picked from commit 597cb6ca1da4a3eea77c1e4928f55203a1d5c70c) +Signed-off-by: Jon Maloy +--- + hw/block/fdc.c | 5 +---- + 1 file changed, 1 insertion(+), 4 deletions(-) + +diff --git a/hw/block/fdc.c b/hw/block/fdc.c +index 63042ef030..97fa6de423 100644 +--- a/hw/block/fdc.c ++++ b/hw/block/fdc.c +@@ -2341,10 +2341,7 @@ void fdctrl_realize_common(DeviceState *dev, FDCtrl *fdctrl, Error **errp) + + /* Restricted for Red Hat Enterprise Linux: */ + MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); +- if (!strstr(mc->name, "-rhel7.") && +- /* Exported two upstream machine types allows FDC too */ +- strcmp(mc->name, "pc-i440fx-4.2") && +- strcmp(mc->name, "pc-i440fx-2.11")) { ++ if (!strstr(mc->name, "-rhel7.")) { + error_setg(errp, "Device %s is not supported with machine type %s", + object_get_typename(OBJECT(dev)), mc->name); + return; +-- +2.27.0 + diff --git a/kvm-Revert-redhat-Expose-upstream-machines-pc-4.2-and-pc.patch b/kvm-Revert-redhat-Expose-upstream-machines-pc-4.2-and-pc.patch new file mode 100644 index 0000000..27e3dc9 --- /dev/null +++ b/kvm-Revert-redhat-Expose-upstream-machines-pc-4.2-and-pc.patch @@ -0,0 +1,191 @@ +From ee3cae3bb349469edcf725a1c5161521e95dcb9f Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 22 Mar 2022 19:23:36 -0400 +Subject: [PATCH 03/18] Revert "redhat: Expose upstream machines pc-4.2 and + pc-2.11" + +RH-Author: Jon Maloy +RH-MergeRequest: 131: Revert "redhat: Add hw_compat_4_2_extra and apply to upstream machines" +RH-Commit: [3/3] 35cee68034580f81b3aa916921eecd2fdfa7dd15 (jmaloy/qemu-kvm) +RH-Bugzilla: 2062613 +RH-Acked-by: Peter Xu +RH-Acked-by: Dr. David Alan Gilbert + +BZ: https://bugzilla.redhat.com/2062613 +UPSTREAM: no +BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=44038000 + +commit f3b50d6d4ae0be9e64aafe6a15f5423bab4899e9 +Author: Dr. David Alan Gilbert +Date: Wed Mar 9 10:34:58 2022 +0000 + + Revert "redhat: Expose upstream machines pc-4.2 and pc-2.11" + This reverts commit 618e2424edba499d52cd26cf8363bc2dd85ef149. + We no longer need these compat machines. + + Signed-off-by: Dr. David Alan Gilbert + +(cherry picked from commit f3b50d6d4ae0be9e64aafe6a15f5423bab4899e9) +Signed-off-by: Jon Maloy +--- + hw/i386/pc_piix.c | 37 ------------------------------------- + 1 file changed, 37 deletions(-) + +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index 7b7076cbc7..f03a8f0db8 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -315,14 +315,6 @@ static void pc_init1(MachineState *machine, + * hw_compat_*, pc_compat_*, or * pc_*_machine_options(). + */ + +-/* +- * NOTE! Not all the upstream machine types are disabled for RHEL. For +- * providing a very limited support for upstream machine types, pc machines +- * 2.11 and 4.2 are exposed explicitly. This will make the below "#if" macros +- * a bit messed up, but please read this comment first so that we can have a +- * rough understanding of what we're going to do. +- */ +- + #if 0 /* Disabled for Red Hat Enterprise Linux */ + static void pc_compat_2_3_fn(MachineState *machine) + { +@@ -399,8 +391,6 @@ static void pc_xen_hvm_init(MachineState *machine) + } + #endif + +-#endif /* Disabled for Red Hat Enterprise Linux */ +- + #define DEFINE_I440FX_MACHINE(suffix, name, compatfn, optionfn) \ + static void pc_init_##suffix(MachineState *machine) \ + { \ +@@ -465,10 +455,8 @@ static void pc_i440fx_6_0_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_6_0, pc_compat_6_0_len); + } + +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v6_0, "pc-i440fx-6.0", NULL, + pc_i440fx_6_0_machine_options); +-#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_5_2_machine_options(MachineClass *m) + { +@@ -479,10 +467,8 @@ static void pc_i440fx_5_2_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_5_2, pc_compat_5_2_len); + } + +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v5_2, "pc-i440fx-5.2", NULL, + pc_i440fx_5_2_machine_options); +-#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_5_1_machine_options(MachineClass *m) + { +@@ -497,10 +483,8 @@ static void pc_i440fx_5_1_machine_options(MachineClass *m) + pcmc->pci_root_uid = 1; + } + +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v5_1, "pc-i440fx-5.1", NULL, + pc_i440fx_5_1_machine_options); +-#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_5_0_machine_options(MachineClass *m) + { +@@ -513,10 +497,8 @@ static void pc_i440fx_5_0_machine_options(MachineClass *m) + m->auto_enable_numa_with_memdev = false; + } + +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v5_0, "pc-i440fx-5.0", NULL, + pc_i440fx_5_0_machine_options); +-#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_4_2_machine_options(MachineClass *m) + { +@@ -525,15 +507,8 @@ static void pc_i440fx_4_2_machine_options(MachineClass *m) + m->is_default = false; + compat_props_add(m->compat_props, hw_compat_4_2, hw_compat_4_2_len); + compat_props_add(m->compat_props, pc_compat_4_2, pc_compat_4_2_len); +- +- /* +- * RHEL: Mark all upstream machines as deprecated because they're not +- * supported by RHEL, even if exported. +- */ +- m->deprecation_reason = "Not supported by RHEL"; + } + +-/* RHEL: Export pc-4.2 */ + DEFINE_I440FX_MACHINE(v4_2, "pc-i440fx-4.2", NULL, + pc_i440fx_4_2_machine_options); + +@@ -546,10 +521,8 @@ static void pc_i440fx_4_1_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_4_1, pc_compat_4_1_len); + } + +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v4_1, "pc-i440fx-4.1", NULL, + pc_i440fx_4_1_machine_options); +-#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_4_0_machine_options(MachineClass *m) + { +@@ -562,10 +535,8 @@ static void pc_i440fx_4_0_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_4_0, pc_compat_4_0_len); + } + +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v4_0, "pc-i440fx-4.0", NULL, + pc_i440fx_4_0_machine_options); +-#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_3_1_machine_options(MachineClass *m) + { +@@ -581,10 +552,8 @@ static void pc_i440fx_3_1_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_3_1, pc_compat_3_1_len); + } + +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v3_1, "pc-i440fx-3.1", NULL, + pc_i440fx_3_1_machine_options); +-#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_3_0_machine_options(MachineClass *m) + { +@@ -593,10 +562,8 @@ static void pc_i440fx_3_0_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_3_0, pc_compat_3_0_len); + } + +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v3_0, "pc-i440fx-3.0", NULL, + pc_i440fx_3_0_machine_options); +-#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_2_12_machine_options(MachineClass *m) + { +@@ -605,10 +572,8 @@ static void pc_i440fx_2_12_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_2_12, pc_compat_2_12_len); + } + +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + DEFINE_I440FX_MACHINE(v2_12, "pc-i440fx-2.12", NULL, + pc_i440fx_2_12_machine_options); +-#endif /* Disabled for Red Hat Enterprise Linux */ + + static void pc_i440fx_2_11_machine_options(MachineClass *m) + { +@@ -617,11 +582,9 @@ static void pc_i440fx_2_11_machine_options(MachineClass *m) + compat_props_add(m->compat_props, pc_compat_2_11, pc_compat_2_11_len); + } + +-/* RHEL: Export pc-2.11 */ + DEFINE_I440FX_MACHINE(v2_11, "pc-i440fx-2.11", NULL, + pc_i440fx_2_11_machine_options); + +-#if 0 /* Disabled for Red Hat Enterprise Linux */ + static void pc_i440fx_2_10_machine_options(MachineClass *m) + { + pc_i440fx_2_11_machine_options(m); +-- +2.27.0 + diff --git a/kvm-Revert-virtio-scsi-Reject-scsi-cd-if-data-plane-enab.patch b/kvm-Revert-virtio-scsi-Reject-scsi-cd-if-data-plane-enab.patch new file mode 100644 index 0000000..e8eb35d --- /dev/null +++ b/kvm-Revert-virtio-scsi-Reject-scsi-cd-if-data-plane-enab.patch @@ -0,0 +1,51 @@ +From 733acef2caea0758edd74fb634b095ce09bf5914 Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Mon, 9 May 2022 03:46:23 -0400 +Subject: [PATCH 15/16] Revert "virtio-scsi: Reject scsi-cd if data plane + enabled [RHEL only]" + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 91: Revert "virtio-scsi: Reject scsi-cd if data plane enabled [RHEL only]" +RH-Commit: [1/1] 1af55d792bc9166e5c86272afe8093c76ab41bb4 (eesposit/qemu-kvm) +RH-Bugzilla: 1995710 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi + +This reverts commit 4e17b1126e. + +Over time AioContext usage and coverage has increased, and now block +backend is capable of handling AioContext change upon eject and insert. +Therefore the above downstream-only commit is not necessary anymore, +and can be safely reverted. + +X-downstream-only: true + +Signed-off-by: Emanuele Giuseppe Esposito +--- + hw/scsi/virtio-scsi.c | 9 --------- + 1 file changed, 9 deletions(-) + +diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c +index 2450c9438c..db54d104be 100644 +--- a/hw/scsi/virtio-scsi.c ++++ b/hw/scsi/virtio-scsi.c +@@ -937,15 +937,6 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev, + AioContext *old_context; + int ret; + +- /* XXX: Remove this check once block backend is capable of handling +- * AioContext change upon eject/insert. +- * s->ctx is NULL if ioeventfd is off, s->ctx is qemu_get_aio_context() if +- * data plane is not used, both cases are safe for scsi-cd. */ +- if (s->ctx && s->ctx != qemu_get_aio_context() && +- object_dynamic_cast(OBJECT(dev), "scsi-cd")) { +- error_setg(errp, "scsi-cd is not supported by data plane"); +- return; +- } + if (s->ctx && !s->dataplane_fenced) { + if (blk_op_is_blocked(sd->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) { + return; +-- +2.31.1 + diff --git a/kvm-acpi-fix-OEM-ID-OEM-Table-ID-padding.patch b/kvm-acpi-fix-OEM-ID-OEM-Table-ID-padding.patch new file mode 100644 index 0000000..9d2594f --- /dev/null +++ b/kvm-acpi-fix-OEM-ID-OEM-Table-ID-padding.patch @@ -0,0 +1,78 @@ +From af082f3499de265d123157d097b5c84981e0aa63 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 30 Mar 2022 14:52:34 -0400 +Subject: [PATCH 15/18] acpi: fix OEM ID/OEM Table ID padding + +RH-Author: Jon Maloy +RH-MergeRequest: 141: acpi: fix QEMU crash when started with SLIC table +RH-Commit: [7/10] 51ea859cbe12b5a902d529ab589d18757d98f71d (jmaloy/qemu-kvm) +RH-Bugzilla: 2062611 +RH-Acked-by: Igor Mammedov + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2062611 +Upstream: Merged + +commit 748c030f360a940fe0c9382c8ca1649096c3a80d +Author: Igor Mammedov +Date: Wed Jan 12 08:03:31 2022 -0500 + + acpi: fix OEM ID/OEM Table ID padding + + Commit [2] broke original '\0' padding of OEM ID and OEM Table ID + fields in headers of ACPI tables. While it doesn't have impact on + default values since QEMU uses 6 and 8 characters long values + respectively, it broke usecase where IDs are provided on QEMU CLI. + It shouldn't affect guest (but may cause licensing verification + issues in guest OS). + One of the broken usecases is user supplied SLIC table with IDs + shorter than max possible length, where [2] mangles IDs with extra + spaces in RSDT and FADT tables whereas guest OS expects those to + mirror the respective values of the used SLIC table. + + Fix it by replacing whitespace padding with '\0' padding in + accordance with [1] and expectations of guest OS + + 1) ACPI spec, v2.0b + 17.2 AML Grammar Definition + ... + //OEM ID of up to 6 characters. If the OEM ID is + //shorter than 6 characters, it can be terminated + //with a NULL character. + + 2) + Fixes: 602b458201 ("acpi: Permit OEM ID and OEM table ID fields to be changed") + Resolves: https://gitlab.com/qemu-project/qemu/-/issues/707 + Reported-by: Dmitry V. Orekhov + Signed-off-by: Igor Mammedov + Cc: qemu-stable@nongnu.org + Message-Id: <20220112130332.1648664-4-imammedo@redhat.com> + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + Reviewed-by: Ani Sinha + Tested-by: Dmitry V. Orekhov dima.orekhov@gmail.com + +(cherry picked from commit 748c030f360a940fe0c9382c8ca1649096c3a80d) +Signed-off-by: Jon Maloy +--- + hw/acpi/aml-build.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c +index b3b3310df3..65148d5b9d 100644 +--- a/hw/acpi/aml-build.c ++++ b/hw/acpi/aml-build.c +@@ -1724,9 +1724,9 @@ void acpi_table_begin(AcpiTable *desc, GArray *array) + build_append_int_noprefix(array, 0, 4); /* Length */ + build_append_int_noprefix(array, desc->rev, 1); /* Revision */ + build_append_int_noprefix(array, 0, 1); /* Checksum */ +- build_append_padded_str(array, desc->oem_id, 6, ' '); /* OEMID */ ++ build_append_padded_str(array, desc->oem_id, 6, '\0'); /* OEMID */ + /* OEM Table ID */ +- build_append_padded_str(array, desc->oem_table_id, 8, ' '); ++ build_append_padded_str(array, desc->oem_table_id, 8, '\0'); + build_append_int_noprefix(array, 1, 4); /* OEM Revision */ + g_array_append_vals(array, ACPI_BUILD_APPNAME8, 4); /* Creator ID */ + build_append_int_noprefix(array, 1, 4); /* Creator Revision */ +-- +2.27.0 + diff --git a/kvm-acpi-fix-QEMU-crash-when-started-with-SLIC-table.patch b/kvm-acpi-fix-QEMU-crash-when-started-with-SLIC-table.patch new file mode 100644 index 0000000..a6b1151 --- /dev/null +++ b/kvm-acpi-fix-QEMU-crash-when-started-with-SLIC-table.patch @@ -0,0 +1,108 @@ +From 4e8fb957a349558648d5cddb80a89460bc97439e Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 30 Mar 2022 14:52:34 -0400 +Subject: [PATCH 09/18] acpi: fix QEMU crash when started with SLIC table +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 141: acpi: fix QEMU crash when started with SLIC table +RH-Commit: [1/10] 0c34e80346c33da4f220d9c486b120c35005144e (jmaloy/qemu-kvm) +RH-Bugzilla: 2062611 +RH-Acked-by: Igor Mammedov + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2062611 +Upstream: Merged + +commit 8cdb99af45365727ac17f45239a9b8c1d5155c6d) +Author: Igor Mammedov +Date: Mon Dec 27 14:31:17 2021 -0500 + + acpi: fix QEMU crash when started with SLIC table + + if QEMU is started with used provided SLIC table blob, + + -acpitable sig=SLIC,oem_id='CRASH ',oem_table_id="ME",oem_rev=00002210,asl_compiler_id="",asl_compiler_rev=00000000,data=/dev/null + it will assert with: + + hw/acpi/aml-build.c:61:build_append_padded_str: assertion failed: (len <= maxlen) + + and following backtrace: + + ... + build_append_padded_str (array=0x555556afe320, str=0x555556afdb2e "CRASH ME", maxlen=0x6, pad=0x20) at hw/acpi/aml-build.c:61 + acpi_table_begin (desc=0x7fffffffd1b0, array=0x555556afe320) at hw/acpi/aml-build.c:1727 + build_fadt (tbl=0x555556afe320, linker=0x555557ca3830, f=0x7fffffffd318, oem_id=0x555556afdb2e "CRASH ME", oem_table_id=0x555556afdb34 "ME") at hw/acpi/aml-build.c:2064 + ... + + which happens due to acpi_table_begin() expecting NULL terminated + oem_id and oem_table_id strings, which is normally the case, but + in case of user provided SLIC table, oem_id points to table's blob + directly and as result oem_id became longer than expected. + + Fix issue by handling oem_id consistently and make acpi_get_slic_oem() + return NULL terminated strings. + + PS: + After [1] refactoring, oem_id semantics became inconsistent, where + NULL terminated string was coming from machine and old way pointer + into byte array coming from -acpitable option. That used to work + since build_header() wasn't expecting NULL terminated string and + blindly copied the 1st 6 bytes only. + + However commit [2] broke that by replacing build_header() with + acpi_table_begin(), which was expecting NULL terminated string + and was checking oem_id size. + + 1) 602b45820 ("acpi: Permit OEM ID and OEM table ID fields to be changed") + 2) + Fixes: 4b56e1e4eb08 ("acpi: build_fadt: use acpi_table_begin()/acpi_table_end() instead of build_header()") + Resolves: https://gitlab.com/qemu-project/qemu/-/issues/786 + Signed-off-by: Igor Mammedov + Message-Id: <20211227193120.1084176-2-imammedo@redhat.com> + Reviewed-by: Philippe Mathieu-Daudé + Tested-by: Denis Lisov + Tested-by: Alexander Tsoy + Cc: qemu-stable@nongnu.org + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + +(cherry picked from commit 8cdb99af45365727ac17f45239a9b8c1d5155c6d) +Signed-off-by: Jon Maloy +--- + hw/acpi/core.c | 4 ++-- + hw/i386/acpi-build.c | 2 ++ + 2 files changed, 4 insertions(+), 2 deletions(-) + +diff --git a/hw/acpi/core.c b/hw/acpi/core.c +index 1e004d0078..3e811bf03c 100644 +--- a/hw/acpi/core.c ++++ b/hw/acpi/core.c +@@ -345,8 +345,8 @@ int acpi_get_slic_oem(AcpiSlicOem *oem) + struct acpi_table_header *hdr = (void *)(u - sizeof(hdr->_length)); + + if (memcmp(hdr->sig, "SLIC", 4) == 0) { +- oem->id = hdr->oem_id; +- oem->table_id = hdr->oem_table_id; ++ oem->id = g_strndup(hdr->oem_id, 6); ++ oem->table_id = g_strndup(hdr->oem_table_id, 8); + return 0; + } + } +diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c +index a4478e77b7..acc4869db0 100644 +--- a/hw/i386/acpi-build.c ++++ b/hw/i386/acpi-build.c +@@ -2726,6 +2726,8 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) + + /* Cleanup memory that's no longer used. */ + g_array_free(table_offsets, true); ++ g_free(slic_oem.id); ++ g_free(slic_oem.table_id); + } + + static void acpi_ram_update(MemoryRegion *mr, GArray *data) +-- +2.27.0 + diff --git a/kvm-acpi-pcihp-pcie-set-power-on-cap-on-parent-slot.patch b/kvm-acpi-pcihp-pcie-set-power-on-cap-on-parent-slot.patch new file mode 100644 index 0000000..2be41b6 --- /dev/null +++ b/kvm-acpi-pcihp-pcie-set-power-on-cap-on-parent-slot.patch @@ -0,0 +1,140 @@ +From c9ceb175667cdeead59384a97a812367ae19c570 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 23 Mar 2022 13:21:40 -0400 +Subject: [PATCH 06/18] acpi: pcihp: pcie: set power on cap on parent slot + +RH-Author: Jon Maloy +RH-MergeRequest: 134: pci: expose TYPE_XIO3130_DOWNSTREAM name +RH-Commit: [2/2] d883872647a6e90ec573140b2c171f3f53b600ab (jmaloy/qemu-kvm) +RH-Bugzilla: 2062610 +RH-Acked-by: Igor Mammedov +RH-Acked-by: Gerd Hoffmann + +BZ: https://bugzilla.redhat.com/2062610 +UPSTREAM: merged +BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=44038138 + +commit 6b0969f1ec825984cd74619f0730be421b0c46fb +Author: Igor Mammedov +Date: Tue Mar 1 10:11:59 2022 -0500 + + acpi: pcihp: pcie: set power on cap on parent slot + + on creation a PCIDevice has power turned on at the end of pci_qdev_realize() + however later on if PCIe slot isn't populated with any children + it's power is turned off. It's fine if native hotplug is used + as plug callback will power slot on among other things. + However when ACPI hotplug is enabled it replaces native PCIe plug + callbacks with ACPI specific ones (acpi_pcihp_device_*plug_cb) and + as result slot stays powered off. It works fine as ACPI hotplug + on guest side takes care of enumerating/initializing hotplugged + device. But when later guest is migrated, call chain introduced by] + commit d5daff7d312 (pcie: implement slot power control for pcie root ports) + + pcie_cap_slot_post_load() + -> pcie_cap_update_power() + -> pcie_set_power_device() + -> pci_set_power() + -> pci_update_mappings() + + will disable earlier initialized BARs for the hotplugged device + in powered off slot due to commit 23786d13441 (pci: implement power state) + which disables BARs if power is off. + + Fix it by setting PCI_EXP_SLTCTL_PCC to PCI_EXP_SLTCTL_PWR_ON + on slot (root port/downstream port) at the time a device + hotplugged into it. As result PCI_EXP_SLTCTL_PWR_ON is migrated + to target and above call chain keeps device plugged into it + powered on. + + Fixes: d5daff7d312 ("pcie: implement slot power control for pcie root ports") + Fixes: 23786d13441 ("pci: implement power state") + Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=2053584 + Suggested-by: "Michael S. Tsirkin" + Signed-off-by: Igor Mammedov + Message-Id: <20220301151200.3507298-3-imammedo@redhat.com> + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + +(cherry picked from commit 6b0969f1ec825984cd74619f0730be421b0c46fb) +Signed-off-by: Jon Maloy +--- + hw/acpi/pcihp.c | 12 +++++++++++- + hw/pci/pcie.c | 11 +++++++++++ + include/hw/pci/pcie.h | 1 + + 3 files changed, 23 insertions(+), 1 deletion(-) + +diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c +index a5e182dd3a..be0e846b34 100644 +--- a/hw/acpi/pcihp.c ++++ b/hw/acpi/pcihp.c +@@ -32,6 +32,7 @@ + #include "hw/pci/pci_bridge.h" + #include "hw/pci/pci_host.h" + #include "hw/pci/pcie_port.h" ++#include "hw/pci-bridge/xio3130_downstream.h" + #include "hw/i386/acpi-build.h" + #include "hw/acpi/acpi.h" + #include "hw/pci/pci_bus.h" +@@ -341,6 +342,8 @@ void acpi_pcihp_device_plug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s, + { + PCIDevice *pdev = PCI_DEVICE(dev); + int slot = PCI_SLOT(pdev->devfn); ++ PCIDevice *bridge; ++ PCIBus *bus; + int bsel; + + /* Don't send event when device is enabled during qemu machine creation: +@@ -370,7 +373,14 @@ void acpi_pcihp_device_plug_cb(HotplugHandler *hotplug_dev, AcpiPciHpState *s, + return; + } + +- bsel = acpi_pcihp_get_bsel(pci_get_bus(pdev)); ++ bus = pci_get_bus(pdev); ++ bridge = pci_bridge_get_device(bus); ++ if (object_dynamic_cast(OBJECT(bridge), TYPE_PCIE_ROOT_PORT) || ++ object_dynamic_cast(OBJECT(bridge), TYPE_XIO3130_DOWNSTREAM)) { ++ pcie_cap_slot_enable_power(bridge); ++ } ++ ++ bsel = acpi_pcihp_get_bsel(bus); + g_assert(bsel >= 0); + s->acpi_pcihp_pci_status[bsel].up |= (1U << slot); + acpi_send_event(DEVICE(hotplug_dev), ACPI_PCI_HOTPLUG_STATUS); +diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c +index d7d73a31e4..996f0e24fe 100644 +--- a/hw/pci/pcie.c ++++ b/hw/pci/pcie.c +@@ -366,6 +366,17 @@ static void hotplug_event_clear(PCIDevice *dev) + } + } + ++void pcie_cap_slot_enable_power(PCIDevice *dev) ++{ ++ uint8_t *exp_cap = dev->config + dev->exp.exp_cap; ++ uint32_t sltcap = pci_get_long(exp_cap + PCI_EXP_SLTCAP); ++ ++ if (sltcap & PCI_EXP_SLTCAP_PCP) { ++ pci_set_word_by_mask(exp_cap + PCI_EXP_SLTCTL, ++ PCI_EXP_SLTCTL_PCC, PCI_EXP_SLTCTL_PWR_ON); ++ } ++} ++ + static void pcie_set_power_device(PCIBus *bus, PCIDevice *dev, void *opaque) + { + bool *power = opaque; +diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h +index 6063bee0ec..c27368d077 100644 +--- a/include/hw/pci/pcie.h ++++ b/include/hw/pci/pcie.h +@@ -112,6 +112,7 @@ void pcie_cap_slot_write_config(PCIDevice *dev, + uint32_t addr, uint32_t val, int len); + int pcie_cap_slot_post_load(void *opaque, int version_id); + void pcie_cap_slot_push_attention_button(PCIDevice *dev); ++void pcie_cap_slot_enable_power(PCIDevice *dev); + + void pcie_cap_root_init(PCIDevice *dev); + void pcie_cap_root_reset(PCIDevice *dev); +-- +2.27.0 + diff --git a/kvm-acpi-validate-hotplug-selector-on-access.patch b/kvm-acpi-validate-hotplug-selector-on-access.patch new file mode 100644 index 0000000..d18989a --- /dev/null +++ b/kvm-acpi-validate-hotplug-selector-on-access.patch @@ -0,0 +1,51 @@ +From 529a5d908f5d16714b8ae0a51eaaaa84994dfae8 Mon Sep 17 00:00:00 2001 +From: "Michael S. Tsirkin" +Date: Tue, 21 Dec 2021 09:45:44 -0500 +Subject: [PATCH 1/2] acpi: validate hotplug selector on access +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 97: acpi: validate hotplug selector on access +RH-Commit: [1/1] 79bcfb0df0091e2b716d2e1c545f047b3409c26c (jmaloy/qemu-kvm) +RH-Bugzilla: 2036580 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Igor Mammedov + +When bus is looked up on a pci write, we didn't +validate that the lookup succeeded. +Fuzzers thus can trigger QEMU crash by dereferencing the NULL +bus pointer. + +Fixes: b32bd763a1 ("pci: introduce acpi-index property for PCI device") +Fixes: CVE-2021-4158 +Cc: "Igor Mammedov" +Fixes: https://gitlab.com/qemu-project/qemu/-/issues/770 +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Ani Sinha +(cherry picked from commit 9bd6565ccee68f72d5012e24646e12a1c662827e) +Signed-off-by: Jon Maloy +--- + hw/acpi/pcihp.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c +index 30405b5113..a5e182dd3a 100644 +--- a/hw/acpi/pcihp.c ++++ b/hw/acpi/pcihp.c +@@ -491,6 +491,9 @@ static void pci_write(void *opaque, hwaddr addr, uint64_t data, + } + + bus = acpi_pcihp_find_hotplug_bus(s, s->hotplug_select); ++ if (!bus) { ++ break; ++ } + QTAILQ_FOREACH_SAFE(kid, &bus->qbus.children, sibling, next) { + Object *o = OBJECT(kid->child); + PCIDevice *dev = PCI_DEVICE(o); +-- +2.27.0 + diff --git a/kvm-block-Lock-AioContext-for-drain_end-in-blockdev-reop.patch b/kvm-block-Lock-AioContext-for-drain_end-in-blockdev-reop.patch new file mode 100644 index 0000000..eb0f3cf --- /dev/null +++ b/kvm-block-Lock-AioContext-for-drain_end-in-blockdev-reop.patch @@ -0,0 +1,63 @@ +From b21fa5ecd9acf2b91839a2915fb4bb39dac4c803 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 3 Feb 2022 15:05:33 +0100 +Subject: [PATCH 2/5] block: Lock AioContext for drain_end in blockdev-reopen + +RH-Author: Kevin Wolf +RH-MergeRequest: 142: block: Lock AioContext for drain_end in blockdev-reopen +RH-Commit: [1/2] 98de3b5987f88ea6b4b503f623d6c4475574e037 +RH-Bugzilla: 2067118 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Hanna Reitz + +bdrv_subtree_drained_end() requires the caller to hold the AioContext +lock for the drained node. Not doing this for nodes outside of the main +AioContext leads to crashes when AIO_WAIT_WHILE() needs to wait and +tries to temporarily release the lock. + +Fixes: 3908b7a8994fa5ef7a89aa58cd5a02fc58141592 +Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2046659 +Reported-by: Qing Wang +Signed-off-by: Kevin Wolf +Message-Id: <20220203140534.36522-2-kwolf@redhat.com> +Reviewed-by: Hanna Reitz +Signed-off-by: Kevin Wolf +(cherry picked from commit aba8205be0707b9d108e32254e186ba88107a869) +Signed-off-by: Kevin Wolf +--- + blockdev.c | 11 ++++++++++- + 1 file changed, 10 insertions(+), 1 deletion(-) + +diff --git a/blockdev.c b/blockdev.c +index b35072644e..565f6a81fd 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -3562,6 +3562,7 @@ void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp) + { + BlockReopenQueue *queue = NULL; + GSList *drained = NULL; ++ GSList *p; + + /* Add each one of the BDS that we want to reopen to the queue */ + for (; reopen_list != NULL; reopen_list = reopen_list->next) { +@@ -3611,7 +3612,15 @@ void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp) + + fail: + bdrv_reopen_queue_free(queue); +- g_slist_free_full(drained, (GDestroyNotify) bdrv_subtree_drained_end); ++ for (p = drained; p; p = p->next) { ++ BlockDriverState *bs = p->data; ++ AioContext *ctx = bdrv_get_aio_context(bs); ++ ++ aio_context_acquire(ctx); ++ bdrv_subtree_drained_end(bs); ++ aio_context_release(ctx); ++ } ++ g_slist_free(drained); + } + + void qmp_blockdev_del(const char *node_name, Error **errp) +-- +2.27.0 + diff --git a/kvm-block-backend-prevent-dangling-BDS-pointers-across-a.patch b/kvm-block-backend-prevent-dangling-BDS-pointers-across-a.patch new file mode 100644 index 0000000..52d37d8 --- /dev/null +++ b/kvm-block-backend-prevent-dangling-BDS-pointers-across-a.patch @@ -0,0 +1,129 @@ +From bf4c15a3debbe68b6eb25c52174843470a9c014f Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 11 Jan 2022 15:36:12 +0000 +Subject: [PATCH 3/6] block-backend: prevent dangling BDS pointers across + aio_poll() + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 109: block-backend: prevent dangling BDS pointers across aio_poll() +RH-Commit: [1/2] da5a59eddff0dc10be7de8e291fa675143d11d73 +RH-Bugzilla: 2021778 2036178 +RH-Acked-by: Hanna Reitz +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Kevin Wolf + +The BlockBackend root child can change when aio_poll() is invoked. This +happens when a temporary filter node is removed upon blockjob +completion, for example. + +Functions in block/block-backend.c must be aware of this when using a +blk_bs() pointer across aio_poll() because the BlockDriverState refcnt +may reach 0, resulting in a stale pointer. + +One example is scsi_device_purge_requests(), which calls blk_drain() to +wait for in-flight requests to cancel. If the backup blockjob is active, +then the BlockBackend root child is a temporary filter BDS owned by the +blockjob. The blockjob can complete during bdrv_drained_begin() and the +last reference to the BDS is released when the temporary filter node is +removed. This results in a use-after-free when blk_drain() calls +bdrv_drained_end(bs) on the dangling pointer. + +Explicitly hold a reference to bs across block APIs that invoke +aio_poll(). + +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2021778 +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2036178 +Signed-off-by: Stefan Hajnoczi +Message-Id: <20220111153613.25453-2-stefanha@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 1e3552dbd28359d35967b7c28dc86cde1bc29205) +Signed-off-by: Stefan Hajnoczi +--- + block/block-backend.c | 19 +++++++++++++++++-- + 1 file changed, 17 insertions(+), 2 deletions(-) + +diff --git a/block/block-backend.c b/block/block-backend.c +index 12ef80ea17..23e727199b 100644 +--- a/block/block-backend.c ++++ b/block/block-backend.c +@@ -822,16 +822,22 @@ BlockBackend *blk_by_public(BlockBackendPublic *public) + void blk_remove_bs(BlockBackend *blk) + { + ThrottleGroupMember *tgm = &blk->public.throttle_group_member; +- BlockDriverState *bs; + BdrvChild *root; + + notifier_list_notify(&blk->remove_bs_notifiers, blk); + if (tgm->throttle_state) { +- bs = blk_bs(blk); ++ BlockDriverState *bs = blk_bs(blk); ++ ++ /* ++ * Take a ref in case blk_bs() changes across bdrv_drained_begin(), for ++ * example, if a temporary filter node is removed by a blockjob. ++ */ ++ bdrv_ref(bs); + bdrv_drained_begin(bs); + throttle_group_detach_aio_context(tgm); + throttle_group_attach_aio_context(tgm, qemu_get_aio_context()); + bdrv_drained_end(bs); ++ bdrv_unref(bs); + } + + blk_update_root_state(blk); +@@ -1705,6 +1711,7 @@ void blk_drain(BlockBackend *blk) + BlockDriverState *bs = blk_bs(blk); + + if (bs) { ++ bdrv_ref(bs); + bdrv_drained_begin(bs); + } + +@@ -1714,6 +1721,7 @@ void blk_drain(BlockBackend *blk) + + if (bs) { + bdrv_drained_end(bs); ++ bdrv_unref(bs); + } + } + +@@ -2044,10 +2052,13 @@ static int blk_do_set_aio_context(BlockBackend *blk, AioContext *new_context, + int ret; + + if (bs) { ++ bdrv_ref(bs); ++ + if (update_root_node) { + ret = bdrv_child_try_set_aio_context(bs, new_context, blk->root, + errp); + if (ret < 0) { ++ bdrv_unref(bs); + return ret; + } + } +@@ -2057,6 +2068,8 @@ static int blk_do_set_aio_context(BlockBackend *blk, AioContext *new_context, + throttle_group_attach_aio_context(tgm, new_context); + bdrv_drained_end(bs); + } ++ ++ bdrv_unref(bs); + } + + blk->ctx = new_context; +@@ -2326,11 +2339,13 @@ void blk_io_limits_disable(BlockBackend *blk) + ThrottleGroupMember *tgm = &blk->public.throttle_group_member; + assert(tgm->throttle_state); + if (bs) { ++ bdrv_ref(bs); + bdrv_drained_begin(bs); + } + throttle_group_unregister_tgm(tgm); + if (bs) { + bdrv_drained_end(bs); ++ bdrv_unref(bs); + } + } + +-- +2.27.0 + diff --git a/kvm-block-io-Update-BSC-only-if-want_zero-is-true.patch b/kvm-block-io-Update-BSC-only-if-want_zero-is-true.patch new file mode 100644 index 0000000..c1ee128 --- /dev/null +++ b/kvm-block-io-Update-BSC-only-if-want_zero-is-true.patch @@ -0,0 +1,56 @@ +From 4c6eff78f4b31ec4bd7b42440396760d19fde63e Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Tue, 18 Jan 2022 17:59:59 +0100 +Subject: [PATCH 6/7] block/io: Update BSC only if want_zero is true + +RH-Author: Hanna Reitz +RH-MergeRequest: 112: block/io: Update BSC only if want_zero is true +RH-Commit: [1/2] a202de1f52110d1e871c3b5b58f2d9e9b5d17570 +RH-Bugzilla: 2041480 +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Kevin Wolf + +We update the block-status cache whenever we get new information from a +bdrv_co_block_status() call to the block driver. However, if we have +passed want_zero=false to that call, it may flag areas containing zeroes +as data, and so we would update the block-status cache with wrong +information. + +Therefore, we should not update the cache with want_zero=false. + +Reported-by: Nir Soffer +Fixes: 0bc329fbb00 ("block: block-status cache for data regions") +Reviewed-by: Nir Soffer +Cc: qemu-stable@nongnu.org +Signed-off-by: Hanna Reitz +Message-Id: <20220118170000.49423-2-hreitz@redhat.com> +Reviewed-by: Eric Blake +Signed-off-by: Eric Blake +(cherry picked from commit 113b727ce788335cf76f65355d670c9bc130fd75) +Signed-off-by: Hanna Reitz +--- + block/io.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/block/io.c b/block/io.c +index bb0a254def..4e4cb556c5 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -2497,8 +2497,12 @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs, + * non-protocol nodes, and then it is never used. However, filling + * the cache requires an RCU update, so double check here to avoid + * such an update if possible. ++ * ++ * Check want_zero, because we only want to update the cache when we ++ * have accurate information about what is zero and what is data. + */ +- if (ret == (BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID) && ++ if (want_zero && ++ ret == (BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID) && + QLIST_EMPTY(&bs->children)) + { + /* +-- +2.27.0 + diff --git a/kvm-block-nbd-Assert-there-are-no-timers-when-closed.patch b/kvm-block-nbd-Assert-there-are-no-timers-when-closed.patch new file mode 100644 index 0000000..324021b --- /dev/null +++ b/kvm-block-nbd-Assert-there-are-no-timers-when-closed.patch @@ -0,0 +1,52 @@ +From d5a85fcf996948d1154e88e9ee3b4e8c64ec2694 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Fri, 4 Feb 2022 12:10:08 +0100 +Subject: [PATCH 2/6] block/nbd: Assert there are no timers when closed + +RH-Author: Hanna Reitz +RH-MergeRequest: 117: block/nbd: Handle AioContext changes +RH-Commit: [2/6] 995795ae9844a7d2b28cb1e57fd7fe81482d0205 +RH-Bugzilla: 2035185 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi + +Our two timers must not remain armed beyond nbd_clear_bdrvstate(), or +they will access freed data when they fire. + +This patch is separate from the patches that actually fix the issue +(HEAD^^ and HEAD^) so that you can run the associated regression iotest +(281) on a configuration that reproducibly exposes the bug. + +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Hanna Reitz +Signed-off-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 8a39c381e5e407d2fe5500324323f90a8540fa90) + +Conflict: +- block/nbd.c: open_timer was introduced after the 6.2 release (for + nbd's @open-timeout parameter), and has not been backported, so drop + the assertion that it is NULL + +Signed-off-by: Hanna Reitz +--- + block/nbd.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/block/nbd.c b/block/nbd.c +index b8e5a9b4cc..aab20125d8 100644 +--- a/block/nbd.c ++++ b/block/nbd.c +@@ -108,6 +108,9 @@ static void nbd_clear_bdrvstate(BlockDriverState *bs) + + yank_unregister_instance(BLOCKDEV_YANK_INSTANCE(bs->node_name)); + ++ /* Must not leave timers behind that would access freed data */ ++ assert(!s->reconnect_delay_timer); ++ + object_unref(OBJECT(s->tlscreds)); + qapi_free_SocketAddress(s->saddr); + s->saddr = NULL; +-- +2.27.0 + diff --git a/kvm-block-nbd-Delete-reconnect-delay-timer-when-done.patch b/kvm-block-nbd-Delete-reconnect-delay-timer-when-done.patch new file mode 100644 index 0000000..7d1c000 --- /dev/null +++ b/kvm-block-nbd-Delete-reconnect-delay-timer-when-done.patch @@ -0,0 +1,54 @@ +From 8e23c0f208c6bd5bb64c4f6e4863b93fa6f4e9de Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Fri, 4 Feb 2022 12:10:06 +0100 +Subject: [PATCH 1/6] block/nbd: Delete reconnect delay timer when done + +RH-Author: Hanna Reitz +RH-MergeRequest: 117: block/nbd: Handle AioContext changes +RH-Commit: [1/6] 70814602a8a43a7c14857d76266d82b1aa5174a9 +RH-Bugzilla: 2035185 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi + +We start the reconnect delay timer to cancel the reconnection attempt +after a while. Once nbd_co_do_establish_connection() has returned, this +attempt is over, and we no longer need the timer. + +Delete it before returning from nbd_reconnect_attempt(), so that it does +not persist beyond the I/O request that was paused for reconnecting; we +do not want it to fire in a drained section, because all sort of things +can happen in such a section (e.g. the AioContext might be changed, and +we do not want the timer to fire in the wrong context; or the BDS might +even be deleted, and so the timer CB would access already-freed data). + +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Hanna Reitz +Signed-off-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 3ce1fc16bad9c3f8b7b10b451a224d6d76e5c551) +Signed-off-by: Hanna Reitz +--- + block/nbd.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/block/nbd.c b/block/nbd.c +index 5ef462db1b..b8e5a9b4cc 100644 +--- a/block/nbd.c ++++ b/block/nbd.c +@@ -353,6 +353,13 @@ static coroutine_fn void nbd_reconnect_attempt(BDRVNBDState *s) + } + + nbd_co_do_establish_connection(s->bs, NULL); ++ ++ /* ++ * The reconnect attempt is done (maybe successfully, maybe not), so ++ * we no longer need this timer. Delete it so it will not outlive ++ * this I/O request (so draining removes all timers). ++ */ ++ reconnect_delay_timer_del(s); + } + + static coroutine_fn int nbd_receive_replies(BDRVNBDState *s, uint64_t handle) +-- +2.27.0 + diff --git a/kvm-block-nbd-Move-s-ioc-on-AioContext-change.patch b/kvm-block-nbd-Move-s-ioc-on-AioContext-change.patch new file mode 100644 index 0000000..4cd3cce --- /dev/null +++ b/kvm-block-nbd-Move-s-ioc-on-AioContext-change.patch @@ -0,0 +1,107 @@ +From c7f63e7bbc5119d92775e20d1ebbf8280c78b732 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Fri, 4 Feb 2022 12:10:11 +0100 +Subject: [PATCH 5/6] block/nbd: Move s->ioc on AioContext change + +RH-Author: Hanna Reitz +RH-MergeRequest: 117: block/nbd: Handle AioContext changes +RH-Commit: [5/6] 107757b9fbadfb832c75521317108525daa4174e +RH-Bugzilla: 2035185 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi + +s->ioc must always be attached to the NBD node's AioContext. If that +context changes, s->ioc must be attached to the new context. + +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2033626 +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Hanna Reitz +Signed-off-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit e15f3a66c830e3fce99c9d56c493c2f7078a1225) + +Conflict: +- block/nbd.c: open_timer was added after the 6.2 release, so we need + not (and cannot) assert it is NULL here. + +Signed-off-by: Hanna Reitz +--- + block/nbd.c | 41 +++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 41 insertions(+) + +diff --git a/block/nbd.c b/block/nbd.c +index aab20125d8..a3896c7f5f 100644 +--- a/block/nbd.c ++++ b/block/nbd.c +@@ -2003,6 +2003,38 @@ static void nbd_cancel_in_flight(BlockDriverState *bs) + nbd_co_establish_connection_cancel(s->conn); + } + ++static void nbd_attach_aio_context(BlockDriverState *bs, ++ AioContext *new_context) ++{ ++ BDRVNBDState *s = bs->opaque; ++ ++ /* ++ * The reconnect_delay_timer is scheduled in I/O paths when the ++ * connection is lost, to cancel the reconnection attempt after a ++ * given time. Once this attempt is done (successfully or not), ++ * nbd_reconnect_attempt() ensures the timer is deleted before the ++ * respective I/O request is resumed. ++ * Since the AioContext can only be changed when a node is drained, ++ * the reconnect_delay_timer cannot be active here. ++ */ ++ assert(!s->reconnect_delay_timer); ++ ++ if (s->ioc) { ++ qio_channel_attach_aio_context(s->ioc, new_context); ++ } ++} ++ ++static void nbd_detach_aio_context(BlockDriverState *bs) ++{ ++ BDRVNBDState *s = bs->opaque; ++ ++ assert(!s->reconnect_delay_timer); ++ ++ if (s->ioc) { ++ qio_channel_detach_aio_context(s->ioc); ++ } ++} ++ + static BlockDriver bdrv_nbd = { + .format_name = "nbd", + .protocol_name = "nbd", +@@ -2026,6 +2058,9 @@ static BlockDriver bdrv_nbd = { + .bdrv_dirname = nbd_dirname, + .strong_runtime_opts = nbd_strong_runtime_opts, + .bdrv_cancel_in_flight = nbd_cancel_in_flight, ++ ++ .bdrv_attach_aio_context = nbd_attach_aio_context, ++ .bdrv_detach_aio_context = nbd_detach_aio_context, + }; + + static BlockDriver bdrv_nbd_tcp = { +@@ -2051,6 +2086,9 @@ static BlockDriver bdrv_nbd_tcp = { + .bdrv_dirname = nbd_dirname, + .strong_runtime_opts = nbd_strong_runtime_opts, + .bdrv_cancel_in_flight = nbd_cancel_in_flight, ++ ++ .bdrv_attach_aio_context = nbd_attach_aio_context, ++ .bdrv_detach_aio_context = nbd_detach_aio_context, + }; + + static BlockDriver bdrv_nbd_unix = { +@@ -2076,6 +2114,9 @@ static BlockDriver bdrv_nbd_unix = { + .bdrv_dirname = nbd_dirname, + .strong_runtime_opts = nbd_strong_runtime_opts, + .bdrv_cancel_in_flight = nbd_cancel_in_flight, ++ ++ .bdrv_attach_aio_context = nbd_attach_aio_context, ++ .bdrv_detach_aio_context = nbd_detach_aio_context, + }; + + static void bdrv_nbd_init(void) +-- +2.27.0 + diff --git a/kvm-block-rbd-fix-handling-of-holes-in-.bdrv_co_block_st.patch b/kvm-block-rbd-fix-handling-of-holes-in-.bdrv_co_block_st.patch new file mode 100644 index 0000000..2d8f3b4 --- /dev/null +++ b/kvm-block-rbd-fix-handling-of-holes-in-.bdrv_co_block_st.patch @@ -0,0 +1,59 @@ +From f4b7133d7aeb1d0b9115d01b5cff4df7f6b24e78 Mon Sep 17 00:00:00 2001 +From: Peter Lieven +Date: Thu, 13 Jan 2022 15:44:25 +0100 +Subject: [PATCH 5/6] block/rbd: fix handling of holes in .bdrv_co_block_status + +RH-Author: Stefano Garzarella +RH-MergeRequest: 110: block/rbd: fix handling of holes in .bdrv_co_block_status +RH-Commit: [1/2] 352656a5c77cc7855b476c3559a10c6aa64a4f58 +RH-Bugzilla: 2037135 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Kevin Wolf +RH-Acked-by: Hanna Reitz + +the assumption that we can't hit a hole if we do not diff against a snapshot was wrong. + +We can see a hole in an image if we diff against base if there exists an older snapshot +of the image and we have discarded blocks in the image where the snapshot has data. + +Fix this by simply handling a hole like an unallocated area. There are no callbacks +for unallocated areas so just bail out if we hit a hole. + +Fixes: 0347a8fd4c3faaedf119be04c197804be40a384b +Suggested-by: Ilya Dryomov +Cc: qemu-stable@nongnu.org +Signed-off-by: Peter Lieven +Message-Id: <20220113144426.4036493-2-pl@kamp.de> +Reviewed-by: Ilya Dryomov +Reviewed-by: Stefano Garzarella +Signed-off-by: Kevin Wolf +(cherry picked from commit 9e302f64bb407a9bb097b626da97228c2654cfee) +Signed-off-by: Stefano Garzarella +--- + block/rbd.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/block/rbd.c b/block/rbd.c +index def96292e0..20bb896c4a 100644 +--- a/block/rbd.c ++++ b/block/rbd.c +@@ -1279,11 +1279,11 @@ static int qemu_rbd_diff_iterate_cb(uint64_t offs, size_t len, + RBDDiffIterateReq *req = opaque; + + assert(req->offs + req->bytes <= offs); +- /* +- * we do not diff against a snapshot so we should never receive a callback +- * for a hole. +- */ +- assert(exists); ++ ++ /* treat a hole like an unallocated area and bail out */ ++ if (!exists) { ++ return 0; ++ } + + if (!req->exists && offs > req->offs) { + /* +-- +2.27.0 + diff --git a/kvm-block-rbd-workaround-for-ceph-issue-53784.patch b/kvm-block-rbd-workaround-for-ceph-issue-53784.patch new file mode 100644 index 0000000..7e052f2 --- /dev/null +++ b/kvm-block-rbd-workaround-for-ceph-issue-53784.patch @@ -0,0 +1,103 @@ +From 8c50eedf03d8e62acd387b9aa9369dadcea9324c Mon Sep 17 00:00:00 2001 +From: Peter Lieven +Date: Thu, 13 Jan 2022 15:44:26 +0100 +Subject: [PATCH 6/6] block/rbd: workaround for ceph issue #53784 + +RH-Author: Stefano Garzarella +RH-MergeRequest: 110: block/rbd: fix handling of holes in .bdrv_co_block_status +RH-Commit: [2/2] 1384557462e89bb539d0d25a1a471ad738fb9e89 +RH-Bugzilla: 2037135 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Kevin Wolf +RH-Acked-by: Hanna Reitz + +librbd had a bug until early 2022 that affected all versions of ceph that +supported fast-diff. This bug results in reporting of incorrect offsets +if the offset parameter to rbd_diff_iterate2 is not object aligned. + +This patch works around this bug for pre Quincy versions of librbd. + +Fixes: 0347a8fd4c3faaedf119be04c197804be40a384b +Cc: qemu-stable@nongnu.org +Signed-off-by: Peter Lieven +Message-Id: <20220113144426.4036493-3-pl@kamp.de> +Reviewed-by: Ilya Dryomov +Reviewed-by: Stefano Garzarella +Tested-by: Stefano Garzarella +Signed-off-by: Kevin Wolf +(cherry picked from commit fc176116cdea816ceb8dd969080b2b95f58edbc0) +Signed-off-by: Stefano Garzarella +--- + block/rbd.c | 42 ++++++++++++++++++++++++++++++++++++++++-- + 1 file changed, 40 insertions(+), 2 deletions(-) + +diff --git a/block/rbd.c b/block/rbd.c +index 20bb896c4a..8f183eba2a 100644 +--- a/block/rbd.c ++++ b/block/rbd.c +@@ -1320,6 +1320,7 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs, + int status, r; + RBDDiffIterateReq req = { .offs = offset }; + uint64_t features, flags; ++ uint64_t head = 0; + + assert(offset + bytes <= s->image_size); + +@@ -1347,7 +1348,43 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs, + return status; + } + +- r = rbd_diff_iterate2(s->image, NULL, offset, bytes, true, true, ++#if LIBRBD_VERSION_CODE < LIBRBD_VERSION(1, 17, 0) ++ /* ++ * librbd had a bug until early 2022 that affected all versions of ceph that ++ * supported fast-diff. This bug results in reporting of incorrect offsets ++ * if the offset parameter to rbd_diff_iterate2 is not object aligned. ++ * Work around this bug by rounding down the offset to object boundaries. ++ * This is OK because we call rbd_diff_iterate2 with whole_object = true. ++ * However, this workaround only works for non cloned images with default ++ * striping. ++ * ++ * See: https://tracker.ceph.com/issues/53784 ++ */ ++ ++ /* check if RBD image has non-default striping enabled */ ++ if (features & RBD_FEATURE_STRIPINGV2) { ++ return status; ++ } ++ ++#pragma GCC diagnostic push ++#pragma GCC diagnostic ignored "-Wdeprecated-declarations" ++ /* ++ * check if RBD image is a clone (= has a parent). ++ * ++ * rbd_get_parent_info is deprecated from Nautilus onwards, but the ++ * replacement rbd_get_parent is not present in Luminous and Mimic. ++ */ ++ if (rbd_get_parent_info(s->image, NULL, 0, NULL, 0, NULL, 0) != -ENOENT) { ++ return status; ++ } ++#pragma GCC diagnostic pop ++ ++ head = req.offs & (s->object_size - 1); ++ req.offs -= head; ++ bytes += head; ++#endif ++ ++ r = rbd_diff_iterate2(s->image, NULL, req.offs, bytes, true, true, + qemu_rbd_diff_iterate_cb, &req); + if (r < 0 && r != QEMU_RBD_EXIT_DIFF_ITERATE2) { + return status; +@@ -1366,7 +1403,8 @@ static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs, + status = BDRV_BLOCK_ZERO | BDRV_BLOCK_OFFSET_VALID; + } + +- *pnum = req.bytes; ++ assert(req.bytes > head); ++ *pnum = req.bytes - head; + return status; + } + +-- +2.27.0 + diff --git a/kvm-configs-devices-aarch64-softmmu-Enable-CONFIG_VIRTIO.patch b/kvm-configs-devices-aarch64-softmmu-Enable-CONFIG_VIRTIO.patch new file mode 100644 index 0000000..a948e57 --- /dev/null +++ b/kvm-configs-devices-aarch64-softmmu-Enable-CONFIG_VIRTIO.patch @@ -0,0 +1,41 @@ +From 3a0e9bb88e82cc76ca5efc0595ce94b5dc34749e Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Mon, 25 Apr 2022 13:42:46 +0800 +Subject: [PATCH 1/2] configs/devices/aarch64-softmmu: Enable CONFIG_VIRTIO_MEM + +RH-Author: Gavin Shan +RH-MergeRequest: 80: Enable virtio-mem for aarch64 +RH-Commit: [1/1] 1afbd08da6d7c860da8d617a0a932d3660514878 (gwshan/qemu-rhel-9) +RH-Bugzilla: 2044162 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Eric Auger +RH-Acked-by: David Hildenbrand + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2044162 + +This enables virtio-mem device on aarch64 since all needed commits +are ready. + + b1b87327a9 hw/arm/virt: Support for virtio-mem-pci + 1263615efe virtio-mem: Correct default THP size for ARM64 + +Signed-off-by: Gavin Shan +--- + configs/devices/aarch64-softmmu/aarch64-rh-devices.mak | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak +index 5f6ee1de5b..187938573f 100644 +--- a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak ++++ b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak +@@ -22,6 +22,7 @@ CONFIG_VFIO=y + CONFIG_VFIO_PCI=y + CONFIG_VIRTIO_MMIO=y + CONFIG_VIRTIO_PCI=y ++CONFIG_VIRTIO_MEM=y + CONFIG_XIO3130=y + CONFIG_NVDIMM=y + CONFIG_ACPI_APEI=y +-- +2.35.1 + diff --git a/kvm-coroutine-Rename-qemu_coroutine_inc-dec_pool_size.patch b/kvm-coroutine-Rename-qemu_coroutine_inc-dec_pool_size.patch new file mode 100644 index 0000000..c1f3683 --- /dev/null +++ b/kvm-coroutine-Rename-qemu_coroutine_inc-dec_pool_size.patch @@ -0,0 +1,101 @@ +From e3cb8849862a9f0dd20f2913d540336a037d43c7 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Tue, 10 May 2022 17:10:19 +0200 +Subject: [PATCH 07/16] coroutine: Rename qemu_coroutine_inc/dec_pool_size() + +RH-Author: Kevin Wolf +RH-MergeRequest: 87: coroutine: Fix crashes due to too large pool batch size +RH-Commit: [1/2] 6389b11f70225f221784c270d9b90c1ea43ca8fb (kmwolf/centos-qemu-kvm) +RH-Bugzilla: 2079938 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Stefano Garzarella + +It's true that these functions currently affect the batch size in which +coroutines are reused (i.e. moved from the global release pool to the +allocation pool of a specific thread), but this is a bug and will be +fixed in a separate patch. + +In fact, the comment in the header file already just promises that it +influences the pool size, so reflect this in the name of the functions. +As a nice side effect, the shorter function name makes some line +wrapping unnecessary. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Kevin Wolf +Message-Id: <20220510151020.105528-2-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 98e3ab35054b946f7c2aba5408822532b0920b53) +Signed-off-by: Kevin Wolf +--- + hw/block/virtio-blk.c | 6 ++---- + include/qemu/coroutine.h | 6 +++--- + util/qemu-coroutine.c | 4 ++-- + 3 files changed, 7 insertions(+), 9 deletions(-) + +diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c +index 540c38f829..6a1cc41877 100644 +--- a/hw/block/virtio-blk.c ++++ b/hw/block/virtio-blk.c +@@ -1215,8 +1215,7 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) + for (i = 0; i < conf->num_queues; i++) { + virtio_add_queue(vdev, conf->queue_size, virtio_blk_handle_output); + } +- qemu_coroutine_increase_pool_batch_size(conf->num_queues * conf->queue_size +- / 2); ++ qemu_coroutine_inc_pool_size(conf->num_queues * conf->queue_size / 2); + virtio_blk_data_plane_create(vdev, conf, &s->dataplane, &err); + if (err != NULL) { + error_propagate(errp, err); +@@ -1253,8 +1252,7 @@ static void virtio_blk_device_unrealize(DeviceState *dev) + for (i = 0; i < conf->num_queues; i++) { + virtio_del_queue(vdev, i); + } +- qemu_coroutine_decrease_pool_batch_size(conf->num_queues * conf->queue_size +- / 2); ++ qemu_coroutine_dec_pool_size(conf->num_queues * conf->queue_size / 2); + qemu_del_vm_change_state_handler(s->change); + blockdev_mark_auto_del(s->blk); + virtio_cleanup(vdev); +diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h +index c828a95ee0..5b621d1295 100644 +--- a/include/qemu/coroutine.h ++++ b/include/qemu/coroutine.h +@@ -334,12 +334,12 @@ void coroutine_fn yield_until_fd_readable(int fd); + /** + * Increase coroutine pool size + */ +-void qemu_coroutine_increase_pool_batch_size(unsigned int additional_pool_size); ++void qemu_coroutine_inc_pool_size(unsigned int additional_pool_size); + + /** +- * Devcrease coroutine pool size ++ * Decrease coroutine pool size + */ +-void qemu_coroutine_decrease_pool_batch_size(unsigned int additional_pool_size); ++void qemu_coroutine_dec_pool_size(unsigned int additional_pool_size); + + #include "qemu/lockable.h" + +diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c +index c03b2422ff..faca0ca97c 100644 +--- a/util/qemu-coroutine.c ++++ b/util/qemu-coroutine.c +@@ -205,12 +205,12 @@ AioContext *coroutine_fn qemu_coroutine_get_aio_context(Coroutine *co) + return co->ctx; + } + +-void qemu_coroutine_increase_pool_batch_size(unsigned int additional_pool_size) ++void qemu_coroutine_inc_pool_size(unsigned int additional_pool_size) + { + qatomic_add(&pool_batch_size, additional_pool_size); + } + +-void qemu_coroutine_decrease_pool_batch_size(unsigned int removing_pool_size) ++void qemu_coroutine_dec_pool_size(unsigned int removing_pool_size) + { + qatomic_sub(&pool_batch_size, removing_pool_size); + } +-- +2.31.1 + diff --git a/kvm-coroutine-Revert-to-constant-batch-size.patch b/kvm-coroutine-Revert-to-constant-batch-size.patch new file mode 100644 index 0000000..2973510 --- /dev/null +++ b/kvm-coroutine-Revert-to-constant-batch-size.patch @@ -0,0 +1,138 @@ +From 345107bfd5537b51f34aaeb97d6161858bb6feee Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Tue, 10 May 2022 17:10:20 +0200 +Subject: [PATCH 08/16] coroutine: Revert to constant batch size + +RH-Author: Kevin Wolf +RH-MergeRequest: 87: coroutine: Fix crashes due to too large pool batch size +RH-Commit: [2/2] 8a8a39af873854cdc8333d1a70f3479a97c3ec7a (kmwolf/centos-qemu-kvm) +RH-Bugzilla: 2079938 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Stefano Garzarella + +Commit 4c41c69e changed the way the coroutine pool is sized because for +virtio-blk devices with a large queue size and heavy I/O, it was just +too small and caused coroutines to be deleted and reallocated soon +afterwards. The change made the size dynamic based on the number of +queues and the queue size of virtio-blk devices. + +There are two important numbers here: Slightly simplified, when a +coroutine terminates, it is generally stored in the global release pool +up to a certain pool size, and if the pool is full, it is freed. +Conversely, when allocating a new coroutine, the coroutines in the +release pool are reused if the pool already has reached a certain +minimum size (the batch size), otherwise we allocate new coroutines. + +The problem after commit 4c41c69e is that it not only increases the +maximum pool size (which is the intended effect), but also the batch +size for reusing coroutines (which is a bug). It means that in cases +with many devices and/or a large queue size (which defaults to the +number of vcpus for virtio-blk-pci), many thousand coroutines could be +sitting in the release pool without being reused. + +This is not only a waste of memory and allocations, but it actually +makes the QEMU process likely to hit the vm.max_map_count limit on Linux +because each coroutine requires two mappings (its stack and the guard +page for the stack), causing it to abort() in qemu_alloc_stack() because +when the limit is hit, mprotect() starts to fail with ENOMEM. + +In order to fix the problem, change the batch size back to 64 to avoid +uselessly accumulating coroutines in the release pool, but keep the +dynamic maximum pool size so that coroutines aren't freed too early +in heavy I/O scenarios. + +Note that this fix doesn't strictly make it impossible to hit the limit, +but this would only happen if most of the coroutines are actually in use +at the same time, not just sitting in a pool. This is the same behaviour +as we already had before commit 4c41c69e. Fully preventing this would +require allowing qemu_coroutine_create() to return an error, but it +doesn't seem to be a scenario that people hit in practice. + +Cc: qemu-stable@nongnu.org +Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2079938 +Fixes: 4c41c69e05fe28c0f95f8abd2ebf407e95a4f04b +Signed-off-by: Kevin Wolf +Message-Id: <20220510151020.105528-3-kwolf@redhat.com> +Tested-by: Hiroki Narukawa +Signed-off-by: Kevin Wolf +(cherry picked from commit 9ec7a59b5aad4b736871c378d30f5ef5ec51cb52) + +Conflicts: + util/qemu-coroutine.c + +Trivial merge conflict because we don't have commit ac387a08 downstream. + +Signed-off-by: Kevin Wolf +--- + util/qemu-coroutine.c | 22 ++++++++++++++-------- + 1 file changed, 14 insertions(+), 8 deletions(-) + +diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c +index faca0ca97c..804f672e0a 100644 +--- a/util/qemu-coroutine.c ++++ b/util/qemu-coroutine.c +@@ -20,14 +20,20 @@ + #include "qemu/coroutine_int.h" + #include "block/aio.h" + +-/** Initial batch size is 64, and is increased on demand */ ++/** ++ * The minimal batch size is always 64, coroutines from the release_pool are ++ * reused as soon as there are 64 coroutines in it. The maximum pool size starts ++ * with 64 and is increased on demand so that coroutines are not deleted even if ++ * they are not immediately reused. ++ */ + enum { +- POOL_INITIAL_BATCH_SIZE = 64, ++ POOL_MIN_BATCH_SIZE = 64, ++ POOL_INITIAL_MAX_SIZE = 64, + }; + + /** Free list to speed up creation */ + static QSLIST_HEAD(, Coroutine) release_pool = QSLIST_HEAD_INITIALIZER(pool); +-static unsigned int pool_batch_size = POOL_INITIAL_BATCH_SIZE; ++static unsigned int pool_max_size = POOL_INITIAL_MAX_SIZE; + static unsigned int release_pool_size; + static __thread QSLIST_HEAD(, Coroutine) alloc_pool = QSLIST_HEAD_INITIALIZER(pool); + static __thread unsigned int alloc_pool_size; +@@ -51,7 +57,7 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque) + if (CONFIG_COROUTINE_POOL) { + co = QSLIST_FIRST(&alloc_pool); + if (!co) { +- if (release_pool_size > qatomic_read(&pool_batch_size)) { ++ if (release_pool_size > POOL_MIN_BATCH_SIZE) { + /* Slow path; a good place to register the destructor, too. */ + if (!coroutine_pool_cleanup_notifier.notify) { + coroutine_pool_cleanup_notifier.notify = coroutine_pool_cleanup; +@@ -88,12 +94,12 @@ static void coroutine_delete(Coroutine *co) + co->caller = NULL; + + if (CONFIG_COROUTINE_POOL) { +- if (release_pool_size < qatomic_read(&pool_batch_size) * 2) { ++ if (release_pool_size < qatomic_read(&pool_max_size) * 2) { + QSLIST_INSERT_HEAD_ATOMIC(&release_pool, co, pool_next); + qatomic_inc(&release_pool_size); + return; + } +- if (alloc_pool_size < qatomic_read(&pool_batch_size)) { ++ if (alloc_pool_size < qatomic_read(&pool_max_size)) { + QSLIST_INSERT_HEAD(&alloc_pool, co, pool_next); + alloc_pool_size++; + return; +@@ -207,10 +213,10 @@ AioContext *coroutine_fn qemu_coroutine_get_aio_context(Coroutine *co) + + void qemu_coroutine_inc_pool_size(unsigned int additional_pool_size) + { +- qatomic_add(&pool_batch_size, additional_pool_size); ++ qatomic_add(&pool_max_size, additional_pool_size); + } + + void qemu_coroutine_dec_pool_size(unsigned int removing_pool_size) + { +- qatomic_sub(&pool_batch_size, removing_pool_size); ++ qatomic_sub(&pool_max_size, removing_pool_size); + } +-- +2.31.1 + diff --git a/kvm-coroutine-ucontext-use-QEMU_DEFINE_STATIC_CO_TLS.patch b/kvm-coroutine-ucontext-use-QEMU_DEFINE_STATIC_CO_TLS.patch new file mode 100644 index 0000000..963cf04 --- /dev/null +++ b/kvm-coroutine-ucontext-use-QEMU_DEFINE_STATIC_CO_TLS.patch @@ -0,0 +1,132 @@ +From ffbd90e5f4eba620c7cd631b04f0ed31beb22ffa Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 17 May 2022 12:07:56 +0100 +Subject: [PATCH 1/6] coroutine-ucontext: use QEMU_DEFINE_STATIC_CO_TLS() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 89: coroutine: use coroutine TLS macros to protect thread-local variables +RH-Commit: [1/3] a9782fe8e919c4bd317b7e8744c7ff57d898add3 (stefanha/centos-stream-qemu-kvm) +RH-Bugzilla: 1952483 +RH-Acked-by: Hanna Reitz +RH-Acked-by: Eric Blake +RH-Acked-by: Kevin Wolf + +Thread-Local Storage variables cannot be used directly from coroutine +code because the compiler may optimize TLS variable accesses across +qemu_coroutine_yield() calls. When the coroutine is re-entered from +another thread the TLS variables from the old thread must no longer be +used. + +Use QEMU_DEFINE_STATIC_CO_TLS() for the current and leader variables. + +Signed-off-by: Stefan Hajnoczi +Message-Id: <20220307153853.602859-2-stefanha@redhat.com> +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Kevin Wolf +(cherry picked from commit 34145a307d849d0b6734d0222a7aa0bb9eef7407) +Signed-off-by: Stefan Hajnoczi +--- + util/coroutine-ucontext.c | 38 ++++++++++++++++++++++++-------------- + 1 file changed, 24 insertions(+), 14 deletions(-) + +diff --git a/util/coroutine-ucontext.c b/util/coroutine-ucontext.c +index 904b375192..127d5a13c8 100644 +--- a/util/coroutine-ucontext.c ++++ b/util/coroutine-ucontext.c +@@ -25,6 +25,7 @@ + #include "qemu/osdep.h" + #include + #include "qemu/coroutine_int.h" ++#include "qemu/coroutine-tls.h" + + #ifdef CONFIG_VALGRIND_H + #include +@@ -66,8 +67,8 @@ typedef struct { + /** + * Per-thread coroutine bookkeeping + */ +-static __thread CoroutineUContext leader; +-static __thread Coroutine *current; ++QEMU_DEFINE_STATIC_CO_TLS(Coroutine *, current); ++QEMU_DEFINE_STATIC_CO_TLS(CoroutineUContext, leader); + + /* + * va_args to makecontext() must be type 'int', so passing +@@ -97,14 +98,15 @@ static inline __attribute__((always_inline)) + void finish_switch_fiber(void *fake_stack_save) + { + #ifdef CONFIG_ASAN ++ CoroutineUContext *leaderp = get_ptr_leader(); + const void *bottom_old; + size_t size_old; + + __sanitizer_finish_switch_fiber(fake_stack_save, &bottom_old, &size_old); + +- if (!leader.stack) { +- leader.stack = (void *)bottom_old; +- leader.stack_size = size_old; ++ if (!leaderp->stack) { ++ leaderp->stack = (void *)bottom_old; ++ leaderp->stack_size = size_old; + } + #endif + #ifdef CONFIG_TSAN +@@ -161,8 +163,10 @@ static void coroutine_trampoline(int i0, int i1) + + /* Initialize longjmp environment and switch back the caller */ + if (!sigsetjmp(self->env, 0)) { +- start_switch_fiber_asan(COROUTINE_YIELD, &fake_stack_save, leader.stack, +- leader.stack_size); ++ CoroutineUContext *leaderp = get_ptr_leader(); ++ ++ start_switch_fiber_asan(COROUTINE_YIELD, &fake_stack_save, ++ leaderp->stack, leaderp->stack_size); + start_switch_fiber_tsan(&fake_stack_save, self, true); /* true=caller */ + siglongjmp(*(sigjmp_buf *)co->entry_arg, 1); + } +@@ -297,7 +301,7 @@ qemu_coroutine_switch(Coroutine *from_, Coroutine *to_, + int ret; + void *fake_stack_save = NULL; + +- current = to_; ++ set_current(to_); + + ret = sigsetjmp(from->env, 0); + if (ret == 0) { +@@ -315,18 +319,24 @@ qemu_coroutine_switch(Coroutine *from_, Coroutine *to_, + + Coroutine *qemu_coroutine_self(void) + { +- if (!current) { +- current = &leader.base; ++ Coroutine *self = get_current(); ++ CoroutineUContext *leaderp = get_ptr_leader(); ++ ++ if (!self) { ++ self = &leaderp->base; ++ set_current(self); + } + #ifdef CONFIG_TSAN +- if (!leader.tsan_co_fiber) { +- leader.tsan_co_fiber = __tsan_get_current_fiber(); ++ if (!leaderp->tsan_co_fiber) { ++ leaderp->tsan_co_fiber = __tsan_get_current_fiber(); + } + #endif +- return current; ++ return self; + } + + bool qemu_in_coroutine(void) + { +- return current && current->caller; ++ Coroutine *self = get_current(); ++ ++ return self && self->caller; + } +-- +2.31.1 + diff --git a/kvm-coroutine-use-QEMU_DEFINE_STATIC_CO_TLS.patch b/kvm-coroutine-use-QEMU_DEFINE_STATIC_CO_TLS.patch new file mode 100644 index 0000000..9d0f811 --- /dev/null +++ b/kvm-coroutine-use-QEMU_DEFINE_STATIC_CO_TLS.patch @@ -0,0 +1,139 @@ +From 9c2e55d25fec6ffb21e344513b7dbeed7e21f641 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 17 May 2022 12:08:04 +0100 +Subject: [PATCH 2/6] coroutine: use QEMU_DEFINE_STATIC_CO_TLS() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 89: coroutine: use coroutine TLS macros to protect thread-local variables +RH-Commit: [2/3] 68a8847e406e2eace6ddc31b0c5676a60600d606 (stefanha/centos-stream-qemu-kvm) +RH-Bugzilla: 1952483 +RH-Acked-by: Hanna Reitz +RH-Acked-by: Eric Blake +RH-Acked-by: Kevin Wolf + +Thread-Local Storage variables cannot be used directly from coroutine +code because the compiler may optimize TLS variable accesses across +qemu_coroutine_yield() calls. When the coroutine is re-entered from +another thread the TLS variables from the old thread must no longer be +used. + +Use QEMU_DEFINE_STATIC_CO_TLS() for the current and leader variables. +The alloc_pool QSLIST needs a typedef so the return value of +get_ptr_alloc_pool() can be stored in a local variable. + +One example of why this code is necessary: a coroutine that yields +before calling qemu_coroutine_create() to create another coroutine is +affected by the TLS issue. + +Signed-off-by: Stefan Hajnoczi +Message-Id: <20220307153853.602859-3-stefanha@redhat.com> +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Kevin Wolf +(cherry picked from commit ac387a08a9c9f6b36757da912f0339c25f421f90) + +Conflicts: +- Context conflicts due to commit 5411171c3ef4 ("coroutine: Revert to + constant batch size"). + +Signed-off-by: Stefan Hajnoczi +--- + util/qemu-coroutine.c | 41 ++++++++++++++++++++++++----------------- + 1 file changed, 24 insertions(+), 17 deletions(-) + +diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c +index 804f672e0a..4a8bd63ef0 100644 +--- a/util/qemu-coroutine.c ++++ b/util/qemu-coroutine.c +@@ -18,6 +18,7 @@ + #include "qemu/atomic.h" + #include "qemu/coroutine.h" + #include "qemu/coroutine_int.h" ++#include "qemu/coroutine-tls.h" + #include "block/aio.h" + + /** +@@ -35,17 +36,20 @@ enum { + static QSLIST_HEAD(, Coroutine) release_pool = QSLIST_HEAD_INITIALIZER(pool); + static unsigned int pool_max_size = POOL_INITIAL_MAX_SIZE; + static unsigned int release_pool_size; +-static __thread QSLIST_HEAD(, Coroutine) alloc_pool = QSLIST_HEAD_INITIALIZER(pool); +-static __thread unsigned int alloc_pool_size; +-static __thread Notifier coroutine_pool_cleanup_notifier; ++ ++typedef QSLIST_HEAD(, Coroutine) CoroutineQSList; ++QEMU_DEFINE_STATIC_CO_TLS(CoroutineQSList, alloc_pool); ++QEMU_DEFINE_STATIC_CO_TLS(unsigned int, alloc_pool_size); ++QEMU_DEFINE_STATIC_CO_TLS(Notifier, coroutine_pool_cleanup_notifier); + + static void coroutine_pool_cleanup(Notifier *n, void *value) + { + Coroutine *co; + Coroutine *tmp; ++ CoroutineQSList *alloc_pool = get_ptr_alloc_pool(); + +- QSLIST_FOREACH_SAFE(co, &alloc_pool, pool_next, tmp) { +- QSLIST_REMOVE_HEAD(&alloc_pool, pool_next); ++ QSLIST_FOREACH_SAFE(co, alloc_pool, pool_next, tmp) { ++ QSLIST_REMOVE_HEAD(alloc_pool, pool_next); + qemu_coroutine_delete(co); + } + } +@@ -55,27 +59,30 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque) + Coroutine *co = NULL; + + if (CONFIG_COROUTINE_POOL) { +- co = QSLIST_FIRST(&alloc_pool); ++ CoroutineQSList *alloc_pool = get_ptr_alloc_pool(); ++ ++ co = QSLIST_FIRST(alloc_pool); + if (!co) { + if (release_pool_size > POOL_MIN_BATCH_SIZE) { + /* Slow path; a good place to register the destructor, too. */ +- if (!coroutine_pool_cleanup_notifier.notify) { +- coroutine_pool_cleanup_notifier.notify = coroutine_pool_cleanup; +- qemu_thread_atexit_add(&coroutine_pool_cleanup_notifier); ++ Notifier *notifier = get_ptr_coroutine_pool_cleanup_notifier(); ++ if (!notifier->notify) { ++ notifier->notify = coroutine_pool_cleanup; ++ qemu_thread_atexit_add(notifier); + } + + /* This is not exact; there could be a little skew between + * release_pool_size and the actual size of release_pool. But + * it is just a heuristic, it does not need to be perfect. + */ +- alloc_pool_size = qatomic_xchg(&release_pool_size, 0); +- QSLIST_MOVE_ATOMIC(&alloc_pool, &release_pool); +- co = QSLIST_FIRST(&alloc_pool); ++ set_alloc_pool_size(qatomic_xchg(&release_pool_size, 0)); ++ QSLIST_MOVE_ATOMIC(alloc_pool, &release_pool); ++ co = QSLIST_FIRST(alloc_pool); + } + } + if (co) { +- QSLIST_REMOVE_HEAD(&alloc_pool, pool_next); +- alloc_pool_size--; ++ QSLIST_REMOVE_HEAD(alloc_pool, pool_next); ++ set_alloc_pool_size(get_alloc_pool_size() - 1); + } + } + +@@ -99,9 +106,9 @@ static void coroutine_delete(Coroutine *co) + qatomic_inc(&release_pool_size); + return; + } +- if (alloc_pool_size < qatomic_read(&pool_max_size)) { +- QSLIST_INSERT_HEAD(&alloc_pool, co, pool_next); +- alloc_pool_size++; ++ if (get_alloc_pool_size() < qatomic_read(&pool_max_size)) { ++ QSLIST_INSERT_HEAD(get_ptr_alloc_pool(), co, pool_next); ++ set_alloc_pool_size(get_alloc_pool_size() + 1); + return; + } + } +-- +2.31.1 + diff --git a/kvm-coroutine-win32-use-QEMU_DEFINE_STATIC_CO_TLS.patch b/kvm-coroutine-win32-use-QEMU_DEFINE_STATIC_CO_TLS.patch new file mode 100644 index 0000000..1665319 --- /dev/null +++ b/kvm-coroutine-win32-use-QEMU_DEFINE_STATIC_CO_TLS.patch @@ -0,0 +1,99 @@ +From 336581e6e9ace3f1ddd24ad0a258db9785f9b0ed Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 17 May 2022 12:08:12 +0100 +Subject: [PATCH 3/6] coroutine-win32: use QEMU_DEFINE_STATIC_CO_TLS() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 89: coroutine: use coroutine TLS macros to protect thread-local variables +RH-Commit: [3/3] 55b35dfdae1bc7d6f614ac9f81a92f5c6431f713 (stefanha/centos-stream-qemu-kvm) +RH-Bugzilla: 1952483 +RH-Acked-by: Hanna Reitz +RH-Acked-by: Eric Blake +RH-Acked-by: Kevin Wolf + +Thread-Local Storage variables cannot be used directly from coroutine +code because the compiler may optimize TLS variable accesses across +qemu_coroutine_yield() calls. When the coroutine is re-entered from +another thread the TLS variables from the old thread must no longer be +used. + +Use QEMU_DEFINE_STATIC_CO_TLS() for the current and leader variables. + +I think coroutine-win32.c could get away with __thread because the +variables are only used in situations where either the stale value is +correct (current) or outside coroutine context (loading leader when +current is NULL). Due to the difficulty of being sure that this is +really safe in all scenarios it seems worth converting it anyway. + +Signed-off-by: Stefan Hajnoczi +Message-Id: <20220307153853.602859-4-stefanha@redhat.com> +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Kevin Wolf +(cherry picked from commit c1fe694357a328c807ae3cc6961c19e923448fcc) +Signed-off-by: Stefan Hajnoczi +--- + util/coroutine-win32.c | 18 +++++++++++++----- + 1 file changed, 13 insertions(+), 5 deletions(-) + +diff --git a/util/coroutine-win32.c b/util/coroutine-win32.c +index de6bd4fd3e..c02a62c896 100644 +--- a/util/coroutine-win32.c ++++ b/util/coroutine-win32.c +@@ -25,6 +25,7 @@ + #include "qemu/osdep.h" + #include "qemu-common.h" + #include "qemu/coroutine_int.h" ++#include "qemu/coroutine-tls.h" + + typedef struct + { +@@ -34,8 +35,8 @@ typedef struct + CoroutineAction action; + } CoroutineWin32; + +-static __thread CoroutineWin32 leader; +-static __thread Coroutine *current; ++QEMU_DEFINE_STATIC_CO_TLS(CoroutineWin32, leader); ++QEMU_DEFINE_STATIC_CO_TLS(Coroutine *, current); + + /* This function is marked noinline to prevent GCC from inlining it + * into coroutine_trampoline(). If we allow it to do that then it +@@ -52,7 +53,7 @@ qemu_coroutine_switch(Coroutine *from_, Coroutine *to_, + CoroutineWin32 *from = DO_UPCAST(CoroutineWin32, base, from_); + CoroutineWin32 *to = DO_UPCAST(CoroutineWin32, base, to_); + +- current = to_; ++ set_current(to_); + + to->action = action; + SwitchToFiber(to->fiber); +@@ -89,14 +90,21 @@ void qemu_coroutine_delete(Coroutine *co_) + + Coroutine *qemu_coroutine_self(void) + { ++ Coroutine *current = get_current(); ++ + if (!current) { +- current = &leader.base; +- leader.fiber = ConvertThreadToFiber(NULL); ++ CoroutineWin32 *leader = get_ptr_leader(); ++ ++ current = &leader->base; ++ set_current(current); ++ leader->fiber = ConvertThreadToFiber(NULL); + } + return current; + } + + bool qemu_in_coroutine(void) + { ++ Coroutine *current = get_current(); ++ + return current && current->caller; + } +-- +2.31.1 + diff --git a/kvm-display-qxl-render-fix-race-condition-in-qxl_cursor-.patch b/kvm-display-qxl-render-fix-race-condition-in-qxl_cursor-.patch new file mode 100644 index 0000000..040cfe1 --- /dev/null +++ b/kvm-display-qxl-render-fix-race-condition-in-qxl_cursor-.patch @@ -0,0 +1,58 @@ +From abd84f26e0fe0bc9952d91fbd35fb3a7253cfecf Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 13 Apr 2022 20:54:45 -0400 +Subject: [PATCH 1/2] display/qxl-render: fix race condition in qxl_cursor + (CVE-2021-4207) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 152: display/qxl-render: fix race condition in qxl_cursor (CVE-2021-4207) +RH-Commit: [1/1] f05b9a956f2e0ca522b5be127beff813d04b5588 (jmaloy/qemu-kvm) +RH-Bugzilla: 2040738 +RH-Acked-by: Gerd Hoffmann +RH-Acked-by: Mauro Matteo Cascella + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2040738 +Upstream: Merged +CVE: CVE-2021-4207 + +commit 9569f5cb5b4bffa9d3ebc8ba7da1e03830a9a895 +Author: Mauro Matteo Cascella +Date: Thu Apr 7 10:11:06 2022 +0200 + + display/qxl-render: fix race condition in qxl_cursor (CVE-2021-4207) + + Avoid fetching 'width' and 'height' a second time to prevent possible + race condition. Refer to security advisory + https://starlabs.sg/advisories/22-4207/ for more information. + + Fixes: CVE-2021-4207 + Signed-off-by: Mauro Matteo Cascella + Reviewed-by: Marc-André Lureau + Message-Id: <20220407081106.343235-1-mcascell@redhat.com> + Signed-off-by: Gerd Hoffmann + +(cherry picked from commit 9569f5cb5b4bffa9d3ebc8ba7da1e03830a9a895) +Signed-off-by: Jon Maloy +--- + hw/display/qxl-render.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/display/qxl-render.c b/hw/display/qxl-render.c +index d28849b121..237ed293ba 100644 +--- a/hw/display/qxl-render.c ++++ b/hw/display/qxl-render.c +@@ -266,7 +266,7 @@ static QEMUCursor *qxl_cursor(PCIQXLDevice *qxl, QXLCursor *cursor, + } + break; + case SPICE_CURSOR_TYPE_ALPHA: +- size = sizeof(uint32_t) * cursor->header.width * cursor->header.height; ++ size = sizeof(uint32_t) * c->width * c->height; + qxl_unpack_chunks(c->data, size, qxl, &cursor->chunk, group_id); + if (qxl->debug > 2) { + cursor_print_ascii_art(c, "qxl/alpha"); +-- +2.27.0 + diff --git a/kvm-doc-Add-the-SGX-numa-description.patch b/kvm-doc-Add-the-SGX-numa-description.patch new file mode 100644 index 0000000..0bed8a6 --- /dev/null +++ b/kvm-doc-Add-the-SGX-numa-description.patch @@ -0,0 +1,77 @@ +From e8377e3f4d540e2594a50985523e87d1f3cabbc7 Mon Sep 17 00:00:00 2001 +From: Yang Zhong +Date: Mon, 1 Nov 2021 12:20:08 -0400 +Subject: [PATCH 3/7] doc: Add the SGX numa description + +RH-Author: Paul Lai +RH-MergeRequest: 111: numa: Enable numa for SGX EPC sections +RH-Commit: [3/5] 41c74688c9662b966c243566a837135ff52341c4 +RH-Bugzilla: 1518984 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Bandan Das +RH-Acked-by: Cornelia Huck + +Add the SGX numa reference command and how to check if +SGX numa is support or not with multiple EPC sections. + +Signed-off-by: Yang Zhong +Message-Id: <20211101162009.62161-5-yang.zhong@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit d1889b36098c79e2e6ac90faf3d0dc5ec0057677) +Signed-off-by: Paul Lai +--- + docs/system/i386/sgx.rst | 31 +++++++++++++++++++++++++++---- + 1 file changed, 27 insertions(+), 4 deletions(-) + +diff --git a/docs/system/i386/sgx.rst b/docs/system/i386/sgx.rst +index f8fade5ac2..0f0a73f758 100644 +--- a/docs/system/i386/sgx.rst ++++ b/docs/system/i386/sgx.rst +@@ -141,8 +141,7 @@ To launch a SGX guest: + |qemu_system_x86| \\ + -cpu host,+sgx-provisionkey \\ + -object memory-backend-epc,id=mem1,size=64M,prealloc=on \\ +- -object memory-backend-epc,id=mem2,size=28M \\ +- -M sgx-epc.0.memdev=mem1,sgx-epc.1.memdev=mem2 ++ -M sgx-epc.0.memdev=mem1,sgx-epc.0.node=0 + + Utilizing SGX in the guest requires a kernel/OS with SGX support. + The support can be determined in guest by:: +@@ -152,8 +151,32 @@ The support can be determined in guest by:: + and SGX epc info by:: + + $ dmesg | grep sgx +- [ 1.242142] sgx: EPC section 0x180000000-0x181bfffff +- [ 1.242319] sgx: EPC section 0x181c00000-0x1837fffff ++ [ 0.182807] sgx: EPC section 0x140000000-0x143ffffff ++ [ 0.183695] sgx: [Firmware Bug]: Unable to map EPC section to online node. Fallback to the NUMA node 0. ++ ++To launch a SGX numa guest: ++ ++.. parsed-literal:: ++ ++ |qemu_system_x86| \\ ++ -cpu host,+sgx-provisionkey \\ ++ -object memory-backend-ram,size=2G,host-nodes=0,policy=bind,id=node0 \\ ++ -object memory-backend-epc,id=mem0,size=64M,prealloc=on,host-nodes=0,policy=bind \\ ++ -numa node,nodeid=0,cpus=0-1,memdev=node0 \\ ++ -object memory-backend-ram,size=2G,host-nodes=1,policy=bind,id=node1 \\ ++ -object memory-backend-epc,id=mem1,size=28M,prealloc=on,host-nodes=1,policy=bind \\ ++ -numa node,nodeid=1,cpus=2-3,memdev=node1 \\ ++ -M sgx-epc.0.memdev=mem0,sgx-epc.0.node=0,sgx-epc.1.memdev=mem1,sgx-epc.1.node=1 ++ ++and SGX epc numa info by:: ++ ++ $ dmesg | grep sgx ++ [ 0.369937] sgx: EPC section 0x180000000-0x183ffffff ++ [ 0.370259] sgx: EPC section 0x184000000-0x185bfffff ++ ++ $ dmesg | grep SRAT ++ [ 0.009981] ACPI: SRAT: Node 0 PXM 0 [mem 0x180000000-0x183ffffff] ++ [ 0.009982] ACPI: SRAT: Node 1 PXM 1 [mem 0x184000000-0x185bfffff] + + References + ---------- +-- +2.27.0 + diff --git a/kvm-hw-acpi-aml-build-Use-existing-CPU-topology-to-build.patch b/kvm-hw-acpi-aml-build-Use-existing-CPU-topology-to-build.patch new file mode 100644 index 0000000..2795dcd --- /dev/null +++ b/kvm-hw-acpi-aml-build-Use-existing-CPU-topology-to-build.patch @@ -0,0 +1,179 @@ +From 8a12049e97149056f61f7748d9869606d282d16e Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Wed, 11 May 2022 18:01:35 +0800 +Subject: [PATCH 06/16] hw/acpi/aml-build: Use existing CPU topology to build + PPTT table + +RH-Author: Gavin Shan +RH-MergeRequest: 86: hw/arm/virt: Fix the default CPU topology +RH-Commit: [6/6] 53fa376531c204cf706cc1a7a0499019756106cb (gwshan/qemu-rhel-9) +RH-Bugzilla: 2041823 +RH-Acked-by: Eric Auger +RH-Acked-by: Cornelia Huck +RH-Acked-by: Andrew Jones + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2041823 + +When the PPTT table is built, the CPU topology is re-calculated, but +it's unecessary because the CPU topology has been populated in +virt_possible_cpu_arch_ids() on arm/virt machine. + +This reworks build_pptt() to avoid by reusing the existing IDs in +ms->possible_cpus. Currently, the only user of build_pptt() is +arm/virt machine. + +Signed-off-by: Gavin Shan +Tested-by: Yanan Wang +Reviewed-by: Yanan Wang +Acked-by: Igor Mammedov +Acked-by: Michael S. Tsirkin +Message-id: 20220503140304.855514-7-gshan@redhat.com +Signed-off-by: Peter Maydell +(cherry picked from commit ae9141d4a3265553503bf07d3574b40f84615a34) +Signed-off-by: Gavin Shan +--- + hw/acpi/aml-build.c | 111 +++++++++++++++++++------------------------- + 1 file changed, 48 insertions(+), 63 deletions(-) + +diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c +index 4086879ebf..e6bfac95c7 100644 +--- a/hw/acpi/aml-build.c ++++ b/hw/acpi/aml-build.c +@@ -2002,86 +2002,71 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms, + const char *oem_id, const char *oem_table_id) + { + MachineClass *mc = MACHINE_GET_CLASS(ms); +- GQueue *list = g_queue_new(); +- guint pptt_start = table_data->len; +- guint parent_offset; +- guint length, i; +- int uid = 0; +- int socket; ++ CPUArchIdList *cpus = ms->possible_cpus; ++ int64_t socket_id = -1, cluster_id = -1, core_id = -1; ++ uint32_t socket_offset = 0, cluster_offset = 0, core_offset = 0; ++ uint32_t pptt_start = table_data->len; ++ int n; + AcpiTable table = { .sig = "PPTT", .rev = 2, + .oem_id = oem_id, .oem_table_id = oem_table_id }; + + acpi_table_begin(&table, table_data); + +- for (socket = 0; socket < ms->smp.sockets; socket++) { +- g_queue_push_tail(list, +- GUINT_TO_POINTER(table_data->len - pptt_start)); +- build_processor_hierarchy_node( +- table_data, +- /* +- * Physical package - represents the boundary +- * of a physical package +- */ +- (1 << 0), +- 0, socket, NULL, 0); +- } +- +- if (mc->smp_props.clusters_supported) { +- length = g_queue_get_length(list); +- for (i = 0; i < length; i++) { +- int cluster; +- +- parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); +- for (cluster = 0; cluster < ms->smp.clusters; cluster++) { +- g_queue_push_tail(list, +- GUINT_TO_POINTER(table_data->len - pptt_start)); +- build_processor_hierarchy_node( +- table_data, +- (0 << 0), /* not a physical package */ +- parent_offset, cluster, NULL, 0); +- } ++ /* ++ * This works with the assumption that cpus[n].props.*_id has been ++ * sorted from top to down levels in mc->possible_cpu_arch_ids(). ++ * Otherwise, the unexpected and duplicated containers will be ++ * created. ++ */ ++ for (n = 0; n < cpus->len; n++) { ++ if (cpus->cpus[n].props.socket_id != socket_id) { ++ assert(cpus->cpus[n].props.socket_id > socket_id); ++ socket_id = cpus->cpus[n].props.socket_id; ++ cluster_id = -1; ++ core_id = -1; ++ socket_offset = table_data->len - pptt_start; ++ build_processor_hierarchy_node(table_data, ++ (1 << 0), /* Physical package */ ++ 0, socket_id, NULL, 0); + } +- } + +- length = g_queue_get_length(list); +- for (i = 0; i < length; i++) { +- int core; +- +- parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); +- for (core = 0; core < ms->smp.cores; core++) { +- if (ms->smp.threads > 1) { +- g_queue_push_tail(list, +- GUINT_TO_POINTER(table_data->len - pptt_start)); +- build_processor_hierarchy_node( +- table_data, +- (0 << 0), /* not a physical package */ +- parent_offset, core, NULL, 0); +- } else { +- build_processor_hierarchy_node( +- table_data, +- (1 << 1) | /* ACPI Processor ID valid */ +- (1 << 3), /* Node is a Leaf */ +- parent_offset, uid++, NULL, 0); ++ if (mc->smp_props.clusters_supported) { ++ if (cpus->cpus[n].props.cluster_id != cluster_id) { ++ assert(cpus->cpus[n].props.cluster_id > cluster_id); ++ cluster_id = cpus->cpus[n].props.cluster_id; ++ core_id = -1; ++ cluster_offset = table_data->len - pptt_start; ++ build_processor_hierarchy_node(table_data, ++ (0 << 0), /* Not a physical package */ ++ socket_offset, cluster_id, NULL, 0); + } ++ } else { ++ cluster_offset = socket_offset; + } +- } + +- length = g_queue_get_length(list); +- for (i = 0; i < length; i++) { +- int thread; ++ if (ms->smp.threads == 1) { ++ build_processor_hierarchy_node(table_data, ++ (1 << 1) | /* ACPI Processor ID valid */ ++ (1 << 3), /* Node is a Leaf */ ++ cluster_offset, n, NULL, 0); ++ } else { ++ if (cpus->cpus[n].props.core_id != core_id) { ++ assert(cpus->cpus[n].props.core_id > core_id); ++ core_id = cpus->cpus[n].props.core_id; ++ core_offset = table_data->len - pptt_start; ++ build_processor_hierarchy_node(table_data, ++ (0 << 0), /* Not a physical package */ ++ cluster_offset, core_id, NULL, 0); ++ } + +- parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); +- for (thread = 0; thread < ms->smp.threads; thread++) { +- build_processor_hierarchy_node( +- table_data, ++ build_processor_hierarchy_node(table_data, + (1 << 1) | /* ACPI Processor ID valid */ + (1 << 2) | /* Processor is a Thread */ + (1 << 3), /* Node is a Leaf */ +- parent_offset, uid++, NULL, 0); ++ core_offset, n, NULL, 0); + } + } + +- g_queue_free(list); + acpi_table_end(linker, &table); + } + +-- +2.31.1 + diff --git a/kvm-hw-arm-virt-Add-8.6-machine-type.patch b/kvm-hw-arm-virt-Add-8.6-machine-type.patch new file mode 100644 index 0000000..f3c5492 --- /dev/null +++ b/kvm-hw-arm-virt-Add-8.6-machine-type.patch @@ -0,0 +1,57 @@ +From a154eb35d738aecf552d57d99499facce1c834ba Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Mon, 20 Dec 2021 15:24:24 +0100 +Subject: [PATCH 4/6] hw/arm/virt: Add 8.6 machine type + +RH-Author: Eric Auger +RH-MergeRequest: 95: hw/arm/virt: Add virt-rhel8.6.0 machine type +RH-Commit: [4/5] d0df3e796d3e9a6ca2af1e3b33fc6021bcac5d09 +RH-Bugzilla: 2031039 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Andrew Jones +RH-Acked-by: Gavin Shan + +branch: rhel-8.6.0 +Brew: 42212069 +Upstream: no + +Add 8.6 machine type. + +Signed-off-by: Eric Auger +--- + hw/arm/virt.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 6a4173b6c3..c9c17b9d45 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3228,17 +3228,23 @@ static void rhel_machine_init(void) + } + type_init(rhel_machine_init); + ++static void rhel860_virt_options(MachineClass *mc) ++{ ++ compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); ++} ++DEFINE_RHEL_MACHINE_AS_LATEST(8, 6, 0) ++ + static void rhel850_virt_options(MachineClass *mc) + { + VirtMachineClass *vmc = VIRT_MACHINE_CLASS(OBJECT_CLASS(mc)); + +- compat_props_add(mc->compat_props, arm_rhel_compat, arm_rhel_compat_len); ++ rhel860_virt_options(mc); + compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len); + mc->smp_props.prefer_sockets = true; + vmc->no_cpu_topology = true; + vmc->no_tcg_its = true; + } +-DEFINE_RHEL_MACHINE_AS_LATEST(8, 5, 0) ++DEFINE_RHEL_MACHINE(8, 5, 0) + + static void rhel840_virt_options(MachineClass *mc) + { +-- +2.27.0 + diff --git a/kvm-hw-arm-virt-Check-no_tcg_its-and-minor-style-changes.patch b/kvm-hw-arm-virt-Check-no_tcg_its-and-minor-style-changes.patch new file mode 100644 index 0000000..679f436 --- /dev/null +++ b/kvm-hw-arm-virt-Check-no_tcg_its-and-minor-style-changes.patch @@ -0,0 +1,86 @@ +From 1b4a8daf695a81f18ba70bea91b199da215da4e1 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Wed, 5 Jan 2022 16:17:10 +0100 +Subject: [PATCH 5/6] hw/arm/virt: Check no_tcg_its and minor style changes + +RH-Author: Eric Auger +RH-MergeRequest: 95: hw/arm/virt: Add virt-rhel8.6.0 machine type +RH-Commit: [5/5] 57e77446ff5a1a7efe152b2c907c0a0ca5487ab7 +RH-Bugzilla: 2031039 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Andrew Jones +RH-Acked-by: Gavin Shan + +branch: rhel-8.6.0 +Brew: 42212069 +Upstream: no + +Truly allow TCG ITS instantiation according to the no_tcg_its +class flag. Otherwise it is always set to false. + +We also take benefit of this patch to do some minor non +functional style changes to be closer to the upstream code. + +Signed-off-by: Eric Auger +--- + hw/arm/virt.c | 14 ++++++++++++-- + 1 file changed, 12 insertions(+), 2 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index c9c17b9d45..dbf0a6d62f 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3157,6 +3157,7 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + "in ACPI table header." + "The string may be up to 6 bytes in size"); + ++ + object_class_property_add_str(oc, "x-oem-table-id", + virt_get_oem_table_id, + virt_set_oem_table_id); +@@ -3164,6 +3165,7 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + "Override the default value of field OEM Table ID " + "in ACPI table header." + "The string may be up to 8 bytes in size"); ++ + } + + static void rhel_virt_instance_init(Object *obj) +@@ -3188,24 +3190,32 @@ static void rhel_virt_instance_init(Object *obj) + } else { + /* Default allows ITS instantiation */ + vms->its = true; ++ ++ if (vmc->no_tcg_its) { ++ vms->tcg_its = false; ++ } else { ++ vms->tcg_its = true; ++ } + } + + /* Default disallows iommu instantiation */ + vms->iommu = VIRT_IOMMU_NONE; + ++ /* The default root bus is attached to iommu by default */ ++ vms->default_bus_bypass_iommu = false; ++ + /* Default disallows RAS instantiation and is non-configurable for RHEL */ + vms->ras = false; + + /* MTE is disabled by default and non-configurable for RHEL */ + vms->mte = false; + +- vms->default_bus_bypass_iommu = false; + vms->irqmap = a15irqmap; + + virt_flash_create(vms); ++ + vms->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6); + vms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8); +- + } + + static const TypeInfo rhel_machine_info = { +-- +2.27.0 + diff --git a/kvm-hw-arm-virt-Consider-SMP-configuration-in-CPU-topolo.patch b/kvm-hw-arm-virt-Consider-SMP-configuration-in-CPU-topolo.patch new file mode 100644 index 0000000..240aead --- /dev/null +++ b/kvm-hw-arm-virt-Consider-SMP-configuration-in-CPU-topolo.patch @@ -0,0 +1,74 @@ +From 3b05d3464945295112b5d02d142422f524a52054 Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Wed, 11 May 2022 18:01:35 +0800 +Subject: [PATCH 03/16] hw/arm/virt: Consider SMP configuration in CPU topology + +RH-Author: Gavin Shan +RH-MergeRequest: 86: hw/arm/virt: Fix the default CPU topology +RH-Commit: [3/6] 7125b41f038c2b1cb33377d0ef1222f1ea42b648 (gwshan/qemu-rhel-9) +RH-Bugzilla: 2041823 +RH-Acked-by: Eric Auger +RH-Acked-by: Cornelia Huck +RH-Acked-by: Andrew Jones + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2041823 + +Currently, the SMP configuration isn't considered when the CPU +topology is populated. In this case, it's impossible to provide +the default CPU-to-NUMA mapping or association based on the socket +ID of the given CPU. + +This takes account of SMP configuration when the CPU topology +is populated. The die ID for the given CPU isn't assigned since +it's not supported on arm/virt machine. Besides, the used SMP +configuration in qtest/numa-test/aarch64_numa_cpu() is corrcted +to avoid testing failure + +Signed-off-by: Gavin Shan +Reviewed-by: Yanan Wang +Acked-by: Igor Mammedov +Message-id: 20220503140304.855514-4-gshan@redhat.com +Signed-off-by: Peter Maydell +(cherry picked from commit c9ec4cb5e4936f980889e717524e73896b0200ed) +Signed-off-by: Gavin Shan +--- + hw/arm/virt.c | 15 ++++++++++++++- + 1 file changed, 14 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 8be12e121d..a87c8d396a 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2553,6 +2553,7 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) + int n; + unsigned int max_cpus = ms->smp.max_cpus; + VirtMachineState *vms = VIRT_MACHINE(ms); ++ MachineClass *mc = MACHINE_GET_CLASS(vms); + + if (ms->possible_cpus) { + assert(ms->possible_cpus->len == max_cpus); +@@ -2566,8 +2567,20 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) + ms->possible_cpus->cpus[n].type = ms->cpu_type; + ms->possible_cpus->cpus[n].arch_id = + virt_cpu_mp_affinity(vms, n); ++ ++ assert(!mc->smp_props.dies_supported); ++ ms->possible_cpus->cpus[n].props.has_socket_id = true; ++ ms->possible_cpus->cpus[n].props.socket_id = ++ n / (ms->smp.clusters * ms->smp.cores * ms->smp.threads); ++ ms->possible_cpus->cpus[n].props.has_cluster_id = true; ++ ms->possible_cpus->cpus[n].props.cluster_id = ++ (n / (ms->smp.cores * ms->smp.threads)) % ms->smp.clusters; ++ ms->possible_cpus->cpus[n].props.has_core_id = true; ++ ms->possible_cpus->cpus[n].props.core_id = ++ (n / ms->smp.threads) % ms->smp.cores; + ms->possible_cpus->cpus[n].props.has_thread_id = true; +- ms->possible_cpus->cpus[n].props.thread_id = n; ++ ms->possible_cpus->cpus[n].props.thread_id = ++ n % ms->smp.threads; + } + return ms->possible_cpus; + } +-- +2.31.1 + diff --git a/kvm-hw-arm-virt-Fix-CPU-s-default-NUMA-node-ID.patch b/kvm-hw-arm-virt-Fix-CPU-s-default-NUMA-node-ID.patch new file mode 100644 index 0000000..6b60b70 --- /dev/null +++ b/kvm-hw-arm-virt-Fix-CPU-s-default-NUMA-node-ID.patch @@ -0,0 +1,88 @@ +From 14e49ad3b98f01c1ad6fe456469d40a96a43dc3c Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Wed, 11 May 2022 18:01:35 +0800 +Subject: [PATCH 05/16] hw/arm/virt: Fix CPU's default NUMA node ID + +RH-Author: Gavin Shan +RH-MergeRequest: 86: hw/arm/virt: Fix the default CPU topology +RH-Commit: [5/6] 5336f62bc0c53c0417db1d71ef89544907bc28c0 (gwshan/qemu-rhel-9) +RH-Bugzilla: 2041823 +RH-Acked-by: Eric Auger +RH-Acked-by: Cornelia Huck +RH-Acked-by: Andrew Jones + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2041823 + +When CPU-to-NUMA association isn't explicitly provided by users, +the default one is given by mc->get_default_cpu_node_id(). However, +the CPU topology isn't fully considered in the default association +and this causes CPU topology broken warnings on booting Linux guest. + +For example, the following warning messages are observed when the +Linux guest is booted with the following command lines. + +/home/gavin/sandbox/qemu.main/build/qemu-system-aarch64 \ +-accel kvm -machine virt,gic-version=host \ +-cpu host \ +-smp 6,sockets=2,cores=3,threads=1 \ +-m 1024M,slots=16,maxmem=64G \ +-object memory-backend-ram,id=mem0,size=128M \ +-object memory-backend-ram,id=mem1,size=128M \ +-object memory-backend-ram,id=mem2,size=128M \ +-object memory-backend-ram,id=mem3,size=128M \ +-object memory-backend-ram,id=mem4,size=128M \ +-object memory-backend-ram,id=mem4,size=384M \ +-numa node,nodeid=0,memdev=mem0 \ +-numa node,nodeid=1,memdev=mem1 \ +-numa node,nodeid=2,memdev=mem2 \ +-numa node,nodeid=3,memdev=mem3 \ +-numa node,nodeid=4,memdev=mem4 \ +-numa node,nodeid=5,memdev=mem5 +: +alternatives: patching kernel code +BUG: arch topology borken +the CLS domain not a subset of the MC domain + +BUG: arch topology borken +the DIE domain not a subset of the NODE domain + +With current implementation of mc->get_default_cpu_node_id(), +CPU#0 to CPU#5 are associated with NODE#0 to NODE#5 separately. +That's incorrect because CPU#0/1/2 should be associated with same +NUMA node because they're seated in same socket. + +This fixes the issue by considering the socket ID when the default +CPU-to-NUMA association is provided in virt_possible_cpu_arch_ids(). +With this applied, no more CPU topology broken warnings are seen +from the Linux guest. The 6 CPUs are associated with NODE#0/1, but +there are no CPUs associated with NODE#2/3/4/5. + +Signed-off-by: Gavin Shan +Reviewed-by: Igor Mammedov +Reviewed-by: Yanan Wang +Message-id: 20220503140304.855514-6-gshan@redhat.com +Signed-off-by: Peter Maydell +(cherry picked from commit 4c18bc192386dfbca530e7f550e0992df657818a) +Signed-off-by: Gavin Shan +--- + hw/arm/virt.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index a87c8d396a..95d012d6eb 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2545,7 +2545,9 @@ virt_cpu_index_to_props(MachineState *ms, unsigned cpu_index) + + static int64_t virt_get_default_cpu_node_id(const MachineState *ms, int idx) + { +- return idx % ms->numa_state->num_nodes; ++ int64_t socket_id = ms->possible_cpus->cpus[idx].props.socket_id; ++ ++ return socket_id % ms->numa_state->num_nodes; + } + + static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) +-- +2.31.1 + diff --git a/kvm-hw-arm-virt-Fix-missing-initialization-in-instance-c.patch b/kvm-hw-arm-virt-Fix-missing-initialization-in-instance-c.patch new file mode 100644 index 0000000..78b9ee0 --- /dev/null +++ b/kvm-hw-arm-virt-Fix-missing-initialization-in-instance-c.patch @@ -0,0 +1,56 @@ +From e25c40735d2f022c07481b548d20476222006657 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Wed, 4 May 2022 11:11:54 +0200 +Subject: [PATCH 2/5] hw/arm/virt: Fix missing initialization in + instance/class_init() + +RH-Author: Eric Auger +RH-MergeRequest: 82: hw/arm/virt: Remove the dtb-kaslr-seed machine option +RH-Commit: [2/2] 22cbbfc30cf57a09b8acfb25d8a4dff2754c630c (eauger1/centos-qemu-kvm) +RH-Bugzilla: 2046029 +RH-Acked-by: Gavin Shan +RH-Acked-by: Andrew Jones +RH-Acked-by: Cornelia Huck + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2046029 +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=45133161 +Upstream Status: RHEL-only +Tested: Boot RHEL guest and check migration from 8.6 to 9.1 + (with custom additions) + +During the 7.0 rebase, the initialization of highmem_mmio and +highmem_redists was forgotten in rhel_virt_instance_init(). +Fix it to match virt_instance_init() code. + +Also mc->smp_props.clusters_supported was missing in +rhel_machine_class_init(). + +Signed-off-by: Eric Auger +--- + hw/arm/virt.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index bde4f77994..8be12e121d 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3286,6 +3286,7 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + hc->unplug_request = virt_machine_device_unplug_request_cb; + hc->unplug = virt_machine_device_unplug_cb; + mc->nvdimm_supported = true; ++ mc->smp_props.clusters_supported = true; + mc->auto_enable_numa_with_memhp = true; + mc->auto_enable_numa_with_memdev = true; + mc->default_ram_id = "mach-virt.ram"; +@@ -3366,6 +3367,8 @@ static void rhel_virt_instance_init(Object *obj) + vms->gic_version = VIRT_GIC_VERSION_NOSEL; + + vms->highmem_ecam = !vmc->no_highmem_ecam; ++ vms->highmem_mmio = true; ++ vms->highmem_redists = true; + + if (vmc->no_its) { + vms->its = false; +-- +2.31.1 + diff --git a/kvm-hw-arm-virt-Register-iommu-as-a-class-property.patch b/kvm-hw-arm-virt-Register-iommu-as-a-class-property.patch new file mode 100644 index 0000000..734756d --- /dev/null +++ b/kvm-hw-arm-virt-Register-iommu-as-a-class-property.patch @@ -0,0 +1,78 @@ +From 8d5b57798d079307a98f6be5e1f6d28d1937a2fe Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Mon, 20 Dec 2021 15:50:44 +0100 +Subject: [PATCH 1/6] hw/arm/virt: Register "iommu" as a class property + +RH-Author: Eric Auger +RH-MergeRequest: 95: hw/arm/virt: Add virt-rhel8.6.0 machine type +RH-Commit: [1/5] 74b01bb90213493db700d5bdf81dd99892571972 +RH-Bugzilla: 2031039 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Andrew Jones +RH-Acked-by: Gavin Shan + +branch: rhel-8.6.0 +Brew: 42212069 +Upstream: no + +Register the "iommu" option as a class property. This mirrors what +was done in upstream commit b91def7b ("arm/virt: Register +most properties as class properties"). + +While we are at it we also move the "x-oem-id" and "x-oem-table-id" +registrations at the very end of the rhel_machine_class_init() +function. This makes our life easier when comparing with upstream. + +Signed-off-by: Eric Auger +--- + hw/arm/virt.c | 20 ++++++++++++-------- + 1 file changed, 12 insertions(+), 8 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index e8941afd01..684ffce52e 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3131,6 +3131,18 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + "Set GIC version. " + "Valid values are 2, 3, host and max"); + ++ object_class_property_add_str(oc, "iommu", virt_get_iommu, virt_set_iommu); ++ object_class_property_set_description(oc, "iommu", ++ "Set the IOMMU type. " ++ "Valid values are none and smmuv3"); ++ ++ object_class_property_add_bool(oc, "default_bus_bypass_iommu", ++ virt_get_default_bus_bypass_iommu, ++ virt_set_default_bus_bypass_iommu); ++ object_class_property_set_description(oc, "default_bus_bypass_iommu", ++ "Set on/off to enable/disable " ++ "bypass_iommu for default root bus"); ++ + object_class_property_add_str(oc, "x-oem-id", + virt_get_oem_id, + virt_set_oem_id); +@@ -3146,10 +3158,6 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + "Override the default value of field OEM Table ID " + "in ACPI table header." + "The string may be up to 8 bytes in size"); +- object_class_property_add_bool(oc, "default_bus_bypass_iommu", +- virt_get_default_bus_bypass_iommu, +- virt_set_default_bus_bypass_iommu); +- + } + + static void rhel_virt_instance_init(Object *obj) +@@ -3183,10 +3191,6 @@ static void rhel_virt_instance_init(Object *obj) + + /* Default disallows iommu instantiation */ + vms->iommu = VIRT_IOMMU_NONE; +- object_property_add_str(obj, "iommu", virt_get_iommu, virt_set_iommu); +- object_property_set_description(obj, "iommu", +- "Set the IOMMU type. " +- "Valid values are none and smmuv3"); + + /* Default disallows RAS instantiation and is non-configurable for RHEL */ + vms->ras = false; +-- +2.27.0 + diff --git a/kvm-hw-arm-virt-Register-its-as-a-class-property.patch b/kvm-hw-arm-virt-Register-its-as-a-class-property.patch new file mode 100644 index 0000000..91b353a --- /dev/null +++ b/kvm-hw-arm-virt-Register-its-as-a-class-property.patch @@ -0,0 +1,57 @@ +From 07e2094cd86c1be349c0bdda69acd1857afacb66 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Mon, 20 Dec 2021 16:04:59 +0100 +Subject: [PATCH 2/6] hw/arm/virt: Register "its" as a class property + +RH-Author: Eric Auger +RH-MergeRequest: 95: hw/arm/virt: Add virt-rhel8.6.0 machine type +RH-Commit: [2/5] 4ddfa57495578127770f93689c4d9f111a12b91c +RH-Bugzilla: 2031039 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Andrew Jones +RH-Acked-by: Gavin Shan + +branch: rhel-8.6.0 +Brew: 42212069 +Upstream: no + +Register "its" as a class property. This mirrors what was done +in commit 27edeeaafe43 ("virt: Register "its" as class property"). + +Signed-off-by: Eric Auger +--- + hw/arm/virt.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 684ffce52e..d679391eb0 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3143,6 +3143,12 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + "Set on/off to enable/disable " + "bypass_iommu for default root bus"); + ++ object_class_property_add_bool(oc, "its", virt_get_its, ++ virt_set_its); ++ object_class_property_set_description(oc, "its", ++ "Set on/off to enable/disable " ++ "ITS instantiation"); ++ + object_class_property_add_str(oc, "x-oem-id", + virt_get_oem_id, + virt_set_oem_id); +@@ -3182,11 +3188,6 @@ static void rhel_virt_instance_init(Object *obj) + } else { + /* Default allows ITS instantiation */ + vms->its = true; +- object_property_add_bool(obj, "its", virt_get_its, +- virt_set_its); +- object_property_set_description(obj, "its", +- "Set on/off to enable/disable " +- "ITS instantiation"); + } + + /* Default disallows iommu instantiation */ +-- +2.27.0 + diff --git a/kvm-hw-arm-virt-Remove-the-dtb-kaslr-seed-machine-option.patch b/kvm-hw-arm-virt-Remove-the-dtb-kaslr-seed-machine-option.patch new file mode 100644 index 0000000..10af6c0 --- /dev/null +++ b/kvm-hw-arm-virt-Remove-the-dtb-kaslr-seed-machine-option.patch @@ -0,0 +1,76 @@ +From 69f771c3dc641431f3e98497cbd3832edb69284f Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Tue, 3 May 2022 08:56:52 +0200 +Subject: [PATCH 1/5] hw/arm/virt: Remove the dtb-kaslr-seed machine option + +RH-Author: Eric Auger +RH-MergeRequest: 82: hw/arm/virt: Remove the dtb-kaslr-seed machine option +RH-Commit: [1/2] a89dcd7f22e04ae39de99795d3f34cdd0b831bc0 (eauger1/centos-qemu-kvm) +RH-Bugzilla: 2046029 +RH-Acked-by: Gavin Shan +RH-Acked-by: Andrew Jones +RH-Acked-by: Cornelia Huck + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2046029 +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=45133161 +Upstream Status: RHEL-only +Tested: Boot RHEL guest and check the option is not available + +In RHEL we do not want to expose the dtb-kaslr-seed virt machine +option. Indeed the default 'on' value matches our need as +random data in the DTB does not cause any boot failure and we +want to support KASLR for the guest. + +Signed-off-by: Eric Auger + +--- +--- + hw/arm/virt.c | 11 +++-------- + 1 file changed, 3 insertions(+), 8 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index e06862d22a..bde4f77994 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2350,6 +2350,7 @@ static void virt_set_its(Object *obj, bool value, Error **errp) + vms->its = value; + } + ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + static bool virt_get_dtb_kaslr_seed(Object *obj, Error **errp) + { + VirtMachineState *vms = VIRT_MACHINE(obj); +@@ -2363,6 +2364,7 @@ static void virt_set_dtb_kaslr_seed(Object *obj, bool value, Error **errp) + + vms->dtb_kaslr_seed = value; + } ++#endif /* disabled for RHEL */ + + static char *virt_get_oem_id(Object *obj, Error **errp) + { +@@ -3346,13 +3348,6 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + "Override the default value of field OEM Table ID " + "in ACPI table header." + "The string may be up to 8 bytes in size"); +- +- object_class_property_add_bool(oc, "dtb-kaslr-seed", +- virt_get_dtb_kaslr_seed, +- virt_set_dtb_kaslr_seed); +- object_class_property_set_description(oc, "dtb-kaslr-seed", +- "Set off to disable passing of kaslr-seed " +- "dtb node to guest"); + } + + static void rhel_virt_instance_init(Object *obj) +@@ -3397,7 +3392,7 @@ static void rhel_virt_instance_init(Object *obj) + /* MTE is disabled by default and non-configurable for RHEL */ + vms->mte = false; + +- /* Supply a kaslr-seed by default */ ++ /* Supply a kaslr-seed by default and non-configurable for RHEL */ + vms->dtb_kaslr_seed = true; + + vms->irqmap = a15irqmap; +-- +2.31.1 + diff --git a/kvm-hw-arm-virt-Rename-default_bus_bypass_iommu.patch b/kvm-hw-arm-virt-Rename-default_bus_bypass_iommu.patch new file mode 100644 index 0000000..25e20ea --- /dev/null +++ b/kvm-hw-arm-virt-Rename-default_bus_bypass_iommu.patch @@ -0,0 +1,46 @@ +From e896ba2bfbb613576ec3fbe5b948a326ac06193d Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Mon, 20 Dec 2021 15:58:38 +0100 +Subject: [PATCH 3/6] hw/arm/virt: Rename default_bus_bypass_iommu + +RH-Author: Eric Auger +RH-MergeRequest: 95: hw/arm/virt: Add virt-rhel8.6.0 machine type +RH-Commit: [3/5] 3ed0425391dab7cf14c6e66fc1b2430be1152d6c +RH-Bugzilla: 2031039 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Andrew Jones +RH-Acked-by: Gavin Shan + +branch: rhel-8.6.0 +Brew: 42212069 +Upstream: no + +Rename "default_bus_bypass_iommu" into "default-bus-bypass-iommu". +This mirrors what was done in upstream commit: +9dad363a223 ("hw/arm/virt: Rename default_bus_bypass_iommu") + +Signed-off-by: Eric Auger +--- + hw/arm/virt.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index d679391eb0..6a4173b6c3 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3136,10 +3136,10 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data) + "Set the IOMMU type. " + "Valid values are none and smmuv3"); + +- object_class_property_add_bool(oc, "default_bus_bypass_iommu", ++ object_class_property_add_bool(oc, "default-bus-bypass-iommu", + virt_get_default_bus_bypass_iommu, + virt_set_default_bus_bypass_iommu); +- object_class_property_set_description(oc, "default_bus_bypass_iommu", ++ object_class_property_set_description(oc, "default-bus-bypass-iommu", + "Set on/off to enable/disable " + "bypass_iommu for default root bus"); + +-- +2.27.0 + diff --git a/kvm-hw-block-fdc-Prevent-end-of-track-overrun-CVE-2021-3.patch b/kvm-hw-block-fdc-Prevent-end-of-track-overrun-CVE-2021-3.patch new file mode 100644 index 0000000..1bdad27 --- /dev/null +++ b/kvm-hw-block-fdc-Prevent-end-of-track-overrun-CVE-2021-3.patch @@ -0,0 +1,96 @@ +From 6ee4a8718dcce2d6da43ee200534b75baf1d7bbe Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= +Date: Thu, 18 Nov 2021 12:57:32 +0100 +Subject: [PATCH 16/17] hw/block/fdc: Prevent end-of-track overrun + (CVE-2021-3507) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 107: hw/block/fdc: Prevent end-of-track overrun (CVE-2021-3507) +RH-Commit: [1/2] 9ffc5290348884d20b894fa79f4d0c8089247f8b (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 1951522 +RH-Acked-by: Hanna Reitz +RH-Acked-by: Miroslav Rezanina + +Per the 82078 datasheet, if the end-of-track (EOT byte in +the FIFO) is more than the number of sectors per side, the +command is terminated unsuccessfully: + +* 5.2.5 DATA TRANSFER TERMINATION + + The 82078 supports terminal count explicitly through + the TC pin and implicitly through the underrun/over- + run and end-of-track (EOT) functions. For full sector + transfers, the EOT parameter can define the last + sector to be transferred in a single or multisector + transfer. If the last sector to be transferred is a par- + tial sector, the host can stop transferring the data in + mid-sector, and the 82078 will continue to complete + the sector as if a hardware TC was received. The + only difference between these implicit functions and + TC is that they return "abnormal termination" result + status. Such status indications can be ignored if they + were expected. + +* 6.1.3 READ TRACK + + This command terminates when the EOT specified + number of sectors have been read. If the 82078 + does not find an I D Address Mark on the diskette + after the second· occurrence of a pulse on the + INDX# pin, then it sets the IC code in Status Regis- + ter 0 to "01" (Abnormal termination), sets the MA bit + in Status Register 1 to "1", and terminates the com- + mand. + +* 6.1.6 VERIFY + + Refer to Table 6-6 and Table 6-7 for information + concerning the values of MT and EC versus SC and + EOT value. + +* Table 6·6. Result Phase Table + +* Table 6-7. Verify Command Result Phase Table + +Fix by aborting the transfer when EOT > # Sectors Per Side. + +Cc: qemu-stable@nongnu.org +Cc: Hervé Poussineau +Fixes: baca51faff0 ("floppy driver: disk geometry auto detect") +Reported-by: Alexander Bulekov +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/339 +Signed-off-by: Philippe Mathieu-Daudé +Message-Id: <20211118115733.4038610-2-philmd@redhat.com> +Reviewed-by: Hanna Reitz +Signed-off-by: Kevin Wolf +(cherry picked from commit defac5e2fbddf8423a354ff0454283a2115e1367) +Signed-off-by: Jon Maloy +--- + hw/block/fdc.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/hw/block/fdc.c b/hw/block/fdc.c +index ca1776121f..6481ec0cfb 100644 +--- a/hw/block/fdc.c ++++ b/hw/block/fdc.c +@@ -1532,6 +1532,14 @@ static void fdctrl_start_transfer(FDCtrl *fdctrl, int direction) + int tmp; + fdctrl->data_len = 128 << (fdctrl->fifo[5] > 7 ? 7 : fdctrl->fifo[5]); + tmp = (fdctrl->fifo[6] - ks + 1); ++ if (tmp < 0) { ++ FLOPPY_DPRINTF("invalid EOT: %d\n", tmp); ++ fdctrl_stop_transfer(fdctrl, FD_SR0_ABNTERM, FD_SR1_MA, 0x00); ++ fdctrl->fifo[3] = kt; ++ fdctrl->fifo[4] = kh; ++ fdctrl->fifo[5] = ks; ++ return; ++ } + if (fdctrl->fifo[0] & 0x80) + tmp += fdctrl->fifo[6]; + fdctrl->data_len *= tmp; +-- +2.31.1 + diff --git a/kvm-hw-intc-arm_gicv3-Check-for-MEMTX_OK-instead-of-MEMT.patch b/kvm-hw-intc-arm_gicv3-Check-for-MEMTX_OK-instead-of-MEMT.patch new file mode 100644 index 0000000..eea6fa2 --- /dev/null +++ b/kvm-hw-intc-arm_gicv3-Check-for-MEMTX_OK-instead-of-MEMT.patch @@ -0,0 +1,75 @@ +From 2db3d0de1be018f14cb91fdd4a368996b09d8bec Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 13 Apr 2022 14:51:06 -0400 +Subject: [PATCH 1/3] hw/intc/arm_gicv3: Check for !MEMTX_OK instead of + MEMTX_ERROR +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 151: hw/intc/arm_gicv3: Check for !MEMTX_OK instead of MEMTX_ERROR +RH-Commit: [1/3] 561c9c2b1249f07d33013040b1c495ed1fbf825b (jmaloy/qemu-kvm) +RH-Bugzilla: 1999236 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Peter Xu + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1999236 +Upstream: Merged +CVE: CVE-2021-3750 + +commit b9d383ab797f54ae5fa8746117770709921dc529 +Author: Philippe Mathieu-Daudé +Date: Wed Dec 15 19:24:19 2021 +0100 + + hw/intc/arm_gicv3: Check for !MEMTX_OK instead of MEMTX_ERROR + + Quoting Peter Maydell: + + "These MEMTX_* aren't from the memory transaction + API functions; they're just being used by gicd_readl() and + friends as a way to indicate a success/failure so that the + actual MemoryRegionOps read/write fns like gicv3_dist_read() + can log a guest error." + + We are going to introduce more MemTxResult bits, so it is + safer to check for !MEMTX_OK rather than MEMTX_ERROR. + + Reviewed-by: Peter Xu + Reviewed-by: David Hildenbrand + Reviewed-by: Peter Maydell + Reviewed-by: Stefan Hajnoczi + Signed-off-by: Philippe Mathieu-Daudé + Signed-off-by: Peter Maydell + +(cherry picked from commit b9d383ab797f54ae5fa8746117770709921dc529) +Signed-off-by: Jon Maloy +--- + hw/intc/arm_gicv3_redist.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/hw/intc/arm_gicv3_redist.c b/hw/intc/arm_gicv3_redist.c +index c8ff3eca08..99b11ca5ee 100644 +--- a/hw/intc/arm_gicv3_redist.c ++++ b/hw/intc/arm_gicv3_redist.c +@@ -462,7 +462,7 @@ MemTxResult gicv3_redist_read(void *opaque, hwaddr offset, uint64_t *data, + break; + } + +- if (r == MEMTX_ERROR) { ++ if (r != MEMTX_OK) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: invalid guest read at offset " TARGET_FMT_plx + " size %u\n", __func__, offset, size); +@@ -521,7 +521,7 @@ MemTxResult gicv3_redist_write(void *opaque, hwaddr offset, uint64_t data, + break; + } + +- if (r == MEMTX_ERROR) { ++ if (r != MEMTX_OK) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: invalid guest write at offset " TARGET_FMT_plx + " size %u\n", __func__, offset, size); +-- +2.27.0 + diff --git a/kvm-hw-virtio-Replace-g_memdup-by-g_memdup2.patch b/kvm-hw-virtio-Replace-g_memdup-by-g_memdup2.patch new file mode 100644 index 0000000..44897ac --- /dev/null +++ b/kvm-hw-virtio-Replace-g_memdup-by-g_memdup2.patch @@ -0,0 +1,95 @@ +From 4dad0e9abbc843fba4e5fee6e7aa1b0db13f5898 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:27:35 +0200 +Subject: [PATCH 03/32] hw/virtio: Replace g_memdup() by g_memdup2() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [3/27] ae196903eb1a7aebbf999100e997cf82e5024cb6 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit d792199de55ca5cb5334016884039c740290b5c7 +Author: Philippe Mathieu-Daudé +Date: Thu May 12 19:57:46 2022 +0200 + + hw/virtio: Replace g_memdup() by g_memdup2() + + Per https://discourse.gnome.org/t/port-your-module-from-g-memdup-to-g-memdup2-now/5538 + + The old API took the size of the memory to duplicate as a guint, + whereas most memory functions take memory sizes as a gsize. This + made it easy to accidentally pass a gsize to g_memdup(). For large + values, that would lead to a silent truncation of the size from 64 + to 32 bits, and result in a heap area being returned which is + significantly smaller than what the caller expects. This can likely + be exploited in various modules to cause a heap buffer overflow. + + Replace g_memdup() by the safer g_memdup2() wrapper. + + Acked-by: Jason Wang + Acked-by: Eugenio Pérez + Signed-off-by: Philippe Mathieu-Daudé + Message-Id: <20220512175747.142058-6-eperezma@redhat.com> + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + +Signed-off-by: Eugenio Pérez +--- + hw/net/virtio-net.c | 3 ++- + hw/virtio/virtio-crypto.c | 6 +++--- + 2 files changed, 5 insertions(+), 4 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 099e65036d..633de61513 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -1458,7 +1458,8 @@ static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) + } + + iov_cnt = elem->out_num; +- iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num); ++ iov2 = iov = g_memdup2(elem->out_sg, ++ sizeof(struct iovec) * elem->out_num); + s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl)); + iov_discard_front(&iov, &iov_cnt, sizeof(ctrl)); + if (s != sizeof(ctrl)) { +diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c +index dcd80b904d..0e31e3cc04 100644 +--- a/hw/virtio/virtio-crypto.c ++++ b/hw/virtio/virtio-crypto.c +@@ -242,7 +242,7 @@ static void virtio_crypto_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) + } + + out_num = elem->out_num; +- out_iov_copy = g_memdup(elem->out_sg, sizeof(out_iov[0]) * out_num); ++ out_iov_copy = g_memdup2(elem->out_sg, sizeof(out_iov[0]) * out_num); + out_iov = out_iov_copy; + + in_num = elem->in_num; +@@ -605,11 +605,11 @@ virtio_crypto_handle_request(VirtIOCryptoReq *request) + } + + out_num = elem->out_num; +- out_iov_copy = g_memdup(elem->out_sg, sizeof(out_iov[0]) * out_num); ++ out_iov_copy = g_memdup2(elem->out_sg, sizeof(out_iov[0]) * out_num); + out_iov = out_iov_copy; + + in_num = elem->in_num; +- in_iov_copy = g_memdup(elem->in_sg, sizeof(in_iov[0]) * in_num); ++ in_iov_copy = g_memdup2(elem->in_sg, sizeof(in_iov[0]) * in_num); + in_iov = in_iov_copy; + + if (unlikely(iov_to_buf(out_iov, out_num, 0, &req, sizeof(req)) +-- +2.31.1 + diff --git a/kvm-hw-virtio-vdpa-Fix-leak-of-host-notifier-memory-regi.patch b/kvm-hw-virtio-vdpa-Fix-leak-of-host-notifier-memory-regi.patch new file mode 100644 index 0000000..bb42634 --- /dev/null +++ b/kvm-hw-virtio-vdpa-Fix-leak-of-host-notifier-memory-regi.patch @@ -0,0 +1,66 @@ +From f0115d856f46e65e3b62896f84fe1902a958bf79 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Tue, 22 Mar 2022 19:23:36 -0400 +Subject: [PATCH 04/18] hw/virtio: vdpa: Fix leak of host-notifier + memory-region + +RH-Author: Jon Maloy +RH-MergeRequest: 132: hw/virtio: vdpa: Fix leak of host-notifier memory-region +RH-Commit: [1/1] b3cec35d185e3b9844a458f5c51c5d5ef7e3d8f1 (jmaloy/qemu-kvm) +RH-Bugzilla: 2060843 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Laurent Vivier +RH-Acked-by: Igor Mammedov + +BZ: https://bugzilla.redhat.com/2060843 +UPSTREAM: no +BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=44038138 + +commit 98f7607ecda00dea3cbb2ed7b4427c96846efb83 +Author: Laurent Vivier +Date: Fri Feb 11 18:02:59 2022 +0100 + + hw/virtio: vdpa: Fix leak of host-notifier memory-region + + If call virtio_queue_set_host_notifier_mr fails, should free + host-notifier memory-region. + + This problem can trigger a coredump with some vDPA drivers (mlx5, + but not with the vdpasim), if we unplug the virtio-net card from + the guest after a stop/start. + + The same fix has been done for vhost-user: + 1f89d3b91e3e ("hw/virtio: Fix leak of host-notifier memory-region") + + Fixes: d0416d487bd5 ("vhost-vdpa: map virtqueue notification area if possible") + Cc: jasowang@redhat.com + Resolves: https://bugzilla.redhat.com/2027208 + Signed-off-by: Laurent Vivier + Message-Id: <20220211170259.1388734-1-lvivier@redhat.com> + Cc: qemu-stable@nongnu.org + Acked-by: Jason Wang + Reviewed-by: Stefano Garzarella + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + +(cherry picked from commit 98f7607ecda00dea3cbb2ed7b4427c96846efb83) +Signed-off-by: Jon Maloy +--- + hw/virtio/vhost-vdpa.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index bcaf00e09f..78da48a333 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -415,6 +415,7 @@ static int vhost_vdpa_host_notifier_init(struct vhost_dev *dev, int queue_index) + g_free(name); + + if (virtio_queue_set_host_notifier_mr(vdev, queue_index, &n->mr, true)) { ++ object_unparent(OBJECT(&n->mr)); + munmap(addr, page_size); + goto err; + } +-- +2.27.0 + diff --git a/kvm-i386-Add-Icelake-Server-v6-CPU-model-with-5-level-EP.patch b/kvm-i386-Add-Icelake-Server-v6-CPU-model-with-5-level-EP.patch new file mode 100644 index 0000000..540f721 --- /dev/null +++ b/kvm-i386-Add-Icelake-Server-v6-CPU-model-with-5-level-EP.patch @@ -0,0 +1,59 @@ +From ccaa1135bd1aa90c94f0e8b5417bd2a420134e6c Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 30 Mar 2022 14:52:34 -0400 +Subject: [PATCH 08/18] i386: Add Icelake-Server-v6 CPU model with 5-level EPT + support + +RH-Author: Jon Maloy +RH-MergeRequest: 139: vmxcap: Add 5-level EPT bit +RH-Commit: [2/2] e913746b2df9cbd0308014ab5cc72577458857fa (jmaloy/qemu-kvm) +RH-Bugzilla: 2065207 +RH-Acked-by: Paolo Bonzini + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2065207 +UPSTREAM: Merged + +commit: 12cab535db6440af41ed8dfefe908a594321b6ce +Author: Vitaly Kuznetsov +Date: Mon Feb 21 15:53:15 2022 +0100 + + i386: Add Icelake-Server-v6 CPU model with 5-level EPT support + + Windows 11 with WSL2 enabled (Hyper-V) fails to boot with Icelake-Server + {-v5} CPU model but boots well with '-cpu host'. Apparently, it expects + 5-level paging and 5-level EPT support to come in pair but QEMU's + Icelake-Server CPU model lacks the later. Introduce 'Icelake-Server-v6' + CPU model with 'vmx-page-walk-5' enabled by default. + + Signed-off-by: Vitaly Kuznetsov + Message-Id: <20220221145316.576138-1-vkuznets@redhat.com> + Signed-off-by: Paolo Bonzini + +(cherry picked from commit 12cab535db6440af41ed8dfefe908a594321b6ce) +Signed-off-by: Jon Maloy +--- + target/i386/cpu.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index aa9e636800..6e25d13339 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -3505,6 +3505,14 @@ static const X86CPUDefinition builtin_x86_defs[] = { + { /* end of list */ } + }, + }, ++ { ++ .version = 6, ++ .note = "5-level EPT", ++ .props = (PropValue[]) { ++ { "vmx-page-walk-5", "on" }, ++ { /* end of list */ } ++ }, ++ }, + { /* end of list */ } + } + }, +-- +2.27.0 + diff --git a/kvm-iotests-108-Fix-when-missing-user_allow_other.patch b/kvm-iotests-108-Fix-when-missing-user_allow_other.patch new file mode 100644 index 0000000..a37ea6f --- /dev/null +++ b/kvm-iotests-108-Fix-when-missing-user_allow_other.patch @@ -0,0 +1,52 @@ +From 447bca651c9156d7aba6b7495c75f19b5e4ed53f Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Thu, 21 Apr 2022 16:24:35 +0200 +Subject: [PATCH 07/16] iotests/108: Fix when missing user_allow_other + +RH-Author: Hanna Reitz +RH-MergeRequest: 96: qcow2: Improve refcount structure rebuilding +RH-Commit: [4/4] a51ab8606fc9d8dea2b6539f4e795d5813892a5c (hreitz/qemu-kvm-c-9-s) +RH-Bugzilla: 2072379 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi + +FUSE exports' allow-other option defaults to "auto", which means that it +will try passing allow_other as a mount option, and fall back to not +using it when an error occurs. We make no effort to hide fusermount's +error message (because it would be difficult, and because users might +want to know about the fallback occurring), and so when allow_other does +not work (primarily when /etc/fuse.conf does not contain +user_allow_other), this error message will appear and break the +reference output. + +We do not need allow_other here, though, so we can just pass +allow-other=off to fix that. + +Reported-by: Markus Armbruster +Signed-off-by: Hanna Reitz +Message-Id: <20220421142435.569600-1-hreitz@redhat.com> +Tested-by: Markus Armbruster +Tested-by: Eric Blake +(cherry picked from commit 348a0740afc5b313599533eb69bbb2b95d2f1bba) +Signed-off-by: Hanna Reitz +--- + tests/qemu-iotests/108 | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tests/qemu-iotests/108 b/tests/qemu-iotests/108 +index a3090e2875..4681c7c769 100755 +--- a/tests/qemu-iotests/108 ++++ b/tests/qemu-iotests/108 +@@ -326,7 +326,7 @@ else + + $QSD \ + --blockdev file,node-name=export-node,filename="$TEST_IMG" \ +- --export fuse,id=fuse-export,node-name=export-node,mountpoint="$export_mp",writable=on,growable=off \ ++ --export fuse,id=fuse-export,node-name=export-node,mountpoint="$export_mp",writable=on,growable=off,allow-other=off \ + --pidfile "$TEST_DIR/qsd.pid" \ + & + +-- +2.31.1 + diff --git a/kvm-iotests-108-Test-new-refcount-rebuild-algorithm.patch b/kvm-iotests-108-Test-new-refcount-rebuild-algorithm.patch new file mode 100644 index 0000000..7a968f6 --- /dev/null +++ b/kvm-iotests-108-Test-new-refcount-rebuild-algorithm.patch @@ -0,0 +1,445 @@ +From ed69e01352b5e9a06173daab53bfa373c8535732 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Tue, 5 Apr 2022 15:46:51 +0200 +Subject: [PATCH 05/16] iotests/108: Test new refcount rebuild algorithm + +RH-Author: Hanna Reitz +RH-MergeRequest: 96: qcow2: Improve refcount structure rebuilding +RH-Commit: [2/4] b68310a9fee8465dd3f568c8e867e1b7ae52bdaf (hreitz/qemu-kvm-c-9-s) +RH-Bugzilla: 2072379 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi + +One clear problem with how qcow2's refcount structure rebuild algorithm +used to be before "qcow2: Improve refcount structure rebuilding" was +that it is prone to failure for qcow2 images on block devices: There is +generally unused space after the actual image, and if that exceeds what +one refblock covers, the old algorithm would invariably write the +reftable past the block device's end, which cannot work. The new +algorithm does not have this problem. + +Test it with three tests: +(1) Create an image with more empty space at the end than what one + refblock covers, see whether rebuilding the refcount structures + results in a change in the image file length. (It should not.) + +(2) Leave precisely enough space somewhere at the beginning of the image + for the new reftable (and the refblock for that place), see whether + the new algorithm puts the reftable there. (It should.) + +(3) Test the original problem: Create (something like) a block device + with a fixed size, then create a qcow2 image in there, write some + data, and then have qemu-img check rebuild the refcount structures. + Before HEAD^, the reftable would have been written past the image + file end, i.e. outside of what the block device provides, which + cannot work. HEAD^ should have fixed that. + ("Something like a block device" means a loop device if we can use + one ("sudo -n losetup" works), or a FUSE block export with + growable=false otherwise.) + +Reviewed-by: Eric Blake +Signed-off-by: Hanna Reitz +Message-Id: <20220405134652.19278-3-hreitz@redhat.com> +(cherry picked from commit 9ffd6d646d1d5ee9087a8cbf0b7d2f96c5656162) + +Conflicts: +- 108: The downstream qemu-storage-daemon does not support --daemonize, + so this switch has been replaced by a loop waiting for the PID file to + appear + +Signed-off-by: Hanna Reitz +--- + tests/qemu-iotests/108 | 263 ++++++++++++++++++++++++++++++++++++- + tests/qemu-iotests/108.out | 81 ++++++++++++ + 2 files changed, 343 insertions(+), 1 deletion(-) + +diff --git a/tests/qemu-iotests/108 b/tests/qemu-iotests/108 +index 56339ab2c5..a3090e2875 100755 +--- a/tests/qemu-iotests/108 ++++ b/tests/qemu-iotests/108 +@@ -30,13 +30,20 @@ status=1 # failure is the default! + + _cleanup() + { +- _cleanup_test_img ++ _cleanup_test_img ++ if [ -f "$TEST_DIR/qsd.pid" ]; then ++ qsd_pid=$(cat "$TEST_DIR/qsd.pid") ++ kill -KILL "$qsd_pid" ++ fusermount -u "$TEST_DIR/fuse-export" &>/dev/null ++ fi ++ rm -f "$TEST_DIR/fuse-export" + } + trap "_cleanup; exit \$status" 0 1 2 3 15 + + # get standard environment, filters and checks + . ./common.rc + . ./common.filter ++. ./common.qemu + + # This tests qcow2-specific low-level functionality + _supported_fmt qcow2 +@@ -47,6 +54,22 @@ _supported_os Linux + # files + _unsupported_imgopts 'refcount_bits=\([^1]\|.\([^6]\|$\)\)' data_file + ++# This test either needs sudo -n losetup or FUSE exports to work ++if sudo -n losetup &>/dev/null; then ++ loopdev=true ++else ++ loopdev=false ++ ++ # QSD --export fuse will either yield "Parameter 'id' is missing" ++ # or "Invalid parameter 'fuse'", depending on whether there is ++ # FUSE support or not. ++ error=$($QSD --export fuse 2>&1) ++ if [[ $error = *"'fuse'"* ]]; then ++ _notrun 'Passwordless sudo for losetup or FUSE support required, but' \ ++ 'neither is available' ++ fi ++fi ++ + echo + echo '=== Repairing an image without any refcount table ===' + echo +@@ -138,6 +161,244 @@ _make_test_img 64M + poke_file "$TEST_IMG" $((0x10008)) "\xff\xff\xff\xff\xff\xff\x00\x00" + _check_test_img -r all + ++echo ++echo '=== Check rebuilt reftable location ===' ++ ++# In an earlier version of the refcount rebuild algorithm, the ++# reftable was generally placed at the image end (unless something was ++# allocated in the area covered by the refblock right before the image ++# file end, then we would try to place the reftable in that refblock). ++# This was later changed so the reftable would be placed in the ++# earliest possible location. Test this. ++ ++echo ++echo '--- Does the image size increase? ---' ++echo ++ ++# First test: Just create some image, write some data to it, and ++# resize it so there is free space at the end of the image (enough ++# that it spans at least one full refblock, which for cluster_size=512 ++# images, spans 128k). With the old algorithm, the reftable would ++# have then been placed at the end of the image file, but with the new ++# one, it will be put in that free space. ++# We want to check whether the size of the image file increases due to ++# rebuilding the refcount structures (it should not). ++ ++_make_test_img -o 'cluster_size=512' 1M ++# Write something ++$QEMU_IO -c 'write 0 64k' "$TEST_IMG" | _filter_qemu_io ++ ++# Add free space ++file_len=$(stat -c '%s' "$TEST_IMG") ++truncate -s $((file_len + 256 * 1024)) "$TEST_IMG" ++ ++# Corrupt the image by saying the image header was not allocated ++rt_offset=$(peek_file_be "$TEST_IMG" 48 8) ++rb_offset=$(peek_file_be "$TEST_IMG" $rt_offset 8) ++poke_file "$TEST_IMG" $rb_offset "\x00\x00" ++ ++# Check whether rebuilding the refcount structures increases the image ++# file size ++file_len=$(stat -c '%s' "$TEST_IMG") ++echo ++# The only leaks there can be are the old refcount structures that are ++# leaked during rebuilding, no need to clutter the output with them ++_check_test_img -r all | grep -v '^Repairing cluster.*refcount=1 reference=0' ++echo ++post_repair_file_len=$(stat -c '%s' "$TEST_IMG") ++ ++if [[ $file_len -eq $post_repair_file_len ]]; then ++ echo 'OK: Image size did not change' ++else ++ echo 'ERROR: Image size differs' \ ++ "($file_len before, $post_repair_file_len after)" ++fi ++ ++echo ++echo '--- Will the reftable occupy a hole specifically left for it? ---' ++echo ++ ++# Note: With cluster_size=512, every refblock covers 128k. ++# The reftable covers 8M per reftable cluster. ++ ++# Create an image that requires two reftable clusters (just because ++# this is more interesting than a single-clustered reftable). ++_make_test_img -o 'cluster_size=512' 9M ++$QEMU_IO -c 'write 0 8M' "$TEST_IMG" | _filter_qemu_io ++ ++# Writing 8M will have resized the reftable. Unfortunately, doing so ++# will leave holes in the file, so we need to fill them up so we can ++# be sure the whole file is allocated. Do that by writing ++# consecutively smaller chunks starting from 8 MB, until the file ++# length increases even with a chunk size of 512. Then we must have ++# filled all holes. ++ofs=$((8 * 1024 * 1024)) ++block_len=$((16 * 1024)) ++while [[ $block_len -ge 512 ]]; do ++ file_len=$(stat -c '%s' "$TEST_IMG") ++ while [[ $(stat -c '%s' "$TEST_IMG") -eq $file_len ]]; do ++ # Do not include this in the reference output, it does not ++ # really matter which qemu-io calls we do here exactly ++ $QEMU_IO -c "write $ofs $block_len" "$TEST_IMG" >/dev/null ++ ofs=$((ofs + block_len)) ++ done ++ block_len=$((block_len / 2)) ++done ++ ++# Fill up to 9M (do not include this in the reference output either, ++# $ofs is random for all we know) ++$QEMU_IO -c "write $ofs $((9 * 1024 * 1024 - ofs))" "$TEST_IMG" >/dev/null ++ ++# Make space as follows: ++# - For the first refblock: Right at the beginning of the image (this ++# refblock is placed in the first place possible), ++# - For the reftable somewhere soon afterwards, still near the ++# beginning of the image (i.e. covered by the first refblock); the ++# reftable too is placed in the first place possible, but only after ++# all refblocks have been placed) ++# No space is needed for the other refblocks, because no refblock is ++# put before the space it covers. In this test case, we do not mind ++# if they are placed at the image file's end. ++ ++# Before we make that space, we have to find out the host offset of ++# the area that belonged to the two data clusters at guest offset 4k, ++# because we expect the reftable to be placed there, and we will have ++# to verify that it is. ++ ++l1_offset=$(peek_file_be "$TEST_IMG" 40 8) ++l2_offset=$(peek_file_be "$TEST_IMG" $l1_offset 8) ++l2_offset=$((l2_offset & 0x00fffffffffffe00)) ++data_4k_offset=$(peek_file_be "$TEST_IMG" \ ++ $((l2_offset + 4096 / 512 * 8)) 8) ++data_4k_offset=$((data_4k_offset & 0x00fffffffffffe00)) ++ ++$QEMU_IO -c "discard 0 512" -c "discard 4k 1k" "$TEST_IMG" | _filter_qemu_io ++ ++# Corrupt the image by saying the image header was not allocated ++rt_offset=$(peek_file_be "$TEST_IMG" 48 8) ++rb_offset=$(peek_file_be "$TEST_IMG" $rt_offset 8) ++poke_file "$TEST_IMG" $rb_offset "\x00\x00" ++ ++echo ++# The only leaks there can be are the old refcount structures that are ++# leaked during rebuilding, no need to clutter the output with them ++_check_test_img -r all | grep -v '^Repairing cluster.*refcount=1 reference=0' ++echo ++ ++# Check whether the reftable was put where we expected ++rt_offset=$(peek_file_be "$TEST_IMG" 48 8) ++if [[ $rt_offset -eq $data_4k_offset ]]; then ++ echo 'OK: Reftable is where we expect it' ++else ++ echo "ERROR: Reftable is at $rt_offset, but was expected at $data_4k_offset" ++fi ++ ++echo ++echo '--- Rebuilding refcount structures on block devices ---' ++echo ++ ++# A block device cannot really grow, at least not during qemu-img ++# check. As mentioned in the above cases, rebuilding the refcount ++# structure may lead to new refcount structures being written after ++# the end of the image, and in the past that happened even if there ++# was more than sufficient space in the image. Such post-EOF writes ++# will not work on block devices, so test that the new algorithm ++# avoids it. ++ ++# If we have passwordless sudo and losetup, we can use those to create ++# a block device. Otherwise, we can resort to qemu's FUSE export to ++# create a file that isn't growable, which effectively tests the same ++# thing. ++ ++_cleanup_test_img ++truncate -s $((64 * 1024 * 1024)) "$TEST_IMG" ++ ++if $loopdev; then ++ export_mp=$(sudo -n losetup --show -f "$TEST_IMG") ++ export_mp_driver=host_device ++ sudo -n chmod go+rw "$export_mp" ++else ++ # Create non-growable FUSE export that is a bit like an empty ++ # block device ++ export_mp="$TEST_DIR/fuse-export" ++ export_mp_driver=file ++ touch "$export_mp" ++ ++ $QSD \ ++ --blockdev file,node-name=export-node,filename="$TEST_IMG" \ ++ --export fuse,id=fuse-export,node-name=export-node,mountpoint="$export_mp",writable=on,growable=off \ ++ --pidfile "$TEST_DIR/qsd.pid" \ ++ & ++ ++ while [ ! -f "$TEST_DIR/qsd.pid" ]; do ++ sleep 0.1 ++ done ++fi ++ ++# Now create a qcow2 image on the device -- unfortunately, qemu-img ++# create force-creates the file, so we have to resort to the ++# blockdev-create job. ++_launch_qemu \ ++ --blockdev $export_mp_driver,node-name=file,filename="$export_mp" ++ ++_send_qemu_cmd \ ++ $QEMU_HANDLE \ ++ '{ "execute": "qmp_capabilities" }' \ ++ 'return' ++ ++# Small cluster size again, so the image needs multiple refblocks ++_send_qemu_cmd \ ++ $QEMU_HANDLE \ ++ '{ "execute": "blockdev-create", ++ "arguments": { ++ "job-id": "create", ++ "options": { ++ "driver": "qcow2", ++ "file": "file", ++ "size": '$((64 * 1024 * 1024))', ++ "cluster-size": 512 ++ } } }' \ ++ '"concluded"' ++ ++_send_qemu_cmd \ ++ $QEMU_HANDLE \ ++ '{ "execute": "job-dismiss", "arguments": { "id": "create" } }' \ ++ 'return' ++ ++_send_qemu_cmd \ ++ $QEMU_HANDLE \ ++ '{ "execute": "quit" }' \ ++ 'return' ++ ++wait=y _cleanup_qemu ++echo ++ ++# Write some data ++$QEMU_IO -c 'write 0 64k' "$export_mp" | _filter_qemu_io ++ ++# Corrupt the image by saying the image header was not allocated ++rt_offset=$(peek_file_be "$export_mp" 48 8) ++rb_offset=$(peek_file_be "$export_mp" $rt_offset 8) ++poke_file "$export_mp" $rb_offset "\x00\x00" ++ ++# Repairing such a simple case should just work ++# (We used to put the reftable at the end of the image file, which can ++# never work for non-growable devices.) ++echo ++TEST_IMG="$export_mp" _check_test_img -r all \ ++ | grep -v '^Repairing cluster.*refcount=1 reference=0' ++ ++if $loopdev; then ++ sudo -n losetup -d "$export_mp" ++else ++ qsd_pid=$(cat "$TEST_DIR/qsd.pid") ++ kill -TERM "$qsd_pid" ++ # Wait for process to exit (cannot `wait` because the QSD is daemonized) ++ while [ -f "$TEST_DIR/qsd.pid" ]; do ++ true ++ done ++fi ++ + # success, all done + echo '*** done' + rm -f $seq.full +diff --git a/tests/qemu-iotests/108.out b/tests/qemu-iotests/108.out +index 75bab8dc84..b5401d788d 100644 +--- a/tests/qemu-iotests/108.out ++++ b/tests/qemu-iotests/108.out +@@ -105,6 +105,87 @@ The following inconsistencies were found and repaired: + 0 leaked clusters + 1 corruptions + ++Double checking the fixed image now... ++No errors were found on the image. ++ ++=== Check rebuilt reftable location === ++ ++--- Does the image size increase? --- ++ ++Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 ++wrote 65536/65536 bytes at offset 0 ++64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++ERROR cluster 0 refcount=0 reference=1 ++Rebuilding refcount structure ++The following inconsistencies were found and repaired: ++ ++ 0 leaked clusters ++ 1 corruptions ++ ++Double checking the fixed image now... ++No errors were found on the image. ++ ++OK: Image size did not change ++ ++--- Will the reftable occupy a hole specifically left for it? --- ++ ++Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=9437184 ++wrote 8388608/8388608 bytes at offset 0 ++8 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++discard 512/512 bytes at offset 0 ++512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++discard 1024/1024 bytes at offset 4096 ++1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++ERROR cluster 0 refcount=0 reference=1 ++Rebuilding refcount structure ++The following inconsistencies were found and repaired: ++ ++ 0 leaked clusters ++ 1 corruptions ++ ++Double checking the fixed image now... ++No errors were found on the image. ++ ++OK: Reftable is where we expect it ++ ++--- Rebuilding refcount structures on block devices --- ++ ++{ "execute": "qmp_capabilities" } ++{"return": {}} ++{ "execute": "blockdev-create", ++ "arguments": { ++ "job-id": "create", ++ "options": { ++ "driver": "IMGFMT", ++ "file": "file", ++ "size": 67108864, ++ "cluster-size": 512 ++ } } } ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "create"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "create"}} ++{"return": {}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "create"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "create"}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "create"}} ++{ "execute": "job-dismiss", "arguments": { "id": "create" } } ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "create"}} ++{"return": {}} ++{ "execute": "quit" } ++{"return": {}} ++{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}} ++ ++wrote 65536/65536 bytes at offset 0 ++64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++ERROR cluster 0 refcount=0 reference=1 ++Rebuilding refcount structure ++The following inconsistencies were found and repaired: ++ ++ 0 leaked clusters ++ 1 corruptions ++ + Double checking the fixed image now... + No errors were found on the image. + *** done +-- +2.31.1 + diff --git a/kvm-iotests-281-Let-NBD-connection-yield-in-iothread.patch b/kvm-iotests-281-Let-NBD-connection-yield-in-iothread.patch new file mode 100644 index 0000000..b703c23 --- /dev/null +++ b/kvm-iotests-281-Let-NBD-connection-yield-in-iothread.patch @@ -0,0 +1,108 @@ +From 2ed48247fd39ade97164dee3c65162b96a116f14 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Fri, 4 Feb 2022 12:10:12 +0100 +Subject: [PATCH 6/6] iotests/281: Let NBD connection yield in iothread + +RH-Author: Hanna Reitz +RH-MergeRequest: 117: block/nbd: Handle AioContext changes +RH-Commit: [6/6] a23706f34022d301eb7ffc84fc0d0a77d72b9844 +RH-Bugzilla: 2035185 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi + +Put an NBD block device into an I/O thread, and then read data from it, +hoping that the NBD connection will yield during that read. When it +does, the coroutine must be reentered in the block device's I/O thread, +which will only happen if the NBD block driver attaches the connection's +QIOChannel to the new AioContext. It did not do that after 4ddb5d2fde +("block/nbd: drop connection_co") and prior to "block/nbd: Move s->ioc +on AioContext change", which would cause an assertion failure. + +To improve our chances of yielding, the NBD server is throttled to +reading 64 kB/s, and the NBD client reads 128 kB, so it should yield at +some point. + +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Hanna Reitz +Signed-off-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 8cfbe929e8c26050f0a4580a1606a370a947d4ce) +Signed-off-by: Hanna Reitz +--- + tests/qemu-iotests/281 | 28 +++++++++++++++++++++++++--- + tests/qemu-iotests/281.out | 4 ++-- + 2 files changed, 27 insertions(+), 5 deletions(-) + +diff --git a/tests/qemu-iotests/281 b/tests/qemu-iotests/281 +index 13c588be75..b2ead7f388 100755 +--- a/tests/qemu-iotests/281 ++++ b/tests/qemu-iotests/281 +@@ -253,8 +253,9 @@ class TestYieldingAndTimers(iotests.QMPTestCase): + self.create_nbd_export() + + # Simple VM with an NBD block device connected to the NBD export +- # provided by the QSD ++ # provided by the QSD, and an (initially unused) iothread + self.vm = iotests.VM() ++ self.vm.add_object('iothread,id=iothr') + self.vm.add_blockdev('nbd,node-name=nbd,server.type=unix,' + + f'server.path={self.sock},export=exp,' + + 'reconnect-delay=1') +@@ -293,19 +294,40 @@ class TestYieldingAndTimers(iotests.QMPTestCase): + # thus not see the error, and so the test will pass.) + time.sleep(2) + ++ def test_yield_in_iothread(self): ++ # Move the NBD node to the I/O thread; the NBD block driver should ++ # attach the connection's QIOChannel to that thread's AioContext, too ++ result = self.vm.qmp('x-blockdev-set-iothread', ++ node_name='nbd', iothread='iothr') ++ self.assert_qmp(result, 'return', {}) ++ ++ # Do some I/O that will be throttled by the QSD, so that the network ++ # connection hopefully will yield here. When it is resumed, it must ++ # then be resumed in the I/O thread's AioContext. ++ result = self.vm.qmp('human-monitor-command', ++ command_line='qemu-io nbd "read 0 128K"') ++ self.assert_qmp(result, 'return', '') ++ + def create_nbd_export(self): + assert self.qsd is None + +- # Simple NBD export of a null-co BDS ++ # Export a throttled null-co BDS: Reads are throttled (max 64 kB/s), ++ # writes are not. + self.qsd = QemuStorageDaemon( ++ '--object', ++ 'throttle-group,id=thrgr,x-bps-read=65536,x-bps-read-max=65536', ++ + '--blockdev', + 'null-co,node-name=null,read-zeroes=true', + ++ '--blockdev', ++ 'throttle,node-name=thr,file=null,throttle-group=thrgr', ++ + '--nbd-server', + f'addr.type=unix,addr.path={self.sock}', + + '--export', +- 'nbd,id=exp,node-name=null,name=exp,writable=true' ++ 'nbd,id=exp,node-name=thr,name=exp,writable=true' + ) + + def stop_nbd_export(self): +diff --git a/tests/qemu-iotests/281.out b/tests/qemu-iotests/281.out +index 914e3737bd..3f8a935a08 100644 +--- a/tests/qemu-iotests/281.out ++++ b/tests/qemu-iotests/281.out +@@ -1,5 +1,5 @@ +-..... ++...... + ---------------------------------------------------------------------- +-Ran 5 tests ++Ran 6 tests + + OK +-- +2.27.0 + diff --git a/kvm-iotests-281-Test-lingering-timers.patch b/kvm-iotests-281-Test-lingering-timers.patch new file mode 100644 index 0000000..c31b413 --- /dev/null +++ b/kvm-iotests-281-Test-lingering-timers.patch @@ -0,0 +1,174 @@ +From b56684f6c1bef4fb5bf87ac5a1106d3830c05ad0 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Fri, 4 Feb 2022 12:10:10 +0100 +Subject: [PATCH 4/6] iotests/281: Test lingering timers + +RH-Author: Hanna Reitz +RH-MergeRequest: 117: block/nbd: Handle AioContext changes +RH-Commit: [4/6] aaad466941637a34224dc037bbea37d128b5676b +RH-Bugzilla: 2035185 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi + +Prior to "block/nbd: Delete reconnect delay timer when done" and +"block/nbd: Delete open timer when done", both of those timers would +remain scheduled even after successfully (re-)connecting to the server, +and they would not even be deleted when the BDS is deleted. + +This test constructs exactly this situation: +(1) Configure an @open-timeout, so the open timer is armed, and +(2) Configure a @reconnect-delay and trigger a reconnect situation + (which succeeds immediately), so the reconnect delay timer is armed. +Then we immediately delete the BDS, and sleep for longer than the +@open-timeout and @reconnect-delay. Prior to said patches, this caused +one (or both) of the timer CBs to access already-freed data. + +Accessing freed data may or may not crash, so this test can produce +false successes, but I do not know how to show the problem in a better +or more reliable way. If you run this test on "block/nbd: Assert there +are no timers when closed" and without the fix patches mentioned above, +you should reliably see an assertion failure. +(But all other tests that use the reconnect delay timer (264 and 277) +will fail in that configuration, too; as will nbd-reconnect-on-open, +which uses the open timer.) + +Remove this test from the quick group because of the two second sleep +this patch introduces. + +(I decided to put this test case into 281, because the main bug this +series addresses is in the interaction of the NBD block driver and I/O +threads, which is precisely the scope of 281. The test case for that +other bug will also be put into the test class added here. + +Also, excuse the test class's name, I couldn't come up with anything +better. The "yield" part will make sense two patches from now.) + +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Hanna Reitz +Signed-off-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit eaf1e85d4ddefdbd197f393fa9c5acc7ba8133b0) + +Conflict: +- @open-timeout was introduced after the 6.2 release, and has not been + backported. Consequently, there is no open_timer, and we can (and + must) drop the respective parts of the test here. + +Signed-off-by: Hanna Reitz +--- + tests/qemu-iotests/281 | 73 ++++++++++++++++++++++++++++++++++++-- + tests/qemu-iotests/281.out | 4 +-- + 2 files changed, 73 insertions(+), 4 deletions(-) + +diff --git a/tests/qemu-iotests/281 b/tests/qemu-iotests/281 +index 956698083f..13c588be75 100755 +--- a/tests/qemu-iotests/281 ++++ b/tests/qemu-iotests/281 +@@ -1,5 +1,5 @@ + #!/usr/bin/env python3 +-# group: rw quick ++# group: rw + # + # Test cases for blockdev + IOThread interactions + # +@@ -20,8 +20,9 @@ + # + + import os ++import time + import iotests +-from iotests import qemu_img ++from iotests import qemu_img, QemuStorageDaemon + + image_len = 64 * 1024 * 1024 + +@@ -243,6 +244,74 @@ class TestBlockdevBackupAbort(iotests.QMPTestCase): + # Hangs on failure, we expect this error. + self.assert_qmp(result, 'error/class', 'GenericError') + ++# Test for RHBZ#2033626 ++class TestYieldingAndTimers(iotests.QMPTestCase): ++ sock = os.path.join(iotests.sock_dir, 'nbd.sock') ++ qsd = None ++ ++ def setUp(self): ++ self.create_nbd_export() ++ ++ # Simple VM with an NBD block device connected to the NBD export ++ # provided by the QSD ++ self.vm = iotests.VM() ++ self.vm.add_blockdev('nbd,node-name=nbd,server.type=unix,' + ++ f'server.path={self.sock},export=exp,' + ++ 'reconnect-delay=1') ++ ++ self.vm.launch() ++ ++ def tearDown(self): ++ self.stop_nbd_export() ++ self.vm.shutdown() ++ ++ def test_timers_with_blockdev_del(self): ++ # Stop and restart the NBD server, and do some I/O on the client to ++ # trigger a reconnect and start the reconnect delay timer ++ self.stop_nbd_export() ++ self.create_nbd_export() ++ ++ result = self.vm.qmp('human-monitor-command', ++ command_line='qemu-io nbd "write 0 512"') ++ self.assert_qmp(result, 'return', '') ++ ++ # Reconnect is done, so the reconnect delay timer should be gone. ++ # (But there used to be a bug where it remained active, for which this ++ # is a regression test.) ++ ++ # Delete the BDS to see whether the timer is gone. If it is not, ++ # it will remain active, fire later, and then access freed data. ++ # (Or, with "block/nbd: Assert there are no timers when closed" ++ # applied, the assertion added in that patch will fail.) ++ result = self.vm.qmp('blockdev-del', node_name='nbd') ++ self.assert_qmp(result, 'return', {}) ++ ++ # Give the timer some time to fire (it has a timeout of 1 s). ++ # (Sleeping in an iotest may ring some alarm bells, but note that if ++ # the timing is off here, the test will just always pass. If we kill ++ # the VM too early, then we just kill the timer before it can fire, ++ # thus not see the error, and so the test will pass.) ++ time.sleep(2) ++ ++ def create_nbd_export(self): ++ assert self.qsd is None ++ ++ # Simple NBD export of a null-co BDS ++ self.qsd = QemuStorageDaemon( ++ '--blockdev', ++ 'null-co,node-name=null,read-zeroes=true', ++ ++ '--nbd-server', ++ f'addr.type=unix,addr.path={self.sock}', ++ ++ '--export', ++ 'nbd,id=exp,node-name=null,name=exp,writable=true' ++ ) ++ ++ def stop_nbd_export(self): ++ self.qsd.stop() ++ self.qsd = None ++ + if __name__ == '__main__': + iotests.main(supported_fmts=['qcow2'], + supported_protocols=['file']) +diff --git a/tests/qemu-iotests/281.out b/tests/qemu-iotests/281.out +index 89968f35d7..914e3737bd 100644 +--- a/tests/qemu-iotests/281.out ++++ b/tests/qemu-iotests/281.out +@@ -1,5 +1,5 @@ +-.... ++..... + ---------------------------------------------------------------------- +-Ran 4 tests ++Ran 5 tests + + OK +-- +2.27.0 + diff --git a/kvm-iotests-Test-blockdev-reopen-with-iothreads-and-thro.patch b/kvm-iotests-Test-blockdev-reopen-with-iothreads-and-thro.patch new file mode 100644 index 0000000..1caf73c --- /dev/null +++ b/kvm-iotests-Test-blockdev-reopen-with-iothreads-and-thro.patch @@ -0,0 +1,106 @@ +From ea4d8424fb2053b1cbb9538190b2b06351054125 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 3 Feb 2022 15:05:34 +0100 +Subject: [PATCH 3/5] iotests: Test blockdev-reopen with iothreads and + throttling + +RH-Author: Kevin Wolf +RH-MergeRequest: 142: block: Lock AioContext for drain_end in blockdev-reopen +RH-Commit: [2/2] 91d365864c391ca7db7db13260913fb61987b833 +RH-Bugzilla: 2067118 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Hanna Reitz + +The 'throttle' block driver implements .bdrv_co_drain_end, so +blockdev-reopen will have to wait for it to complete in the polling +loop at the end of qmp_blockdev_reopen(). This makes AIO_WAIT_WHILE() +release the AioContext lock, which causes a crash if the lock hasn't +correctly been taken. + +Signed-off-by: Kevin Wolf +Message-Id: <20220203140534.36522-3-kwolf@redhat.com> +Reviewed-by: Hanna Reitz +Signed-off-by: Kevin Wolf +(cherry picked from commit ee810602376125ca0e0afd6b7c715e13740978ea) +Signed-off-by: Kevin Wolf +--- + tests/qemu-iotests/245 | 36 +++++++++++++++++++++++++++++++++--- + tests/qemu-iotests/245.out | 4 ++-- + 2 files changed, 35 insertions(+), 5 deletions(-) + +diff --git a/tests/qemu-iotests/245 b/tests/qemu-iotests/245 +index 24ac43f70e..8cbed7821b 100755 +--- a/tests/qemu-iotests/245 ++++ b/tests/qemu-iotests/245 +@@ -1138,12 +1138,13 @@ class TestBlockdevReopen(iotests.QMPTestCase): + self.assertEqual(self.get_node('hd1'), None) + self.assert_qmp(self.get_node('hd2'), 'ro', True) + +- def run_test_iothreads(self, iothread_a, iothread_b, errmsg = None): +- opts = hd_opts(0) ++ def run_test_iothreads(self, iothread_a, iothread_b, errmsg = None, ++ opts_a = None, opts_b = None): ++ opts = opts_a or hd_opts(0) + result = self.vm.qmp('blockdev-add', conv_keys = False, **opts) + self.assert_qmp(result, 'return', {}) + +- opts2 = hd_opts(2) ++ opts2 = opts_b or hd_opts(2) + result = self.vm.qmp('blockdev-add', conv_keys = False, **opts2) + self.assert_qmp(result, 'return', {}) + +@@ -1194,6 +1195,35 @@ class TestBlockdevReopen(iotests.QMPTestCase): + def test_iothreads_switch_overlay(self): + self.run_test_iothreads('', 'iothread0') + ++ def test_iothreads_with_throttling(self): ++ # Create a throttle-group object ++ opts = { 'qom-type': 'throttle-group', 'id': 'group0', ++ 'limits': { 'iops-total': 1000 } } ++ result = self.vm.qmp('object-add', conv_keys = False, **opts) ++ self.assert_qmp(result, 'return', {}) ++ ++ # Options with a throttle filter between format and protocol ++ opts = [ ++ { ++ 'driver': iotests.imgfmt, ++ 'node-name': f'hd{idx}', ++ 'file' : { ++ 'node-name': f'hd{idx}-throttle', ++ 'driver': 'throttle', ++ 'throttle-group': 'group0', ++ 'file': { ++ 'driver': 'file', ++ 'node-name': f'hd{idx}-file', ++ 'filename': hd_path[idx], ++ }, ++ }, ++ } ++ for idx in (0, 2) ++ ] ++ ++ self.run_test_iothreads('iothread0', 'iothread0', None, ++ opts[0], opts[1]) ++ + if __name__ == '__main__': + iotests.activate_logging() + iotests.main(supported_fmts=["qcow2"], +diff --git a/tests/qemu-iotests/245.out b/tests/qemu-iotests/245.out +index 4eced19294..a4e04a3266 100644 +--- a/tests/qemu-iotests/245.out ++++ b/tests/qemu-iotests/245.out +@@ -17,8 +17,8 @@ read 1/1 bytes at offset 262152 + read 1/1 bytes at offset 262160 + 1 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +-............... ++................ + ---------------------------------------------------------------------- +-Ran 25 tests ++Ran 26 tests + + OK +-- +2.27.0 + diff --git a/kvm-iotests-block-status-cache-New-test.patch b/kvm-iotests-block-status-cache-New-test.patch new file mode 100644 index 0000000..25f057c --- /dev/null +++ b/kvm-iotests-block-status-cache-New-test.patch @@ -0,0 +1,197 @@ +From 0ba4c0836f702bb3abbd173c7ee486a8247331ae Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Tue, 18 Jan 2022 18:00:00 +0100 +Subject: [PATCH 7/7] iotests/block-status-cache: New test + +RH-Author: Hanna Reitz +RH-MergeRequest: 112: block/io: Update BSC only if want_zero is true +RH-Commit: [2/2] ba86b4db32c33e17a85f476d445ef0523cf8f60e +RH-Bugzilla: 2041480 +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Kevin Wolf + +Add a new test to verify that want_zero=false block-status calls do not +pollute the block-status cache for want_zero=true calls. + +We check want_zero=true calls and their results using `qemu-img map` +(over NBD), and want_zero=false calls also using `qemu-img map` over +NBD, but using the qemu:allocation-depth context. + +(This test case cannot be integrated into nbd-qemu-allocation, because +that is a qcow2 test, and this is a raw test.) + +Signed-off-by: Hanna Reitz +Message-Id: <20220118170000.49423-3-hreitz@redhat.com> +Reviewed-by: Nir Soffer +Reviewed-by: Eric Blake +Tested-by: Eric Blake +Signed-off-by: Eric Blake +(cherry picked from commit 6384dd534d742123d26c008d9794b20bc41359d5) +Signed-off-by: Hanna Reitz +--- + tests/qemu-iotests/tests/block-status-cache | 139 ++++++++++++++++++ + .../qemu-iotests/tests/block-status-cache.out | 5 + + 2 files changed, 144 insertions(+) + create mode 100755 tests/qemu-iotests/tests/block-status-cache + create mode 100644 tests/qemu-iotests/tests/block-status-cache.out + +diff --git a/tests/qemu-iotests/tests/block-status-cache b/tests/qemu-iotests/tests/block-status-cache +new file mode 100755 +index 0000000000..6fa10bb8f8 +--- /dev/null ++++ b/tests/qemu-iotests/tests/block-status-cache +@@ -0,0 +1,139 @@ ++#!/usr/bin/env python3 ++# group: rw quick ++# ++# Test cases for the block-status cache. ++# ++# Copyright (C) 2022 Red Hat, Inc. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++# ++ ++import os ++import signal ++import iotests ++from iotests import qemu_img_create, qemu_img_pipe, qemu_nbd ++ ++ ++image_size = 1 * 1024 * 1024 ++test_img = os.path.join(iotests.test_dir, 'test.img') ++ ++nbd_pidfile = os.path.join(iotests.test_dir, 'nbd.pid') ++nbd_sock = os.path.join(iotests.sock_dir, 'nbd.sock') ++ ++ ++class TestBscWithNbd(iotests.QMPTestCase): ++ def setUp(self) -> None: ++ """Just create an empty image with a read-only NBD server on it""" ++ assert qemu_img_create('-f', iotests.imgfmt, test_img, ++ str(image_size)) == 0 ++ ++ # Pass --allocation-depth to enable the qemu:allocation-depth context, ++ # which we are going to query to provoke a block-status inquiry with ++ # want_zero=false. ++ assert qemu_nbd(f'--socket={nbd_sock}', ++ f'--format={iotests.imgfmt}', ++ '--persistent', ++ '--allocation-depth', ++ '--read-only', ++ f'--pid-file={nbd_pidfile}', ++ test_img) \ ++ == 0 ++ ++ def tearDown(self) -> None: ++ with open(nbd_pidfile, encoding='utf-8') as f: ++ pid = int(f.read()) ++ os.kill(pid, signal.SIGTERM) ++ os.remove(nbd_pidfile) ++ os.remove(test_img) ++ ++ def test_with_zero_bug(self) -> None: ++ """ ++ Verify that the block-status cache is not corrupted by a ++ want_zero=false call. ++ We can provoke a want_zero=false call with `qemu-img map` over NBD with ++ x-dirty-bitmap=qemu:allocation-depth, so we first run a normal `map` ++ (which results in want_zero=true), then using said ++ qemu:allocation-depth context, and finally another normal `map` to ++ verify that the cache has not been corrupted. ++ """ ++ ++ nbd_img_opts = f'driver=nbd,server.type=unix,server.path={nbd_sock}' ++ nbd_img_opts_alloc_depth = nbd_img_opts + \ ++ ',x-dirty-bitmap=qemu:allocation-depth' ++ ++ # Normal map, results in want_zero=true. ++ # This will probably detect an allocated data sector first (qemu likes ++ # to allocate the first sector to facilitate alignment probing), and ++ # then the rest to be zero. The BSC will thus contain (if anything) ++ # one range covering the first sector. ++ map_pre = qemu_img_pipe('map', '--output=json', '--image-opts', ++ nbd_img_opts) ++ ++ # qemu:allocation-depth maps for want_zero=false. ++ # want_zero=false should (with the file driver, which the server is ++ # using) report everything as data. While this is sufficient for ++ # want_zero=false, this is nothing that should end up in the ++ # block-status cache. ++ # Due to a bug, this information did end up in the cache, though, and ++ # this would lead to wrong information being returned on subsequent ++ # want_zero=true calls. ++ # ++ # We need to run this map twice: On the first call, we probably still ++ # have the first sector in the cache, and so this will be served from ++ # the cache; and only the subsequent range will be queried from the ++ # block driver. This subsequent range will then be entered into the ++ # cache. ++ # If we did a want_zero=true call at this point, we would thus get ++ # correct information: The first sector is not covered by the cache, so ++ # we would get fresh block-status information from the driver, which ++ # would return a data range, and this would then go into the cache, ++ # evicting the wrong range from the want_zero=false call before. ++ # ++ # Therefore, we need a second want_zero=false map to reproduce: ++ # Since the first sector is not in the cache, the query for its status ++ # will go to the driver, which will return a result that reports the ++ # whole image to be a single data area. This result will then go into ++ # the cache, and so the cache will then report the whole image to ++ # contain data. ++ # ++ # Note that once the cache reports the whole image to contain data, any ++ # subsequent map operation will be served from the cache, and so we can ++ # never loop too many times here. ++ for _ in range(2): ++ # (Ignore the result, this is just to contaminate the cache) ++ qemu_img_pipe('map', '--output=json', '--image-opts', ++ nbd_img_opts_alloc_depth) ++ ++ # Now let's see whether the cache reports everything as data, or ++ # whether we get correct information (i.e. the same as we got on our ++ # first attempt). ++ map_post = qemu_img_pipe('map', '--output=json', '--image-opts', ++ nbd_img_opts) ++ ++ if map_pre != map_post: ++ print('ERROR: Map information differs before and after querying ' + ++ 'qemu:allocation-depth') ++ print('Before:') ++ print(map_pre) ++ print('After:') ++ print(map_post) ++ ++ self.fail("Map information differs") ++ ++ ++if __name__ == '__main__': ++ # The block-status cache only works on the protocol layer, so to test it, ++ # we can only use the raw format ++ iotests.main(supported_fmts=['raw'], ++ supported_protocols=['file']) +diff --git a/tests/qemu-iotests/tests/block-status-cache.out b/tests/qemu-iotests/tests/block-status-cache.out +new file mode 100644 +index 0000000000..ae1213e6f8 +--- /dev/null ++++ b/tests/qemu-iotests/tests/block-status-cache.out +@@ -0,0 +1,5 @@ ++. ++---------------------------------------------------------------------- ++Ran 1 tests ++ ++OK +-- +2.27.0 + diff --git a/kvm-iotests-stream-error-on-reset-New-test.patch b/kvm-iotests-stream-error-on-reset-New-test.patch new file mode 100644 index 0000000..0214854 --- /dev/null +++ b/kvm-iotests-stream-error-on-reset-New-test.patch @@ -0,0 +1,198 @@ +From ffdec41922a34b6fe4e7e11f259553d65b41563e Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 11 Jan 2022 15:36:13 +0000 +Subject: [PATCH 4/6] iotests/stream-error-on-reset: New test + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 109: block-backend: prevent dangling BDS pointers across aio_poll() +RH-Commit: [2/2] 0ecb7010d9c121398e7ee22ee47dd85d89bcd941 +RH-Bugzilla: 2021778 2036178 +RH-Acked-by: Hanna Reitz +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Kevin Wolf + +Author: Hanna Reitz + +Test the following scenario: +- Simple stream block in two-layer backing chain (base and top) +- The job is drained via blk_drain(), then an error occurs while the job + settles the ongoing request +- And so the job completes while in blk_drain() + +This was reported as a segfault, but is fixed by "block-backend: prevent +dangling BDS pointers across aio_poll()". + +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2036178 +Signed-off-by: Hanna Reitz +Signed-off-by: Stefan Hajnoczi +Message-Id: <20220111153613.25453-3-stefanha@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 2ca1d5d6b91f8a52a5c651f660b2f58c94bf97ba) +Signed-off-by: Stefan Hajnoczi +--- + .../qemu-iotests/tests/stream-error-on-reset | 140 ++++++++++++++++++ + .../tests/stream-error-on-reset.out | 5 + + 2 files changed, 145 insertions(+) + create mode 100755 tests/qemu-iotests/tests/stream-error-on-reset + create mode 100644 tests/qemu-iotests/tests/stream-error-on-reset.out + +diff --git a/tests/qemu-iotests/tests/stream-error-on-reset b/tests/qemu-iotests/tests/stream-error-on-reset +new file mode 100755 +index 0000000000..7eaedb24d7 +--- /dev/null ++++ b/tests/qemu-iotests/tests/stream-error-on-reset +@@ -0,0 +1,140 @@ ++#!/usr/bin/env python3 ++# group: rw quick ++# ++# Test what happens when a stream job completes in a blk_drain(). ++# ++# Copyright (C) 2022 Red Hat, Inc. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++# ++ ++import os ++import iotests ++from iotests import imgfmt, qemu_img_create, qemu_io_silent, QMPTestCase ++ ++ ++image_size = 1 * 1024 * 1024 ++data_size = 64 * 1024 ++base = os.path.join(iotests.test_dir, 'base.img') ++top = os.path.join(iotests.test_dir, 'top.img') ++ ++ ++# We want to test completing a stream job in a blk_drain(). ++# ++# The blk_drain() we are going to use is a virtio-scsi device resetting, ++# which we can trigger by resetting the system. ++# ++# In order to have the block job complete on drain, we (1) throttle its ++# base image so we can start the drain after it has begun, but before it ++# completes, and (2) make it encounter an I/O error on the ensuing write. ++# (If it completes regularly, the completion happens after the drain for ++# some reason.) ++ ++class TestStreamErrorOnReset(QMPTestCase): ++ def setUp(self) -> None: ++ """ ++ Create two images: ++ - base image {base} with {data_size} bytes allocated ++ - top image {top} without any data allocated ++ ++ And the following VM configuration: ++ - base image throttled to {data_size} ++ - top image with a blkdebug configuration so the first write access ++ to it will result in an error ++ - top image is attached to a virtio-scsi device ++ """ ++ assert qemu_img_create('-f', imgfmt, base, str(image_size)) == 0 ++ assert qemu_io_silent('-c', f'write 0 {data_size}', base) == 0 ++ assert qemu_img_create('-f', imgfmt, top, str(image_size)) == 0 ++ ++ self.vm = iotests.VM() ++ self.vm.add_args('-accel', 'tcg') # Make throttling work properly ++ self.vm.add_object(self.vm.qmp_to_opts({ ++ 'qom-type': 'throttle-group', ++ 'id': 'thrgr', ++ 'x-bps-total': str(data_size) ++ })) ++ self.vm.add_blockdev(self.vm.qmp_to_opts({ ++ 'driver': imgfmt, ++ 'node-name': 'base', ++ 'file': { ++ 'driver': 'throttle', ++ 'throttle-group': 'thrgr', ++ 'file': { ++ 'driver': 'file', ++ 'filename': base ++ } ++ } ++ })) ++ self.vm.add_blockdev(self.vm.qmp_to_opts({ ++ 'driver': imgfmt, ++ 'node-name': 'top', ++ 'file': { ++ 'driver': 'blkdebug', ++ 'node-name': 'top-blkdebug', ++ 'inject-error': [{ ++ 'event': 'pwritev', ++ 'immediately': 'true', ++ 'once': 'true' ++ }], ++ 'image': { ++ 'driver': 'file', ++ 'filename': top ++ } ++ }, ++ 'backing': 'base' ++ })) ++ self.vm.add_device(self.vm.qmp_to_opts({ ++ 'driver': 'virtio-scsi', ++ 'id': 'vscsi' ++ })) ++ self.vm.add_device(self.vm.qmp_to_opts({ ++ 'driver': 'scsi-hd', ++ 'bus': 'vscsi.0', ++ 'drive': 'top' ++ })) ++ self.vm.launch() ++ ++ def tearDown(self) -> None: ++ self.vm.shutdown() ++ os.remove(top) ++ os.remove(base) ++ ++ def test_stream_error_on_reset(self) -> None: ++ # Launch a stream job, which will take at least a second to ++ # complete, because the base image is throttled (so we can ++ # get in between it having started and it having completed) ++ res = self.vm.qmp('block-stream', job_id='stream', device='top') ++ self.assert_qmp(res, 'return', {}) ++ ++ while True: ++ ev = self.vm.event_wait('JOB_STATUS_CHANGE') ++ if ev['data']['status'] == 'running': ++ # Once the stream job is running, reset the system, which ++ # forces the virtio-scsi device to be reset, thus draining ++ # the stream job, and making it complete. Completing ++ # inside of that drain should not result in a segfault. ++ res = self.vm.qmp('system_reset') ++ self.assert_qmp(res, 'return', {}) ++ elif ev['data']['status'] == 'null': ++ # The test is done once the job is gone ++ break ++ ++ ++if __name__ == '__main__': ++ # Passes with any format with backing file support, but qed and ++ # qcow1 do not seem to exercise the used-to-be problematic code ++ # path, so there is no point in having them in this list ++ iotests.main(supported_fmts=['qcow2', 'vmdk'], ++ supported_protocols=['file']) +diff --git a/tests/qemu-iotests/tests/stream-error-on-reset.out b/tests/qemu-iotests/tests/stream-error-on-reset.out +new file mode 100644 +index 0000000000..ae1213e6f8 +--- /dev/null ++++ b/tests/qemu-iotests/tests/stream-error-on-reset.out +@@ -0,0 +1,5 @@ ++. ++---------------------------------------------------------------------- ++Ran 1 tests ++ ++OK +-- +2.27.0 + diff --git a/kvm-iotests.py-Add-QemuStorageDaemon-class.patch b/kvm-iotests.py-Add-QemuStorageDaemon-class.patch new file mode 100644 index 0000000..539897f --- /dev/null +++ b/kvm-iotests.py-Add-QemuStorageDaemon-class.patch @@ -0,0 +1,92 @@ +From 34ffcd1a463bd3c1d36ed2f33dd6335b35b38460 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Fri, 4 Feb 2022 12:10:09 +0100 +Subject: [PATCH 3/6] iotests.py: Add QemuStorageDaemon class + +RH-Author: Hanna Reitz +RH-MergeRequest: 117: block/nbd: Handle AioContext changes +RH-Commit: [3/6] 754fe76bc5e8be57f4b78f176531014c4a12b044 +RH-Bugzilla: 2035185 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi + +This is a rather simple class that allows creating a QSD instance +running in the background and stopping it when no longer needed. + +The __del__ handler is a safety net for when something goes so wrong in +a test that e.g. the tearDown() method is not called (e.g. setUp() +launches the QSD, but then launching a VM fails). We do not want the +QSD to continue running after the test has failed, so __del__() will +take care to kill it. + +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Hanna Reitz +Signed-off-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 091dc7b2b5553a529bff9a7bf9ad3bc85bc5bdcd) +Signed-off-by: Hanna Reitz +--- + tests/qemu-iotests/iotests.py | 40 +++++++++++++++++++++++++++++++++++ + 1 file changed, 40 insertions(+) + +diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py +index 83bfedb902..a51b5ce8cd 100644 +--- a/tests/qemu-iotests/iotests.py ++++ b/tests/qemu-iotests/iotests.py +@@ -72,6 +72,8 @@ + qemu_prog = os.environ.get('QEMU_PROG', 'qemu') + qemu_opts = os.environ.get('QEMU_OPTIONS', '').strip().split(' ') + ++qsd_prog = os.environ.get('QSD_PROG', 'qemu-storage-daemon') ++ + gdb_qemu_env = os.environ.get('GDB_OPTIONS') + qemu_gdb = [] + if gdb_qemu_env: +@@ -312,6 +314,44 @@ def cmd(self, cmd): + return self._read_output() + + ++class QemuStorageDaemon: ++ def __init__(self, *args: str, instance_id: str = 'a'): ++ assert '--pidfile' not in args ++ self.pidfile = os.path.join(test_dir, f'qsd-{instance_id}-pid') ++ all_args = [qsd_prog] + list(args) + ['--pidfile', self.pidfile] ++ ++ # Cannot use with here, we want the subprocess to stay around ++ # pylint: disable=consider-using-with ++ self._p = subprocess.Popen(all_args) ++ while not os.path.exists(self.pidfile): ++ if self._p.poll() is not None: ++ cmd = ' '.join(all_args) ++ raise RuntimeError( ++ 'qemu-storage-daemon terminated with exit code ' + ++ f'{self._p.returncode}: {cmd}') ++ ++ time.sleep(0.01) ++ ++ with open(self.pidfile, encoding='utf-8') as f: ++ self._pid = int(f.read().strip()) ++ ++ assert self._pid == self._p.pid ++ ++ def stop(self, kill_signal=15): ++ self._p.send_signal(kill_signal) ++ self._p.wait() ++ self._p = None ++ ++ try: ++ os.remove(self.pidfile) ++ except OSError: ++ pass ++ ++ def __del__(self): ++ if self._p is not None: ++ self.stop(kill_signal=9) ++ ++ + def qemu_nbd(*args): + '''Run qemu-nbd in daemon mode and return the parent's exit code''' + return subprocess.call(qemu_nbd_args + ['--fork'] + list(args)) +-- +2.27.0 + diff --git a/kvm-kvm-don-t-use-perror-without-useful-errno.patch b/kvm-kvm-don-t-use-perror-without-useful-errno.patch new file mode 100644 index 0000000..a78c089 --- /dev/null +++ b/kvm-kvm-don-t-use-perror-without-useful-errno.patch @@ -0,0 +1,62 @@ +From 9ddefaedf423ec03eadaf17496c14e0d7b2381c8 Mon Sep 17 00:00:00 2001 +From: Cornelia Huck +Date: Thu, 28 Jul 2022 16:24:46 +0200 +Subject: [PATCH 30/32] kvm: don't use perror() without useful errno + +RH-Author: Cornelia Huck +RH-MergeRequest: 110: kvm: don't use perror() without useful errno +RH-Commit: [1/1] 20e51aac6767c1f89f74c7d692d1fb7689eff5f0 (cohuck/qemu-kvm-c9s) +RH-Bugzilla: 2095608 +RH-Acked-by: Eric Auger +RH-Acked-by: Thomas Huth +RH-Acked-by: Gavin Shan + +perror() is designed to append the decoded errno value to a +string. This, however, only makes sense if we called something that +actually sets errno prior to that. + +For the callers that check for split irqchip support that is not the +case, and we end up with confusing error messages that end in +"success". Use error_report() instead. + +Signed-off-by: Cornelia Huck +Message-Id: <20220728142446.438177-1-cohuck@redhat.com> +Signed-off-by: Paolo Bonzini + +https://bugzilla.redhat.com/show_bug.cgi?id=2095608 +(cherry picked from commit 47c182fe8b03c0c40059fb95840923e65c9bdb4f) +Signed-off-by: Cornelia Huck +--- + accel/kvm/kvm-all.c | 2 +- + target/arm/kvm.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 5f1377ca04..e9c7947640 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -2254,7 +2254,7 @@ static void kvm_irqchip_create(KVMState *s) + ret = kvm_arch_irqchip_create(s); + if (ret == 0) { + if (s->kernel_irqchip_split == ON_OFF_AUTO_ON) { +- perror("Split IRQ chip mode not supported."); ++ error_report("Split IRQ chip mode not supported."); + exit(1); + } else { + ret = kvm_vm_ioctl(s, KVM_CREATE_IRQCHIP); +diff --git a/target/arm/kvm.c b/target/arm/kvm.c +index bbf1ce7ba3..0a2ba1f8e3 100644 +--- a/target/arm/kvm.c ++++ b/target/arm/kvm.c +@@ -960,7 +960,7 @@ void kvm_arch_init_irq_routing(KVMState *s) + int kvm_arch_irqchip_create(KVMState *s) + { + if (kvm_kernel_irqchip_split()) { +- perror("-machine kernel_irqchip=split is not supported on ARM."); ++ error_report("-machine kernel_irqchip=split is not supported on ARM."); + exit(1); + } + +-- +2.31.1 + diff --git a/kvm-linux-aio-explain-why-max-batch-is-checked-in-laio_i.patch b/kvm-linux-aio-explain-why-max-batch-is-checked-in-laio_i.patch new file mode 100644 index 0000000..f12b8ec --- /dev/null +++ b/kvm-linux-aio-explain-why-max-batch-is-checked-in-laio_i.patch @@ -0,0 +1,49 @@ +From 49d9c9dced7278517105e9cfec34ea4af716432d Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Thu, 9 Jun 2022 17:47:12 +0100 +Subject: [PATCH 6/6] linux-aio: explain why max batch is checked in + laio_io_unplug() + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 102: linux-aio: fix unbalanced plugged counter in laio_io_unplug() +RH-Commit: [2/2] b3d6421086bde50d4baad2343b2df89c5f66950e (stefanha/centos-stream-qemu-kvm) +RH-Bugzilla: 2092788 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefano Garzarella + +It may not be obvious why laio_io_unplug() checks max batch. I discussed +this with Stefano and have added a comment summarizing the reason. + +Cc: Stefano Garzarella +Cc: Kevin Wolf +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Stefano Garzarella +Message-id: 20220609164712.1539045-3-stefanha@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 99b969fbe105117f5af6060d3afef40ca39cc9c1) +Signed-off-by: Stefan Hajnoczi +--- + block/linux-aio.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/block/linux-aio.c b/block/linux-aio.c +index 6078da7e42..9c2393a2f7 100644 +--- a/block/linux-aio.c ++++ b/block/linux-aio.c +@@ -365,6 +365,12 @@ void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s, + assert(s->io_q.plugged); + s->io_q.plugged--; + ++ /* ++ * Why max batch checking is performed here: ++ * Another BDS may have queued requests with a higher dev_max_batch and ++ * therefore in_queue could now exceed our dev_max_batch. Re-check the max ++ * batch so we can honor our device's dev_max_batch. ++ */ + if (s->io_q.in_queue >= laio_max_batch(s, dev_max_batch) || + (!s->io_q.plugged && + !s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending))) { +-- +2.31.1 + diff --git a/kvm-linux-aio-fix-unbalanced-plugged-counter-in-laio_io_.patch b/kvm-linux-aio-fix-unbalanced-plugged-counter-in-laio_io_.patch new file mode 100644 index 0000000..ed9b5ee --- /dev/null +++ b/kvm-linux-aio-fix-unbalanced-plugged-counter-in-laio_io_.patch @@ -0,0 +1,56 @@ +From e7326c3a7e0fc022aa5c0ae07bc1e19ad1b6f2ed Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Thu, 9 Jun 2022 17:47:11 +0100 +Subject: [PATCH 5/6] linux-aio: fix unbalanced plugged counter in + laio_io_unplug() + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 102: linux-aio: fix unbalanced plugged counter in laio_io_unplug() +RH-Commit: [1/2] 8a71da371c72521f1d70b8767ee564575e0d522b (stefanha/centos-stream-qemu-kvm) +RH-Bugzilla: 2092788 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefano Garzarella + +Every laio_io_plug() call has a matching laio_io_unplug() call. There is +a plugged counter that tracks the number of levels of plugging and +allows for nesting. + +The plugged counter must reflect the balance between laio_io_plug() and +laio_io_unplug() calls accurately. Otherwise I/O stalls occur since +io_submit(2) calls are skipped while plugged. + +Reported-by: Nikolay Tenev +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Stefano Garzarella +Message-id: 20220609164712.1539045-2-stefanha@redhat.com +Cc: Stefano Garzarella +Fixes: 68d7946648 ("linux-aio: add `dev_max_batch` parameter to laio_io_unplug()") +[Stefano Garzarella suggested adding a Fixes tag. +--Stefan] +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit f387cac5af030a58ac5a0dacf64cab5e5a4fe5c7) +Signed-off-by: Stefan Hajnoczi +--- + block/linux-aio.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/block/linux-aio.c b/block/linux-aio.c +index 4c423fcccf..6078da7e42 100644 +--- a/block/linux-aio.c ++++ b/block/linux-aio.c +@@ -363,8 +363,10 @@ void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s, + uint64_t dev_max_batch) + { + assert(s->io_q.plugged); ++ s->io_q.plugged--; ++ + if (s->io_q.in_queue >= laio_max_batch(s, dev_max_batch) || +- (--s->io_q.plugged == 0 && ++ (!s->io_q.plugged && + !s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending))) { + ioq_submit(s); + } +-- +2.31.1 + diff --git a/kvm-meson-create-have_vhost_-variables.patch b/kvm-meson-create-have_vhost_-variables.patch new file mode 100644 index 0000000..fcae620 --- /dev/null +++ b/kvm-meson-create-have_vhost_-variables.patch @@ -0,0 +1,154 @@ +From 51c310097832724bafac26aed81399da40128400 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:50:43 +0200 +Subject: [PATCH 05/32] meson: create have_vhost_* variables +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [5/27] 3b30f89e6d639923dc9d9a92a4261bb4509e5c83 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit 2a3129a37652e5e81d12f6e16dd3c447f09831f9 +Author: Paolo Bonzini +Date: Wed Apr 20 17:34:05 2022 +0200 + + meson: create have_vhost_* variables + + When using Meson options rather than config-host.h, the "when" clauses + have to be changed to if statements (which is not necessarily great, + though at least it highlights which parts of the build are per-target + and which are not). + + Do that before moving vhost logic to meson.build, though for now + the variables are just based on config-host.mak data. + + Reviewed-by: Marc-André Lureau + Signed-off-by: Paolo Bonzini + +Signed-off-by: Eugenio Pérez +--- + meson.build | 30 ++++++++++++++++++++---------- + tests/meson.build | 2 +- + tools/meson.build | 2 +- + 3 files changed, 22 insertions(+), 12 deletions(-) + +diff --git a/meson.build b/meson.build +index 13e3323380..735f538497 100644 +--- a/meson.build ++++ b/meson.build +@@ -298,6 +298,15 @@ have_tpm = get_option('tpm') \ + .require(targetos != 'windows', error_message: 'TPM emulation only available on POSIX systems') \ + .allowed() + ++# vhost ++have_vhost_user = 'CONFIG_VHOST_USER' in config_host ++have_vhost_vdpa = 'CONFIG_VHOST_VDPA' in config_host ++have_vhost_kernel = 'CONFIG_VHOST_KERNEL' in config_host ++have_vhost_net_user = 'CONFIG_VHOST_NET_USER' in config_host ++have_vhost_net_vdpa = 'CONFIG_VHOST_NET_VDPA' in config_host ++have_vhost_net = 'CONFIG_VHOST_NET' in config_host ++have_vhost_user_crypto = 'CONFIG_VHOST_CRYPTO' in config_host ++ + # Target-specific libraries and flags + libm = cc.find_library('m', required: false) + threads = dependency('threads') +@@ -1335,7 +1344,7 @@ has_statx_mnt_id = cc.links(statx_mnt_id_test) + have_vhost_user_blk_server = get_option('vhost_user_blk_server') \ + .require(targetos == 'linux', + error_message: 'vhost_user_blk_server requires linux') \ +- .require('CONFIG_VHOST_USER' in config_host, ++ .require(have_vhost_user, + error_message: 'vhost_user_blk_server requires vhost-user support') \ + .disable_auto_if(not have_system) \ + .allowed() +@@ -2116,9 +2125,9 @@ host_kconfig = \ + (have_ivshmem ? ['CONFIG_IVSHMEM=y'] : []) + \ + ('CONFIG_OPENGL' in config_host ? ['CONFIG_OPENGL=y'] : []) + \ + (x11.found() ? ['CONFIG_X11=y'] : []) + \ +- ('CONFIG_VHOST_USER' in config_host ? ['CONFIG_VHOST_USER=y'] : []) + \ +- ('CONFIG_VHOST_VDPA' in config_host ? ['CONFIG_VHOST_VDPA=y'] : []) + \ +- ('CONFIG_VHOST_KERNEL' in config_host ? ['CONFIG_VHOST_KERNEL=y'] : []) + \ ++ (have_vhost_user ? ['CONFIG_VHOST_USER=y'] : []) + \ ++ (have_vhost_vdpa ? ['CONFIG_VHOST_VDPA=y'] : []) + \ ++ (have_vhost_kernel ? ['CONFIG_VHOST_KERNEL=y'] : []) + \ + (have_virtfs ? ['CONFIG_VIRTFS=y'] : []) + \ + ('CONFIG_LINUX' in config_host ? ['CONFIG_LINUX=y'] : []) + \ + ('CONFIG_PVRDMA' in config_host ? ['CONFIG_PVRDMA=y'] : []) + \ +@@ -2799,7 +2808,7 @@ if have_system or have_user + endif + + vhost_user = not_found +-if targetos == 'linux' and 'CONFIG_VHOST_USER' in config_host ++if targetos == 'linux' and have_vhost_user + libvhost_user = subproject('libvhost-user') + vhost_user = libvhost_user.get_variable('vhost_user_dep') + endif +@@ -3386,7 +3395,7 @@ if have_tools + dependencies: qemuutil, + install: true) + +- if 'CONFIG_VHOST_USER' in config_host ++ if have_vhost_user + subdir('contrib/vhost-user-blk') + subdir('contrib/vhost-user-gpu') + subdir('contrib/vhost-user-input') +@@ -3516,15 +3525,16 @@ if 'simple' in get_option('trace_backends') + endif + summary_info += {'D-Bus display': dbus_display} + summary_info += {'QOM debugging': get_option('qom_cast_debug')} +-summary_info += {'vhost-kernel support': config_host.has_key('CONFIG_VHOST_KERNEL')} +-summary_info += {'vhost-net support': config_host.has_key('CONFIG_VHOST_NET')} +-summary_info += {'vhost-crypto support': config_host.has_key('CONFIG_VHOST_CRYPTO')} ++summary_info += {'vhost-kernel support': have_vhost_kernel} ++summary_info += {'vhost-net support': have_vhost_net} ++summary_info += {'vhost-user support': have_vhost_user} ++summary_info += {'vhost-user-crypto support': have_vhost_user_crypto} + summary_info += {'vhost-scsi support': config_host.has_key('CONFIG_VHOST_SCSI')} + summary_info += {'vhost-vsock support': config_host.has_key('CONFIG_VHOST_VSOCK')} +-summary_info += {'vhost-user support': config_host.has_key('CONFIG_VHOST_USER')} + summary_info += {'vhost-user-blk server support': have_vhost_user_blk_server} + summary_info += {'vhost-user-fs support': config_host.has_key('CONFIG_VHOST_USER_FS')} + summary_info += {'vhost-vdpa support': config_host.has_key('CONFIG_VHOST_VDPA')} ++summary_info += {'vhost-vdpa support': have_vhost_vdpa} + summary_info += {'build guest agent': have_ga} + summary(summary_info, bool_yn: true, section: 'Configurable features') + +diff --git a/tests/meson.build b/tests/meson.build +index 1d05109eb4..bbe41c8559 100644 +--- a/tests/meson.build ++++ b/tests/meson.build +@@ -70,7 +70,7 @@ test_deps = { + 'test-qht-par': qht_bench, + } + +-if have_tools and 'CONFIG_VHOST_USER' in config_host and 'CONFIG_LINUX' in config_host ++if have_tools and have_vhost_user and 'CONFIG_LINUX' in config_host + executable('vhost-user-bridge', + sources: files('vhost-user-bridge.c'), + dependencies: [qemuutil, vhost_user]) +diff --git a/tools/meson.build b/tools/meson.build +index 46977af84f..10eb3a043f 100644 +--- a/tools/meson.build ++++ b/tools/meson.build +@@ -3,7 +3,7 @@ have_virtiofsd = get_option('virtiofsd') \ + error_message: 'virtiofsd requires Linux') \ + .require(seccomp.found() and libcap_ng.found(), + error_message: 'virtiofsd requires libcap-ng-devel and seccomp-devel') \ +- .require('CONFIG_VHOST_USER' in config_host, ++ .require(have_vhost_user, + error_message: 'virtiofsd needs vhost-user-support') \ + .disable_auto_if(not have_tools and not have_system) \ + .allowed() +-- +2.31.1 + diff --git a/kvm-meson-use-have_vhost_-variables-to-pick-sources.patch b/kvm-meson-use-have_vhost_-variables-to-pick-sources.patch new file mode 100644 index 0000000..99d86c1 --- /dev/null +++ b/kvm-meson-use-have_vhost_-variables-to-pick-sources.patch @@ -0,0 +1,213 @@ +From a7d57a09e33275d5e6649273b5c9da1bc3c92491 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:51:53 +0200 +Subject: [PATCH 06/32] meson: use have_vhost_* variables to pick sources +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [6/27] bc3db1efb759c0bc97fde2f4fbb3d6dc404c8d3d (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit 43b6d7ee1fbc5b5fb7c85d8131fdac1863214ad6 +Author: Paolo Bonzini +Date: Wed Apr 20 17:34:06 2022 +0200 + + meson: use have_vhost_* variables to pick sources + + Reviewed-by: Marc-André Lureau + Signed-off-by: Paolo Bonzini + +Signed-off-by: Eugenio Pérez +--- + Kconfig.host | 3 --- + backends/meson.build | 8 ++++++-- + hw/net/meson.build | 8 ++++++-- + hw/virtio/Kconfig | 3 --- + hw/virtio/meson.build | 25 ++++++++++++++++--------- + meson.build | 1 + + net/meson.build | 12 +++++++----- + tests/qtest/meson.build | 4 +++- + 8 files changed, 39 insertions(+), 25 deletions(-) + +diff --git a/Kconfig.host b/Kconfig.host +index 60b9c07b5e..1165c4eacd 100644 +--- a/Kconfig.host ++++ b/Kconfig.host +@@ -22,15 +22,12 @@ config TPM + + config VHOST_USER + bool +- select VHOST + + config VHOST_VDPA + bool +- select VHOST + + config VHOST_KERNEL + bool +- select VHOST + + config VIRTFS + bool +diff --git a/backends/meson.build b/backends/meson.build +index 6e68945528..cb92f639ca 100644 +--- a/backends/meson.build ++++ b/backends/meson.build +@@ -12,9 +12,13 @@ softmmu_ss.add([files( + softmmu_ss.add(when: 'CONFIG_POSIX', if_true: files('rng-random.c')) + softmmu_ss.add(when: 'CONFIG_POSIX', if_true: files('hostmem-file.c')) + softmmu_ss.add(when: 'CONFIG_LINUX', if_true: files('hostmem-memfd.c')) +-softmmu_ss.add(when: ['CONFIG_VHOST_USER', 'CONFIG_VIRTIO'], if_true: files('vhost-user.c')) ++if have_vhost_user ++ softmmu_ss.add(when: 'CONFIG_VIRTIO', if_true: files('vhost-user.c')) ++endif + softmmu_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('cryptodev-vhost.c')) +-softmmu_ss.add(when: ['CONFIG_VIRTIO_CRYPTO', 'CONFIG_VHOST_CRYPTO'], if_true: files('cryptodev-vhost-user.c')) ++if have_vhost_user_crypto ++ softmmu_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('cryptodev-vhost-user.c')) ++endif + softmmu_ss.add(when: 'CONFIG_GIO', if_true: [files('dbus-vmstate.c'), gio]) + softmmu_ss.add(when: 'CONFIG_SGX', if_true: files('hostmem-epc.c')) + +diff --git a/hw/net/meson.build b/hw/net/meson.build +index 685b75badb..ebac261542 100644 +--- a/hw/net/meson.build ++++ b/hw/net/meson.build +@@ -46,8 +46,12 @@ specific_ss.add(when: 'CONFIG_XILINX_ETHLITE', if_true: files('xilinx_ethlite.c' + softmmu_ss.add(when: 'CONFIG_VIRTIO_NET', if_true: files('net_rx_pkt.c')) + specific_ss.add(when: 'CONFIG_VIRTIO_NET', if_true: files('virtio-net.c')) + +-softmmu_ss.add(when: ['CONFIG_VIRTIO_NET', 'CONFIG_VHOST_NET'], if_true: files('vhost_net.c'), if_false: files('vhost_net-stub.c')) +-softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost_net-stub.c')) ++if have_vhost_net ++ softmmu_ss.add(when: 'CONFIG_VIRTIO_NET', if_true: files('vhost_net.c'), if_false: files('vhost_net-stub.c')) ++ softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost_net-stub.c')) ++else ++ softmmu_ss.add(files('vhost_net-stub.c')) ++endif + + softmmu_ss.add(when: 'CONFIG_ETSEC', if_true: files( + 'fsl_etsec/etsec.c', +diff --git a/hw/virtio/Kconfig b/hw/virtio/Kconfig +index c144d42f9b..8ca7b3d9d6 100644 +--- a/hw/virtio/Kconfig ++++ b/hw/virtio/Kconfig +@@ -1,6 +1,3 @@ +-config VHOST +- bool +- + config VIRTIO + bool + +diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build +index 67dc77e00f..30a832eb4a 100644 +--- a/hw/virtio/meson.build ++++ b/hw/virtio/meson.build +@@ -2,18 +2,22 @@ softmmu_virtio_ss = ss.source_set() + softmmu_virtio_ss.add(files('virtio-bus.c')) + softmmu_virtio_ss.add(when: 'CONFIG_VIRTIO_PCI', if_true: files('virtio-pci.c')) + softmmu_virtio_ss.add(when: 'CONFIG_VIRTIO_MMIO', if_true: files('virtio-mmio.c')) +-softmmu_virtio_ss.add(when: 'CONFIG_VHOST', if_false: files('vhost-stub.c')) +- +-softmmu_ss.add_all(when: 'CONFIG_VIRTIO', if_true: softmmu_virtio_ss) +-softmmu_ss.add(when: 'CONFIG_VIRTIO', if_false: files('vhost-stub.c')) +- +-softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-stub.c')) + + virtio_ss = ss.source_set() + virtio_ss.add(files('virtio.c')) +-virtio_ss.add(when: 'CONFIG_VHOST', if_true: files('vhost.c', 'vhost-backend.c', 'vhost-iova-tree.c')) +-virtio_ss.add(when: 'CONFIG_VHOST_USER', if_true: files('vhost-user.c')) +-virtio_ss.add(when: 'CONFIG_VHOST_VDPA', if_true: files('vhost-shadow-virtqueue.c', 'vhost-vdpa.c')) ++ ++if have_vhost ++ virtio_ss.add(files('vhost.c', 'vhost-backend.c', 'vhost-iova-tree.c')) ++ if have_vhost_user ++ virtio_ss.add(files('vhost-user.c')) ++ endif ++ if have_vhost_vdpa ++ virtio_ss.add(files('vhost-vdpa.c', 'vhost-shadow-virtqueue.c')) ++ endif ++else ++ softmmu_virtio_ss.add(files('vhost-stub.c')) ++endif ++ + virtio_ss.add(when: 'CONFIG_VIRTIO_BALLOON', if_true: files('virtio-balloon.c')) + virtio_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('virtio-crypto.c')) + virtio_ss.add(when: ['CONFIG_VIRTIO_CRYPTO', 'CONFIG_VIRTIO_PCI'], if_true: files('virtio-crypto-pci.c')) +@@ -53,3 +57,6 @@ virtio_pci_ss.add(when: 'CONFIG_VIRTIO_MEM', if_true: files('virtio-mem-pci.c')) + virtio_ss.add_all(when: 'CONFIG_VIRTIO_PCI', if_true: virtio_pci_ss) + + specific_ss.add_all(when: 'CONFIG_VIRTIO', if_true: virtio_ss) ++softmmu_ss.add_all(when: 'CONFIG_VIRTIO', if_true: softmmu_virtio_ss) ++softmmu_ss.add(when: 'CONFIG_VIRTIO', if_false: files('vhost-stub.c')) ++softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-stub.c')) +diff --git a/meson.build b/meson.build +index 735f538497..9ba675f098 100644 +--- a/meson.build ++++ b/meson.build +@@ -305,6 +305,7 @@ have_vhost_kernel = 'CONFIG_VHOST_KERNEL' in config_host + have_vhost_net_user = 'CONFIG_VHOST_NET_USER' in config_host + have_vhost_net_vdpa = 'CONFIG_VHOST_NET_VDPA' in config_host + have_vhost_net = 'CONFIG_VHOST_NET' in config_host ++have_vhost = have_vhost_user or have_vhost_vdpa or have_vhost_kernel + have_vhost_user_crypto = 'CONFIG_VHOST_CRYPTO' in config_host + + # Target-specific libraries and flags +diff --git a/net/meson.build b/net/meson.build +index 847bc2ac85..c965e83b26 100644 +--- a/net/meson.build ++++ b/net/meson.build +@@ -26,10 +26,10 @@ softmmu_ss.add(when: vde, if_true: files('vde.c')) + if have_netmap + softmmu_ss.add(files('netmap.c')) + endif +-vhost_user_ss = ss.source_set() +-vhost_user_ss.add(when: 'CONFIG_VIRTIO_NET', if_true: files('vhost-user.c'), if_false: files('vhost-user-stub.c')) +-softmmu_ss.add_all(when: 'CONFIG_VHOST_NET_USER', if_true: vhost_user_ss) +-softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-user-stub.c')) ++if have_vhost_net_user ++ softmmu_ss.add(when: 'CONFIG_VIRTIO_NET', if_true: files('vhost-user.c'), if_false: files('vhost-user-stub.c')) ++ softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-user-stub.c')) ++endif + + softmmu_ss.add(when: 'CONFIG_LINUX', if_true: files('tap-linux.c')) + softmmu_ss.add(when: 'CONFIG_BSD', if_true: files('tap-bsd.c')) +@@ -40,6 +40,8 @@ if not config_host.has_key('CONFIG_LINUX') and not config_host.has_key('CONFIG_B + endif + softmmu_ss.add(when: 'CONFIG_POSIX', if_true: files(tap_posix)) + softmmu_ss.add(when: 'CONFIG_WIN32', if_true: files('tap-win32.c')) +-softmmu_ss.add(when: 'CONFIG_VHOST_NET_VDPA', if_true: files('vhost-vdpa.c')) ++if have_vhost_net_vdpa ++ softmmu_ss.add(files('vhost-vdpa.c')) ++endif + + subdir('can') +diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build +index 67cd32def1..9f550df900 100644 +--- a/tests/qtest/meson.build ++++ b/tests/qtest/meson.build +@@ -269,7 +269,9 @@ qos_test_ss.add( + if have_virtfs + qos_test_ss.add(files('virtio-9p-test.c')) + endif +-qos_test_ss.add(when: 'CONFIG_VHOST_USER', if_true: files('vhost-user-test.c')) ++if have_vhost_user ++ qos_test_ss.add(files('vhost-user-test.c')) ++endif + if have_tools and have_vhost_user_blk_server + qos_test_ss.add(files('vhost-user-blk-test.c')) + endif +-- +2.31.1 + diff --git a/kvm-meson.build-Fix-docker-test-build-alpine-when-includ.patch b/kvm-meson.build-Fix-docker-test-build-alpine-when-includ.patch new file mode 100644 index 0000000..0da63bf --- /dev/null +++ b/kvm-meson.build-Fix-docker-test-build-alpine-when-includ.patch @@ -0,0 +1,87 @@ +From 7c489b54b0bb33445113fbf16e88feb23be68013 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Fri, 13 May 2022 03:28:30 -0300 +Subject: [PATCH 07/18] meson.build: Fix docker-test-build@alpine when + including linux/errqueue.h +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd +RH-Commit: [1/11] f058eb846fcf611d527a1dd3b0cc399cdc17e3ee (LeoBras/centos-qemu-kvm) +RH-Bugzilla: 1968509 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +A build error happens in alpine CI when linux/errqueue.h is included +in io/channel-socket.c, due to redefining of 'struct __kernel_timespec': + +=== +ninja: job failed: [...] +In file included from /usr/include/linux/errqueue.h:6, + from ../io/channel-socket.c:29: +/usr/include/linux/time_types.h:7:8: error: redefinition of 'struct __kernel_timespec' + 7 | struct __kernel_timespec { + | ^~~~~~~~~~~~~~~~~ +In file included from /usr/include/liburing.h:19, + from /builds/user/qemu/include/block/aio.h:18, + from /builds/user/qemu/include/io/channel.h:26, + from /builds/user/qemu/include/io/channel-socket.h:24, + from ../io/channel-socket.c:24: +/usr/include/liburing/compat.h:9:8: note: originally defined here + 9 | struct __kernel_timespec { + | ^~~~~~~~~~~~~~~~~ +ninja: subcommand failed +=== + +As above error message suggests, 'struct __kernel_timespec' was already +defined by liburing/compat.h. + +Fix alpine CI by adding test to disable liburing in configure step if a +redefinition happens between linux/errqueue.h and liburing/compat.h. + +[dgilbert: This has been fixed in Alpine issue 13813 and liburing] + +Signed-off-by: Leonardo Bras +Message-Id: <20220513062836.965425-2-leobras@redhat.com> +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 354081d43de44ebd3497fe08f7f0121a5517d528) +Signed-off-by: Leonardo Bras +--- + meson.build | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/meson.build b/meson.build +index 5a7c10e639..13e3323380 100644 +--- a/meson.build ++++ b/meson.build +@@ -471,12 +471,23 @@ if not get_option('linux_aio').auto() or have_block + required: get_option('linux_aio'), + kwargs: static_kwargs) + endif ++ ++linux_io_uring_test = ''' ++ #include ++ #include ++ ++ int main(void) { return 0; }''' ++ + linux_io_uring = not_found + if not get_option('linux_io_uring').auto() or have_block + linux_io_uring = dependency('liburing', version: '>=0.3', + required: get_option('linux_io_uring'), + method: 'pkg-config', kwargs: static_kwargs) ++ if not cc.links(linux_io_uring_test) ++ linux_io_uring = not_found ++ endif + endif ++ + libnfs = not_found + if not get_option('libnfs').auto() or have_block + libnfs = dependency('libnfs', version: '>=1.9.3', +-- +2.35.3 + diff --git a/kvm-migration-Add-migrate_use_tls-helper.patch b/kvm-migration-Add-migrate_use_tls-helper.patch new file mode 100644 index 0000000..0fe0d91 --- /dev/null +++ b/kvm-migration-Add-migrate_use_tls-helper.patch @@ -0,0 +1,106 @@ +From 828f6c106eedcb7a48e551ffda15af56ff92a899 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Fri, 13 May 2022 03:28:34 -0300 +Subject: [PATCH 11/18] migration: Add migrate_use_tls() helper +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd +RH-Commit: [5/11] 06e945297c3b9c0ce5864885aafcdba1e5746bc2 (LeoBras/centos-qemu-kvm) +RH-Bugzilla: 1968509 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +A lot of places check parameters.tls_creds in order to evaluate if TLS is +in use, and sometimes call migrate_get_current() just for that test. + +Add new helper function migrate_use_tls() in order to simplify testing +for TLS usage. + +Signed-off-by: Leonardo Bras +Reviewed-by: Juan Quintela +Reviewed-by: Peter Xu +Reviewed-by: Daniel P. Berrangé +Message-Id: <20220513062836.965425-6-leobras@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit d2fafb6a6814a8998607d0baf691265032996a0f) +Signed-off-by: Leonardo Bras +--- + migration/channel.c | 3 +-- + migration/migration.c | 9 +++++++++ + migration/migration.h | 1 + + migration/multifd.c | 5 +---- + 4 files changed, 12 insertions(+), 6 deletions(-) + +diff --git a/migration/channel.c b/migration/channel.c +index c4fc000a1a..086b5c0d8b 100644 +--- a/migration/channel.c ++++ b/migration/channel.c +@@ -38,8 +38,7 @@ void migration_channel_process_incoming(QIOChannel *ioc) + trace_migration_set_incoming_channel( + ioc, object_get_typename(OBJECT(ioc))); + +- if (s->parameters.tls_creds && +- *s->parameters.tls_creds && ++ if (migrate_use_tls() && + !object_dynamic_cast(OBJECT(ioc), + TYPE_QIO_CHANNEL_TLS)) { + migration_tls_channel_process_incoming(s, ioc, &local_err); +diff --git a/migration/migration.c b/migration/migration.c +index 0a6b3b9f4d..d91efb66fe 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -2582,6 +2582,15 @@ bool migrate_use_zero_copy_send(void) + } + #endif + ++int migrate_use_tls(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.tls_creds && *s->parameters.tls_creds; ++} ++ + int migrate_use_xbzrle(void) + { + MigrationState *s; +diff --git a/migration/migration.h b/migration/migration.h +index 5bcb7628ef..c2cabb8a14 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -381,6 +381,7 @@ bool migrate_use_zero_copy_send(void); + #else + #define migrate_use_zero_copy_send() (false) + #endif ++int migrate_use_tls(void); + int migrate_use_xbzrle(void); + uint64_t migrate_xbzrle_cache_size(void); + bool migrate_colo_enabled(void); +diff --git a/migration/multifd.c b/migration/multifd.c +index 76b57a7177..43998ad117 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -784,14 +784,11 @@ static bool multifd_channel_connect(MultiFDSendParams *p, + QIOChannel *ioc, + Error *error) + { +- MigrationState *s = migrate_get_current(); +- + trace_multifd_set_outgoing_channel( + ioc, object_get_typename(OBJECT(ioc)), p->tls_hostname, error); + + if (!error) { +- if (s->parameters.tls_creds && +- *s->parameters.tls_creds && ++ if (migrate_use_tls() && + !object_dynamic_cast(OBJECT(ioc), + TYPE_QIO_CHANNEL_TLS)) { + multifd_tls_channel_connect(p, ioc, &error); +-- +2.35.3 + diff --git a/kvm-migration-Add-zero-copy-send-parameter-for-QMP-HMP-f.patch b/kvm-migration-Add-zero-copy-send-parameter-for-QMP-HMP-f.patch new file mode 100644 index 0000000..206ac3d --- /dev/null +++ b/kvm-migration-Add-zero-copy-send-parameter-for-QMP-HMP-f.patch @@ -0,0 +1,250 @@ +From d6500340dc3c1152b5efe04ef3daa50c17a55e30 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Fri, 13 May 2022 03:28:33 -0300 +Subject: [PATCH 10/18] migration: Add zero-copy-send parameter for QMP/HMP for + Linux +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd +RH-Commit: [4/11] 514d98d595992c53ff98de750035e080ded8972e (LeoBras/centos-qemu-kvm) +RH-Bugzilla: 1968509 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +Add property that allows zero-copy migration of memory pages +on the sending side, and also includes a helper function +migrate_use_zero_copy_send() to check if it's enabled. + +No code is introduced to actually do the migration, but it allow +future implementations to enable/disable this feature. + +On non-Linux builds this parameter is compiled-out. + +Signed-off-by: Leonardo Bras +Reviewed-by: Peter Xu +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Juan Quintela +Acked-by: Markus Armbruster +Message-Id: <20220513062836.965425-5-leobras@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit abb6295b3ace5d17c3a65936913fc346616dbf14) +Signed-off-by: Leonardo Bras +--- + migration/migration.c | 32 ++++++++++++++++++++++++++++++++ + migration/migration.h | 5 +++++ + migration/socket.c | 11 +++++++++-- + monitor/hmp-cmds.c | 6 ++++++ + qapi/migration.json | 24 ++++++++++++++++++++++++ + 5 files changed, 76 insertions(+), 2 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 695f0f2900..0a6b3b9f4d 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -899,6 +899,10 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) + params->multifd_zlib_level = s->parameters.multifd_zlib_level; + params->has_multifd_zstd_level = true; + params->multifd_zstd_level = s->parameters.multifd_zstd_level; ++#ifdef CONFIG_LINUX ++ params->has_zero_copy_send = true; ++ params->zero_copy_send = s->parameters.zero_copy_send; ++#endif + params->has_xbzrle_cache_size = true; + params->xbzrle_cache_size = s->parameters.xbzrle_cache_size; + params->has_max_postcopy_bandwidth = true; +@@ -1555,6 +1559,11 @@ static void migrate_params_test_apply(MigrateSetParameters *params, + if (params->has_multifd_compression) { + dest->multifd_compression = params->multifd_compression; + } ++#ifdef CONFIG_LINUX ++ if (params->has_zero_copy_send) { ++ dest->zero_copy_send = params->zero_copy_send; ++ } ++#endif + if (params->has_xbzrle_cache_size) { + dest->xbzrle_cache_size = params->xbzrle_cache_size; + } +@@ -1667,6 +1676,11 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp) + if (params->has_multifd_compression) { + s->parameters.multifd_compression = params->multifd_compression; + } ++#ifdef CONFIG_LINUX ++ if (params->has_zero_copy_send) { ++ s->parameters.zero_copy_send = params->zero_copy_send; ++ } ++#endif + if (params->has_xbzrle_cache_size) { + s->parameters.xbzrle_cache_size = params->xbzrle_cache_size; + xbzrle_cache_resize(params->xbzrle_cache_size, errp); +@@ -2557,6 +2571,17 @@ int migrate_multifd_zstd_level(void) + return s->parameters.multifd_zstd_level; + } + ++#ifdef CONFIG_LINUX ++bool migrate_use_zero_copy_send(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.zero_copy_send; ++} ++#endif ++ + int migrate_use_xbzrle(void) + { + MigrationState *s; +@@ -4200,6 +4225,10 @@ static Property migration_properties[] = { + DEFINE_PROP_UINT8("multifd-zstd-level", MigrationState, + parameters.multifd_zstd_level, + DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL), ++#ifdef CONFIG_LINUX ++ DEFINE_PROP_BOOL("zero_copy_send", MigrationState, ++ parameters.zero_copy_send, false), ++#endif + DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState, + parameters.xbzrle_cache_size, + DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE), +@@ -4297,6 +4326,9 @@ static void migration_instance_init(Object *obj) + params->has_multifd_compression = true; + params->has_multifd_zlib_level = true; + params->has_multifd_zstd_level = true; ++#ifdef CONFIG_LINUX ++ params->has_zero_copy_send = true; ++#endif + params->has_xbzrle_cache_size = true; + params->has_max_postcopy_bandwidth = true; + params->has_max_cpu_throttle = true; +diff --git a/migration/migration.h b/migration/migration.h +index 2de861df01..5bcb7628ef 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -376,6 +376,11 @@ MultiFDCompression migrate_multifd_compression(void); + int migrate_multifd_zlib_level(void); + int migrate_multifd_zstd_level(void); + ++#ifdef CONFIG_LINUX ++bool migrate_use_zero_copy_send(void); ++#else ++#define migrate_use_zero_copy_send() (false) ++#endif + int migrate_use_xbzrle(void); + uint64_t migrate_xbzrle_cache_size(void); + bool migrate_colo_enabled(void); +diff --git a/migration/socket.c b/migration/socket.c +index 05705a32d8..3754d8f72c 100644 +--- a/migration/socket.c ++++ b/migration/socket.c +@@ -74,9 +74,16 @@ static void socket_outgoing_migration(QIOTask *task, + + if (qio_task_propagate_error(task, &err)) { + trace_migration_socket_outgoing_error(error_get_pretty(err)); +- } else { +- trace_migration_socket_outgoing_connected(data->hostname); ++ goto out; + } ++ ++ trace_migration_socket_outgoing_connected(data->hostname); ++ ++ if (migrate_use_zero_copy_send()) { ++ error_setg(&err, "Zero copy send not available in migration"); ++ } ++ ++out: + migration_channel_connect(data->s, sioc, data->hostname, err); + object_unref(OBJECT(sioc)); + } +diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c +index 634968498b..55b48d3733 100644 +--- a/monitor/hmp-cmds.c ++++ b/monitor/hmp-cmds.c +@@ -1309,6 +1309,12 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) + p->has_multifd_zstd_level = true; + visit_type_uint8(v, param, &p->multifd_zstd_level, &err); + break; ++#ifdef CONFIG_LINUX ++ case MIGRATION_PARAMETER_ZERO_COPY_SEND: ++ p->has_zero_copy_send = true; ++ visit_type_bool(v, param, &p->zero_copy_send, &err); ++ break; ++#endif + case MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE: + p->has_xbzrle_cache_size = true; + if (!visit_type_size(v, param, &cache_size, &err)) { +diff --git a/qapi/migration.json b/qapi/migration.json +index 27d7b28158..4d833ecdd6 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -741,6 +741,13 @@ + # will consume more CPU. + # Defaults to 1. (Since 5.0) + # ++# @zero-copy-send: Controls behavior on sending memory pages on migration. ++# When true, enables a zero-copy mechanism for sending ++# memory pages, if host supports it. ++# Requires that QEMU be permitted to use locked memory ++# for guest RAM pages. ++# Defaults to false. (Since 7.1) ++# + # @block-bitmap-mapping: Maps block nodes and bitmaps on them to + # aliases for the purpose of dirty bitmap migration. Such + # aliases may for example be the corresponding names on the +@@ -780,6 +787,7 @@ + 'xbzrle-cache-size', 'max-postcopy-bandwidth', + 'max-cpu-throttle', 'multifd-compression', + 'multifd-zlib-level' ,'multifd-zstd-level', ++ { 'name': 'zero-copy-send', 'if' : 'CONFIG_LINUX'}, + 'block-bitmap-mapping' ] } + + ## +@@ -906,6 +914,13 @@ + # will consume more CPU. + # Defaults to 1. (Since 5.0) + # ++# @zero-copy-send: Controls behavior on sending memory pages on migration. ++# When true, enables a zero-copy mechanism for sending ++# memory pages, if host supports it. ++# Requires that QEMU be permitted to use locked memory ++# for guest RAM pages. ++# Defaults to false. (Since 7.1) ++# + # @block-bitmap-mapping: Maps block nodes and bitmaps on them to + # aliases for the purpose of dirty bitmap migration. Such + # aliases may for example be the corresponding names on the +@@ -960,6 +975,7 @@ + '*multifd-compression': 'MultiFDCompression', + '*multifd-zlib-level': 'uint8', + '*multifd-zstd-level': 'uint8', ++ '*zero-copy-send': { 'type': 'bool', 'if': 'CONFIG_LINUX' }, + '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } } + + ## +@@ -1106,6 +1122,13 @@ + # will consume more CPU. + # Defaults to 1. (Since 5.0) + # ++# @zero-copy-send: Controls behavior on sending memory pages on migration. ++# When true, enables a zero-copy mechanism for sending ++# memory pages, if host supports it. ++# Requires that QEMU be permitted to use locked memory ++# for guest RAM pages. ++# Defaults to false. (Since 7.1) ++# + # @block-bitmap-mapping: Maps block nodes and bitmaps on them to + # aliases for the purpose of dirty bitmap migration. Such + # aliases may for example be the corresponding names on the +@@ -1158,6 +1181,7 @@ + '*multifd-compression': 'MultiFDCompression', + '*multifd-zlib-level': 'uint8', + '*multifd-zstd-level': 'uint8', ++ '*zero-copy-send': { 'type': 'bool', 'if': 'CONFIG_LINUX' }, + '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } } + + ## +-- +2.35.3 + diff --git a/kvm-migration-Allow-migrate-recover-to-run-multiple-time.patch b/kvm-migration-Allow-migrate-recover-to-run-multiple-time.patch new file mode 100644 index 0000000..29dc0ea --- /dev/null +++ b/kvm-migration-Allow-migrate-recover-to-run-multiple-time.patch @@ -0,0 +1,98 @@ +From fd6f516a94e635bc42e58448f314db575814a834 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Thu, 31 Mar 2022 11:08:45 -0400 +Subject: [PATCH 18/18] migration: Allow migrate-recover to run multiple times +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 104: migration: Allow migrate-recover to run multiple times +RH-Commit: [1/1] afd726e54c069ae800e2d01f34e768d6bac7dcb9 (peterx/qemu-kvm) +RH-Bugzilla: 2096143 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Hanna Reitz +RH-Acked-by: Dr. David Alan Gilbert + +Previously migration didn't have an easy way to cleanup the listening +transport, migrate recovery only allows to execute once. That's done with a +trick flag in postcopy_recover_triggered. + +Now the facility is already there. + +Drop postcopy_recover_triggered and instead allows a new migrate-recover to +release the previous listener transport. + +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Peter Xu +Message-Id: <20220331150857.74406-8-peterx@redhat.com> +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 08401c0426bc1a5ce4609afd1cda5dd39abbf9fa) +Signed-off-by: Peter Xu +--- + migration/migration.c | 13 ++----------- + migration/migration.h | 1 - + migration/savevm.c | 3 --- + 3 files changed, 2 insertions(+), 15 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 2a141bfaf3..8fb3eae910 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -2166,11 +2166,8 @@ void qmp_migrate_recover(const char *uri, Error **errp) + return; + } + +- if (qatomic_cmpxchg(&mis->postcopy_recover_triggered, +- false, true) == true) { +- error_setg(errp, "Migrate recovery is triggered already"); +- return; +- } ++ /* If there's an existing transport, release it */ ++ migration_incoming_transport_cleanup(mis); + + /* + * Note that this call will never start a real migration; it will +@@ -2178,12 +2175,6 @@ void qmp_migrate_recover(const char *uri, Error **errp) + * to continue using that newly established channel. + */ + qemu_start_incoming_migration(uri, errp); +- +- /* Safe to dereference with the assert above */ +- if (*errp) { +- /* Reset the flag so user could still retry */ +- qatomic_set(&mis->postcopy_recover_triggered, false); +- } + } + + void qmp_migrate_pause(Error **errp) +diff --git a/migration/migration.h b/migration/migration.h +index c2cabb8a14..fbc8690ec8 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -139,7 +139,6 @@ struct MigrationIncomingState { + struct PostcopyBlocktimeContext *blocktime_ctx; + + /* notify PAUSED postcopy incoming migrations to try to continue */ +- bool postcopy_recover_triggered; + QemuSemaphore postcopy_pause_sem_dst; + QemuSemaphore postcopy_pause_sem_fault; + +diff --git a/migration/savevm.c b/migration/savevm.c +index 02ed94c180..d9076897b8 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -2589,9 +2589,6 @@ static bool postcopy_pause_incoming(MigrationIncomingState *mis) + + assert(migrate_postcopy_ram()); + +- /* Clear the triggered bit to allow one recovery */ +- mis->postcopy_recover_triggered = false; +- + /* + * Unregister yank with either from/to src would work, since ioc behind it + * is the same +-- +2.35.3 + diff --git a/kvm-migration-Change-zero_copy_send-from-migration-param.patch b/kvm-migration-Change-zero_copy_send-from-migration-param.patch new file mode 100644 index 0000000..abeeeb6 --- /dev/null +++ b/kvm-migration-Change-zero_copy_send-from-migration-param.patch @@ -0,0 +1,289 @@ +From 7e2a037f3f349c21201152cecce32d8c8ff0bea0 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Mon, 20 Jun 2022 02:39:45 -0300 +Subject: [PATCH 17/18] migration: Change zero_copy_send from migration + parameter to migration capability +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd +RH-Commit: [11/11] e4a955607947896a49398ac8400241a0adac51a1 (LeoBras/centos-qemu-kvm) +RH-Bugzilla: 1968509 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +When originally implemented, zero_copy_send was designed as a Migration +paramenter. + +But taking into account how is that supposed to work, and how +the difference between a capability and a parameter, it only makes sense +that zero-copy-send would work better as a capability. + +Taking into account how recently the change got merged, it was decided +that it's still time to make it right, and convert zero_copy_send into +a Migration capability. + +Signed-off-by: Leonardo Bras +Reviewed-by: Juan Quintela +Acked-by: Markus Armbruster +Acked-by: Peter Xu +Signed-off-by: Juan Quintela +Signed-off-by: Dr. David Alan Gilbert + dgilbert: always define the capability, even on non-Linux but error if +set; avoids build problems with the capability +(cherry picked from commit 1abaec9a1b2c23f7aa94709a422128d9e42c3e0b) +Signed-off-by: Leonardo Bras +--- + migration/migration.c | 58 +++++++++++++++++++------------------------ + monitor/hmp-cmds.c | 6 ----- + qapi/migration.json | 33 +++++++----------------- + 3 files changed, 34 insertions(+), 63 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 102236fba0..2a141bfaf3 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -163,7 +163,8 @@ INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot, + MIGRATION_CAPABILITY_COMPRESS, + MIGRATION_CAPABILITY_XBZRLE, + MIGRATION_CAPABILITY_X_COLO, +- MIGRATION_CAPABILITY_VALIDATE_UUID); ++ MIGRATION_CAPABILITY_VALIDATE_UUID, ++ MIGRATION_CAPABILITY_ZERO_COPY_SEND); + + /* When we add fault tolerance, we could have several + migrations at once. For now we don't need to add +@@ -899,10 +900,6 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) + params->multifd_zlib_level = s->parameters.multifd_zlib_level; + params->has_multifd_zstd_level = true; + params->multifd_zstd_level = s->parameters.multifd_zstd_level; +-#ifdef CONFIG_LINUX +- params->has_zero_copy_send = true; +- params->zero_copy_send = s->parameters.zero_copy_send; +-#endif + params->has_xbzrle_cache_size = true; + params->xbzrle_cache_size = s->parameters.xbzrle_cache_size; + params->has_max_postcopy_bandwidth = true; +@@ -1263,6 +1260,24 @@ static bool migrate_caps_check(bool *cap_list, + } + } + ++#ifdef CONFIG_LINUX ++ if (cap_list[MIGRATION_CAPABILITY_ZERO_COPY_SEND] && ++ (!cap_list[MIGRATION_CAPABILITY_MULTIFD] || ++ migrate_use_compression() || ++ migrate_use_tls())) { ++ error_setg(errp, ++ "Zero copy only available for non-compressed non-TLS multifd migration"); ++ return false; ++ } ++#else ++ if (cap_list[MIGRATION_CAPABILITY_ZERO_COPY_SEND]) { ++ error_setg(errp, ++ "Zero copy currently only available on Linux"); ++ return false; ++ } ++#endif ++ ++ + /* incoming side only */ + if (runstate_check(RUN_STATE_INMIGRATE) && + !migrate_multifd_is_allowed() && +@@ -1485,16 +1500,6 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp) + error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: "); + return false; + } +-#ifdef CONFIG_LINUX +- if (params->zero_copy_send && +- (!migrate_use_multifd() || +- params->multifd_compression != MULTIFD_COMPRESSION_NONE || +- (params->tls_creds && *params->tls_creds))) { +- error_setg(errp, +- "Zero copy only available for non-compressed non-TLS multifd migration"); +- return false; +- } +-#endif + return true; + } + +@@ -1568,11 +1573,6 @@ static void migrate_params_test_apply(MigrateSetParameters *params, + if (params->has_multifd_compression) { + dest->multifd_compression = params->multifd_compression; + } +-#ifdef CONFIG_LINUX +- if (params->has_zero_copy_send) { +- dest->zero_copy_send = params->zero_copy_send; +- } +-#endif + if (params->has_xbzrle_cache_size) { + dest->xbzrle_cache_size = params->xbzrle_cache_size; + } +@@ -1685,11 +1685,6 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp) + if (params->has_multifd_compression) { + s->parameters.multifd_compression = params->multifd_compression; + } +-#ifdef CONFIG_LINUX +- if (params->has_zero_copy_send) { +- s->parameters.zero_copy_send = params->zero_copy_send; +- } +-#endif + if (params->has_xbzrle_cache_size) { + s->parameters.xbzrle_cache_size = params->xbzrle_cache_size; + xbzrle_cache_resize(params->xbzrle_cache_size, errp); +@@ -2587,7 +2582,7 @@ bool migrate_use_zero_copy_send(void) + + s = migrate_get_current(); + +- return s->parameters.zero_copy_send; ++ return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND]; + } + #endif + +@@ -4243,10 +4238,6 @@ static Property migration_properties[] = { + DEFINE_PROP_UINT8("multifd-zstd-level", MigrationState, + parameters.multifd_zstd_level, + DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL), +-#ifdef CONFIG_LINUX +- DEFINE_PROP_BOOL("zero_copy_send", MigrationState, +- parameters.zero_copy_send, false), +-#endif + DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState, + parameters.xbzrle_cache_size, + DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE), +@@ -4284,6 +4275,10 @@ static Property migration_properties[] = { + DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_MULTIFD), + DEFINE_PROP_MIG_CAP("x-background-snapshot", + MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT), ++#ifdef CONFIG_LINUX ++ DEFINE_PROP_MIG_CAP("x-zero-copy-send", ++ MIGRATION_CAPABILITY_ZERO_COPY_SEND), ++#endif + + DEFINE_PROP_END_OF_LIST(), + }; +@@ -4344,9 +4339,6 @@ static void migration_instance_init(Object *obj) + params->has_multifd_compression = true; + params->has_multifd_zlib_level = true; + params->has_multifd_zstd_level = true; +-#ifdef CONFIG_LINUX +- params->has_zero_copy_send = true; +-#endif + params->has_xbzrle_cache_size = true; + params->has_max_postcopy_bandwidth = true; + params->has_max_cpu_throttle = true; +diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c +index 55b48d3733..634968498b 100644 +--- a/monitor/hmp-cmds.c ++++ b/monitor/hmp-cmds.c +@@ -1309,12 +1309,6 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) + p->has_multifd_zstd_level = true; + visit_type_uint8(v, param, &p->multifd_zstd_level, &err); + break; +-#ifdef CONFIG_LINUX +- case MIGRATION_PARAMETER_ZERO_COPY_SEND: +- p->has_zero_copy_send = true; +- visit_type_bool(v, param, &p->zero_copy_send, &err); +- break; +-#endif + case MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE: + p->has_xbzrle_cache_size = true; + if (!visit_type_size(v, param, &cache_size, &err)) { +diff --git a/qapi/migration.json b/qapi/migration.json +index 4d833ecdd6..5105790cd0 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -463,6 +463,13 @@ + # procedure starts. The VM RAM is saved with running VM. + # (since 6.0) + # ++# @zero-copy-send: Controls behavior on sending memory pages on migration. ++# When true, enables a zero-copy mechanism for sending ++# memory pages, if host supports it. ++# Requires that QEMU be permitted to use locked memory ++# for guest RAM pages. ++# (since 7.1) ++# + # Features: + # @unstable: Members @x-colo and @x-ignore-shared are experimental. + # +@@ -476,7 +483,8 @@ + 'block', 'return-path', 'pause-before-switchover', 'multifd', + 'dirty-bitmaps', 'postcopy-blocktime', 'late-block-activate', + { 'name': 'x-ignore-shared', 'features': [ 'unstable' ] }, +- 'validate-uuid', 'background-snapshot'] } ++ 'validate-uuid', 'background-snapshot', ++ 'zero-copy-send'] } + + ## + # @MigrationCapabilityStatus: +@@ -741,12 +749,6 @@ + # will consume more CPU. + # Defaults to 1. (Since 5.0) + # +-# @zero-copy-send: Controls behavior on sending memory pages on migration. +-# When true, enables a zero-copy mechanism for sending +-# memory pages, if host supports it. +-# Requires that QEMU be permitted to use locked memory +-# for guest RAM pages. +-# Defaults to false. (Since 7.1) + # + # @block-bitmap-mapping: Maps block nodes and bitmaps on them to + # aliases for the purpose of dirty bitmap migration. Such +@@ -787,7 +789,6 @@ + 'xbzrle-cache-size', 'max-postcopy-bandwidth', + 'max-cpu-throttle', 'multifd-compression', + 'multifd-zlib-level' ,'multifd-zstd-level', +- { 'name': 'zero-copy-send', 'if' : 'CONFIG_LINUX'}, + 'block-bitmap-mapping' ] } + + ## +@@ -914,13 +915,6 @@ + # will consume more CPU. + # Defaults to 1. (Since 5.0) + # +-# @zero-copy-send: Controls behavior on sending memory pages on migration. +-# When true, enables a zero-copy mechanism for sending +-# memory pages, if host supports it. +-# Requires that QEMU be permitted to use locked memory +-# for guest RAM pages. +-# Defaults to false. (Since 7.1) +-# + # @block-bitmap-mapping: Maps block nodes and bitmaps on them to + # aliases for the purpose of dirty bitmap migration. Such + # aliases may for example be the corresponding names on the +@@ -975,7 +969,6 @@ + '*multifd-compression': 'MultiFDCompression', + '*multifd-zlib-level': 'uint8', + '*multifd-zstd-level': 'uint8', +- '*zero-copy-send': { 'type': 'bool', 'if': 'CONFIG_LINUX' }, + '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } } + + ## +@@ -1122,13 +1115,6 @@ + # will consume more CPU. + # Defaults to 1. (Since 5.0) + # +-# @zero-copy-send: Controls behavior on sending memory pages on migration. +-# When true, enables a zero-copy mechanism for sending +-# memory pages, if host supports it. +-# Requires that QEMU be permitted to use locked memory +-# for guest RAM pages. +-# Defaults to false. (Since 7.1) +-# + # @block-bitmap-mapping: Maps block nodes and bitmaps on them to + # aliases for the purpose of dirty bitmap migration. Such + # aliases may for example be the corresponding names on the +@@ -1181,7 +1167,6 @@ + '*multifd-compression': 'MultiFDCompression', + '*multifd-zlib-level': 'uint8', + '*multifd-zstd-level': 'uint8', +- '*zero-copy-send': { 'type': 'bool', 'if': 'CONFIG_LINUX' }, + '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } } + + ## +-- +2.35.3 + diff --git a/kvm-migration-Fix-operator-type.patch b/kvm-migration-Fix-operator-type.patch new file mode 100644 index 0000000..f6a462a --- /dev/null +++ b/kvm-migration-Fix-operator-type.patch @@ -0,0 +1,47 @@ +From 4bd48e784ae0c38c89f1a944b06c997fd28c4d37 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Thu, 19 May 2022 04:15:33 -0400 +Subject: [PATCH 16/16] migration: Fix operator type +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 92: Fix build using clang 14 +RH-Commit: [1/1] ad9980e64cf2e39085d68f1ff601444bf2afe228 (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 2064530 +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Dr. David Alan Gilbert + +Clang spotted an & that should have been an &&; fix it. + +Reported by: David Binderman / https://gitlab.com/dcb +Fixes: 65dacaa04fa ("migration: introduce save_normal_page()") +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/963 +Signed-off-by: Dr. David Alan Gilbert +Message-Id: <20220406102515.96320-1-dgilbert@redhat.com> +Reviewed-by: Peter Maydell +Reviewed-by: Peter Xu +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit f912ec5b2d65644116ff496b58d7c9145c19e4c0) +Signed-off-by: Miroslav Rezanina +--- + migration/ram.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/migration/ram.c b/migration/ram.c +index 3532f64ecb..0ef4bd63eb 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -1289,7 +1289,7 @@ static int save_normal_page(RAMState *rs, RAMBlock *block, ram_addr_t offset, + offset | RAM_SAVE_FLAG_PAGE)); + if (async) { + qemu_put_buffer_async(rs->f, buf, TARGET_PAGE_SIZE, +- migrate_release_ram() & ++ migrate_release_ram() && + migration_in_postcopy()); + } else { + qemu_put_buffer(rs->f, buf, TARGET_PAGE_SIZE); +-- +2.31.1 + diff --git a/kvm-multifd-Copy-pages-before-compressing-them-with-zlib.patch b/kvm-multifd-Copy-pages-before-compressing-them-with-zlib.patch new file mode 100644 index 0000000..ea89a9f --- /dev/null +++ b/kvm-multifd-Copy-pages-before-compressing-them-with-zlib.patch @@ -0,0 +1,142 @@ +From 1d280070748b604c60a7be4d4c3c3a28e3964f37 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Tue, 2 Aug 2022 10:11:21 +0200 +Subject: [PATCH 31/32] multifd: Copy pages before compressing them with zlib + +RH-Author: Thomas Huth +RH-MergeRequest: 112: Fix postcopy migration on s390x +RH-Commit: [1/2] fd5a0221e22b4563bd1cb7f8a8b95f0bfe8f5fc9 (thuth/qemu-kvm-cs9) +RH-Bugzilla: 2099934 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand +RH-Acked-by: Peter Xu + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2099934 + +zlib_send_prepare() compresses pages of a running VM. zlib does not +make any thread-safety guarantees with respect to changing deflate() +input concurrently with deflate() [1]. + +One can observe problems due to this with the IBM zEnterprise Data +Compression accelerator capable zlib [2]. When the hardware +acceleration is enabled, migration/multifd/tcp/plain/zlib test fails +intermittently [3] due to sliding window corruption. The accelerator's +architecture explicitly discourages concurrent accesses [4]: + + Page 26-57, "Other Conditions": + + As observed by this CPU, other CPUs, and channel + programs, references to the parameter block, first, + second, and third operands may be multiple-access + references, accesses to these storage locations are + not necessarily block-concurrent, and the sequence + of these accesses or references is undefined. + +Mark Adler pointed out that vanilla zlib performs double fetches under +certain circumstances as well [5], therefore we need to copy data +before passing it to deflate(). + +[1] https://zlib.net/manual.html +[2] https://github.com/madler/zlib/pull/410 +[3] https://lists.nongnu.org/archive/html/qemu-devel/2022-03/msg03988.html +[4] http://publibfp.dhe.ibm.com/epubs/pdf/a227832c.pdf +[5] https://lists.gnu.org/archive/html/qemu-devel/2022-07/msg00889.html + +Signed-off-by: Ilya Leoshkevich +Message-Id: <20220705203559.2960949-1-iii@linux.ibm.com> +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 007e179ef0e97eafda4c9ff2a9d665a1947c7c6d) +Signed-off-by: Thomas Huth +--- + migration/multifd-zlib.c | 38 ++++++++++++++++++++++++++++++-------- + 1 file changed, 30 insertions(+), 8 deletions(-) + +diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c +index 3a7ae44485..18213a9513 100644 +--- a/migration/multifd-zlib.c ++++ b/migration/multifd-zlib.c +@@ -27,6 +27,8 @@ struct zlib_data { + uint8_t *zbuff; + /* size of compressed buffer */ + uint32_t zbuff_len; ++ /* uncompressed buffer of size qemu_target_page_size() */ ++ uint8_t *buf; + }; + + /* Multifd zlib compression */ +@@ -45,26 +47,38 @@ static int zlib_send_setup(MultiFDSendParams *p, Error **errp) + { + struct zlib_data *z = g_new0(struct zlib_data, 1); + z_stream *zs = &z->zs; ++ const char *err_msg; + + zs->zalloc = Z_NULL; + zs->zfree = Z_NULL; + zs->opaque = Z_NULL; + if (deflateInit(zs, migrate_multifd_zlib_level()) != Z_OK) { +- g_free(z); +- error_setg(errp, "multifd %u: deflate init failed", p->id); +- return -1; ++ err_msg = "deflate init failed"; ++ goto err_free_z; + } + /* This is the maxium size of the compressed buffer */ + z->zbuff_len = compressBound(MULTIFD_PACKET_SIZE); + z->zbuff = g_try_malloc(z->zbuff_len); + if (!z->zbuff) { +- deflateEnd(&z->zs); +- g_free(z); +- error_setg(errp, "multifd %u: out of memory for zbuff", p->id); +- return -1; ++ err_msg = "out of memory for zbuff"; ++ goto err_deflate_end; ++ } ++ z->buf = g_try_malloc(qemu_target_page_size()); ++ if (!z->buf) { ++ err_msg = "out of memory for buf"; ++ goto err_free_zbuff; + } + p->data = z; + return 0; ++ ++err_free_zbuff: ++ g_free(z->zbuff); ++err_deflate_end: ++ deflateEnd(&z->zs); ++err_free_z: ++ g_free(z); ++ error_setg(errp, "multifd %u: %s", p->id, err_msg); ++ return -1; + } + + /** +@@ -82,6 +96,8 @@ static void zlib_send_cleanup(MultiFDSendParams *p, Error **errp) + deflateEnd(&z->zs); + g_free(z->zbuff); + z->zbuff = NULL; ++ g_free(z->buf); ++ z->buf = NULL; + g_free(p->data); + p->data = NULL; + } +@@ -114,8 +130,14 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) + flush = Z_SYNC_FLUSH; + } + ++ /* ++ * Since the VM might be running, the page may be changing concurrently ++ * with compression. zlib does not guarantee that this is safe, ++ * therefore copy the page before calling deflate(). ++ */ ++ memcpy(z->buf, p->pages->block->host + p->normal[i], page_size); + zs->avail_in = page_size; +- zs->next_in = p->pages->block->host + p->normal[i]; ++ zs->next_in = z->buf; + + zs->avail_out = available; + zs->next_out = z->zbuff + out_size; +-- +2.31.1 + diff --git a/kvm-multifd-Implement-zero-copy-write-in-multifd-migrati.patch b/kvm-multifd-Implement-zero-copy-write-in-multifd-migrati.patch new file mode 100644 index 0000000..c7159e1 --- /dev/null +++ b/kvm-multifd-Implement-zero-copy-write-in-multifd-migrati.patch @@ -0,0 +1,182 @@ +From c1a2866d158ac67179fa0d17f1710302eb9a3866 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Fri, 13 May 2022 03:28:37 -0300 +Subject: [PATCH 14/18] multifd: Implement zero copy write in multifd migration + (multifd-zero-copy) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd +RH-Commit: [8/11] b93009cc94b2cc4b464b4f68ebfb37b870dd6f7d (LeoBras/centos-qemu-kvm) +RH-Bugzilla: 1968509 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +Implement zero copy send on nocomp_send_write(), by making use of QIOChannel +writev + flags & flush interface. + +Change multifd_send_sync_main() so flush_zero_copy() can be called +after each iteration in order to make sure all dirty pages are sent before +a new iteration is started. It will also flush at the beginning and at the +end of migration. + +Also make it return -1 if flush_zero_copy() fails, in order to cancel +the migration process, and avoid resuming the guest in the target host +without receiving all current RAM. + +This will work fine on RAM migration because the RAM pages are not usually freed, +and there is no problem on changing the pages content between writev_zero_copy() and +the actual sending of the buffer, because this change will dirty the page and +cause it to be re-sent on a next iteration anyway. + +A lot of locked memory may be needed in order to use multifd migration +with zero-copy enabled, so disabling the feature should be necessary for +low-privileged users trying to perform multifd migrations. + +Signed-off-by: Leonardo Bras +Reviewed-by: Peter Xu +Reviewed-by: Daniel P. Berrangé +Message-Id: <20220513062836.965425-9-leobras@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 5b1d9bab2da4fca3a3caee97c430e5709cb32b7b) +Signed-off-by: Leonardo Bras +--- + migration/migration.c | 11 ++++++++++- + migration/multifd.c | 37 +++++++++++++++++++++++++++++++++++-- + migration/multifd.h | 2 ++ + migration/socket.c | 5 +++-- + 4 files changed, 50 insertions(+), 5 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index d91efb66fe..102236fba0 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1485,7 +1485,16 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp) + error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: "); + return false; + } +- ++#ifdef CONFIG_LINUX ++ if (params->zero_copy_send && ++ (!migrate_use_multifd() || ++ params->multifd_compression != MULTIFD_COMPRESSION_NONE || ++ (params->tls_creds && *params->tls_creds))) { ++ error_setg(errp, ++ "Zero copy only available for non-compressed non-TLS multifd migration"); ++ return false; ++ } ++#endif + return true; + } + +diff --git a/migration/multifd.c b/migration/multifd.c +index 8fca6c970e..0b5b41c53f 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -571,6 +571,7 @@ void multifd_save_cleanup(void) + int multifd_send_sync_main(QEMUFile *f) + { + int i; ++ bool flush_zero_copy; + + if (!migrate_use_multifd()) { + return 0; +@@ -581,6 +582,20 @@ int multifd_send_sync_main(QEMUFile *f) + return -1; + } + } ++ ++ /* ++ * When using zero-copy, it's necessary to flush the pages before any of ++ * the pages can be sent again, so we'll make sure the new version of the ++ * pages will always arrive _later_ than the old pages. ++ * ++ * Currently we achieve this by flushing the zero-page requested writes ++ * per ram iteration, but in the future we could potentially optimize it ++ * to be less frequent, e.g. only after we finished one whole scanning of ++ * all the dirty bitmaps. ++ */ ++ ++ flush_zero_copy = migrate_use_zero_copy_send(); ++ + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; + +@@ -602,6 +617,17 @@ int multifd_send_sync_main(QEMUFile *f) + ram_counters.transferred += p->packet_len; + qemu_mutex_unlock(&p->mutex); + qemu_sem_post(&p->sem); ++ ++ if (flush_zero_copy && p->c) { ++ int ret; ++ Error *err = NULL; ++ ++ ret = qio_channel_flush(p->c, &err); ++ if (ret < 0) { ++ error_report_err(err); ++ return -1; ++ } ++ } + } + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; +@@ -686,8 +712,8 @@ static void *multifd_send_thread(void *opaque) + p->iov[0].iov_base = p->packet; + } + +- ret = qio_channel_writev_all(p->c, p->iov, p->iovs_num, +- &local_err); ++ ret = qio_channel_writev_full_all(p->c, p->iov, p->iovs_num, NULL, ++ 0, p->write_flags, &local_err); + if (ret != 0) { + break; + } +@@ -928,6 +954,13 @@ int multifd_save_setup(Error **errp) + /* We need one extra place for the packet header */ + p->iov = g_new0(struct iovec, page_count + 1); + p->normal = g_new0(ram_addr_t, page_count); ++ ++ if (migrate_use_zero_copy_send()) { ++ p->write_flags = QIO_CHANNEL_WRITE_FLAG_ZERO_COPY; ++ } else { ++ p->write_flags = 0; ++ } ++ + socket_send_channel_create(multifd_new_send_channel_async, p); + } + +diff --git a/migration/multifd.h b/migration/multifd.h +index cd495195ce..7ec688fb4f 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -96,6 +96,8 @@ typedef struct { + uint32_t packet_len; + /* pointer to the packet */ + MultiFDPacket_t *packet; ++ /* multifd flags for sending ram */ ++ int write_flags; + /* multifd flags for each packet */ + uint32_t flags; + /* size of the next packet that contains pages */ +diff --git a/migration/socket.c b/migration/socket.c +index 3754d8f72c..4fd5e85f50 100644 +--- a/migration/socket.c ++++ b/migration/socket.c +@@ -79,8 +79,9 @@ static void socket_outgoing_migration(QIOTask *task, + + trace_migration_socket_outgoing_connected(data->hostname); + +- if (migrate_use_zero_copy_send()) { +- error_setg(&err, "Zero copy send not available in migration"); ++ if (migrate_use_zero_copy_send() && ++ !qio_channel_has_feature(sioc, QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY)) { ++ error_setg(&err, "Zero copy send feature not detected in host kernel"); + } + + out: +-- +2.35.3 + diff --git a/kvm-multifd-Send-header-packet-without-flags-if-zero-cop.patch b/kvm-multifd-Send-header-packet-without-flags-if-zero-cop.patch new file mode 100644 index 0000000..415e3a9 --- /dev/null +++ b/kvm-multifd-Send-header-packet-without-flags-if-zero-cop.patch @@ -0,0 +1,102 @@ +From 63255c13492f42a3236d96e706e5f8e70bb4e219 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Fri, 13 May 2022 03:28:36 -0300 +Subject: [PATCH 13/18] multifd: Send header packet without flags if + zero-copy-send is enabled +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd +RH-Commit: [7/11] 137eea685e387d3d6aff187ec3fcac05bc16b6e3 (LeoBras/centos-qemu-kvm) +RH-Bugzilla: 1968509 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +Since d48c3a0445 ("multifd: Use a single writev on the send side"), +sending the header packet and the memory pages happens in the same +writev, which can potentially make the migration faster. + +Using channel-socket as example, this works well with the default copying +mechanism of sendmsg(), but with zero-copy-send=true, it will cause +the migration to often break. + +This happens because the header packet buffer gets reused quite often, +and there is a high chance that by the time the MSG_ZEROCOPY mechanism get +to send the buffer, it has already changed, sending the wrong data and +causing the migration to abort. + +It means that, as it is, the buffer for the header packet is not suitable +for sending with MSG_ZEROCOPY. + +In order to enable zero copy for multifd, send the header packet on an +individual write(), without any flags, and the remanining pages with a +writev(), as it was happening before. This only changes how a migration +with zero-copy-send=true works, not changing any current behavior for +migrations with zero-copy-send=false. + +Signed-off-by: Leonardo Bras +Reviewed-by: Peter Xu +Reviewed-by: Daniel P. Berrangé +Message-Id: <20220513062836.965425-8-leobras@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit b7dbdd8e76cd03453c234dbb9578d20969859d74) +Signed-off-by: Leonardo Bras +--- + migration/multifd.c | 22 +++++++++++++++++++--- + 1 file changed, 19 insertions(+), 3 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index cdb57439a7..8fca6c970e 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -619,6 +619,7 @@ static void *multifd_send_thread(void *opaque) + MultiFDSendParams *p = opaque; + Error *local_err = NULL; + int ret = 0; ++ bool use_zero_copy_send = migrate_use_zero_copy_send(); + + trace_multifd_send_thread_start(p->id); + rcu_register_thread(); +@@ -641,9 +642,14 @@ static void *multifd_send_thread(void *opaque) + if (p->pending_job) { + uint64_t packet_num = p->packet_num; + uint32_t flags = p->flags; +- p->iovs_num = 1; + p->normal_num = 0; + ++ if (use_zero_copy_send) { ++ p->iovs_num = 0; ++ } else { ++ p->iovs_num = 1; ++ } ++ + for (int i = 0; i < p->pages->num; i++) { + p->normal[p->normal_num] = p->pages->offset[i]; + p->normal_num++; +@@ -667,8 +673,18 @@ static void *multifd_send_thread(void *opaque) + trace_multifd_send(p->id, packet_num, p->normal_num, flags, + p->next_packet_size); + +- p->iov[0].iov_len = p->packet_len; +- p->iov[0].iov_base = p->packet; ++ if (use_zero_copy_send) { ++ /* Send header first, without zerocopy */ ++ ret = qio_channel_write_all(p->c, (void *)p->packet, ++ p->packet_len, &local_err); ++ if (ret != 0) { ++ break; ++ } ++ } else { ++ /* Send header using the same writev call */ ++ p->iov[0].iov_len = p->packet_len; ++ p->iov[0].iov_base = p->packet; ++ } + + ret = qio_channel_writev_all(p->c, p->iov, p->iovs_num, + &local_err); +-- +2.35.3 + diff --git a/kvm-multifd-multifd_send_sync_main-now-returns-negative-.patch b/kvm-multifd-multifd_send_sync_main-now-returns-negative-.patch new file mode 100644 index 0000000..e6d726a --- /dev/null +++ b/kvm-multifd-multifd_send_sync_main-now-returns-negative-.patch @@ -0,0 +1,163 @@ +From 4ca5375a936bc87829c6e2b4620f56c73a5efc70 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Fri, 13 May 2022 03:28:35 -0300 +Subject: [PATCH 12/18] multifd: multifd_send_sync_main now returns negative on + error +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd +RH-Commit: [6/11] c8ebdee4327d463c74f4b2eeb42d3c964f314c94 (LeoBras/centos-qemu-kvm) +RH-Bugzilla: 1968509 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu + +Even though multifd_send_sync_main() currently emits error_reports, it's +callers don't really check it before continuing. + +Change multifd_send_sync_main() to return -1 on error and 0 on success. +Also change all it's callers to make use of this change and possibly fail +earlier. + +(This change is important to next patch on multifd zero copy +implementation, to make it sure an error in zero-copy flush does not go +unnoticed. + +Signed-off-by: Leonardo Bras +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Peter Xu +Message-Id: <20220513062836.965425-7-leobras@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 33d70973a3a6e8c6b62bcbc64d9e488961981007) +Signed-off-by: Leonardo Bras +--- + migration/multifd.c | 10 ++++++---- + migration/multifd.h | 2 +- + migration/ram.c | 29 ++++++++++++++++++++++------- + 3 files changed, 29 insertions(+), 12 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 43998ad117..cdb57439a7 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -568,17 +568,17 @@ void multifd_save_cleanup(void) + multifd_send_state = NULL; + } + +-void multifd_send_sync_main(QEMUFile *f) ++int multifd_send_sync_main(QEMUFile *f) + { + int i; + + if (!migrate_use_multifd()) { +- return; ++ return 0; + } + if (multifd_send_state->pages->num) { + if (multifd_send_pages(f) < 0) { + error_report("%s: multifd_send_pages fail", __func__); +- return; ++ return -1; + } + } + for (i = 0; i < migrate_multifd_channels(); i++) { +@@ -591,7 +591,7 @@ void multifd_send_sync_main(QEMUFile *f) + if (p->quit) { + error_report("%s: channel %d has already quit", __func__, i); + qemu_mutex_unlock(&p->mutex); +- return; ++ return -1; + } + + p->packet_num = multifd_send_state->packet_num++; +@@ -610,6 +610,8 @@ void multifd_send_sync_main(QEMUFile *f) + qemu_sem_wait(&p->sem_sync); + } + trace_multifd_send_sync_main(multifd_send_state->packet_num); ++ ++ return 0; + } + + static void *multifd_send_thread(void *opaque) +diff --git a/migration/multifd.h b/migration/multifd.h +index 4dda900a0b..cd495195ce 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -22,7 +22,7 @@ int multifd_load_cleanup(Error **errp); + bool multifd_recv_all_channels_created(void); + bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp); + void multifd_recv_sync_main(void); +-void multifd_send_sync_main(QEMUFile *f); ++int multifd_send_sync_main(QEMUFile *f); + int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset); + + /* Multifd Compression flags */ +diff --git a/migration/ram.c b/migration/ram.c +index 0ef4bd63eb..fb6db54642 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -2903,6 +2903,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque) + { + RAMState **rsp = opaque; + RAMBlock *block; ++ int ret; + + if (compress_threads_save_setup()) { + return -1; +@@ -2937,7 +2938,11 @@ static int ram_save_setup(QEMUFile *f, void *opaque) + ram_control_before_iterate(f, RAM_CONTROL_SETUP); + ram_control_after_iterate(f, RAM_CONTROL_SETUP); + +- multifd_send_sync_main(f); ++ ret = multifd_send_sync_main(f); ++ if (ret < 0) { ++ return ret; ++ } ++ + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); + qemu_fflush(f); + +@@ -3046,7 +3051,11 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) + out: + if (ret >= 0 + && migration_is_setup_or_active(migrate_get_current()->state)) { +- multifd_send_sync_main(rs->f); ++ ret = multifd_send_sync_main(rs->f); ++ if (ret < 0) { ++ return ret; ++ } ++ + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); + qemu_fflush(f); + ram_transferred_add(8); +@@ -3106,13 +3115,19 @@ static int ram_save_complete(QEMUFile *f, void *opaque) + ram_control_after_iterate(f, RAM_CONTROL_FINISH); + } + +- if (ret >= 0) { +- multifd_send_sync_main(rs->f); +- qemu_put_be64(f, RAM_SAVE_FLAG_EOS); +- qemu_fflush(f); ++ if (ret < 0) { ++ return ret; + } + +- return ret; ++ ret = multifd_send_sync_main(rs->f); ++ if (ret < 0) { ++ return ret; ++ } ++ ++ qemu_put_be64(f, RAM_SAVE_FLAG_EOS); ++ qemu_fflush(f); ++ ++ return 0; + } + + static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size, +-- +2.35.3 + diff --git a/kvm-nbd-server-Allow-MULTI_CONN-for-shared-writable-expo.patch b/kvm-nbd-server-Allow-MULTI_CONN-for-shared-writable-expo.patch new file mode 100644 index 0000000..56abcb1 --- /dev/null +++ b/kvm-nbd-server-Allow-MULTI_CONN-for-shared-writable-expo.patch @@ -0,0 +1,381 @@ +From 4a9ddf42788d3f924bdad7746f7aca615f03d7c1 Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Wed, 11 May 2022 19:49:24 -0500 +Subject: [PATCH 2/2] nbd/server: Allow MULTI_CONN for shared writable exports +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Blake +RH-MergeRequest: 90: Advertise MULTI_CONN on writeable NBD servers +RH-Commit: [2/2] 53f0e885a5ed7f6e4bb14e74fe8e7957e6afe90f (ebblake/centos-qemu-kvm) +RH-Bugzilla: 1708300 +RH-Acked-by: Nir Soffer +RH-Acked-by: Kevin Wolf +RH-Acked-by: Daniel P. Berrangé + +According to the NBD spec, a server that advertises +NBD_FLAG_CAN_MULTI_CONN promises that multiple client connections will +not see any cache inconsistencies: when properly separated by a single +flush, actions performed by one client will be visible to another +client, regardless of which client did the flush. + +We always satisfy these conditions in qemu - even when we support +multiple clients, ALL clients go through a single point of reference +into the block layer, with no local caching. The effect of one client +is instantly visible to the next client. Even if our backend were a +network device, we argue that any multi-path caching effects that +would cause inconsistencies in back-to-back actions not seeing the +effect of previous actions would be a bug in that backend, and not the +fault of caching in qemu. As such, it is safe to unconditionally +advertise CAN_MULTI_CONN for any qemu NBD server situation that +supports parallel clients. + +Note, however, that we don't want to advertise CAN_MULTI_CONN when we +know that a second client cannot connect (for historical reasons, +qemu-nbd defaults to a single connection while nbd-server-add and QMP +commands default to unlimited connections; but we already have +existing means to let either style of NBD server creation alter those +defaults). This is visible by no longer advertising MULTI_CONN for +'qemu-nbd -r' without -e, as in the iotest nbd-qemu-allocation. + +The harder part of this patch is setting up an iotest to demonstrate +behavior of multiple NBD clients to a single server. It might be +possible with parallel qemu-io processes, but I found it easier to do +in python with the help of libnbd, and help from Nir and Vladimir in +writing the test. + +Signed-off-by: Eric Blake +Suggested-by: Nir Soffer +Suggested-by: Vladimir Sementsov-Ogievskiy +Message-Id: <20220512004924.417153-3-eblake@redhat.com> +Signed-off-by: Kevin Wolf + +(cherry picked from commit 58a6fdcc9efb2a7c1ef4893dca4aa5e8020ca3dc) +Conflicts: + nbd/server.c - context, e5fb29d5 not backported +Signed-off-by: Eric Blake +--- + MAINTAINERS | 1 + + blockdev-nbd.c | 5 + + docs/interop/nbd.txt | 1 + + docs/tools/qemu-nbd.rst | 3 +- + include/block/nbd.h | 3 +- + nbd/server.c | 10 +- + qapi/block-export.json | 8 +- + tests/qemu-iotests/tests/nbd-multiconn | 145 ++++++++++++++++++ + tests/qemu-iotests/tests/nbd-multiconn.out | 5 + + .../tests/nbd-qemu-allocation.out | 2 +- + 10 files changed, 172 insertions(+), 11 deletions(-) + create mode 100755 tests/qemu-iotests/tests/nbd-multiconn + create mode 100644 tests/qemu-iotests/tests/nbd-multiconn.out + +diff --git a/MAINTAINERS b/MAINTAINERS +index 4ad2451e03..2fe20a49ab 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -3370,6 +3370,7 @@ F: qemu-nbd.* + F: blockdev-nbd.c + F: docs/interop/nbd.txt + F: docs/tools/qemu-nbd.rst ++F: tests/qemu-iotests/tests/*nbd* + T: git https://repo.or.cz/qemu/ericb.git nbd + T: git https://src.openvz.org/scm/~vsementsov/qemu.git nbd + +diff --git a/blockdev-nbd.c b/blockdev-nbd.c +index add41a23af..c6d9b0324c 100644 +--- a/blockdev-nbd.c ++++ b/blockdev-nbd.c +@@ -44,6 +44,11 @@ bool nbd_server_is_running(void) + return nbd_server || qemu_nbd_connections >= 0; + } + ++int nbd_server_max_connections(void) ++{ ++ return nbd_server ? nbd_server->max_connections : qemu_nbd_connections; ++} ++ + static void nbd_blockdev_client_closed(NBDClient *client, bool ignored) + { + nbd_client_put(client); +diff --git a/docs/interop/nbd.txt b/docs/interop/nbd.txt +index bdb0f2a41a..f5ca25174a 100644 +--- a/docs/interop/nbd.txt ++++ b/docs/interop/nbd.txt +@@ -68,3 +68,4 @@ NBD_CMD_BLOCK_STATUS for "qemu:dirty-bitmap:", NBD_CMD_CACHE + * 4.2: NBD_FLAG_CAN_MULTI_CONN for shareable read-only exports, + NBD_CMD_FLAG_FAST_ZERO + * 5.2: NBD_CMD_BLOCK_STATUS for "qemu:allocation-depth" ++* 7.1: NBD_FLAG_CAN_MULTI_CONN for shareable writable exports +diff --git a/docs/tools/qemu-nbd.rst b/docs/tools/qemu-nbd.rst +index 4c950f6199..8e08a29e89 100644 +--- a/docs/tools/qemu-nbd.rst ++++ b/docs/tools/qemu-nbd.rst +@@ -139,8 +139,7 @@ driver options if :option:`--image-opts` is specified. + .. option:: -e, --shared=NUM + + Allow up to *NUM* clients to share the device (default +- ``1``), 0 for unlimited. Safe for readers, but for now, +- consistency is not guaranteed between multiple writers. ++ ``1``), 0 for unlimited. + + .. option:: -t, --persistent + +diff --git a/include/block/nbd.h b/include/block/nbd.h +index c5a29ce1c6..c74b7a9d2e 100644 +--- a/include/block/nbd.h ++++ b/include/block/nbd.h +@@ -1,5 +1,5 @@ + /* +- * Copyright (C) 2016-2020 Red Hat, Inc. ++ * Copyright (C) 2016-2022 Red Hat, Inc. + * Copyright (C) 2005 Anthony Liguori + * + * Network Block Device +@@ -346,6 +346,7 @@ void nbd_client_put(NBDClient *client); + + void nbd_server_is_qemu_nbd(int max_connections); + bool nbd_server_is_running(void); ++int nbd_server_max_connections(void); + void nbd_server_start(SocketAddress *addr, const char *tls_creds, + const char *tls_authz, uint32_t max_connections, + Error **errp); +diff --git a/nbd/server.c b/nbd/server.c +index c5644fd3f6..6e2157acfa 100644 +--- a/nbd/server.c ++++ b/nbd/server.c +@@ -1,5 +1,5 @@ + /* +- * Copyright (C) 2016-2021 Red Hat, Inc. ++ * Copyright (C) 2016-2022 Red Hat, Inc. + * Copyright (C) 2005 Anthony Liguori + * + * Network Block Device Server Side +@@ -1642,7 +1642,6 @@ static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args, + int64_t size; + uint64_t perm, shared_perm; + bool readonly = !exp_args->writable; +- bool shared = !exp_args->writable; + strList *bitmaps; + size_t i; + int ret; +@@ -1693,11 +1692,12 @@ static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args, + exp->description = g_strdup(arg->description); + exp->nbdflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_FLUSH | + NBD_FLAG_SEND_FUA | NBD_FLAG_SEND_CACHE); ++ ++ if (nbd_server_max_connections() != 1) { ++ exp->nbdflags |= NBD_FLAG_CAN_MULTI_CONN; ++ } + if (readonly) { + exp->nbdflags |= NBD_FLAG_READ_ONLY; +- if (shared) { +- exp->nbdflags |= NBD_FLAG_CAN_MULTI_CONN; +- } + } else { + exp->nbdflags |= (NBD_FLAG_SEND_TRIM | NBD_FLAG_SEND_WRITE_ZEROES | + NBD_FLAG_SEND_FAST_ZERO); +diff --git a/qapi/block-export.json b/qapi/block-export.json +index 1e34927f85..755ccc89b1 100644 +--- a/qapi/block-export.json ++++ b/qapi/block-export.json +@@ -21,7 +21,9 @@ + # recreated on the fly while the NBD server is active. + # If missing, it will default to denying access (since 4.0). + # @max-connections: The maximum number of connections to allow at the same +-# time, 0 for unlimited. (since 5.2; default: 0) ++# time, 0 for unlimited. Setting this to 1 also stops ++# the server from advertising multiple client support ++# (since 5.2; default: 0) + # + # Since: 4.2 + ## +@@ -50,7 +52,9 @@ + # recreated on the fly while the NBD server is active. + # If missing, it will default to denying access (since 4.0). + # @max-connections: The maximum number of connections to allow at the same +-# time, 0 for unlimited. (since 5.2; default: 0) ++# time, 0 for unlimited. Setting this to 1 also stops ++# the server from advertising multiple client support ++# (since 5.2; default: 0). + # + # Returns: error if the server is already running. + # +diff --git a/tests/qemu-iotests/tests/nbd-multiconn b/tests/qemu-iotests/tests/nbd-multiconn +new file mode 100755 +index 0000000000..b121f2e363 +--- /dev/null ++++ b/tests/qemu-iotests/tests/nbd-multiconn +@@ -0,0 +1,145 @@ ++#!/usr/bin/env python3 ++# group: rw auto quick ++# ++# Test cases for NBD multi-conn advertisement ++# ++# Copyright (C) 2022 Red Hat, Inc. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++ ++import os ++from contextlib import contextmanager ++import iotests ++from iotests import qemu_img_create, qemu_io ++ ++ ++disk = os.path.join(iotests.test_dir, 'disk') ++size = '4M' ++nbd_sock = os.path.join(iotests.sock_dir, 'nbd_sock') ++nbd_uri = 'nbd+unix:///{}?socket=' + nbd_sock ++ ++ ++@contextmanager ++def open_nbd(export_name): ++ h = nbd.NBD() ++ try: ++ h.connect_uri(nbd_uri.format(export_name)) ++ yield h ++ finally: ++ h.shutdown() ++ ++class TestNbdMulticonn(iotests.QMPTestCase): ++ def setUp(self): ++ qemu_img_create('-f', iotests.imgfmt, disk, size) ++ qemu_io('-c', 'w -P 1 0 2M', '-c', 'w -P 2 2M 2M', disk) ++ ++ self.vm = iotests.VM() ++ self.vm.launch() ++ result = self.vm.qmp('blockdev-add', { ++ 'driver': 'qcow2', ++ 'node-name': 'n', ++ 'file': {'driver': 'file', 'filename': disk} ++ }) ++ self.assert_qmp(result, 'return', {}) ++ ++ def tearDown(self): ++ self.vm.shutdown() ++ os.remove(disk) ++ try: ++ os.remove(nbd_sock) ++ except OSError: ++ pass ++ ++ @contextmanager ++ def run_server(self, max_connections=None): ++ args = { ++ 'addr': { ++ 'type': 'unix', ++ 'data': {'path': nbd_sock} ++ } ++ } ++ if max_connections is not None: ++ args['max-connections'] = max_connections ++ ++ result = self.vm.qmp('nbd-server-start', args) ++ self.assert_qmp(result, 'return', {}) ++ yield ++ ++ result = self.vm.qmp('nbd-server-stop') ++ self.assert_qmp(result, 'return', {}) ++ ++ def add_export(self, name, writable=None): ++ args = { ++ 'type': 'nbd', ++ 'id': name, ++ 'node-name': 'n', ++ 'name': name, ++ } ++ if writable is not None: ++ args['writable'] = writable ++ ++ result = self.vm.qmp('block-export-add', args) ++ self.assert_qmp(result, 'return', {}) ++ ++ def test_default_settings(self): ++ with self.run_server(): ++ self.add_export('r') ++ self.add_export('w', writable=True) ++ with open_nbd('r') as h: ++ self.assertTrue(h.can_multi_conn()) ++ with open_nbd('w') as h: ++ self.assertTrue(h.can_multi_conn()) ++ ++ def test_limited_connections(self): ++ with self.run_server(max_connections=1): ++ self.add_export('r') ++ self.add_export('w', writable=True) ++ with open_nbd('r') as h: ++ self.assertFalse(h.can_multi_conn()) ++ with open_nbd('w') as h: ++ self.assertFalse(h.can_multi_conn()) ++ ++ def test_parallel_writes(self): ++ with self.run_server(): ++ self.add_export('w', writable=True) ++ ++ clients = [nbd.NBD() for _ in range(3)] ++ for c in clients: ++ c.connect_uri(nbd_uri.format('w')) ++ self.assertTrue(c.can_multi_conn()) ++ ++ initial_data = clients[0].pread(1024 * 1024, 0) ++ self.assertEqual(initial_data, b'\x01' * 1024 * 1024) ++ ++ updated_data = b'\x03' * 1024 * 1024 ++ clients[1].pwrite(updated_data, 0) ++ clients[2].flush() ++ current_data = clients[0].pread(1024 * 1024, 0) ++ ++ self.assertEqual(updated_data, current_data) ++ ++ for i in range(3): ++ clients[i].shutdown() ++ ++ ++if __name__ == '__main__': ++ try: ++ # Easier to use libnbd than to try and set up parallel ++ # 'qemu-nbd --list' or 'qemu-io' processes, but not all systems ++ # have libnbd installed. ++ import nbd # type: ignore ++ ++ iotests.main(supported_fmts=['qcow2']) ++ except ImportError: ++ iotests.notrun('libnbd not installed') +diff --git a/tests/qemu-iotests/tests/nbd-multiconn.out b/tests/qemu-iotests/tests/nbd-multiconn.out +new file mode 100644 +index 0000000000..8d7e996700 +--- /dev/null ++++ b/tests/qemu-iotests/tests/nbd-multiconn.out +@@ -0,0 +1,5 @@ ++... ++---------------------------------------------------------------------- ++Ran 3 tests ++ ++OK +diff --git a/tests/qemu-iotests/tests/nbd-qemu-allocation.out b/tests/qemu-iotests/tests/nbd-qemu-allocation.out +index 0bf1abb063..9d938db24e 100644 +--- a/tests/qemu-iotests/tests/nbd-qemu-allocation.out ++++ b/tests/qemu-iotests/tests/nbd-qemu-allocation.out +@@ -17,7 +17,7 @@ wrote 2097152/2097152 bytes at offset 1048576 + exports available: 1 + export: '' + size: 4194304 +- flags: 0x58f ( readonly flush fua df multi cache ) ++ flags: 0x48f ( readonly flush fua df cache ) + min block: 1 + opt block: 4096 + max block: 33554432 +-- +2.31.1 + diff --git a/kvm-numa-Enable-numa-for-SGX-EPC-sections.patch b/kvm-numa-Enable-numa-for-SGX-EPC-sections.patch new file mode 100644 index 0000000..68f7647 --- /dev/null +++ b/kvm-numa-Enable-numa-for-SGX-EPC-sections.patch @@ -0,0 +1,287 @@ +From 35bf6693fb5bba5a9d5fdf4a7fdac06ce574b83d Mon Sep 17 00:00:00 2001 +From: Yang Zhong +Date: Mon, 1 Nov 2021 12:20:05 -0400 +Subject: [PATCH 1/7] numa: Enable numa for SGX EPC sections + +RH-Author: Paul Lai +RH-MergeRequest: 111: numa: Enable numa for SGX EPC sections +RH-Commit: [1/5] c29297cbacc4cb65c9ac125db349a767aa2574af +RH-Bugzilla: 1518984 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Bandan Das +RH-Acked-by: Cornelia Huck + +The basic SGX did not enable numa for SGX EPC sections, which +result in all EPC sections located in numa node 0. This patch +enable SGX numa function in the guest and the EPC section can +work with RAM as one numa node. + +The Guest kernel related log: +[ 0.009981] ACPI: SRAT: Node 0 PXM 0 [mem 0x180000000-0x183ffffff] +[ 0.009982] ACPI: SRAT: Node 1 PXM 1 [mem 0x184000000-0x185bfffff] +The SRAT table can normally show SGX EPC sections menory info in different +numa nodes. + +The SGX EPC numa related command: + ...... + -m 4G,maxmem=20G \ + -smp sockets=2,cores=2 \ + -cpu host,+sgx-provisionkey \ + -object memory-backend-ram,size=2G,host-nodes=0,policy=bind,id=node0 \ + -object memory-backend-epc,id=mem0,size=64M,prealloc=on,host-nodes=0,policy=bind \ + -numa node,nodeid=0,cpus=0-1,memdev=node0 \ + -object memory-backend-ram,size=2G,host-nodes=1,policy=bind,id=node1 \ + -object memory-backend-epc,id=mem1,size=28M,prealloc=on,host-nodes=1,policy=bind \ + -numa node,nodeid=1,cpus=2-3,memdev=node1 \ + -M sgx-epc.0.memdev=mem0,sgx-epc.0.node=0,sgx-epc.1.memdev=mem1,sgx-epc.1.node=1 \ + ...... + +Signed-off-by: Yang Zhong +Message-Id: <20211101162009.62161-2-yang.zhong@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 1105812382e1126d86dddc16b3700f8c79dc93d1) +Signed-off-by: Paul Lai +--- + hw/core/numa.c | 5 ++--- + hw/i386/acpi-build.c | 2 ++ + hw/i386/sgx-epc.c | 3 +++ + hw/i386/sgx-stub.c | 4 ++++ + hw/i386/sgx.c | 44 +++++++++++++++++++++++++++++++++++++++ + include/hw/i386/sgx-epc.h | 3 +++ + monitor/hmp-cmds.c | 1 + + qapi/machine.json | 10 ++++++++- + qemu-options.hx | 4 ++-- + 9 files changed, 70 insertions(+), 6 deletions(-) + +diff --git a/hw/core/numa.c b/hw/core/numa.c +index e6050b2273..1aa05dcf42 100644 +--- a/hw/core/numa.c ++++ b/hw/core/numa.c +@@ -784,9 +784,8 @@ static void numa_stat_memory_devices(NumaNodeMem node_mem[]) + break; + case MEMORY_DEVICE_INFO_KIND_SGX_EPC: + se = value->u.sgx_epc.data; +- /* TODO: once we support numa, assign to right node */ +- node_mem[0].node_mem += se->size; +- node_mem[0].node_plugged_mem += se->size; ++ node_mem[se->node].node_mem += se->size; ++ node_mem[se->node].node_plugged_mem = 0; + break; + default: + g_assert_not_reached(); +diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c +index 447ea35275..a4478e77b7 100644 +--- a/hw/i386/acpi-build.c ++++ b/hw/i386/acpi-build.c +@@ -2071,6 +2071,8 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) + nvdimm_build_srat(table_data); + } + ++ sgx_epc_build_srat(table_data); ++ + /* + * TODO: this part is not in ACPI spec and current linux kernel boots fine + * without these entries. But I recall there were issues the last time I +diff --git a/hw/i386/sgx-epc.c b/hw/i386/sgx-epc.c +index e508827e78..96b2940d75 100644 +--- a/hw/i386/sgx-epc.c ++++ b/hw/i386/sgx-epc.c +@@ -21,6 +21,7 @@ + + static Property sgx_epc_properties[] = { + DEFINE_PROP_UINT64(SGX_EPC_ADDR_PROP, SGXEPCDevice, addr, 0), ++ DEFINE_PROP_UINT32(SGX_EPC_NUMA_NODE_PROP, SGXEPCDevice, node, 0), + DEFINE_PROP_LINK(SGX_EPC_MEMDEV_PROP, SGXEPCDevice, hostmem, + TYPE_MEMORY_BACKEND_EPC, HostMemoryBackendEpc *), + DEFINE_PROP_END_OF_LIST(), +@@ -139,6 +140,8 @@ static void sgx_epc_md_fill_device_info(const MemoryDeviceState *md, + se->memaddr = epc->addr; + se->size = object_property_get_uint(OBJECT(epc), SGX_EPC_SIZE_PROP, + NULL); ++ se->node = object_property_get_uint(OBJECT(epc), SGX_EPC_NUMA_NODE_PROP, ++ NULL); + se->memdev = object_get_canonical_path(OBJECT(epc->hostmem)); + + info->u.sgx_epc.data = se; +diff --git a/hw/i386/sgx-stub.c b/hw/i386/sgx-stub.c +index c9b379e665..26833eb233 100644 +--- a/hw/i386/sgx-stub.c ++++ b/hw/i386/sgx-stub.c +@@ -6,6 +6,10 @@ + #include "qapi/error.h" + #include "qapi/qapi-commands-misc-target.h" + ++void sgx_epc_build_srat(GArray *table_data) ++{ ++} ++ + SGXInfo *qmp_query_sgx(Error **errp) + { + error_setg(errp, "SGX support is not compiled in"); +diff --git a/hw/i386/sgx.c b/hw/i386/sgx.c +index 8fef3dd8fa..d04299904a 100644 +--- a/hw/i386/sgx.c ++++ b/hw/i386/sgx.c +@@ -23,6 +23,7 @@ + #include "sysemu/hw_accel.h" + #include "sysemu/reset.h" + #include ++#include "hw/acpi/aml-build.h" + + #define SGX_MAX_EPC_SECTIONS 8 + #define SGX_CPUID_EPC_INVALID 0x0 +@@ -36,6 +37,46 @@ + + #define RETRY_NUM 2 + ++static int sgx_epc_device_list(Object *obj, void *opaque) ++{ ++ GSList **list = opaque; ++ ++ if (object_dynamic_cast(obj, TYPE_SGX_EPC)) { ++ *list = g_slist_append(*list, DEVICE(obj)); ++ } ++ ++ object_child_foreach(obj, sgx_epc_device_list, opaque); ++ return 0; ++} ++ ++static GSList *sgx_epc_get_device_list(void) ++{ ++ GSList *list = NULL; ++ ++ object_child_foreach(qdev_get_machine(), sgx_epc_device_list, &list); ++ return list; ++} ++ ++void sgx_epc_build_srat(GArray *table_data) ++{ ++ GSList *device_list = sgx_epc_get_device_list(); ++ ++ for (; device_list; device_list = device_list->next) { ++ DeviceState *dev = device_list->data; ++ Object *obj = OBJECT(dev); ++ uint64_t addr, size; ++ int node; ++ ++ node = object_property_get_uint(obj, SGX_EPC_NUMA_NODE_PROP, ++ &error_abort); ++ addr = object_property_get_uint(obj, SGX_EPC_ADDR_PROP, &error_abort); ++ size = object_property_get_uint(obj, SGX_EPC_SIZE_PROP, &error_abort); ++ ++ build_srat_memory(table_data, addr, size, node, MEM_AFFINITY_ENABLED); ++ } ++ g_slist_free(device_list); ++} ++ + static uint64_t sgx_calc_section_metric(uint64_t low, uint64_t high) + { + return (low & MAKE_64BIT_MASK(12, 20)) + +@@ -226,6 +267,9 @@ void pc_machine_init_sgx_epc(PCMachineState *pcms) + /* set the memdev link with memory backend */ + object_property_parse(obj, SGX_EPC_MEMDEV_PROP, list->value->memdev, + &error_fatal); ++ /* set the numa node property for sgx epc object */ ++ object_property_set_uint(obj, SGX_EPC_NUMA_NODE_PROP, list->value->node, ++ &error_fatal); + object_property_set_bool(obj, "realized", true, &error_fatal); + object_unref(obj); + } +diff --git a/include/hw/i386/sgx-epc.h b/include/hw/i386/sgx-epc.h +index a6a65be854..581fac389a 100644 +--- a/include/hw/i386/sgx-epc.h ++++ b/include/hw/i386/sgx-epc.h +@@ -25,6 +25,7 @@ + #define SGX_EPC_ADDR_PROP "addr" + #define SGX_EPC_SIZE_PROP "size" + #define SGX_EPC_MEMDEV_PROP "memdev" ++#define SGX_EPC_NUMA_NODE_PROP "node" + + /** + * SGXEPCDevice: +@@ -38,6 +39,7 @@ typedef struct SGXEPCDevice { + + /* public */ + uint64_t addr; ++ uint32_t node; + HostMemoryBackendEpc *hostmem; + } SGXEPCDevice; + +@@ -56,6 +58,7 @@ typedef struct SGXEPCState { + } SGXEPCState; + + bool sgx_epc_get_section(int section_nr, uint64_t *addr, uint64_t *size); ++void sgx_epc_build_srat(GArray *table_data); + + static inline uint64_t sgx_epc_above_4g_end(SGXEPCState *sgx_epc) + { +diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c +index 9c91bf93e9..2669156b28 100644 +--- a/monitor/hmp-cmds.c ++++ b/monitor/hmp-cmds.c +@@ -1810,6 +1810,7 @@ void hmp_info_memory_devices(Monitor *mon, const QDict *qdict) + se->id ? se->id : ""); + monitor_printf(mon, " memaddr: 0x%" PRIx64 "\n", se->memaddr); + monitor_printf(mon, " size: %" PRIu64 "\n", se->size); ++ monitor_printf(mon, " node: %" PRId64 "\n", se->node); + monitor_printf(mon, " memdev: %s\n", se->memdev); + break; + default: +diff --git a/qapi/machine.json b/qapi/machine.json +index 067e3f5378..16e771affc 100644 +--- a/qapi/machine.json ++++ b/qapi/machine.json +@@ -1207,12 +1207,15 @@ + # + # @memdev: memory backend linked with device + # ++# @node: the numa node ++# + # Since: 6.2 + ## + { 'struct': 'SgxEPCDeviceInfo', + 'data': { '*id': 'str', + 'memaddr': 'size', + 'size': 'size', ++ 'node': 'int', + 'memdev': 'str' + } + } +@@ -1285,10 +1288,15 @@ + # + # @memdev: memory backend linked with device + # ++# @node: the numa node ++# + # Since: 6.2 + ## + { 'struct': 'SgxEPC', +- 'data': { 'memdev': 'str' } } ++ 'data': { 'memdev': 'str', ++ 'node': 'int' ++ } ++} + + ## + # @SgxEPCProperties: +diff --git a/qemu-options.hx b/qemu-options.hx +index 94c4a8dbaf..4b7798088b 100644 +--- a/qemu-options.hx ++++ b/qemu-options.hx +@@ -127,11 +127,11 @@ SRST + ERST + + DEF("M", HAS_ARG, QEMU_OPTION_M, +- " sgx-epc.0.memdev=memid\n", ++ " sgx-epc.0.memdev=memid,sgx-epc.0.node=numaid\n", + QEMU_ARCH_ALL) + + SRST +-``sgx-epc.0.memdev=@var{memid}`` ++``sgx-epc.0.memdev=@var{memid},sgx-epc.0.node=@var{numaid}`` + Define an SGX EPC section. + ERST + +-- +2.27.0 + diff --git a/kvm-numa-Support-SGX-numa-in-the-monitor-and-Libvirt-int.patch b/kvm-numa-Support-SGX-numa-in-the-monitor-and-Libvirt-int.patch new file mode 100644 index 0000000..659dc22 --- /dev/null +++ b/kvm-numa-Support-SGX-numa-in-the-monitor-and-Libvirt-int.patch @@ -0,0 +1,210 @@ +From ea46a86ba6319ea98573c65af5186cd5399ab0ce Mon Sep 17 00:00:00 2001 +From: Yang Zhong +Date: Mon, 1 Nov 2021 12:20:07 -0400 +Subject: [PATCH 2/7] numa: Support SGX numa in the monitor and Libvirt + interfaces + +RH-Author: Paul Lai +RH-MergeRequest: 111: numa: Enable numa for SGX EPC sections +RH-Commit: [2/5] 403c4f98dccd023293cd3246081ae12f4782bed0 +RH-Bugzilla: 1518984 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Bandan Das +RH-Acked-by: Cornelia Huck + +Add the SGXEPCSection list into SGXInfo to show the multiple +SGX EPC sections detailed info, not the total size like before. +This patch can enable numa support for 'info sgx' command and +QMP interfaces. The new interfaces show each EPC section info +in one numa node. Libvirt can use QMP interface to get the +detailed host SGX EPC capabilities to decide how to allocate +host EPC sections to guest. + +(qemu) info sgx + SGX support: enabled + SGX1 support: enabled + SGX2 support: enabled + FLC support: enabled + NUMA node #0: size=67108864 + NUMA node #1: size=29360128 + +The QMP interface show: +(QEMU) query-sgx +{"return": {"sgx": true, "sgx2": true, "sgx1": true, "sections": \ +[{"node": 0, "size": 67108864}, {"node": 1, "size": 29360128}], "flc": true}} + +(QEMU) query-sgx-capabilities +{"return": {"sgx": true, "sgx2": true, "sgx1": true, "sections": \ +[{"node": 0, "size": 17070817280}, {"node": 1, "size": 17079205888}], "flc": true}} + +Signed-off-by: Yang Zhong +Message-Id: <20211101162009.62161-4-yang.zhong@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 4755927ae12547c2e7cb22c5fa1b39038c6c11b1) +Signed-off-by: Paul Lai +--- + hw/i386/sgx.c | 51 +++++++++++++++++++++++++++++++++++-------- + qapi/misc-target.json | 19 ++++++++++++++-- + 2 files changed, 59 insertions(+), 11 deletions(-) + +diff --git a/hw/i386/sgx.c b/hw/i386/sgx.c +index d04299904a..5de5dd0893 100644 +--- a/hw/i386/sgx.c ++++ b/hw/i386/sgx.c +@@ -83,11 +83,13 @@ static uint64_t sgx_calc_section_metric(uint64_t low, uint64_t high) + ((high & MAKE_64BIT_MASK(0, 20)) << 32); + } + +-static uint64_t sgx_calc_host_epc_section_size(void) ++static SGXEPCSectionList *sgx_calc_host_epc_sections(void) + { ++ SGXEPCSectionList *head = NULL, **tail = &head; ++ SGXEPCSection *section; + uint32_t i, type; + uint32_t eax, ebx, ecx, edx; +- uint64_t size = 0; ++ uint32_t j = 0; + + for (i = 0; i < SGX_MAX_EPC_SECTIONS; i++) { + host_cpuid(0x12, i + 2, &eax, &ebx, &ecx, &edx); +@@ -101,10 +103,13 @@ static uint64_t sgx_calc_host_epc_section_size(void) + break; + } + +- size += sgx_calc_section_metric(ecx, edx); ++ section = g_new0(SGXEPCSection, 1); ++ section->node = j++; ++ section->size = sgx_calc_section_metric(ecx, edx); ++ QAPI_LIST_APPEND(tail, section); + } + +- return size; ++ return head; + } + + static void sgx_epc_reset(void *opaque) +@@ -168,13 +173,35 @@ SGXInfo *qmp_query_sgx_capabilities(Error **errp) + info->sgx1 = eax & (1U << 0) ? true : false; + info->sgx2 = eax & (1U << 1) ? true : false; + +- info->section_size = sgx_calc_host_epc_section_size(); ++ info->sections = sgx_calc_host_epc_sections(); + + close(fd); + + return info; + } + ++static SGXEPCSectionList *sgx_get_epc_sections_list(void) ++{ ++ GSList *device_list = sgx_epc_get_device_list(); ++ SGXEPCSectionList *head = NULL, **tail = &head; ++ SGXEPCSection *section; ++ ++ for (; device_list; device_list = device_list->next) { ++ DeviceState *dev = device_list->data; ++ Object *obj = OBJECT(dev); ++ ++ section = g_new0(SGXEPCSection, 1); ++ section->node = object_property_get_uint(obj, SGX_EPC_NUMA_NODE_PROP, ++ &error_abort); ++ section->size = object_property_get_uint(obj, SGX_EPC_SIZE_PROP, ++ &error_abort); ++ QAPI_LIST_APPEND(tail, section); ++ } ++ g_slist_free(device_list); ++ ++ return head; ++} ++ + SGXInfo *qmp_query_sgx(Error **errp) + { + SGXInfo *info = NULL; +@@ -193,14 +220,13 @@ SGXInfo *qmp_query_sgx(Error **errp) + return NULL; + } + +- SGXEPCState *sgx_epc = &pcms->sgx_epc; + info = g_new0(SGXInfo, 1); + + info->sgx = true; + info->sgx1 = true; + info->sgx2 = true; + info->flc = true; +- info->section_size = sgx_epc->size; ++ info->sections = sgx_get_epc_sections_list(); + + return info; + } +@@ -208,6 +234,7 @@ SGXInfo *qmp_query_sgx(Error **errp) + void hmp_info_sgx(Monitor *mon, const QDict *qdict) + { + Error *err = NULL; ++ SGXEPCSectionList *section_list, *section; + g_autoptr(SGXInfo) info = qmp_query_sgx(&err); + + if (err) { +@@ -222,8 +249,14 @@ void hmp_info_sgx(Monitor *mon, const QDict *qdict) + info->sgx2 ? "enabled" : "disabled"); + monitor_printf(mon, "FLC support: %s\n", + info->flc ? "enabled" : "disabled"); +- monitor_printf(mon, "size: %" PRIu64 "\n", +- info->section_size); ++ ++ section_list = info->sections; ++ for (section = section_list; section; section = section->next) { ++ monitor_printf(mon, "NUMA node #%" PRId64 ": ", ++ section->value->node); ++ monitor_printf(mon, "size=%" PRIu64 "\n", ++ section->value->size); ++ } + } + + bool sgx_epc_get_section(int section_nr, uint64_t *addr, uint64_t *size) +diff --git a/qapi/misc-target.json b/qapi/misc-target.json +index 5aa2b95b7d..1022aa0184 100644 +--- a/qapi/misc-target.json ++++ b/qapi/misc-target.json +@@ -337,6 +337,21 @@ + 'if': 'TARGET_ARM' } + + ++## ++# @SGXEPCSection: ++# ++# Information about intel SGX EPC section info ++# ++# @node: the numa node ++# ++# @size: the size of epc section ++# ++# Since: 6.2 ++## ++{ 'struct': 'SGXEPCSection', ++ 'data': { 'node': 'int', ++ 'size': 'uint64'}} ++ + ## + # @SGXInfo: + # +@@ -350,7 +365,7 @@ + # + # @flc: true if FLC is supported + # +-# @section-size: The EPC section size for guest ++# @sections: The EPC sections info for guest + # + # Since: 6.2 + ## +@@ -359,7 +374,7 @@ + 'sgx1': 'bool', + 'sgx2': 'bool', + 'flc': 'bool', +- 'section-size': 'uint64'}, ++ 'sections': ['SGXEPCSection']}, + 'if': 'TARGET_I386' } + + ## +-- +2.27.0 + diff --git a/kvm-pc-bios-s390-ccw-Split-virtio-scsi-code-from-virtio_.patch b/kvm-pc-bios-s390-ccw-Split-virtio-scsi-code-from-virtio_.patch new file mode 100644 index 0000000..b212194 --- /dev/null +++ b/kvm-pc-bios-s390-ccw-Split-virtio-scsi-code-from-virtio_.patch @@ -0,0 +1,180 @@ +From 2e38b4ec5c53b2b98539a70105d3046e1c452ab8 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 8 Jul 2022 20:49:01 +0200 +Subject: [PATCH 13/17] pc-bios/s390-ccw: Split virtio-scsi code from + virtio_blk_setup_device() + +RH-Author: Thomas Huth +RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry +RH-Commit: [8/10] f49c5fb77e05c9dc09ed9f037e37f6a461e4bba6 (thuth/qemu-kvm-cs9) +RH-Bugzilla: 2098077 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck + +Bugzilla: http://bugzilla.redhat.com/2098077 + +commit cf30b7c4a9b2c64518be8037c2e6670aacdb00b9 +Author: Thomas Huth +Date: Mon Jul 4 13:19:00 2022 +0200 + + pc-bios/s390-ccw: Split virtio-scsi code from virtio_blk_setup_device() + + The next patch is going to add more virtio-block specific code to + virtio_blk_setup_device(), and if the virtio-scsi code is also in + there, this is more cumbersome. And the calling function virtio_setup() + in main.c looks at the device type already anyway, so it's more + logical to separate the virtio-scsi stuff into a new function in + virtio-scsi.c instead. + + Message-Id: <20220704111903.62400-10-thuth@redhat.com> + Reviewed-by: Eric Farman + Signed-off-by: Thomas Huth + +Signed-off-by: Thomas Huth +--- + pc-bios/s390-ccw/main.c | 24 +++++++++++++++++------- + pc-bios/s390-ccw/virtio-blkdev.c | 20 ++------------------ + pc-bios/s390-ccw/virtio-scsi.c | 19 ++++++++++++++++++- + pc-bios/s390-ccw/virtio-scsi.h | 2 +- + 4 files changed, 38 insertions(+), 27 deletions(-) + +diff --git a/pc-bios/s390-ccw/main.c b/pc-bios/s390-ccw/main.c +index 5d2b7ba94d..13e1d8fdf7 100644 +--- a/pc-bios/s390-ccw/main.c ++++ b/pc-bios/s390-ccw/main.c +@@ -14,6 +14,7 @@ + #include "s390-ccw.h" + #include "cio.h" + #include "virtio.h" ++#include "virtio-scsi.h" + #include "dasd-ipl.h" + + char stack[PAGE_SIZE * 8] __attribute__((__aligned__(PAGE_SIZE))); +@@ -218,6 +219,7 @@ static int virtio_setup(void) + { + VDev *vdev = virtio_get_device(); + QemuIplParameters *early_qipl = (QemuIplParameters *)QIPL_ADDRESS; ++ int ret; + + memcpy(&qipl, early_qipl, sizeof(QemuIplParameters)); + +@@ -225,18 +227,26 @@ static int virtio_setup(void) + menu_setup(); + } + +- if (virtio_get_device_type() == VIRTIO_ID_NET) { ++ switch (vdev->senseid.cu_model) { ++ case VIRTIO_ID_NET: + sclp_print("Network boot device detected\n"); + vdev->netboot_start_addr = qipl.netboot_start_addr; +- } else { +- int ret = virtio_blk_setup_device(blk_schid); +- if (ret) { +- return ret; +- } ++ return 0; ++ case VIRTIO_ID_BLOCK: ++ ret = virtio_blk_setup_device(blk_schid); ++ break; ++ case VIRTIO_ID_SCSI: ++ ret = virtio_scsi_setup_device(blk_schid); ++ break; ++ default: ++ panic("\n! No IPL device available !\n"); ++ } ++ ++ if (!ret) { + IPL_assert(virtio_ipl_disk_is_valid(), "No valid IPL device detected"); + } + +- return 0; ++ return ret; + } + + static void ipl_boot_device(void) +diff --git a/pc-bios/s390-ccw/virtio-blkdev.c b/pc-bios/s390-ccw/virtio-blkdev.c +index db1f7f44aa..c175b66a47 100644 +--- a/pc-bios/s390-ccw/virtio-blkdev.c ++++ b/pc-bios/s390-ccw/virtio-blkdev.c +@@ -222,27 +222,11 @@ uint64_t virtio_get_blocks(void) + int virtio_blk_setup_device(SubChannelId schid) + { + VDev *vdev = virtio_get_device(); +- int ret = 0; + + vdev->schid = schid; + virtio_setup_ccw(vdev); + +- switch (vdev->senseid.cu_model) { +- case VIRTIO_ID_BLOCK: +- sclp_print("Using virtio-blk.\n"); +- break; +- case VIRTIO_ID_SCSI: +- IPL_assert(vdev->config.scsi.sense_size == VIRTIO_SCSI_SENSE_SIZE, +- "Config: sense size mismatch"); +- IPL_assert(vdev->config.scsi.cdb_size == VIRTIO_SCSI_CDB_SIZE, +- "Config: CDB size mismatch"); ++ sclp_print("Using virtio-blk.\n"); + +- sclp_print("Using virtio-scsi.\n"); +- ret = virtio_scsi_setup(vdev); +- break; +- default: +- panic("\n! No IPL device available !\n"); +- } +- +- return ret; ++ return 0; + } +diff --git a/pc-bios/s390-ccw/virtio-scsi.c b/pc-bios/s390-ccw/virtio-scsi.c +index 2c8d0f3097..3b7069270c 100644 +--- a/pc-bios/s390-ccw/virtio-scsi.c ++++ b/pc-bios/s390-ccw/virtio-scsi.c +@@ -329,7 +329,7 @@ static void scsi_parse_capacity_report(void *data, + } + } + +-int virtio_scsi_setup(VDev *vdev) ++static int virtio_scsi_setup(VDev *vdev) + { + int retry_test_unit_ready = 3; + uint8_t data[256]; +@@ -430,3 +430,20 @@ int virtio_scsi_setup(VDev *vdev) + + return 0; + } ++ ++int virtio_scsi_setup_device(SubChannelId schid) ++{ ++ VDev *vdev = virtio_get_device(); ++ ++ vdev->schid = schid; ++ virtio_setup_ccw(vdev); ++ ++ IPL_assert(vdev->config.scsi.sense_size == VIRTIO_SCSI_SENSE_SIZE, ++ "Config: sense size mismatch"); ++ IPL_assert(vdev->config.scsi.cdb_size == VIRTIO_SCSI_CDB_SIZE, ++ "Config: CDB size mismatch"); ++ ++ sclp_print("Using virtio-scsi.\n"); ++ ++ return virtio_scsi_setup(vdev); ++} +diff --git a/pc-bios/s390-ccw/virtio-scsi.h b/pc-bios/s390-ccw/virtio-scsi.h +index 4b14c2c2f9..e6b6cd4815 100644 +--- a/pc-bios/s390-ccw/virtio-scsi.h ++++ b/pc-bios/s390-ccw/virtio-scsi.h +@@ -67,8 +67,8 @@ static inline bool virtio_scsi_response_ok(const VirtioScsiCmdResp *r) + return r->response == VIRTIO_SCSI_S_OK && r->status == CDB_STATUS_GOOD; + } + +-int virtio_scsi_setup(VDev *vdev); + int virtio_scsi_read_many(VDev *vdev, + ulong sector, void *load_addr, int sec_num); ++int virtio_scsi_setup_device(SubChannelId schid); + + #endif /* VIRTIO_SCSI_H */ +-- +2.31.1 + diff --git a/kvm-pc-bios-s390-ccw-bootmap-Improve-the-guessing-logic-.patch b/kvm-pc-bios-s390-ccw-bootmap-Improve-the-guessing-logic-.patch new file mode 100644 index 0000000..231a8a0 --- /dev/null +++ b/kvm-pc-bios-s390-ccw-bootmap-Improve-the-guessing-logic-.patch @@ -0,0 +1,102 @@ +From 64fa56e0520215e3909e442f09d8073c1870648a Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 8 Jul 2022 20:49:01 +0200 +Subject: [PATCH 07/17] pc-bios/s390-ccw/bootmap: Improve the guessing logic in + zipl_load_vblk() + +RH-Author: Thomas Huth +RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry +RH-Commit: [2/10] ca8f5e847617cf4ac2fd6c38edb2982f32fa3eba (thuth/qemu-kvm-cs9) +RH-Bugzilla: 2098077 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck + +Bugzilla: http://bugzilla.redhat.com/2098077 + +commit 422865f6672ee1482b98d18321b55c1ecfb06c82 +Author: Thomas Huth +Date: Mon Jul 4 13:18:54 2022 +0200 + + pc-bios/s390-ccw/bootmap: Improve the guessing logic in zipl_load_vblk() + + The logic of trying an final ISO or ECKD boot on virtio-block devices is + very weird: Since the geometry hardly ever matches in virtio_disk_is_scsi(), + virtio_blk_setup_device() always sets a "guessed" disk geometry via + virtio_assume_scsi() (which is certainly also wrong in a lot of cases). + + zipl_load_vblk() then sees that there's been a "virtio_guessed_disk_nature" + and tries to fix up the geometry again via virtio_assume_iso9660() before + always trying to do ipl_iso_el_torito(). That's a very brain-twisting + way of attempting to boot from ISO images, which won't work anymore after + the following patches that will clean up the virtio_assume_scsi() mess + (and thus get rid of the "virtio_guessed_disk_nature" here). + + Let's try a better approach instead: ISO files always have a magic + string "CD001" at offset 0x8001 (see e.g. the ECMA-119 specification) + which we can use to decide whether we should try to boot in ISO 9660 + mode (which we should also try if we see a sector size of 2048). + + And if we were not able to boot in ISO mode here, the final boot attempt + before panicking is to boot in ECKD mode. Since this is our last boot + attempt anyway, simply always assume the ECKD geometry here (if the sector + size was not 4096 yet), so that we also do not depend on the guessed disk + geometry from virtio_blk_setup_device() here anymore. + + Message-Id: <20220704111903.62400-4-thuth@redhat.com> + Signed-off-by: Thomas Huth + +Signed-off-by: Thomas Huth +--- + pc-bios/s390-ccw/bootmap.c | 27 +++++++++++++++++++++++---- + 1 file changed, 23 insertions(+), 4 deletions(-) + +diff --git a/pc-bios/s390-ccw/bootmap.c b/pc-bios/s390-ccw/bootmap.c +index 56411ab3b6..994e59c0b0 100644 +--- a/pc-bios/s390-ccw/bootmap.c ++++ b/pc-bios/s390-ccw/bootmap.c +@@ -780,18 +780,37 @@ static void ipl_iso_el_torito(void) + } + } + ++/** ++ * Detect whether we're trying to boot from an .ISO image. ++ * These always have a signature string "CD001" at offset 0x8001. ++ */ ++static bool has_iso_signature(void) ++{ ++ int blksize = virtio_get_block_size(); ++ ++ if (!blksize || virtio_read(0x8000 / blksize, sec)) { ++ return false; ++ } ++ ++ return !memcmp("CD001", &sec[1], 5); ++} ++ + /*********************************************************************** + * Bus specific IPL sequences + */ + + static void zipl_load_vblk(void) + { +- if (virtio_guessed_disk_nature()) { +- virtio_assume_iso9660(); ++ int blksize = virtio_get_block_size(); ++ ++ if (blksize == VIRTIO_ISO_BLOCK_SIZE || has_iso_signature()) { ++ if (blksize != VIRTIO_ISO_BLOCK_SIZE) { ++ virtio_assume_iso9660(); ++ } ++ ipl_iso_el_torito(); + } +- ipl_iso_el_torito(); + +- if (virtio_guessed_disk_nature()) { ++ if (blksize != VIRTIO_DASD_DEFAULT_BLOCK_SIZE) { + sclp_print("Using guessed DASD geometry.\n"); + virtio_assume_eckd(); + } +-- +2.31.1 + diff --git a/kvm-pc-bios-s390-ccw-netboot.mak-Ignore-Clang-s-warnings.patch b/kvm-pc-bios-s390-ccw-netboot.mak-Ignore-Clang-s-warnings.patch new file mode 100644 index 0000000..00601aa --- /dev/null +++ b/kvm-pc-bios-s390-ccw-netboot.mak-Ignore-Clang-s-warnings.patch @@ -0,0 +1,78 @@ +From 56674ee1f25f12978a6a8a1390e11b55b3e0fabe Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 8 Jul 2022 20:49:01 +0200 +Subject: [PATCH 15/17] pc-bios/s390-ccw/netboot.mak: Ignore Clang's warnings + about GNU extensions + +RH-Author: Thomas Huth +RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry +RH-Commit: [10/10] 037dab4df23ebb2b42871bca8c842a53a7204b50 (thuth/qemu-kvm-cs9) +RH-Bugzilla: 2098077 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck + +Bugzilla: http://bugzilla.redhat.com/2098077 + +commit e2269220acb03e6c6a460c3090d804835e202239 +Author: Thomas Huth +Date: Mon Jul 4 13:19:03 2022 +0200 + + pc-bios/s390-ccw/netboot.mak: Ignore Clang's warnings about GNU extensions + + When compiling the s390-ccw bios with Clang (v14.0), there is currently + an unuseful warning like this: + + CC pc-bios/s390-ccw/ipv6.o + ../../roms/SLOF/lib/libnet/ipv6.c:447:18: warning: variable length array + folded to constant array as an extension [-Wgnu-folding-constant] + unsigned short raw[ip6size]; + ^ + + SLOF is currently GCC-only and cannot be compiled with Clang yet, so + it is expected that such extensions sneak in there - and as long as + we don't want to compile the code with a compiler that is neither GCC + or Clang, it is also not necessary to avoid such extensions. + + Thus these GNU-extension related warnings are completely useless in + the s390-ccw bios, especially in the code that is coming from SLOF, + so we should simply disable the related warnings here now. + + Message-Id: <20220704111903.62400-13-thuth@redhat.com> + Signed-off-by: Thomas Huth + +Signed-off-by: Thomas Huth +--- + pc-bios/s390-ccw/netboot.mak | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/pc-bios/s390-ccw/netboot.mak b/pc-bios/s390-ccw/netboot.mak +index 68b4d7edcb..ad41898cb6 100644 +--- a/pc-bios/s390-ccw/netboot.mak ++++ b/pc-bios/s390-ccw/netboot.mak +@@ -16,9 +16,12 @@ s390-netboot.elf: $(NETOBJS) libnet.a libc.a + s390-netboot.img: s390-netboot.elf + $(call quiet-command,$(STRIP) --strip-unneeded $< -o $@,"STRIP","$(TARGET_DIR)$@") + ++# SLOF is GCC-only, so ignore warnings about GNU extensions with Clang here ++NO_GNU_WARN := $(call cc-option,-Werror $(QEMU_CFLAGS),-Wno-gnu) ++ + # libc files: + +-LIBC_CFLAGS = $(QEMU_CFLAGS) $(CFLAGS) $(LIBC_INC) $(LIBNET_INC) \ ++LIBC_CFLAGS = $(QEMU_CFLAGS) $(CFLAGS) $(NO_GNU_WARN) $(LIBC_INC) $(LIBNET_INC) \ + -MMD -MP -MT $@ -MF $(@:%.o=%.d) + + CTYPE_OBJS = isdigit.o isxdigit.o toupper.o +@@ -52,7 +55,7 @@ libc.a: $(LIBCOBJS) + + LIBNETOBJS := args.o dhcp.o dns.o icmpv6.o ipv6.o tcp.o udp.o bootp.o \ + dhcpv6.o ethernet.o ipv4.o ndp.o tftp.o pxelinux.o +-LIBNETCFLAGS = $(QEMU_CFLAGS) $(CFLAGS) $(LIBC_INC) $(LIBNET_INC) \ ++LIBNETCFLAGS = $(QEMU_CFLAGS) $(CFLAGS) $(NO_GNU_WARN) $(LIBC_INC) $(LIBNET_INC) \ + -DDHCPARCH=0x1F -MMD -MP -MT $@ -MF $(@:%.o=%.d) + + %.o : $(SLOF_DIR)/lib/libnet/%.c +-- +2.31.1 + diff --git a/kvm-pc-bios-s390-ccw-virtio-Beautify-the-code-for-readin.patch b/kvm-pc-bios-s390-ccw-virtio-Beautify-the-code-for-readin.patch new file mode 100644 index 0000000..5e4b689 --- /dev/null +++ b/kvm-pc-bios-s390-ccw-virtio-Beautify-the-code-for-readin.patch @@ -0,0 +1,56 @@ +From 430e76fd964390db86c8486f76b916a1cf7f74c2 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 8 Jul 2022 20:49:01 +0200 +Subject: [PATCH 12/17] pc-bios/s390-ccw/virtio: Beautify the code for reading + virtqueue configuration + +RH-Author: Thomas Huth +RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry +RH-Commit: [7/10] b15c06b4c5431837672b6cb5d57d09da20718441 (thuth/qemu-kvm-cs9) +RH-Bugzilla: 2098077 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck + +Bugzilla: http://bugzilla.redhat.com/2098077 + +commit 070824885741f5d2a66626d3c4ecb2773c8e0552 +Author: Thomas Huth +Date: Mon Jul 4 13:18:59 2022 +0200 + + pc-bios/s390-ccw/virtio: Beautify the code for reading virtqueue configuration + + It looks nicer if we separate the run_ccw() from the IPL_assert() + statement, and the error message should talk about "virtio device" + instead of "block device", since this code is nowadays used for + non-block (i.e. network) devices, too. + + Message-Id: <20220704111903.62400-9-thuth@redhat.com> + Reviewed-by: Cornelia Huck + Reviewed-by: Eric Farman + Signed-off-by: Thomas Huth + +Signed-off-by: Thomas Huth +--- + pc-bios/s390-ccw/virtio.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/pc-bios/s390-ccw/virtio.c b/pc-bios/s390-ccw/virtio.c +index d8c2b52710..f37510f312 100644 +--- a/pc-bios/s390-ccw/virtio.c ++++ b/pc-bios/s390-ccw/virtio.c +@@ -289,9 +289,8 @@ void virtio_setup_ccw(VDev *vdev) + .num = 0, + }; + +- IPL_assert( +- run_ccw(vdev, CCW_CMD_READ_VQ_CONF, &config, sizeof(config), false) == 0, +- "Could not get block device VQ configuration"); ++ rc = run_ccw(vdev, CCW_CMD_READ_VQ_CONF, &config, sizeof(config), false); ++ IPL_assert(rc == 0, "Could not get virtio device VQ configuration"); + info.num = config.num; + vring_init(&vdev->vrings[i], &info); + vdev->vrings[i].schid = vdev->schid; +-- +2.31.1 + diff --git a/kvm-pc-bios-s390-ccw-virtio-Introduce-a-macro-for-the-DA.patch b/kvm-pc-bios-s390-ccw-virtio-Introduce-a-macro-for-the-DA.patch new file mode 100644 index 0000000..04ab605 --- /dev/null +++ b/kvm-pc-bios-s390-ccw-virtio-Introduce-a-macro-for-the-DA.patch @@ -0,0 +1,63 @@ +From 7d4f2454f95bfc087ad3f2fe3bc4625dcea3568e Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 8 Jul 2022 20:49:01 +0200 +Subject: [PATCH 06/17] pc-bios/s390-ccw/virtio: Introduce a macro for the DASD + block size + +RH-Author: Thomas Huth +RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry +RH-Commit: [1/10] 71033934e1e9988bcf71362e02665ceb7449009d (thuth/qemu-kvm-cs9) +RH-Bugzilla: 2098077 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck + +Bugzilla: http://bugzilla.redhat.com/2098077 + +commit 1f2c2ee48e87ea743f8e23cc7569dd26c4cf9623 +Author: Thomas Huth +Date: Mon Jul 4 13:18:53 2022 +0200 + + pc-bios/s390-ccw/virtio: Introduce a macro for the DASD block size + + Use VIRTIO_DASD_DEFAULT_BLOCK_SIZE instead of the magic value 4096. + + Message-Id: <20220704111903.62400-3-thuth@redhat.com> + Reviewed-by: Eric Farman + Reviewed-by: Cornelia Huck + Signed-off-by: Thomas Huth + +Signed-off-by: Thomas Huth +--- + pc-bios/s390-ccw/virtio-blkdev.c | 2 +- + pc-bios/s390-ccw/virtio.h | 1 + + 2 files changed, 2 insertions(+), 1 deletion(-) + +diff --git a/pc-bios/s390-ccw/virtio-blkdev.c b/pc-bios/s390-ccw/virtio-blkdev.c +index 7d35050292..6483307630 100644 +--- a/pc-bios/s390-ccw/virtio-blkdev.c ++++ b/pc-bios/s390-ccw/virtio-blkdev.c +@@ -155,7 +155,7 @@ void virtio_assume_eckd(void) + vdev->config.blk.physical_block_exp = 0; + switch (vdev->senseid.cu_model) { + case VIRTIO_ID_BLOCK: +- vdev->config.blk.blk_size = 4096; ++ vdev->config.blk.blk_size = VIRTIO_DASD_DEFAULT_BLOCK_SIZE; + break; + case VIRTIO_ID_SCSI: + vdev->config.blk.blk_size = vdev->scsi_block_size; +diff --git a/pc-bios/s390-ccw/virtio.h b/pc-bios/s390-ccw/virtio.h +index 19fceb6495..9e410bde6f 100644 +--- a/pc-bios/s390-ccw/virtio.h ++++ b/pc-bios/s390-ccw/virtio.h +@@ -198,6 +198,7 @@ extern int virtio_read_many(ulong sector, void *load_addr, int sec_num); + #define VIRTIO_SECTOR_SIZE 512 + #define VIRTIO_ISO_BLOCK_SIZE 2048 + #define VIRTIO_SCSI_BLOCK_SIZE 512 ++#define VIRTIO_DASD_DEFAULT_BLOCK_SIZE 4096 + + static inline ulong virtio_sector_adjust(ulong sector) + { +-- +2.31.1 + diff --git a/kvm-pc-bios-s390-ccw-virtio-Read-device-config-after-fea.patch b/kvm-pc-bios-s390-ccw-virtio-Read-device-config-after-fea.patch new file mode 100644 index 0000000..41ae538 --- /dev/null +++ b/kvm-pc-bios-s390-ccw-virtio-Read-device-config-after-fea.patch @@ -0,0 +1,67 @@ +From 20f8724d0837acbe642c8c7698a4b256f34c1209 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 8 Jul 2022 20:49:01 +0200 +Subject: [PATCH 11/17] pc-bios/s390-ccw/virtio: Read device config after + feature negotiation + +RH-Author: Thomas Huth +RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry +RH-Commit: [6/10] 54d21e430b2dfba9e0a0823d6bb8ec7e7f8ff2ff (thuth/qemu-kvm-cs9) +RH-Bugzilla: 2098077 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck + +Bugzilla: http://bugzilla.redhat.com/2098077 + +commit aa5c69ce99411c4886bcd051f288afc02b6d968d +Author: Thomas Huth +Date: Mon Jul 4 13:18:58 2022 +0200 + + pc-bios/s390-ccw/virtio: Read device config after feature negotiation + + Feature negotiation should be done first, since some fields in the + config area can depend on the negotiated features and thus should + rather be read afterwards. + + While we're at it, also adjust the error message here a little bit + (the code is nowadays used for non-block virtio devices, too). + + Message-Id: <20220704111903.62400-8-thuth@redhat.com> + Reviewed-by: Eric Farman + Reviewed-by: Cornelia Huck + Signed-off-by: Thomas Huth + +Signed-off-by: Thomas Huth +--- + pc-bios/s390-ccw/virtio.c | 7 +++---- + 1 file changed, 3 insertions(+), 4 deletions(-) + +diff --git a/pc-bios/s390-ccw/virtio.c b/pc-bios/s390-ccw/virtio.c +index 4e85a2eb82..d8c2b52710 100644 +--- a/pc-bios/s390-ccw/virtio.c ++++ b/pc-bios/s390-ccw/virtio.c +@@ -262,10 +262,6 @@ void virtio_setup_ccw(VDev *vdev) + rc = run_ccw(vdev, CCW_CMD_WRITE_STATUS, &status, sizeof(status), false); + IPL_assert(rc == 0, "Could not write DRIVER status to host"); + +- IPL_assert( +- run_ccw(vdev, CCW_CMD_READ_CONF, &vdev->config, cfg_size, false) == 0, +- "Could not get block device configuration"); +- + /* Feature negotiation */ + for (i = 0; i < ARRAY_SIZE(vdev->guest_features); i++) { + feats.features = 0; +@@ -278,6 +274,9 @@ void virtio_setup_ccw(VDev *vdev) + IPL_assert(rc == 0, "Could not set features bits"); + } + ++ rc = run_ccw(vdev, CCW_CMD_READ_CONF, &vdev->config, cfg_size, false); ++ IPL_assert(rc == 0, "Could not get virtio device configuration"); ++ + for (i = 0; i < vdev->nr_vqs; i++) { + VqInfo info = { + .queue = (unsigned long long) ring_area + (i * VIRTIO_RING_SIZE), +-- +2.31.1 + diff --git a/kvm-pc-bios-s390-ccw-virtio-Set-missing-status-bits-whil.patch b/kvm-pc-bios-s390-ccw-virtio-Set-missing-status-bits-whil.patch new file mode 100644 index 0000000..e976047 --- /dev/null +++ b/kvm-pc-bios-s390-ccw-virtio-Set-missing-status-bits-whil.patch @@ -0,0 +1,93 @@ +From 303fb3ddcdbbd1373c5b1aa28e03f90507e217f3 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 8 Jul 2022 20:49:01 +0200 +Subject: [PATCH 10/17] pc-bios/s390-ccw/virtio: Set missing status bits while + initializing + +RH-Author: Thomas Huth +RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry +RH-Commit: [5/10] 4bc44d9adae055fb60b79d04a2f08535b4d38d2b (thuth/qemu-kvm-cs9) +RH-Bugzilla: 2098077 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck + +Bugzilla: http://bugzilla.redhat.com/2098077 + +commit 175aa06a152ef6b58ba9b2e47a1296b024dea70c +Author: Thomas Huth +Date: Mon Jul 4 13:18:57 2022 +0200 + + pc-bios/s390-ccw/virtio: Set missing status bits while initializing + + According chapter "3.1.1 Driver Requirements: Device Initialization" + of the Virtio specification (v1.1), a driver for a device has to set + the ACKNOWLEDGE and DRIVER bits in the status field after resetting + the device. The s390-ccw bios skipped these steps so far and seems + like QEMU never cared. Anyway, it's better to follow the spec, so + let's set these bits now in the right spots, too. + + Message-Id: <20220704111903.62400-7-thuth@redhat.com> + Acked-by: Christian Borntraeger + Reviewed-by: Cornelia Huck + Reviewed-by: Eric Farman + Signed-off-by: Thomas Huth + +Signed-off-by: Thomas Huth +--- + pc-bios/s390-ccw/virtio.c | 18 ++++++++++++++---- + 1 file changed, 14 insertions(+), 4 deletions(-) + +diff --git a/pc-bios/s390-ccw/virtio.c b/pc-bios/s390-ccw/virtio.c +index 5d2c6e3381..4e85a2eb82 100644 +--- a/pc-bios/s390-ccw/virtio.c ++++ b/pc-bios/s390-ccw/virtio.c +@@ -220,7 +220,7 @@ int virtio_run(VDev *vdev, int vqid, VirtioCmd *cmd) + void virtio_setup_ccw(VDev *vdev) + { + int i, rc, cfg_size = 0; +- unsigned char status = VIRTIO_CONFIG_S_DRIVER_OK; ++ uint8_t status; + struct VirtioFeatureDesc { + uint32_t features; + uint8_t index; +@@ -234,6 +234,10 @@ void virtio_setup_ccw(VDev *vdev) + + run_ccw(vdev, CCW_CMD_VDEV_RESET, NULL, 0, false); + ++ status = VIRTIO_CONFIG_S_ACKNOWLEDGE; ++ rc = run_ccw(vdev, CCW_CMD_WRITE_STATUS, &status, sizeof(status), false); ++ IPL_assert(rc == 0, "Could not write ACKNOWLEDGE status to host"); ++ + switch (vdev->senseid.cu_model) { + case VIRTIO_ID_NET: + vdev->nr_vqs = 2; +@@ -253,6 +257,11 @@ void virtio_setup_ccw(VDev *vdev) + default: + panic("Unsupported virtio device\n"); + } ++ ++ status |= VIRTIO_CONFIG_S_DRIVER; ++ rc = run_ccw(vdev, CCW_CMD_WRITE_STATUS, &status, sizeof(status), false); ++ IPL_assert(rc == 0, "Could not write DRIVER status to host"); ++ + IPL_assert( + run_ccw(vdev, CCW_CMD_READ_CONF, &vdev->config, cfg_size, false) == 0, + "Could not get block device configuration"); +@@ -291,9 +300,10 @@ void virtio_setup_ccw(VDev *vdev) + run_ccw(vdev, CCW_CMD_SET_VQ, &info, sizeof(info), false) == 0, + "Cannot set VQ info"); + } +- IPL_assert( +- run_ccw(vdev, CCW_CMD_WRITE_STATUS, &status, sizeof(status), false) == 0, +- "Could not write status to host"); ++ ++ status |= VIRTIO_CONFIG_S_DRIVER_OK; ++ rc = run_ccw(vdev, CCW_CMD_WRITE_STATUS, &status, sizeof(status), false); ++ IPL_assert(rc == 0, "Could not write DRIVER_OK status to host"); + } + + bool virtio_is_supported(SubChannelId schid) +-- +2.31.1 + diff --git a/kvm-pc-bios-s390-ccw-virtio-blkdev-Remove-virtio_assume_.patch b/kvm-pc-bios-s390-ccw-virtio-blkdev-Remove-virtio_assume_.patch new file mode 100644 index 0000000..109b98e --- /dev/null +++ b/kvm-pc-bios-s390-ccw-virtio-blkdev-Remove-virtio_assume_.patch @@ -0,0 +1,101 @@ +From d3335a98a7b6e084aadf4907968536a67cf8e64c Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 8 Jul 2022 20:49:01 +0200 +Subject: [PATCH 09/17] pc-bios/s390-ccw/virtio-blkdev: Remove + virtio_assume_scsi() + +RH-Author: Thomas Huth +RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry +RH-Commit: [4/10] bf27f75344f220a03475a2918ed49ec9cd5ba317 (thuth/qemu-kvm-cs9) +RH-Bugzilla: 2098077 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck + +Bugzilla: http://bugzilla.redhat.com/2098077 + +commit 5447de2619050a0a4dd480b97f88a9b58da360d1 +Author: Thomas Huth +Date: Mon Jul 4 13:18:56 2022 +0200 + + pc-bios/s390-ccw/virtio-blkdev: Remove virtio_assume_scsi() + + The virtio_assume_scsi() function is very questionable: First, it + is only called for virtio-blk, and not for virtio-scsi, so the naming + is already quite confusing. Second, it is called if we detected a + "invalid" IPL disk, trying to fix it by blindly setting a sector + size of 512. This of course won't work in most cases since disks + might have a different sector size for a reason. + + Thus let's remove this strange function now. The calling code can + also be removed completely, since there is another spot in main.c + that does "IPL_assert(virtio_ipl_disk_is_valid(), ...)" to make + sure that we do not try to IPL from an invalid device. + + Message-Id: <20220704111903.62400-6-thuth@redhat.com> + Reviewed-by: Eric Farman + Signed-off-by: Thomas Huth + +Signed-off-by: Thomas Huth +--- + pc-bios/s390-ccw/virtio-blkdev.c | 24 ------------------------ + pc-bios/s390-ccw/virtio.h | 1 - + 2 files changed, 25 deletions(-) + +diff --git a/pc-bios/s390-ccw/virtio-blkdev.c b/pc-bios/s390-ccw/virtio-blkdev.c +index 7e13155589..db1f7f44aa 100644 +--- a/pc-bios/s390-ccw/virtio-blkdev.c ++++ b/pc-bios/s390-ccw/virtio-blkdev.c +@@ -112,23 +112,6 @@ VirtioGDN virtio_guessed_disk_nature(void) + return virtio_get_device()->guessed_disk_nature; + } + +-void virtio_assume_scsi(void) +-{ +- VDev *vdev = virtio_get_device(); +- +- switch (vdev->senseid.cu_model) { +- case VIRTIO_ID_BLOCK: +- vdev->guessed_disk_nature = VIRTIO_GDN_SCSI; +- vdev->config.blk.blk_size = VIRTIO_SCSI_BLOCK_SIZE; +- vdev->config.blk.physical_block_exp = 0; +- vdev->blk_factor = 1; +- break; +- case VIRTIO_ID_SCSI: +- vdev->scsi_block_size = VIRTIO_SCSI_BLOCK_SIZE; +- break; +- } +-} +- + void virtio_assume_iso9660(void) + { + VDev *vdev = virtio_get_device(); +@@ -247,13 +230,6 @@ int virtio_blk_setup_device(SubChannelId schid) + switch (vdev->senseid.cu_model) { + case VIRTIO_ID_BLOCK: + sclp_print("Using virtio-blk.\n"); +- if (!virtio_ipl_disk_is_valid()) { +- /* make sure all getters but blocksize return 0 for +- * invalid IPL disk +- */ +- memset(&vdev->config.blk, 0, sizeof(vdev->config.blk)); +- virtio_assume_scsi(); +- } + break; + case VIRTIO_ID_SCSI: + IPL_assert(vdev->config.scsi.sense_size == VIRTIO_SCSI_SENSE_SIZE, +diff --git a/pc-bios/s390-ccw/virtio.h b/pc-bios/s390-ccw/virtio.h +index 241730effe..600ba5052b 100644 +--- a/pc-bios/s390-ccw/virtio.h ++++ b/pc-bios/s390-ccw/virtio.h +@@ -182,7 +182,6 @@ enum guessed_disk_nature_type { + typedef enum guessed_disk_nature_type VirtioGDN; + + VirtioGDN virtio_guessed_disk_nature(void); +-void virtio_assume_scsi(void); + void virtio_assume_eckd(void); + void virtio_assume_iso9660(void); + +-- +2.31.1 + diff --git a/kvm-pc-bios-s390-ccw-virtio-blkdev-Request-the-right-fea.patch b/kvm-pc-bios-s390-ccw-virtio-blkdev-Request-the-right-fea.patch new file mode 100644 index 0000000..8bc7a11 --- /dev/null +++ b/kvm-pc-bios-s390-ccw-virtio-blkdev-Request-the-right-fea.patch @@ -0,0 +1,63 @@ +From db58915fcaf3d24b64fe2c34cc15b5596b9a81bb Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 8 Jul 2022 20:49:01 +0200 +Subject: [PATCH 14/17] pc-bios/s390-ccw/virtio-blkdev: Request the right + feature bits + +RH-Author: Thomas Huth +RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry +RH-Commit: [9/10] 9dcd8c2f659f366f9487ab6473d1f0d7778b40a7 (thuth/qemu-kvm-cs9) +RH-Bugzilla: 2098077 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck + +Bugzilla: http://bugzilla.redhat.com/2098077 + +commit 9125a314cca4a1838b09305a87d8efb98f80ab67 +Author: Thomas Huth +Date: Mon Jul 4 13:19:01 2022 +0200 + + pc-bios/s390-ccw/virtio-blkdev: Request the right feature bits + + The virtio-blk code uses the block size and geometry fields in the + config area. According to the virtio-spec, these have to be negotiated + with the right feature bits during initialization, otherwise they + might not be available. QEMU is so far very forgiving and always + provides them, but we should not rely on this behavior, so let's + better request them properly via the VIRTIO_BLK_F_GEOMETRY and + VIRTIO_BLK_F_BLK_SIZE feature bits. + + Message-Id: <20220704111903.62400-11-thuth@redhat.com> + Signed-off-by: Thomas Huth + +Signed-off-by: Thomas Huth +--- + pc-bios/s390-ccw/virtio-blkdev.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/pc-bios/s390-ccw/virtio-blkdev.c b/pc-bios/s390-ccw/virtio-blkdev.c +index c175b66a47..8271c47296 100644 +--- a/pc-bios/s390-ccw/virtio-blkdev.c ++++ b/pc-bios/s390-ccw/virtio-blkdev.c +@@ -13,6 +13,9 @@ + #include "virtio.h" + #include "virtio-scsi.h" + ++#define VIRTIO_BLK_F_GEOMETRY (1 << 4) ++#define VIRTIO_BLK_F_BLK_SIZE (1 << 6) ++ + static int virtio_blk_read_many(VDev *vdev, ulong sector, void *load_addr, + int sec_num) + { +@@ -223,6 +226,7 @@ int virtio_blk_setup_device(SubChannelId schid) + { + VDev *vdev = virtio_get_device(); + ++ vdev->guest_features[0] = VIRTIO_BLK_F_GEOMETRY | VIRTIO_BLK_F_BLK_SIZE; + vdev->schid = schid; + virtio_setup_ccw(vdev); + +-- +2.31.1 + diff --git a/kvm-pc-bios-s390-ccw-virtio-blkdev-Simplify-fix-virtio_i.patch b/kvm-pc-bios-s390-ccw-virtio-blkdev-Simplify-fix-virtio_i.patch new file mode 100644 index 0000000..818e515 --- /dev/null +++ b/kvm-pc-bios-s390-ccw-virtio-blkdev-Simplify-fix-virtio_i.patch @@ -0,0 +1,124 @@ +From f07e4629a7c58407f903810a038660c88c6a6315 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 8 Jul 2022 20:49:01 +0200 +Subject: [PATCH 08/17] pc-bios/s390-ccw/virtio-blkdev: Simplify/fix + virtio_ipl_disk_is_valid() + +RH-Author: Thomas Huth +RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry +RH-Commit: [3/10] fb06830a3e50d9da3d84913b50bb227865cc44b3 (thuth/qemu-kvm-cs9) +RH-Bugzilla: 2098077 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck + +Bugzilla: http://bugzilla.redhat.com/2098077 + +commit bbf615f7b707f009ef8e757d170902ad33b90644 +Author: Thomas Huth +Date: Mon Jul 4 13:18:55 2022 +0200 + + pc-bios/s390-ccw/virtio-blkdev: Simplify/fix virtio_ipl_disk_is_valid() + + The s390-ccw bios fails to boot if the boot disk is a virtio-blk + disk with a sector size of 4096. For example: + + dasdfmt -b 4096 -d cdl -y -p -M quick /dev/dasdX + fdasd -a /dev/dasdX + install a guest onto /dev/dasdX1 using virtio-blk + qemu-system-s390x -nographic -hda /dev/dasdX1 + + The bios then bails out with: + + ! Cannot read block 0 ! + + Looking at virtio_ipl_disk_is_valid() and especially the function + virtio_disk_is_scsi(), it does not really make sense that we expect + only such a limited disk geometry (like a block size of 512) for + our boot disks. Let's relax the check and allow everything that + remotely looks like a sane disk. + + Message-Id: <20220704111903.62400-5-thuth@redhat.com> + Reviewed-by: Eric Farman + Signed-off-by: Thomas Huth + +Signed-off-by: Thomas Huth +--- + pc-bios/s390-ccw/virtio-blkdev.c | 41 ++++++-------------------------- + pc-bios/s390-ccw/virtio.h | 2 -- + 2 files changed, 7 insertions(+), 36 deletions(-) + +diff --git a/pc-bios/s390-ccw/virtio-blkdev.c b/pc-bios/s390-ccw/virtio-blkdev.c +index 6483307630..7e13155589 100644 +--- a/pc-bios/s390-ccw/virtio-blkdev.c ++++ b/pc-bios/s390-ccw/virtio-blkdev.c +@@ -166,46 +166,19 @@ void virtio_assume_eckd(void) + virtio_eckd_sectors_for_block_size(vdev->config.blk.blk_size); + } + +-bool virtio_disk_is_scsi(void) +-{ +- VDev *vdev = virtio_get_device(); +- +- if (vdev->guessed_disk_nature == VIRTIO_GDN_SCSI) { +- return true; +- } +- switch (vdev->senseid.cu_model) { +- case VIRTIO_ID_BLOCK: +- return (vdev->config.blk.geometry.heads == 255) +- && (vdev->config.blk.geometry.sectors == 63) +- && (virtio_get_block_size() == VIRTIO_SCSI_BLOCK_SIZE); +- case VIRTIO_ID_SCSI: +- return true; +- } +- return false; +-} +- +-bool virtio_disk_is_eckd(void) ++bool virtio_ipl_disk_is_valid(void) + { ++ int blksize = virtio_get_block_size(); + VDev *vdev = virtio_get_device(); +- const int block_size = virtio_get_block_size(); + +- if (vdev->guessed_disk_nature == VIRTIO_GDN_DASD) { ++ if (vdev->guessed_disk_nature == VIRTIO_GDN_SCSI || ++ vdev->guessed_disk_nature == VIRTIO_GDN_DASD) { + return true; + } +- switch (vdev->senseid.cu_model) { +- case VIRTIO_ID_BLOCK: +- return (vdev->config.blk.geometry.heads == 15) +- && (vdev->config.blk.geometry.sectors == +- virtio_eckd_sectors_for_block_size(block_size)); +- case VIRTIO_ID_SCSI: +- return false; +- } +- return false; +-} + +-bool virtio_ipl_disk_is_valid(void) +-{ +- return virtio_disk_is_scsi() || virtio_disk_is_eckd(); ++ return (vdev->senseid.cu_model == VIRTIO_ID_BLOCK || ++ vdev->senseid.cu_model == VIRTIO_ID_SCSI) && ++ blksize >= 512 && blksize <= 4096; + } + + int virtio_get_block_size(void) +diff --git a/pc-bios/s390-ccw/virtio.h b/pc-bios/s390-ccw/virtio.h +index 9e410bde6f..241730effe 100644 +--- a/pc-bios/s390-ccw/virtio.h ++++ b/pc-bios/s390-ccw/virtio.h +@@ -186,8 +186,6 @@ void virtio_assume_scsi(void); + void virtio_assume_eckd(void); + void virtio_assume_iso9660(void); + +-extern bool virtio_disk_is_scsi(void); +-extern bool virtio_disk_is_eckd(void); + extern bool virtio_ipl_disk_is_valid(void); + extern int virtio_get_block_size(void); + extern uint8_t virtio_get_heads(void); +-- +2.31.1 + diff --git a/kvm-pci-expose-TYPE_XIO3130_DOWNSTREAM-name.patch b/kvm-pci-expose-TYPE_XIO3130_DOWNSTREAM-name.patch new file mode 100644 index 0000000..817f0ab --- /dev/null +++ b/kvm-pci-expose-TYPE_XIO3130_DOWNSTREAM-name.patch @@ -0,0 +1,83 @@ +From 7998e8aa78caa35c2ab2da44f9e29e21d7548c61 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 23 Mar 2022 13:21:40 -0400 +Subject: [PATCH 05/18] pci: expose TYPE_XIO3130_DOWNSTREAM name + +RH-Author: Jon Maloy +RH-MergeRequest: 134: pci: expose TYPE_XIO3130_DOWNSTREAM name +RH-Commit: [1/2] f09ddcaf686f22b545bf269f87787ebfc33fccda (jmaloy/qemu-kvm) +RH-Bugzilla: 2062610 +RH-Acked-by: Igor Mammedov +RH-Acked-by: Gerd Hoffmann + +BZ: https://bugzilla.redhat.com/2062610 +UPSTREAM: merged +BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=44038138 + +commit c41481af9a5d0d463607cc45b45c510875570817 +Author: Igor Mammedov +Date: Tue Mar 1 10:11:58 2022 -0500 + + pci: expose TYPE_XIO3130_DOWNSTREAM name + + Type name will be used in followup patch for cast check + in pcihp code. + + Signed-off-by: Igor Mammedov + Message-Id: <20220301151200.3507298-2-imammedo@redhat.com> + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + +(cherry picked from commit c41481af9a5d0d463607cc45b45c510875570817) +Signed-off-by: Jon Maloy +--- + hw/pci-bridge/xio3130_downstream.c | 3 ++- + include/hw/pci-bridge/xio3130_downstream.h | 15 +++++++++++++++ + 2 files changed, 17 insertions(+), 1 deletion(-) + create mode 100644 include/hw/pci-bridge/xio3130_downstream.h + +diff --git a/hw/pci-bridge/xio3130_downstream.c b/hw/pci-bridge/xio3130_downstream.c +index 04aae72cd6..b17cafd359 100644 +--- a/hw/pci-bridge/xio3130_downstream.c ++++ b/hw/pci-bridge/xio3130_downstream.c +@@ -28,6 +28,7 @@ + #include "migration/vmstate.h" + #include "qapi/error.h" + #include "qemu/module.h" ++#include "hw/pci-bridge/xio3130_downstream.h" + + #define PCI_DEVICE_ID_TI_XIO3130D 0x8233 /* downstream port */ + #define XIO3130_REVISION 0x1 +@@ -173,7 +174,7 @@ static void xio3130_downstream_class_init(ObjectClass *klass, void *data) + } + + static const TypeInfo xio3130_downstream_info = { +- .name = "xio3130-downstream", ++ .name = TYPE_XIO3130_DOWNSTREAM, + .parent = TYPE_PCIE_SLOT, + .class_init = xio3130_downstream_class_init, + .interfaces = (InterfaceInfo[]) { +diff --git a/include/hw/pci-bridge/xio3130_downstream.h b/include/hw/pci-bridge/xio3130_downstream.h +new file mode 100644 +index 0000000000..1d10139aea +--- /dev/null ++++ b/include/hw/pci-bridge/xio3130_downstream.h +@@ -0,0 +1,15 @@ ++/* ++ * TI X3130 pci express downstream port switch ++ * ++ * Copyright (C) 2022 Igor Mammedov ++ * ++ * SPDX-License-Identifier: GPL-2.0-or-later ++ */ ++ ++#ifndef HW_PCI_BRIDGE_XIO3130_DOWNSTREAM_H ++#define HW_PCI_BRIDGE_XIO3130_DOWNSTREAM_H ++ ++#define TYPE_XIO3130_DOWNSTREAM "xio3130-downstream" ++ ++#endif ++ +-- +2.27.0 + diff --git a/kvm-qapi-Cleanup-SGX-related-comments-and-restore-sectio.patch b/kvm-qapi-Cleanup-SGX-related-comments-and-restore-sectio.patch new file mode 100644 index 0000000..5ef458c --- /dev/null +++ b/kvm-qapi-Cleanup-SGX-related-comments-and-restore-sectio.patch @@ -0,0 +1,214 @@ +From d0cd7be4d347ebe118eb8f3f2fc2eb3e3eb77e3a Mon Sep 17 00:00:00 2001 +From: Yang Zhong +Date: Thu, 20 Jan 2022 17:31:04 -0500 +Subject: [PATCH 5/7] qapi: Cleanup SGX related comments and restore + @section-size +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paul Lai +RH-MergeRequest: 111: numa: Enable numa for SGX EPC sections +RH-Commit: [5/5] 497dbeaebb7b8f99f5f8a7de58000dcab0d0c22d +RH-Bugzilla: 1518984 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Bandan Das +RH-Acked-by: Cornelia Huck + +The SGX NUMA patches were merged into Qemu 7.0 release, we need +clarify detailed version history information and also change +some related comments, which make SGX related comments clearer. + +The QMP command schema promises backwards compatibility as standard. +We temporarily restore "@section-size", which can avoid incompatible +API breakage. The "@section-size" will be deprecated in 7.2 version. + +Suggested-by: Daniel P. Berrangé +Signed-off-by: Yang Zhong +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Philippe Mathieu-Daudé +Message-Id: <20220120223104.437161-1-yang.zhong@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit a66bd91f030827742778a9e0da19fe55716b4a60) +Signed-off-by: Paul Lai +--- + docs/about/deprecated.rst | 13 +++++++++++++ + hw/i386/sgx.c | 11 +++++++++-- + qapi/machine.json | 4 ++-- + qapi/misc-target.json | 22 +++++++++++++++++----- + 4 files changed, 41 insertions(+), 9 deletions(-) + +diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst +index ff7488cb63..33925edf45 100644 +--- a/docs/about/deprecated.rst ++++ b/docs/about/deprecated.rst +@@ -270,6 +270,19 @@ accepted incorrect commands will return an error. Users should make sure that + all arguments passed to ``device_add`` are consistent with the documented + property types. + ++``query-sgx`` return value member ``section-size`` (since 7.0) ++'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' ++ ++Member ``section-size`` in return value elements with meta-type ``uint64`` is ++deprecated. Use ``sections`` instead. ++ ++ ++``query-sgx-capabilities`` return value member ``section-size`` (since 7.0) ++''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' ++ ++Member ``section-size`` in return value elements with meta-type ``uint64`` is ++deprecated. Use ``sections`` instead. ++ + System accelerators + ------------------- + +diff --git a/hw/i386/sgx.c b/hw/i386/sgx.c +index 5de5dd0893..a2b318dd93 100644 +--- a/hw/i386/sgx.c ++++ b/hw/i386/sgx.c +@@ -83,7 +83,7 @@ static uint64_t sgx_calc_section_metric(uint64_t low, uint64_t high) + ((high & MAKE_64BIT_MASK(0, 20)) << 32); + } + +-static SGXEPCSectionList *sgx_calc_host_epc_sections(void) ++static SGXEPCSectionList *sgx_calc_host_epc_sections(uint64_t *size) + { + SGXEPCSectionList *head = NULL, **tail = &head; + SGXEPCSection *section; +@@ -106,6 +106,7 @@ static SGXEPCSectionList *sgx_calc_host_epc_sections(void) + section = g_new0(SGXEPCSection, 1); + section->node = j++; + section->size = sgx_calc_section_metric(ecx, edx); ++ *size += section->size; + QAPI_LIST_APPEND(tail, section); + } + +@@ -156,6 +157,7 @@ SGXInfo *qmp_query_sgx_capabilities(Error **errp) + { + SGXInfo *info = NULL; + uint32_t eax, ebx, ecx, edx; ++ uint64_t size = 0; + + int fd = qemu_open_old("/dev/sgx_vepc", O_RDWR); + if (fd < 0) { +@@ -173,7 +175,8 @@ SGXInfo *qmp_query_sgx_capabilities(Error **errp) + info->sgx1 = eax & (1U << 0) ? true : false; + info->sgx2 = eax & (1U << 1) ? true : false; + +- info->sections = sgx_calc_host_epc_sections(); ++ info->sections = sgx_calc_host_epc_sections(&size); ++ info->section_size = size; + + close(fd); + +@@ -220,12 +223,14 @@ SGXInfo *qmp_query_sgx(Error **errp) + return NULL; + } + ++ SGXEPCState *sgx_epc = &pcms->sgx_epc; + info = g_new0(SGXInfo, 1); + + info->sgx = true; + info->sgx1 = true; + info->sgx2 = true; + info->flc = true; ++ info->section_size = sgx_epc->size; + info->sections = sgx_get_epc_sections_list(); + + return info; +@@ -249,6 +254,8 @@ void hmp_info_sgx(Monitor *mon, const QDict *qdict) + info->sgx2 ? "enabled" : "disabled"); + monitor_printf(mon, "FLC support: %s\n", + info->flc ? "enabled" : "disabled"); ++ monitor_printf(mon, "size: %" PRIu64 "\n", ++ info->section_size); + + section_list = info->sections; + for (section = section_list; section; section = section->next) { +diff --git a/qapi/machine.json b/qapi/machine.json +index 16e771affc..a9f33d0f27 100644 +--- a/qapi/machine.json ++++ b/qapi/machine.json +@@ -1207,7 +1207,7 @@ + # + # @memdev: memory backend linked with device + # +-# @node: the numa node ++# @node: the numa node (Since: 7.0) + # + # Since: 6.2 + ## +@@ -1288,7 +1288,7 @@ + # + # @memdev: memory backend linked with device + # +-# @node: the numa node ++# @node: the numa node (Since: 7.0) + # + # Since: 6.2 + ## +diff --git a/qapi/misc-target.json b/qapi/misc-target.json +index 1022aa0184..4bc45d2474 100644 +--- a/qapi/misc-target.json ++++ b/qapi/misc-target.json +@@ -344,9 +344,9 @@ + # + # @node: the numa node + # +-# @size: the size of epc section ++# @size: the size of EPC section + # +-# Since: 6.2 ++# Since: 7.0 + ## + { 'struct': 'SGXEPCSection', + 'data': { 'node': 'int', +@@ -365,7 +365,13 @@ + # + # @flc: true if FLC is supported + # +-# @sections: The EPC sections info for guest ++# @section-size: The EPC section size for guest ++# Redundant with @sections. Just for backward compatibility. ++# ++# @sections: The EPC sections info for guest (Since: 7.0) ++# ++# Features: ++# @deprecated: Member @section-size is deprecated. Use @sections instead. + # + # Since: 6.2 + ## +@@ -374,6 +380,8 @@ + 'sgx1': 'bool', + 'sgx2': 'bool', + 'flc': 'bool', ++ 'section-size': { 'type': 'uint64', ++ 'features': [ 'deprecated' ] }, + 'sections': ['SGXEPCSection']}, + 'if': 'TARGET_I386' } + +@@ -390,7 +398,9 @@ + # + # -> { "execute": "query-sgx" } + # <- { "return": { "sgx": true, "sgx1" : true, "sgx2" : true, +-# "flc": true, "section-size" : 0 } } ++# "flc": true, "section-size" : 96468992, ++# "sections": [{"node": 0, "size": 67108864}, ++# {"node": 1, "size": 29360128}]} } + # + ## + { 'command': 'query-sgx', 'returns': 'SGXInfo', 'if': 'TARGET_I386' } +@@ -408,7 +418,9 @@ + # + # -> { "execute": "query-sgx-capabilities" } + # <- { "return": { "sgx": true, "sgx1" : true, "sgx2" : true, +-# "flc": true, "section-size" : 0 } } ++# "flc": true, "section-size" : 96468992, ++# "section" : [{"node": 0, "size": 67108864}, ++# {"node": 1, "size": 29360128}]} } + # + ## + { 'command': 'query-sgx-capabilities', 'returns': 'SGXInfo', 'if': 'TARGET_I386' } +-- +2.27.0 + diff --git a/kvm-qapi-machine.json-Add-cluster-id.patch b/kvm-qapi-machine.json-Add-cluster-id.patch new file mode 100644 index 0000000..2b2a22a --- /dev/null +++ b/kvm-qapi-machine.json-Add-cluster-id.patch @@ -0,0 +1,126 @@ +From e97c563f7146098119839aa146a6f25070eb7148 Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Wed, 11 May 2022 18:01:02 +0800 +Subject: [PATCH 01/16] qapi/machine.json: Add cluster-id + +RH-Author: Gavin Shan +RH-MergeRequest: 86: hw/arm/virt: Fix the default CPU topology +RH-Commit: [1/6] 44d7d83008c6d28485ae44f7cced792f4987b919 (gwshan/qemu-rhel-9) +RH-Bugzilla: 2041823 +RH-Acked-by: Eric Auger +RH-Acked-by: Cornelia Huck +RH-Acked-by: Andrew Jones + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2041823 + +This adds cluster-id in CPU instance properties, which will be used +by arm/virt machine. Besides, the cluster-id is also verified or +dumped in various spots: + + * hw/core/machine.c::machine_set_cpu_numa_node() to associate + CPU with its NUMA node. + + * hw/core/machine.c::machine_numa_finish_cpu_init() to record + CPU slots with no NUMA mapping set. + + * hw/core/machine-hmp-cmds.c::hmp_hotpluggable_cpus() to dump + cluster-id. + +Signed-off-by: Gavin Shan +Reviewed-by: Yanan Wang +Acked-by: Igor Mammedov +Message-id: 20220503140304.855514-2-gshan@redhat.com +Signed-off-by: Peter Maydell +(cherry picked from commit 1dcf7001d4bae651129d46d5628b29e93a411d0b) +Signed-off-by: Gavin Shan +--- + hw/core/machine-hmp-cmds.c | 4 ++++ + hw/core/machine.c | 16 ++++++++++++++++ + qapi/machine.json | 6 ++++-- + 3 files changed, 24 insertions(+), 2 deletions(-) + +diff --git a/hw/core/machine-hmp-cmds.c b/hw/core/machine-hmp-cmds.c +index 4e2f319aeb..5cb5eecbfc 100644 +--- a/hw/core/machine-hmp-cmds.c ++++ b/hw/core/machine-hmp-cmds.c +@@ -77,6 +77,10 @@ void hmp_hotpluggable_cpus(Monitor *mon, const QDict *qdict) + if (c->has_die_id) { + monitor_printf(mon, " die-id: \"%" PRIu64 "\"\n", c->die_id); + } ++ if (c->has_cluster_id) { ++ monitor_printf(mon, " cluster-id: \"%" PRIu64 "\"\n", ++ c->cluster_id); ++ } + if (c->has_core_id) { + monitor_printf(mon, " core-id: \"%" PRIu64 "\"\n", c->core_id); + } +diff --git a/hw/core/machine.c b/hw/core/machine.c +index dffc3ef4ab..168f4de910 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -890,6 +890,11 @@ void machine_set_cpu_numa_node(MachineState *machine, + return; + } + ++ if (props->has_cluster_id && !slot->props.has_cluster_id) { ++ error_setg(errp, "cluster-id is not supported"); ++ return; ++ } ++ + if (props->has_socket_id && !slot->props.has_socket_id) { + error_setg(errp, "socket-id is not supported"); + return; +@@ -909,6 +914,11 @@ void machine_set_cpu_numa_node(MachineState *machine, + continue; + } + ++ if (props->has_cluster_id && ++ props->cluster_id != slot->props.cluster_id) { ++ continue; ++ } ++ + if (props->has_die_id && props->die_id != slot->props.die_id) { + continue; + } +@@ -1203,6 +1213,12 @@ static char *cpu_slot_to_string(const CPUArchId *cpu) + } + g_string_append_printf(s, "die-id: %"PRId64, cpu->props.die_id); + } ++ if (cpu->props.has_cluster_id) { ++ if (s->len) { ++ g_string_append_printf(s, ", "); ++ } ++ g_string_append_printf(s, "cluster-id: %"PRId64, cpu->props.cluster_id); ++ } + if (cpu->props.has_core_id) { + if (s->len) { + g_string_append_printf(s, ", "); +diff --git a/qapi/machine.json b/qapi/machine.json +index d25a481ce4..4c417e32a5 100644 +--- a/qapi/machine.json ++++ b/qapi/machine.json +@@ -868,10 +868,11 @@ + # @node-id: NUMA node ID the CPU belongs to + # @socket-id: socket number within node/board the CPU belongs to + # @die-id: die number within socket the CPU belongs to (since 4.1) +-# @core-id: core number within die the CPU belongs to ++# @cluster-id: cluster number within die the CPU belongs to (since 7.1) ++# @core-id: core number within cluster the CPU belongs to + # @thread-id: thread number within core the CPU belongs to + # +-# Note: currently there are 5 properties that could be present ++# Note: currently there are 6 properties that could be present + # but management should be prepared to pass through other + # properties with device_add command to allow for future + # interface extension. This also requires the filed names to be kept in +@@ -883,6 +884,7 @@ + 'data': { '*node-id': 'int', + '*socket-id': 'int', + '*die-id': 'int', ++ '*cluster-id': 'int', + '*core-id': 'int', + '*thread-id': 'int' + } +-- +2.31.1 + diff --git a/kvm-qcow2-Add-errp-to-rebuild_refcount_structure.patch b/kvm-qcow2-Add-errp-to-rebuild_refcount_structure.patch new file mode 100644 index 0000000..9010d3d --- /dev/null +++ b/kvm-qcow2-Add-errp-to-rebuild_refcount_structure.patch @@ -0,0 +1,162 @@ +From 5e385a0e49a520550a83299632be175857b63f19 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Tue, 5 Apr 2022 15:46:52 +0200 +Subject: [PATCH 06/16] qcow2: Add errp to rebuild_refcount_structure() + +RH-Author: Hanna Reitz +RH-MergeRequest: 96: qcow2: Improve refcount structure rebuilding +RH-Commit: [3/4] 937b89a7eab6ec6b18618d59bc1526976ad03290 (hreitz/qemu-kvm-c-9-s) +RH-Bugzilla: 2072379 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi + +Instead of fprint()-ing error messages in rebuild_refcount_structure() +and its rebuild_refcounts_write_refblocks() helper, pass them through an +Error object to qcow2_check_refcounts() (which will then print it). + +Suggested-by: Eric Blake +Signed-off-by: Hanna Reitz +Message-Id: <20220405134652.19278-4-hreitz@redhat.com> +Reviewed-by: Eric Blake +(cherry picked from commit 0423f75351ab83b844a31349218b0eadd830e07a) +Signed-off-by: Hanna Reitz +--- + block/qcow2-refcount.c | 33 +++++++++++++++++++-------------- + 1 file changed, 19 insertions(+), 14 deletions(-) + +diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c +index c5669eaa51..ed0ecfaa89 100644 +--- a/block/qcow2-refcount.c ++++ b/block/qcow2-refcount.c +@@ -2465,7 +2465,8 @@ static int64_t alloc_clusters_imrt(BlockDriverState *bs, + static int rebuild_refcounts_write_refblocks( + BlockDriverState *bs, void **refcount_table, int64_t *nb_clusters, + int64_t first_cluster, int64_t end_cluster, +- uint64_t **on_disk_reftable_ptr, uint32_t *on_disk_reftable_entries_ptr ++ uint64_t **on_disk_reftable_ptr, uint32_t *on_disk_reftable_entries_ptr, ++ Error **errp + ) + { + BDRVQcow2State *s = bs->opaque; +@@ -2516,8 +2517,8 @@ static int rebuild_refcounts_write_refblocks( + nb_clusters, + &first_free_cluster); + if (refblock_offset < 0) { +- fprintf(stderr, "ERROR allocating refblock: %s\n", +- strerror(-refblock_offset)); ++ error_setg_errno(errp, -refblock_offset, ++ "ERROR allocating refblock"); + return refblock_offset; + } + +@@ -2539,6 +2540,7 @@ static int rebuild_refcounts_write_refblocks( + on_disk_reftable_entries * + REFTABLE_ENTRY_SIZE); + if (!on_disk_reftable) { ++ error_setg(errp, "ERROR allocating reftable memory"); + return -ENOMEM; + } + +@@ -2562,7 +2564,7 @@ static int rebuild_refcounts_write_refblocks( + ret = qcow2_pre_write_overlap_check(bs, 0, refblock_offset, + s->cluster_size, false); + if (ret < 0) { +- fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret)); ++ error_setg_errno(errp, -ret, "ERROR writing refblock"); + return ret; + } + +@@ -2578,7 +2580,7 @@ static int rebuild_refcounts_write_refblocks( + ret = bdrv_pwrite(bs->file, refblock_offset, on_disk_refblock, + s->cluster_size); + if (ret < 0) { +- fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret)); ++ error_setg_errno(errp, -ret, "ERROR writing refblock"); + return ret; + } + +@@ -2601,7 +2603,8 @@ static int rebuild_refcounts_write_refblocks( + static int rebuild_refcount_structure(BlockDriverState *bs, + BdrvCheckResult *res, + void **refcount_table, +- int64_t *nb_clusters) ++ int64_t *nb_clusters, ++ Error **errp) + { + BDRVQcow2State *s = bs->opaque; + int64_t reftable_offset = -1; +@@ -2652,7 +2655,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs, + rebuild_refcounts_write_refblocks(bs, refcount_table, nb_clusters, + 0, *nb_clusters, + &on_disk_reftable, +- &on_disk_reftable_entries); ++ &on_disk_reftable_entries, errp); + if (reftable_size_changed < 0) { + res->check_errors++; + ret = reftable_size_changed; +@@ -2676,8 +2679,8 @@ static int rebuild_refcount_structure(BlockDriverState *bs, + refcount_table, nb_clusters, + &first_free_cluster); + if (reftable_offset < 0) { +- fprintf(stderr, "ERROR allocating reftable: %s\n", +- strerror(-reftable_offset)); ++ error_setg_errno(errp, -reftable_offset, ++ "ERROR allocating reftable"); + res->check_errors++; + ret = reftable_offset; + goto fail; +@@ -2695,7 +2698,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs, + reftable_start_cluster, + reftable_end_cluster, + &on_disk_reftable, +- &on_disk_reftable_entries); ++ &on_disk_reftable_entries, errp); + if (reftable_size_changed < 0) { + res->check_errors++; + ret = reftable_size_changed; +@@ -2725,7 +2728,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs, + ret = qcow2_pre_write_overlap_check(bs, 0, reftable_offset, reftable_length, + false); + if (ret < 0) { +- fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret)); ++ error_setg_errno(errp, -ret, "ERROR writing reftable"); + goto fail; + } + +@@ -2733,7 +2736,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs, + ret = bdrv_pwrite(bs->file, reftable_offset, on_disk_reftable, + reftable_length); + if (ret < 0) { +- fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret)); ++ error_setg_errno(errp, -ret, "ERROR writing reftable"); + goto fail; + } + +@@ -2746,7 +2749,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs, + &reftable_offset_and_clusters, + sizeof(reftable_offset_and_clusters)); + if (ret < 0) { +- fprintf(stderr, "ERROR setting reftable: %s\n", strerror(-ret)); ++ error_setg_errno(errp, -ret, "ERROR setting reftable"); + goto fail; + } + +@@ -2814,11 +2817,13 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, + if (rebuild && (fix & BDRV_FIX_ERRORS)) { + BdrvCheckResult old_res = *res; + int fresh_leaks = 0; ++ Error *local_err = NULL; + + fprintf(stderr, "Rebuilding refcount structure\n"); + ret = rebuild_refcount_structure(bs, res, &refcount_table, +- &nb_clusters); ++ &nb_clusters, &local_err); + if (ret < 0) { ++ error_report_err(local_err); + goto fail; + } + +-- +2.31.1 + diff --git a/kvm-qcow2-Improve-refcount-structure-rebuilding.patch b/kvm-qcow2-Improve-refcount-structure-rebuilding.patch new file mode 100644 index 0000000..cdc92b8 --- /dev/null +++ b/kvm-qcow2-Improve-refcount-structure-rebuilding.patch @@ -0,0 +1,465 @@ +From b453cf6be8429f4438d51eb24fcf49e7d9f14db6 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Tue, 5 Apr 2022 15:46:50 +0200 +Subject: [PATCH 04/16] qcow2: Improve refcount structure rebuilding + +RH-Author: Hanna Reitz +RH-MergeRequest: 96: qcow2: Improve refcount structure rebuilding +RH-Commit: [1/4] a3606b7abcaebb4930b566e95b1090aead62dfae (hreitz/qemu-kvm-c-9-s) +RH-Bugzilla: 2072379 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi + +When rebuilding the refcount structures (when qemu-img check -r found +errors with refcount = 0, but reference count > 0), the new refcount +table defaults to being put at the image file end[1]. There is no good +reason for that except that it means we will not have to rewrite any +refblocks we already wrote to disk. + +Changing the code to rewrite those refblocks is not too difficult, +though, so let us do that. That is beneficial for images on block +devices, where we cannot really write beyond the end of the image file. + +Use this opportunity to add extensive comments to the code, and refactor +it a bit, getting rid of the backwards-jumping goto. + +[1] Unless there is something allocated in the area pointed to by the + last refblock, so we have to write that refblock. In that case, we + try to put the reftable in there. + +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1519071 +Closes: https://gitlab.com/qemu-project/qemu/-/issues/941 +Reviewed-by: Eric Blake +Signed-off-by: Hanna Reitz +Message-Id: <20220405134652.19278-2-hreitz@redhat.com> +(cherry picked from commit a8c07ec287554dcefd33733f0e5888a281ddc95e) +Signed-off-by: Hanna Reitz +--- + block/qcow2-refcount.c | 332 +++++++++++++++++++++++++++++------------ + 1 file changed, 235 insertions(+), 97 deletions(-) + +diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c +index b91499410c..c5669eaa51 100644 +--- a/block/qcow2-refcount.c ++++ b/block/qcow2-refcount.c +@@ -2438,111 +2438,140 @@ static int64_t alloc_clusters_imrt(BlockDriverState *bs, + } + + /* +- * Creates a new refcount structure based solely on the in-memory information +- * given through *refcount_table. All necessary allocations will be reflected +- * in that array. ++ * Helper function for rebuild_refcount_structure(). + * +- * On success, the old refcount structure is leaked (it will be covered by the +- * new refcount structure). ++ * Scan the range of clusters [first_cluster, end_cluster) for allocated ++ * clusters and write all corresponding refblocks to disk. The refblock ++ * and allocation data is taken from the in-memory refcount table ++ * *refcount_table[] (of size *nb_clusters), which is basically one big ++ * (unlimited size) refblock for the whole image. ++ * ++ * For these refblocks, clusters are allocated using said in-memory ++ * refcount table. Care is taken that these allocations are reflected ++ * in the refblocks written to disk. ++ * ++ * The refblocks' offsets are written into a reftable, which is ++ * *on_disk_reftable_ptr[] (of size *on_disk_reftable_entries_ptr). If ++ * that reftable is of insufficient size, it will be resized to fit. ++ * This reftable is not written to disk. ++ * ++ * (If *on_disk_reftable_ptr is not NULL, the entries within are assumed ++ * to point to existing valid refblocks that do not need to be allocated ++ * again.) ++ * ++ * Return whether the on-disk reftable array was resized (true/false), ++ * or -errno on error. + */ +-static int rebuild_refcount_structure(BlockDriverState *bs, +- BdrvCheckResult *res, +- void **refcount_table, +- int64_t *nb_clusters) ++static int rebuild_refcounts_write_refblocks( ++ BlockDriverState *bs, void **refcount_table, int64_t *nb_clusters, ++ int64_t first_cluster, int64_t end_cluster, ++ uint64_t **on_disk_reftable_ptr, uint32_t *on_disk_reftable_entries_ptr ++ ) + { + BDRVQcow2State *s = bs->opaque; +- int64_t first_free_cluster = 0, reftable_offset = -1, cluster = 0; ++ int64_t cluster; + int64_t refblock_offset, refblock_start, refblock_index; +- uint32_t reftable_size = 0; +- uint64_t *on_disk_reftable = NULL; ++ int64_t first_free_cluster = 0; ++ uint64_t *on_disk_reftable = *on_disk_reftable_ptr; ++ uint32_t on_disk_reftable_entries = *on_disk_reftable_entries_ptr; + void *on_disk_refblock; +- int ret = 0; +- struct { +- uint64_t reftable_offset; +- uint32_t reftable_clusters; +- } QEMU_PACKED reftable_offset_and_clusters; +- +- qcow2_cache_empty(bs, s->refcount_block_cache); ++ bool reftable_grown = false; ++ int ret; + +-write_refblocks: +- for (; cluster < *nb_clusters; cluster++) { ++ for (cluster = first_cluster; cluster < end_cluster; cluster++) { ++ /* Check all clusters to find refblocks that contain non-zero entries */ + if (!s->get_refcount(*refcount_table, cluster)) { + continue; + } + ++ /* ++ * This cluster is allocated, so we need to create a refblock ++ * for it. The data we will write to disk is just the ++ * respective slice from *refcount_table, so it will contain ++ * accurate refcounts for all clusters belonging to this ++ * refblock. After we have written it, we will therefore skip ++ * all remaining clusters in this refblock. ++ */ ++ + refblock_index = cluster >> s->refcount_block_bits; + refblock_start = refblock_index << s->refcount_block_bits; + +- /* Don't allocate a cluster in a refblock already written to disk */ +- if (first_free_cluster < refblock_start) { +- first_free_cluster = refblock_start; +- } +- refblock_offset = alloc_clusters_imrt(bs, 1, refcount_table, +- nb_clusters, &first_free_cluster); +- if (refblock_offset < 0) { +- fprintf(stderr, "ERROR allocating refblock: %s\n", +- strerror(-refblock_offset)); +- res->check_errors++; +- ret = refblock_offset; +- goto fail; +- } ++ if (on_disk_reftable_entries > refblock_index && ++ on_disk_reftable[refblock_index]) ++ { ++ /* ++ * We can get here after a `goto write_refblocks`: We have a ++ * reftable from a previous run, and the refblock is already ++ * allocated. No need to allocate it again. ++ */ ++ refblock_offset = on_disk_reftable[refblock_index]; ++ } else { ++ int64_t refblock_cluster_index; + +- if (reftable_size <= refblock_index) { +- uint32_t old_reftable_size = reftable_size; +- uint64_t *new_on_disk_reftable; ++ /* Don't allocate a cluster in a refblock already written to disk */ ++ if (first_free_cluster < refblock_start) { ++ first_free_cluster = refblock_start; ++ } ++ refblock_offset = alloc_clusters_imrt(bs, 1, refcount_table, ++ nb_clusters, ++ &first_free_cluster); ++ if (refblock_offset < 0) { ++ fprintf(stderr, "ERROR allocating refblock: %s\n", ++ strerror(-refblock_offset)); ++ return refblock_offset; ++ } + +- reftable_size = ROUND_UP((refblock_index + 1) * REFTABLE_ENTRY_SIZE, +- s->cluster_size) / REFTABLE_ENTRY_SIZE; +- new_on_disk_reftable = g_try_realloc(on_disk_reftable, +- reftable_size * +- REFTABLE_ENTRY_SIZE); +- if (!new_on_disk_reftable) { +- res->check_errors++; +- ret = -ENOMEM; +- goto fail; ++ refblock_cluster_index = refblock_offset / s->cluster_size; ++ if (refblock_cluster_index >= end_cluster) { ++ /* ++ * We must write the refblock that holds this refblock's ++ * refcount ++ */ ++ end_cluster = refblock_cluster_index + 1; + } +- on_disk_reftable = new_on_disk_reftable; + +- memset(on_disk_reftable + old_reftable_size, 0, +- (reftable_size - old_reftable_size) * REFTABLE_ENTRY_SIZE); ++ if (on_disk_reftable_entries <= refblock_index) { ++ on_disk_reftable_entries = ++ ROUND_UP((refblock_index + 1) * REFTABLE_ENTRY_SIZE, ++ s->cluster_size) / REFTABLE_ENTRY_SIZE; ++ on_disk_reftable = ++ g_try_realloc(on_disk_reftable, ++ on_disk_reftable_entries * ++ REFTABLE_ENTRY_SIZE); ++ if (!on_disk_reftable) { ++ return -ENOMEM; ++ } + +- /* The offset we have for the reftable is now no longer valid; +- * this will leak that range, but we can easily fix that by running +- * a leak-fixing check after this rebuild operation */ +- reftable_offset = -1; +- } else { +- assert(on_disk_reftable); +- } +- on_disk_reftable[refblock_index] = refblock_offset; ++ memset(on_disk_reftable + *on_disk_reftable_entries_ptr, 0, ++ (on_disk_reftable_entries - ++ *on_disk_reftable_entries_ptr) * ++ REFTABLE_ENTRY_SIZE); + +- /* If this is apparently the last refblock (for now), try to squeeze the +- * reftable in */ +- if (refblock_index == (*nb_clusters - 1) >> s->refcount_block_bits && +- reftable_offset < 0) +- { +- uint64_t reftable_clusters = size_to_clusters(s, reftable_size * +- REFTABLE_ENTRY_SIZE); +- reftable_offset = alloc_clusters_imrt(bs, reftable_clusters, +- refcount_table, nb_clusters, +- &first_free_cluster); +- if (reftable_offset < 0) { +- fprintf(stderr, "ERROR allocating reftable: %s\n", +- strerror(-reftable_offset)); +- res->check_errors++; +- ret = reftable_offset; +- goto fail; ++ *on_disk_reftable_ptr = on_disk_reftable; ++ *on_disk_reftable_entries_ptr = on_disk_reftable_entries; ++ ++ reftable_grown = true; ++ } else { ++ assert(on_disk_reftable); + } ++ on_disk_reftable[refblock_index] = refblock_offset; + } + ++ /* Refblock is allocated, write it to disk */ ++ + ret = qcow2_pre_write_overlap_check(bs, 0, refblock_offset, + s->cluster_size, false); + if (ret < 0) { + fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret)); +- goto fail; ++ return ret; + } + +- /* The size of *refcount_table is always cluster-aligned, therefore the +- * write operation will not overflow */ ++ /* ++ * The refblock is simply a slice of *refcount_table. ++ * Note that the size of *refcount_table is always aligned to ++ * whole clusters, so the write operation will not result in ++ * out-of-bounds accesses. ++ */ + on_disk_refblock = (void *)((char *) *refcount_table + + refblock_index * s->cluster_size); + +@@ -2550,23 +2579,99 @@ write_refblocks: + s->cluster_size); + if (ret < 0) { + fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret)); +- goto fail; ++ return ret; + } + +- /* Go to the end of this refblock */ ++ /* This refblock is done, skip to its end */ + cluster = refblock_start + s->refcount_block_size - 1; + } + +- if (reftable_offset < 0) { +- uint64_t post_refblock_start, reftable_clusters; ++ return reftable_grown; ++} ++ ++/* ++ * Creates a new refcount structure based solely on the in-memory information ++ * given through *refcount_table (this in-memory information is basically just ++ * the concatenation of all refblocks). All necessary allocations will be ++ * reflected in that array. ++ * ++ * On success, the old refcount structure is leaked (it will be covered by the ++ * new refcount structure). ++ */ ++static int rebuild_refcount_structure(BlockDriverState *bs, ++ BdrvCheckResult *res, ++ void **refcount_table, ++ int64_t *nb_clusters) ++{ ++ BDRVQcow2State *s = bs->opaque; ++ int64_t reftable_offset = -1; ++ int64_t reftable_length = 0; ++ int64_t reftable_clusters; ++ int64_t refblock_index; ++ uint32_t on_disk_reftable_entries = 0; ++ uint64_t *on_disk_reftable = NULL; ++ int ret = 0; ++ int reftable_size_changed = 0; ++ struct { ++ uint64_t reftable_offset; ++ uint32_t reftable_clusters; ++ } QEMU_PACKED reftable_offset_and_clusters; ++ ++ qcow2_cache_empty(bs, s->refcount_block_cache); ++ ++ /* ++ * For each refblock containing entries, we try to allocate a ++ * cluster (in the in-memory refcount table) and write its offset ++ * into on_disk_reftable[]. We then write the whole refblock to ++ * disk (as a slice of the in-memory refcount table). ++ * This is done by rebuild_refcounts_write_refblocks(). ++ * ++ * Once we have scanned all clusters, we try to find space for the ++ * reftable. This will dirty the in-memory refcount table (i.e. ++ * make it differ from the refblocks we have already written), so we ++ * need to run rebuild_refcounts_write_refblocks() again for the ++ * range of clusters where the reftable has been allocated. ++ * ++ * This second run might make the reftable grow again, in which case ++ * we will need to allocate another space for it, which is why we ++ * repeat all this until the reftable stops growing. ++ * ++ * (This loop will terminate, because with every cluster the ++ * reftable grows, it can accomodate a multitude of more refcounts, ++ * so that at some point this must be able to cover the reftable ++ * and all refblocks describing it.) ++ * ++ * We then convert the reftable to big-endian and write it to disk. ++ * ++ * Note that we never free any reftable allocations. Doing so would ++ * needlessly complicate the algorithm: The eventual second check ++ * run we do will clean up all leaks we have caused. ++ */ ++ ++ reftable_size_changed = ++ rebuild_refcounts_write_refblocks(bs, refcount_table, nb_clusters, ++ 0, *nb_clusters, ++ &on_disk_reftable, ++ &on_disk_reftable_entries); ++ if (reftable_size_changed < 0) { ++ res->check_errors++; ++ ret = reftable_size_changed; ++ goto fail; ++ } ++ ++ /* ++ * There was no reftable before, so rebuild_refcounts_write_refblocks() ++ * must have increased its size (from 0 to something). ++ */ ++ assert(reftable_size_changed); ++ ++ do { ++ int64_t reftable_start_cluster, reftable_end_cluster; ++ int64_t first_free_cluster = 0; ++ ++ reftable_length = on_disk_reftable_entries * REFTABLE_ENTRY_SIZE; ++ reftable_clusters = size_to_clusters(s, reftable_length); + +- post_refblock_start = ROUND_UP(*nb_clusters, s->refcount_block_size); +- reftable_clusters = +- size_to_clusters(s, reftable_size * REFTABLE_ENTRY_SIZE); +- /* Not pretty but simple */ +- if (first_free_cluster < post_refblock_start) { +- first_free_cluster = post_refblock_start; +- } + reftable_offset = alloc_clusters_imrt(bs, reftable_clusters, + refcount_table, nb_clusters, + &first_free_cluster); +@@ -2578,24 +2683,55 @@ write_refblocks: + goto fail; + } + +- goto write_refblocks; +- } ++ /* ++ * We need to update the affected refblocks, so re-run the ++ * write_refblocks loop for the reftable's range of clusters. ++ */ ++ assert(offset_into_cluster(s, reftable_offset) == 0); ++ reftable_start_cluster = reftable_offset / s->cluster_size; ++ reftable_end_cluster = reftable_start_cluster + reftable_clusters; ++ reftable_size_changed = ++ rebuild_refcounts_write_refblocks(bs, refcount_table, nb_clusters, ++ reftable_start_cluster, ++ reftable_end_cluster, ++ &on_disk_reftable, ++ &on_disk_reftable_entries); ++ if (reftable_size_changed < 0) { ++ res->check_errors++; ++ ret = reftable_size_changed; ++ goto fail; ++ } ++ ++ /* ++ * If the reftable size has changed, we will need to find a new ++ * allocation, repeating the loop. ++ */ ++ } while (reftable_size_changed); + +- for (refblock_index = 0; refblock_index < reftable_size; refblock_index++) { ++ /* The above loop must have run at least once */ ++ assert(reftable_offset >= 0); ++ ++ /* ++ * All allocations are done, all refblocks are written, convert the ++ * reftable to big-endian and write it to disk. ++ */ ++ ++ for (refblock_index = 0; refblock_index < on_disk_reftable_entries; ++ refblock_index++) ++ { + cpu_to_be64s(&on_disk_reftable[refblock_index]); + } + +- ret = qcow2_pre_write_overlap_check(bs, 0, reftable_offset, +- reftable_size * REFTABLE_ENTRY_SIZE, ++ ret = qcow2_pre_write_overlap_check(bs, 0, reftable_offset, reftable_length, + false); + if (ret < 0) { + fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret)); + goto fail; + } + +- assert(reftable_size < INT_MAX / REFTABLE_ENTRY_SIZE); ++ assert(reftable_length < INT_MAX); + ret = bdrv_pwrite(bs->file, reftable_offset, on_disk_reftable, +- reftable_size * REFTABLE_ENTRY_SIZE); ++ reftable_length); + if (ret < 0) { + fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret)); + goto fail; +@@ -2604,7 +2740,7 @@ write_refblocks: + /* Enter new reftable into the image header */ + reftable_offset_and_clusters.reftable_offset = cpu_to_be64(reftable_offset); + reftable_offset_and_clusters.reftable_clusters = +- cpu_to_be32(size_to_clusters(s, reftable_size * REFTABLE_ENTRY_SIZE)); ++ cpu_to_be32(reftable_clusters); + ret = bdrv_pwrite_sync(bs->file, + offsetof(QCowHeader, refcount_table_offset), + &reftable_offset_and_clusters, +@@ -2614,12 +2750,14 @@ write_refblocks: + goto fail; + } + +- for (refblock_index = 0; refblock_index < reftable_size; refblock_index++) { ++ for (refblock_index = 0; refblock_index < on_disk_reftable_entries; ++ refblock_index++) ++ { + be64_to_cpus(&on_disk_reftable[refblock_index]); + } + s->refcount_table = on_disk_reftable; + s->refcount_table_offset = reftable_offset; +- s->refcount_table_size = reftable_size; ++ s->refcount_table_size = on_disk_reftable_entries; + update_max_refcount_table_index(s); + + return 0; +-- +2.31.1 + diff --git a/kvm-qemu-nbd-Pass-max-connections-to-blockdev-layer.patch b/kvm-qemu-nbd-Pass-max-connections-to-blockdev-layer.patch new file mode 100644 index 0000000..9acff58 --- /dev/null +++ b/kvm-qemu-nbd-Pass-max-connections-to-blockdev-layer.patch @@ -0,0 +1,92 @@ +From e6aae1d0368a152924c38775e517f4e83c1d898b Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Wed, 11 May 2022 19:49:23 -0500 +Subject: [PATCH 1/2] qemu-nbd: Pass max connections to blockdev layer +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Blake +RH-MergeRequest: 90: Advertise MULTI_CONN on writeable NBD servers +RH-Commit: [1/2] b0e33fd125bf3523b8b9a4dead3c8bb2342bfd4e (ebblake/centos-qemu-kvm) +RH-Bugzilla: 1708300 +RH-Acked-by: Nir Soffer +RH-Acked-by: Kevin Wolf +RH-Acked-by: Daniel P. Berrangé + +The next patch wants to adjust whether the NBD server code advertises +MULTI_CONN based on whether it is known if the server limits to +exactly one client. For a server started by QMP, this information is +obtained through nbd_server_start (which can support more than one +export); but for qemu-nbd (which supports exactly one export), it is +controlled only by the command-line option -e/--shared. Since we +already have a hook function used by qemu-nbd, it's easiest to just +alter its signature to fit our needs. + +Signed-off-by: Eric Blake +Message-Id: <20220512004924.417153-2-eblake@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit a5fced40212ed73c715ca298a2929dd4d99c9999) +Signed-off-by: Eric Blake +--- + blockdev-nbd.c | 8 ++++---- + include/block/nbd.h | 2 +- + qemu-nbd.c | 2 +- + 3 files changed, 6 insertions(+), 6 deletions(-) + +diff --git a/blockdev-nbd.c b/blockdev-nbd.c +index 9840d25a82..add41a23af 100644 +--- a/blockdev-nbd.c ++++ b/blockdev-nbd.c +@@ -30,18 +30,18 @@ typedef struct NBDServerData { + } NBDServerData; + + static NBDServerData *nbd_server; +-static bool is_qemu_nbd; ++static int qemu_nbd_connections = -1; /* Non-negative if this is qemu-nbd */ + + static void nbd_update_server_watch(NBDServerData *s); + +-void nbd_server_is_qemu_nbd(bool value) ++void nbd_server_is_qemu_nbd(int max_connections) + { +- is_qemu_nbd = value; ++ qemu_nbd_connections = max_connections; + } + + bool nbd_server_is_running(void) + { +- return nbd_server || is_qemu_nbd; ++ return nbd_server || qemu_nbd_connections >= 0; + } + + static void nbd_blockdev_client_closed(NBDClient *client, bool ignored) +diff --git a/include/block/nbd.h b/include/block/nbd.h +index a98eb665da..c5a29ce1c6 100644 +--- a/include/block/nbd.h ++++ b/include/block/nbd.h +@@ -344,7 +344,7 @@ void nbd_client_new(QIOChannelSocket *sioc, + void nbd_client_get(NBDClient *client); + void nbd_client_put(NBDClient *client); + +-void nbd_server_is_qemu_nbd(bool value); ++void nbd_server_is_qemu_nbd(int max_connections); + bool nbd_server_is_running(void); + void nbd_server_start(SocketAddress *addr, const char *tls_creds, + const char *tls_authz, uint32_t max_connections, +diff --git a/qemu-nbd.c b/qemu-nbd.c +index 713e7557a9..8c25ae93df 100644 +--- a/qemu-nbd.c ++++ b/qemu-nbd.c +@@ -1087,7 +1087,7 @@ int main(int argc, char **argv) + + bs->detect_zeroes = detect_zeroes; + +- nbd_server_is_qemu_nbd(true); ++ nbd_server_is_qemu_nbd(shared); + + export_opts = g_new(BlockExportOptions, 1); + *export_opts = (BlockExportOptions) { +-- +2.31.1 + diff --git a/kvm-qtest-numa-test-Correct-CPU-and-NUMA-association-in-.patch b/kvm-qtest-numa-test-Correct-CPU-and-NUMA-association-in-.patch new file mode 100644 index 0000000..9c2ac99 --- /dev/null +++ b/kvm-qtest-numa-test-Correct-CPU-and-NUMA-association-in-.patch @@ -0,0 +1,100 @@ +From a039ed652e6d2f5edcef9d5d1d3baec17ce7f929 Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Wed, 11 May 2022 18:01:35 +0800 +Subject: [PATCH 04/16] qtest/numa-test: Correct CPU and NUMA association in + aarch64_numa_cpu() + +RH-Author: Gavin Shan +RH-MergeRequest: 86: hw/arm/virt: Fix the default CPU topology +RH-Commit: [4/6] 64e9908a179eb4fb586d662f70f275a81808e50c (gwshan/qemu-rhel-9) +RH-Bugzilla: 2041823 +RH-Acked-by: Eric Auger +RH-Acked-by: Cornelia Huck +RH-Acked-by: Andrew Jones + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2041823 + +In aarch64_numa_cpu(), the CPU and NUMA association is something +like below. Two threads in the same core/cluster/socket are +associated with two individual NUMA nodes, which is unreal as +Igor Mammedov mentioned. We don't expect the association to break +NUMA-to-socket boundary, which matches with the real world. + +NUMA-node socket cluster core thread +------------------------------------------ +0 0 0 0 0 +1 0 0 0 1 + +This corrects the topology for CPUs and their association with +NUMA nodes. After this patch is applied, the CPU and NUMA +association becomes something like below, which looks real. +Besides, socket/cluster/core/thread IDs are all checked when +the NUMA node IDs are verified. It helps to check if the CPU +topology is properly populated or not. + +NUMA-node socket cluster core thread +------------------------------------------ +0 1 0 0 0 +1 0 0 0 0 + +Suggested-by: Igor Mammedov +Signed-off-by: Gavin Shan +Acked-by: Igor Mammedov +Message-id: 20220503140304.855514-5-gshan@redhat.com +Signed-off-by: Peter Maydell +(cherry picked from commit e280ecb39bc1629f74ea5479d464fd1608dc8f76) +Signed-off-by: Gavin Shan +--- + tests/qtest/numa-test.c | 18 ++++++++++++------ + 1 file changed, 12 insertions(+), 6 deletions(-) + +diff --git a/tests/qtest/numa-test.c b/tests/qtest/numa-test.c +index aeda8c774c..32e35daaae 100644 +--- a/tests/qtest/numa-test.c ++++ b/tests/qtest/numa-test.c +@@ -224,17 +224,17 @@ static void aarch64_numa_cpu(const void *data) + g_autofree char *cli = NULL; + + cli = make_cli(data, "-machine " +- "smp.cpus=2,smp.sockets=1,smp.clusters=1,smp.cores=1,smp.threads=2 " ++ "smp.cpus=2,smp.sockets=2,smp.clusters=1,smp.cores=1,smp.threads=1 " + "-numa node,nodeid=0,memdev=ram -numa node,nodeid=1 " +- "-numa cpu,node-id=1,thread-id=0 " +- "-numa cpu,node-id=0,thread-id=1"); ++ "-numa cpu,node-id=0,socket-id=1,cluster-id=0,core-id=0,thread-id=0 " ++ "-numa cpu,node-id=1,socket-id=0,cluster-id=0,core-id=0,thread-id=0"); + qts = qtest_init(cli); + cpus = get_cpus(qts, &resp); + g_assert(cpus); + + while ((e = qlist_pop(cpus))) { + QDict *cpu, *props; +- int64_t thread, node; ++ int64_t socket, cluster, core, thread, node; + + cpu = qobject_to(QDict, e); + g_assert(qdict_haskey(cpu, "props")); +@@ -242,12 +242,18 @@ static void aarch64_numa_cpu(const void *data) + + g_assert(qdict_haskey(props, "node-id")); + node = qdict_get_int(props, "node-id"); ++ g_assert(qdict_haskey(props, "socket-id")); ++ socket = qdict_get_int(props, "socket-id"); ++ g_assert(qdict_haskey(props, "cluster-id")); ++ cluster = qdict_get_int(props, "cluster-id"); ++ g_assert(qdict_haskey(props, "core-id")); ++ core = qdict_get_int(props, "core-id"); + g_assert(qdict_haskey(props, "thread-id")); + thread = qdict_get_int(props, "thread-id"); + +- if (thread == 0) { ++ if (socket == 0 && cluster == 0 && core == 0 && thread == 0) { + g_assert_cmpint(node, ==, 1); +- } else if (thread == 1) { ++ } else if (socket == 1 && cluster == 0 && core == 0 && thread == 0) { + g_assert_cmpint(node, ==, 0); + } else { + g_assert(false); +-- +2.31.1 + diff --git a/kvm-qtest-numa-test-Specify-CPU-topology-in-aarch64_numa.patch b/kvm-qtest-numa-test-Specify-CPU-topology-in-aarch64_numa.patch new file mode 100644 index 0000000..a87abc0 --- /dev/null +++ b/kvm-qtest-numa-test-Specify-CPU-topology-in-aarch64_numa.patch @@ -0,0 +1,68 @@ +From 66f3928b40991d8467a3da086688f73d061886c8 Mon Sep 17 00:00:00 2001 +From: Gavin Shan +Date: Wed, 11 May 2022 18:01:35 +0800 +Subject: [PATCH 02/16] qtest/numa-test: Specify CPU topology in + aarch64_numa_cpu() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Gavin Shan +RH-MergeRequest: 86: hw/arm/virt: Fix the default CPU topology +RH-Commit: [2/6] b851e7ad59e057825392ddf75e9040cc102a0385 (gwshan/qemu-rhel-9) +RH-Bugzilla: 2041823 +RH-Acked-by: Eric Auger +RH-Acked-by: Cornelia Huck +RH-Acked-by: Andrew Jones + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2041823 + +The CPU topology isn't enabled on arm/virt machine yet, but we're +going to do it in next patch. After the CPU topology is enabled by +next patch, "thread-id=1" becomes invalid because the CPU core is +preferred on arm/virt machine. It means these two CPUs have 0/1 +as their core IDs, but their thread IDs are all 0. It will trigger +test failure as the following message indicates: + +[14/21 qemu:qtest+qtest-aarch64 / qtest-aarch64/numa-test ERROR +1.48s killed by signal 6 SIGABRT +>>> G_TEST_DBUS_DAEMON=/home/gavin/sandbox/qemu.main/tests/dbus-vmstate-daemon.sh \ +QTEST_QEMU_STORAGE_DAEMON_BINARY=./storage-daemon/qemu-storage-daemon \ +QTEST_QEMU_BINARY=./qemu-system-aarch64 \ +QTEST_QEMU_IMG=./qemu-img MALLOC_PERTURB_=83 \ +/home/gavin/sandbox/qemu.main/build/tests/qtest/numa-test --tap -k +―――――――――――――――――――――――――――――――――――――――――――――― +stderr: +qemu-system-aarch64: -numa cpu,node-id=0,thread-id=1: no match found + +This fixes the issue by providing comprehensive SMP configurations +in aarch64_numa_cpu(). The SMP configurations aren't used before +the CPU topology is enabled in next patch. + +Signed-off-by: Gavin Shan +Reviewed-by: Yanan Wang +Message-id: 20220503140304.855514-3-gshan@redhat.com +Signed-off-by: Peter Maydell +(cherry picked from commit ac7199a2523ce2ccf8e685087a5d177eeca89b09) +Signed-off-by: Gavin Shan +--- + tests/qtest/numa-test.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/tests/qtest/numa-test.c b/tests/qtest/numa-test.c +index 90bf68a5b3..aeda8c774c 100644 +--- a/tests/qtest/numa-test.c ++++ b/tests/qtest/numa-test.c +@@ -223,7 +223,8 @@ static void aarch64_numa_cpu(const void *data) + QTestState *qts; + g_autofree char *cli = NULL; + +- cli = make_cli(data, "-machine smp.cpus=2 " ++ cli = make_cli(data, "-machine " ++ "smp.cpus=2,smp.sockets=1,smp.clusters=1,smp.cores=1,smp.threads=2 " + "-numa node,nodeid=0,memdev=ram -numa node,nodeid=1 " + "-numa cpu,node-id=1,thread-id=0 " + "-numa cpu,node-id=0,thread-id=1"); +-- +2.31.1 + diff --git a/kvm-redhat-Add-rhel8.6.0-machine-type-for-s390x.patch b/kvm-redhat-Add-rhel8.6.0-machine-type-for-s390x.patch new file mode 100644 index 0000000..edf8ec9 --- /dev/null +++ b/kvm-redhat-Add-rhel8.6.0-machine-type-for-s390x.patch @@ -0,0 +1,69 @@ +From 3541c9fc2c2dd5cf7dd583bc5645d82ea928d9e8 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 10 Dec 2021 10:07:40 +0100 +Subject: [PATCH 1/2] redhat: Add rhel8.6.0 machine type for s390x +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +RH-MergeRequest: 90: Add rhel8.6.0 machine type for s390x +RH-Commit: [1/1] 91961fc52d708e6b30d7361fbab3572c5b5c1859 +RH-Bugzilla: 2005325 +RH-Acked-by: Greg Kurz +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2005325 + +The new machine type has better default values for the upcoming +"generation 16" mainframe. + +Signed-off-by: Thomas Huth +--- + hw/s390x/s390-virtio-ccw.c | 14 +++++++++++++- + 1 file changed, 13 insertions(+), 1 deletion(-) + +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index cf13c457d6..9795eb9406 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -1103,10 +1103,21 @@ static void ccw_machine_2_4_class_options(MachineClass *mc) + DEFINE_CCW_MACHINE(2_4, "2.4", false); + #endif + ++static void ccw_machine_rhel860_instance_options(MachineState *machine) ++{ ++} ++ ++static void ccw_machine_rhel860_class_options(MachineClass *mc) ++{ ++} ++DEFINE_CCW_MACHINE(rhel860, "rhel8.6.0", true); ++ + static void ccw_machine_rhel850_instance_options(MachineState *machine) + { + static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V6_0 }; + ++ ccw_machine_rhel860_instance_options(machine); ++ + s390_set_qemu_cpu_model(0x2964, 13, 2, qemu_cpu_feat); + + s390_cpudef_featoff_greater(16, 1, S390_FEAT_NNPA); +@@ -1118,10 +1129,11 @@ static void ccw_machine_rhel850_instance_options(MachineState *machine) + + static void ccw_machine_rhel850_class_options(MachineClass *mc) + { ++ ccw_machine_rhel860_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len); + mc->smp_props.prefer_sockets = true; + } +-DEFINE_CCW_MACHINE(rhel850, "rhel8.5.0", true); ++DEFINE_CCW_MACHINE(rhel850, "rhel8.5.0", false); + + static void ccw_machine_rhel840_instance_options(MachineState *machine) + { +-- +2.27.0 + diff --git a/kvm-redhat-Define-pseries-rhel8.6.0-machine-type.patch b/kvm-redhat-Define-pseries-rhel8.6.0-machine-type.patch new file mode 100644 index 0000000..760a5fd --- /dev/null +++ b/kvm-redhat-Define-pseries-rhel8.6.0-machine-type.patch @@ -0,0 +1,76 @@ +From 300cdf7f5b8b34e111c5e4141684af7329be46d9 Mon Sep 17 00:00:00 2001 +From: Greg Kurz +Date: Mon, 13 Dec 2021 15:42:41 +0100 +Subject: [PATCH 2/2] redhat: Define pseries-rhel8.6.0 machine type +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Greg Kurz +RH-MergeRequest: 92: redhat: Define pseries-rhel8.6.0 machine type +RH-Commit: [1/1] 3c0f59d7ddf4bb22f382b5df7daa136730b9e866 +RH-Bugzilla: 2031041 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: David Gibson (Red Hat) +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Laurent Vivier + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2031041 + +BRANCH: rhel-8.6.0 + +UPSTREAM: RHEL only + +BREW: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=41989147 + +Signed-off-by: Greg Kurz +--- + hw/ppc/spapr.c | 18 +++++++++++++++--- + 1 file changed, 15 insertions(+), 3 deletions(-) + +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index 2f27888d8a..32cfe8f006 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -5170,6 +5170,19 @@ static void spapr_machine_rhel_default_class_options(MachineClass *mc) + mc->max_cpus = 384; + } + ++/* ++ * pseries-rhel8.6.0 ++ * like pseries-6.2 ++ */ ++ ++static void spapr_machine_rhel860_class_options(MachineClass *mc) ++{ ++ /* The default machine type must apply the RHEL specific defaults */ ++ spapr_machine_rhel_default_class_options(mc); ++} ++ ++DEFINE_SPAPR_MACHINE(rhel860, "rhel8.6.0", true); ++ + /* + * pseries-rhel8.5.0 + * like pseries-6.0 +@@ -5179,15 +5192,14 @@ static void spapr_machine_rhel850_class_options(MachineClass *mc) + { + SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); + +- /* The default machine type must apply the RHEL specific defaults */ +- spapr_machine_rhel_default_class_options(mc); ++ spapr_machine_rhel860_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_rhel_8_5, + hw_compat_rhel_8_5_len); + smc->pre_6_2_numa_affinity = true; + mc->smp_props.prefer_sockets = true; + } + +-DEFINE_SPAPR_MACHINE(rhel850, "rhel8.5.0", true); ++DEFINE_SPAPR_MACHINE(rhel850, "rhel8.5.0", false); + + /* + * pseries-rhel8.4.0 +-- +2.27.0 + diff --git a/kvm-redhat-Update-linux-headers-linux-kvm.h-to-v5.18-rc6.patch b/kvm-redhat-Update-linux-headers-linux-kvm.h-to-v5.18-rc6.patch new file mode 100644 index 0000000..f027c45 --- /dev/null +++ b/kvm-redhat-Update-linux-headers-linux-kvm.h-to-v5.18-rc6.patch @@ -0,0 +1,106 @@ +From 236f216309261bc924e49014267998fdc2ef7f46 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 29 Jul 2022 16:55:34 +0200 +Subject: [PATCH 28/32] redhat: Update linux-headers/linux/kvm.h to v5.18-rc6 + +RH-Author: Thomas Huth +RH-MergeRequest: 109: Honor storage keys during emulation of I/O instructions +RH-Commit: [1/2] f306d7ff8efa64b14158388b95815ac556a25d8a (thuth/qemu-kvm-cs9) +RH-Bugzilla: 2111994 +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand +RH-Acked-by: Claudio Imbrenda + +Upstream Status: RHEL-only +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2111994 + +Based on upstream commit e4082063e47e9731dbeb1c26174c17f6038f577f +("linux-headers: Update to v5.18-rc6"), but this is focusing on +the file linux-headers/linux/kvm.h only (since the other changes +related to the VFIO renaming might break some stuff). + +Signed-off-by: Thomas Huth +--- + linux-headers/linux/kvm.h | 27 +++++++++++++++++++++------ + 1 file changed, 21 insertions(+), 6 deletions(-) + +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index d232feaae9..0d05d02ee4 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -445,7 +445,11 @@ struct kvm_run { + #define KVM_SYSTEM_EVENT_RESET 2 + #define KVM_SYSTEM_EVENT_CRASH 3 + __u32 type; +- __u64 flags; ++ __u32 ndata; ++ union { ++ __u64 flags; ++ __u64 data[16]; ++ }; + } system_event; + /* KVM_EXIT_S390_STSI */ + struct { +@@ -562,9 +566,12 @@ struct kvm_s390_mem_op { + __u32 op; /* type of operation */ + __u64 buf; /* buffer in userspace */ + union { +- __u8 ar; /* the access register number */ ++ struct { ++ __u8 ar; /* the access register number */ ++ __u8 key; /* access key, ignored if flag unset */ ++ }; + __u32 sida_offset; /* offset into the sida */ +- __u8 reserved[32]; /* should be set to 0 */ ++ __u8 reserved[32]; /* ignored */ + }; + }; + /* types for kvm_s390_mem_op->op */ +@@ -572,9 +579,12 @@ struct kvm_s390_mem_op { + #define KVM_S390_MEMOP_LOGICAL_WRITE 1 + #define KVM_S390_MEMOP_SIDA_READ 2 + #define KVM_S390_MEMOP_SIDA_WRITE 3 ++#define KVM_S390_MEMOP_ABSOLUTE_READ 4 ++#define KVM_S390_MEMOP_ABSOLUTE_WRITE 5 + /* flags for kvm_s390_mem_op->flags */ + #define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0) + #define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1) ++#define KVM_S390_MEMOP_F_SKEY_PROTECTION (1ULL << 2) + + /* for KVM_INTERRUPT */ + struct kvm_interrupt { +@@ -1134,6 +1144,12 @@ struct kvm_ppc_resize_hpt { + #define KVM_CAP_VM_GPA_BITS 207 + #define KVM_CAP_XSAVE2 208 + #define KVM_CAP_SYS_ATTRIBUTES 209 ++#define KVM_CAP_PPC_AIL_MODE_3 210 ++#define KVM_CAP_S390_MEM_OP_EXTENSION 211 ++#define KVM_CAP_PMU_CAPABILITY 212 ++#define KVM_CAP_DISABLE_QUIRKS2 213 ++/* #define KVM_CAP_VM_TSC_CONTROL 214 */ ++#define KVM_CAP_SYSTEM_EVENT_DATA 215 + + #ifdef KVM_CAP_IRQ_ROUTING + +@@ -1624,9 +1640,6 @@ struct kvm_enc_region { + #define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3) + #define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4) + +-/* Available with KVM_CAP_XSAVE2 */ +-#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave) +- + struct kvm_s390_pv_sec_parm { + __u64 origin; + __u64 length; +@@ -1973,6 +1986,8 @@ struct kvm_dirty_gfn { + #define KVM_BUS_LOCK_DETECTION_OFF (1 << 0) + #define KVM_BUS_LOCK_DETECTION_EXIT (1 << 1) + ++#define KVM_PMU_CAP_DISABLE (1 << 0) ++ + /** + * struct kvm_stats_header - Header of per vm/vcpu binary statistics data. + * @flags: Some extra information for header, always 0 for now. +-- +2.31.1 + diff --git a/kvm-rhel-machine-types-x86-set-prefer_sockets.patch b/kvm-rhel-machine-types-x86-set-prefer_sockets.patch new file mode 100644 index 0000000..d7bfc96 --- /dev/null +++ b/kvm-rhel-machine-types-x86-set-prefer_sockets.patch @@ -0,0 +1,52 @@ +From 0f0cbd57a8fe8f463941656f5bc75ae5754c3d2b Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 7 Dec 2021 18:39:47 +0000 +Subject: [PATCH 6/6] rhel machine types/x86: set prefer_sockets + +RH-Author: Dr. David Alan Gilbert +RH-MergeRequest: 96: Fixup x86 prefer_sockets +RH-Commit: [1/1] 29578bcc2f5d3408c155c155cdfa10b7a12faf4d +RH-Bugzilla: 2029582 +RH-Acked-by: Igor Mammedov +RH-Acked-by: quintela1 +RH-Acked-by: Cornelia Huck + +When I fixed up the machine types for 8.5 I missed the + prefer_sockets = true + +add them in; it looks like Power, ARM already have them, and I see them +in thuth's s390 patch. + +Signed-off-by: Dr. David Alan Gilbert +--- + hw/i386/pc_piix.c | 1 + + hw/i386/pc_q35.c | 1 + + 2 files changed, 2 insertions(+) + +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index 37fab00733..c30057c443 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -1020,6 +1020,7 @@ static void pc_machine_rhel7_options(MachineClass *m) + compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); + m->alias = "pc"; + m->is_default = 1; ++ m->smp_props.prefer_sockets = true; + } + + static void pc_init_rhel760(MachineState *machine) +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 78876e1101..f6e77bca0e 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -662,6 +662,7 @@ static void pc_q35_machine_rhel850_options(MachineClass *m) + hw_compat_rhel_8_5_len); + compat_props_add(m->compat_props, pc_rhel_8_5_compat, + pc_rhel_8_5_compat_len); ++ m->smp_props.prefer_sockets = true; + } + + DEFINE_PC_MACHINE(q35_rhel850, "pc-q35-rhel8.5.0", pc_q35_init_rhel850, +-- +2.27.0 + diff --git a/kvm-s390x-css-fix-PMCW-invalid-mask.patch b/kvm-s390x-css-fix-PMCW-invalid-mask.patch new file mode 100644 index 0000000..959eea9 --- /dev/null +++ b/kvm-s390x-css-fix-PMCW-invalid-mask.patch @@ -0,0 +1,58 @@ +From f3125f6379cbc070e9acaf58d0ec37972992744b Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Wed, 6 Apr 2022 10:56:26 +0200 +Subject: [PATCH 4/5] s390x/css: fix PMCW invalid mask + +RH-Author: Thomas Huth +RH-MergeRequest: 145: s390x/css: fix PMCW invalid mask +RH-Commit: [1/1] fbf192f651aa668af56ca5c77455595fcdb19508 +RH-Bugzilla: 2071070 +RH-Acked-by: Jon Maloy +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck + +Bugzilla: http://bugzilla.redhat.com/2071070 + +commit 2df59b73e0864f021f6179f32f7ed364f6d4f38d +Author: Nico Boehr +Date: Thu Dec 16 14:16:57 2021 +0100 + + s390x/css: fix PMCW invalid mask + + Previously, we required bits 5, 6 and 7 to be zero (0x07 == 0b111). But, + as per the principles of operation, bit 5 is ignored in MSCH and bits 0, + 1, 6 and 7 need to be zero. + + As both PMCW_FLAGS_MASK_INVALID and ioinst_schib_valid() are only used + by ioinst_handle_msch(), adjust the mask accordingly. + + Fixes: db1c8f53bfb1 ("s390: Channel I/O basic definitions.") + Signed-off-by: Nico Boehr + Reviewed-by: Pierre Morel + Reviewed-by: Halil Pasic + Reviewed-by: Janosch Frank + Reviewed-by: Cornelia Huck + Message-Id: <20211216131657.1057978-1-nrb@linux.ibm.com> + Signed-off-by: Thomas Huth + +Signed-off-by: Thomas Huth +--- + include/hw/s390x/ioinst.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/hw/s390x/ioinst.h b/include/hw/s390x/ioinst.h +index 3771fff9d4..ea8d0f2444 100644 +--- a/include/hw/s390x/ioinst.h ++++ b/include/hw/s390x/ioinst.h +@@ -107,7 +107,7 @@ QEMU_BUILD_BUG_MSG(sizeof(PMCW) != 28, "size of PMCW is wrong"); + #define PMCW_FLAGS_MASK_MP 0x0004 + #define PMCW_FLAGS_MASK_TF 0x0002 + #define PMCW_FLAGS_MASK_DNV 0x0001 +-#define PMCW_FLAGS_MASK_INVALID 0x0700 ++#define PMCW_FLAGS_MASK_INVALID 0xc300 + + #define PMCW_CHARS_MASK_ST 0x00e00000 + #define PMCW_CHARS_MASK_MBFC 0x00000004 +-- +2.27.0 + diff --git a/kvm-s390x.conf b/kvm-s390x.conf new file mode 100644 index 0000000..d82b818 --- /dev/null +++ b/kvm-s390x.conf @@ -0,0 +1,19 @@ +# User changes in this file are preserved across upgrades. +# +# Setting "modprobe kvm nested=1" only enables Nested Virtualization until +# the next reboot or module reload. Uncomment the option below to enable +# the feature permanently. +# +#options kvm nested=1 +# +# +# Setting "modprobe kvm hpage=1" only enables Huge Page Backing (1MB) +# support until the next reboot or module reload. Uncomment the option +# below to enable the feature permanently. +# +# Note: - Incompatible with "nested=1". Loading the module will fail. +# - Dirty page logging will be performed on a 1MB (not 4KB) basis, +# which can result in a lot of data having to be transferred during +# migration, and therefore taking very long to converge. +# +#options kvm hpage=1 diff --git a/kvm-setup b/kvm-setup new file mode 100644 index 0000000..3bfedf6 --- /dev/null +++ b/kvm-setup @@ -0,0 +1,49 @@ +#! /bin/bash + +kvm_setup_powerpc () { + if grep '^platform[[:space:]]*:[[:space:]]*PowerNV' /proc/cpuinfo > /dev/null; then + # PowerNV platform, which is KVM HV capable + + if [ -z "$SUBCORES" ]; then + SUBCORES=1 + fi + + # Step 1. Load the KVM HVmodule + if ! modprobe -b kvm_hv; then + return + fi + + # On POWER8 a host core can only run threads of a single + # guest, meaning that SMT must be disabled on the host in + # order to run KVM guests. (Also applieds to POWER7, but we + # don't support that). + # + # POWER9 doesn't have this limitation (though it will for hash + # guests on radix host when that's implemented). So, only set + # up subcores and disable SMT for POWER*. + if grep '^cpu[[:space:]]*:[[:space:]]*POWER8' /proc/cpuinfo > /dev/null; then + # Step 2. Configure subcore mode + /usr/sbin/ppc64_cpu --subcores-per-core=$SUBCORES + + # Step 3. Disable SMT (multithreading) + /usr/sbin/ppc64_cpu --smt=off + fi + fi +} + +kvm_setup_s390x () { + if grep -q "^features.*sie" /proc/cpuinfo; then + modprobe kvm + fi +} + +case $(uname -m) in + ppc64|ppc64le) + kvm_setup_powerpc + ;; + s390x) + kvm_setup_s390x + ;; +esac + +exit 0 diff --git a/kvm-setup.service b/kvm-setup.service new file mode 100644 index 0000000..9c4bf97 --- /dev/null +++ b/kvm-setup.service @@ -0,0 +1,14 @@ +[Unit] +Description=Perform system configuration to prepare system to run KVM guests +# Offlining CPUs can cause irqbalance to throw warnings if it's running +Before=irqbalance.service +# libvirtd reads CPU topology at startup, so change it before +Before=libvirtd.service + +[Service] +Type=oneshot +EnvironmentFile=-/etc/sysconfig/kvm +ExecStart=/usr/lib/systemd/kvm-setup + +[Install] +WantedBy=multi-user.target diff --git a/kvm-softmmu-fix-device-deletion-events-with-device-JSON-.patch b/kvm-softmmu-fix-device-deletion-events-with-device-JSON-.patch new file mode 100644 index 0000000..c6fcf61 --- /dev/null +++ b/kvm-softmmu-fix-device-deletion-events-with-device-JSON-.patch @@ -0,0 +1,131 @@ +From afe1a63fe0cf863e024889edd82b9a380bfa8230 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= +Date: Wed, 5 Jan 2022 12:38:47 +0000 +Subject: [PATCH 2/6] softmmu: fix device deletion events with -device JSON + syntax +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Kevin Wolf +RH-MergeRequest: 103: Fix hot unplug of devices created with -device JSON syntax +RH-Commit: [1/1] 64cbc78bcb46bdb24d5f589ceb5ad598c388e447 +RH-Bugzilla: 2033279 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Thomas Huth +RH-Acked-by: Jano Tomko +RH-Acked-by: Daniel P. Berrangé + +The -device JSON syntax impl leaks a reference on the created +DeviceState instance. As a result when you hot-unplug the +device, the device_finalize method won't be called and thus +it will fail to emit the required DEVICE_DELETED event. + +A 'json-cli' feature was previously added against the +'device_add' QMP command QAPI schema to indicated to mgmt +apps that -device supported JSON syntax. Given the hotplug +bug that feature flag is not usable for its purpose, so +we add a new 'json-cli-hotplug' feature to indicate the +-device supports JSON without breaking hotplug. + +Fixes: 5dacda5167560b3af8eadbce5814f60ba44b467e +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/802 +Signed-off-by: Daniel P. Berrangé +Message-Id: <20220105123847.4047954-2-berrange@redhat.com> +Reviewed-by: Laurent Vivier +Tested-by: Ján Tomko +Reviewed-by: Thomas Huth +Signed-off-by: Kevin Wolf +(cherry picked from commit 64b4529a432507ee84a924be69a03432639e87ba) +Signed-off-by: Kevin Wolf +--- + qapi/qdev.json | 5 ++++- + softmmu/vl.c | 4 +++- + tests/qtest/device-plug-test.c | 19 +++++++++++++++++++ + 3 files changed, 26 insertions(+), 2 deletions(-) + +diff --git a/qapi/qdev.json b/qapi/qdev.json +index 69656b14df..26cd10106b 100644 +--- a/qapi/qdev.json ++++ b/qapi/qdev.json +@@ -44,6 +44,9 @@ + # @json-cli: If present, the "-device" command line option supports JSON + # syntax with a structure identical to the arguments of this + # command. ++# @json-cli-hotplug: If present, the "-device" command line option supports JSON ++# syntax without the reference counting leak that broke ++# hot-unplug + # + # Notes: + # +@@ -74,7 +77,7 @@ + { 'command': 'device_add', + 'data': {'driver': 'str', '*bus': 'str', '*id': 'str'}, + 'gen': false, # so we can get the additional arguments +- 'features': ['json-cli'] } ++ 'features': ['json-cli', 'json-cli-hotplug'] } + + ## + # @device_del: +diff --git a/softmmu/vl.c b/softmmu/vl.c +index d46b8fb4ab..b3829e2edd 100644 +--- a/softmmu/vl.c ++++ b/softmmu/vl.c +@@ -2690,6 +2690,7 @@ static void qemu_create_cli_devices(void) + qemu_opts_foreach(qemu_find_opts("device"), + device_init_func, NULL, &error_fatal); + QTAILQ_FOREACH(opt, &device_opts, next) { ++ DeviceState *dev; + loc_push_restore(&opt->loc); + /* + * TODO Eventually we should call qmp_device_add() here to make sure it +@@ -2698,7 +2699,8 @@ static void qemu_create_cli_devices(void) + * from the start, so call qdev_device_add_from_qdict() directly for + * now. + */ +- qdev_device_add_from_qdict(opt->opts, true, &error_fatal); ++ dev = qdev_device_add_from_qdict(opt->opts, true, &error_fatal); ++ object_unref(OBJECT(dev)); + loc_pop(&opt->loc); + } + rom_reset_order_override(); +diff --git a/tests/qtest/device-plug-test.c b/tests/qtest/device-plug-test.c +index 559d47727a..ad79bd4c14 100644 +--- a/tests/qtest/device-plug-test.c ++++ b/tests/qtest/device-plug-test.c +@@ -77,6 +77,23 @@ static void test_pci_unplug_request(void) + qtest_quit(qtest); + } + ++static void test_pci_unplug_json_request(void) ++{ ++ QTestState *qtest = qtest_initf( ++ "-device '{\"driver\": \"virtio-mouse-pci\", \"id\": \"dev0\"}'"); ++ ++ /* ++ * Request device removal. As the guest is not running, the request won't ++ * be processed. However during system reset, the removal will be ++ * handled, removing the device. ++ */ ++ device_del(qtest, "dev0"); ++ system_reset(qtest); ++ wait_device_deleted_event(qtest, "dev0"); ++ ++ qtest_quit(qtest); ++} ++ + static void test_ccw_unplug(void) + { + QTestState *qtest = qtest_initf("-device virtio-balloon-ccw,id=dev0"); +@@ -145,6 +162,8 @@ int main(int argc, char **argv) + */ + qtest_add_func("/device-plug/pci-unplug-request", + test_pci_unplug_request); ++ qtest_add_func("/device-plug/pci-unplug-json-request", ++ test_pci_unplug_json_request); + + if (!strcmp(arch, "s390x")) { + qtest_add_func("/device-plug/ccw-unplug", +-- +2.27.0 + diff --git a/kvm-softmmu-physmem-Introduce-MemTxAttrs-memory-field-an.patch b/kvm-softmmu-physmem-Introduce-MemTxAttrs-memory-field-an.patch new file mode 100644 index 0000000..519c48d --- /dev/null +++ b/kvm-softmmu-physmem-Introduce-MemTxAttrs-memory-field-an.patch @@ -0,0 +1,175 @@ +From fe4e22b9ccf2eb55d61eccf5050fb7aeafb5fe20 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 13 Apr 2022 14:51:06 -0400 +Subject: [PATCH 3/3] softmmu/physmem: Introduce MemTxAttrs::memory field and + MEMTX_ACCESS_ERROR +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 151: hw/intc/arm_gicv3: Check for !MEMTX_OK instead of MEMTX_ERROR +RH-Commit: [3/3] b1ebc1e99f21ba0b9eccb284e260b56c7a8e64d8 (jmaloy/qemu-kvm) +RH-Bugzilla: 1999236 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Peter Xu + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1999236 +Upstream: Merged +CVE: CVE-2021-3750 +Conflicts: memalign.h has not been introduced in this version. Instead, + we include osdep.h where the function prototypes are to be + found. + +commit 3ab6fdc91b72e156da22848f0003ff4225690ced +Author: Philippe Mathieu-Daudé +Date: Wed Dec 15 19:24:21 2021 +0100 + + softmmu/physmem: Introduce MemTxAttrs::memory field and MEMTX_ACCESS_ERROR + + Add the 'memory' bit to the memory attributes to restrict bus + controller accesses to memories. + + Introduce flatview_access_allowed() to check bus permission + before running any bus transaction. + + Have read/write accessors return MEMTX_ACCESS_ERROR if an access is + restricted. + + There is no change for the default case where 'memory' is not set. + + Signed-off-by: Philippe Mathieu-Daudé + Message-Id: <20211215182421.418374-4-philmd@redhat.com> + Reviewed-by: Richard Henderson + Reviewed-by: Stefan Hajnoczi + [thuth: Replaced MEMTX_BUS_ERROR with MEMTX_ACCESS_ERROR, remove "inline"] + Signed-off-by: Thomas Huth + +(cherry picked from commit 3ab6fdc91b72e156da22848f0003ff4225690ced) +Signed-off-by: Jon Maloy +--- + include/exec/memattrs.h | 9 +++++++++ + softmmu/physmem.c | 45 +++++++++++++++++++++++++++++++++++++++-- + 2 files changed, 52 insertions(+), 2 deletions(-) + +diff --git a/include/exec/memattrs.h b/include/exec/memattrs.h +index 95f2d20d55..9fb98bc1ef 100644 +--- a/include/exec/memattrs.h ++++ b/include/exec/memattrs.h +@@ -35,6 +35,14 @@ typedef struct MemTxAttrs { + unsigned int secure:1; + /* Memory access is usermode (unprivileged) */ + unsigned int user:1; ++ /* ++ * Bus interconnect and peripherals can access anything (memories, ++ * devices) by default. By setting the 'memory' bit, bus transaction ++ * are restricted to "normal" memories (per the AMBA documentation) ++ * versus devices. Access to devices will be logged and rejected ++ * (see MEMTX_ACCESS_ERROR). ++ */ ++ unsigned int memory:1; + /* Requester ID (for MSI for example) */ + unsigned int requester_id:16; + /* Invert endianness for this page */ +@@ -66,6 +74,7 @@ typedef struct MemTxAttrs { + #define MEMTX_OK 0 + #define MEMTX_ERROR (1U << 0) /* device returned an error */ + #define MEMTX_DECODE_ERROR (1U << 1) /* nothing at that address */ ++#define MEMTX_ACCESS_ERROR (1U << 2) /* access denied */ + typedef uint32_t MemTxResult; + + #endif +diff --git a/softmmu/physmem.c b/softmmu/physmem.c +index 483a31be81..4d0ef5f92f 100644 +--- a/softmmu/physmem.c ++++ b/softmmu/physmem.c +@@ -41,6 +41,8 @@ + #include "qemu/config-file.h" + #include "qemu/error-report.h" + #include "qemu/qemu-print.h" ++#include "qemu/log.h" ++#include "qemu/osdep.h" + #include "exec/memory.h" + #include "exec/ioport.h" + #include "sysemu/dma.h" +@@ -2759,6 +2761,33 @@ static bool prepare_mmio_access(MemoryRegion *mr) + return release_lock; + } + ++/** ++ * flatview_access_allowed ++ * @mr: #MemoryRegion to be accessed ++ * @attrs: memory transaction attributes ++ * @addr: address within that memory region ++ * @len: the number of bytes to access ++ * ++ * Check if a memory transaction is allowed. ++ * ++ * Returns: true if transaction is allowed, false if denied. ++ */ ++static bool flatview_access_allowed(MemoryRegion *mr, MemTxAttrs attrs, ++ hwaddr addr, hwaddr len) ++{ ++ if (likely(!attrs.memory)) { ++ return true; ++ } ++ if (memory_region_is_ram(mr)) { ++ return true; ++ } ++ qemu_log_mask(LOG_GUEST_ERROR, ++ "Invalid access to non-RAM device at " ++ "addr 0x%" HWADDR_PRIX ", size %" HWADDR_PRIu ", " ++ "region '%s'\n", addr, len, memory_region_name(mr)); ++ return false; ++} ++ + /* Called within RCU critical section. */ + static MemTxResult flatview_write_continue(FlatView *fv, hwaddr addr, + MemTxAttrs attrs, +@@ -2773,7 +2802,10 @@ static MemTxResult flatview_write_continue(FlatView *fv, hwaddr addr, + const uint8_t *buf = ptr; + + for (;;) { +- if (!memory_access_is_direct(mr, true)) { ++ if (!flatview_access_allowed(mr, attrs, addr1, l)) { ++ result |= MEMTX_ACCESS_ERROR; ++ /* Keep going. */ ++ } else if (!memory_access_is_direct(mr, true)) { + release_lock |= prepare_mmio_access(mr); + l = memory_access_size(mr, l, addr1); + /* XXX: could force current_cpu to NULL to avoid +@@ -2818,6 +2850,9 @@ static MemTxResult flatview_write(FlatView *fv, hwaddr addr, MemTxAttrs attrs, + + l = len; + mr = flatview_translate(fv, addr, &addr1, &l, true, attrs); ++ if (!flatview_access_allowed(mr, attrs, addr, len)) { ++ return MEMTX_ACCESS_ERROR; ++ } + return flatview_write_continue(fv, addr, attrs, buf, len, + addr1, l, mr); + } +@@ -2836,7 +2871,10 @@ MemTxResult flatview_read_continue(FlatView *fv, hwaddr addr, + + fuzz_dma_read_cb(addr, len, mr); + for (;;) { +- if (!memory_access_is_direct(mr, false)) { ++ if (!flatview_access_allowed(mr, attrs, addr1, l)) { ++ result |= MEMTX_ACCESS_ERROR; ++ /* Keep going. */ ++ } else if (!memory_access_is_direct(mr, false)) { + /* I/O case */ + release_lock |= prepare_mmio_access(mr); + l = memory_access_size(mr, l, addr1); +@@ -2879,6 +2917,9 @@ static MemTxResult flatview_read(FlatView *fv, hwaddr addr, + + l = len; + mr = flatview_translate(fv, addr, &addr1, &l, false, attrs); ++ if (!flatview_access_allowed(mr, attrs, addr, len)) { ++ return MEMTX_ACCESS_ERROR; ++ } + return flatview_read_continue(fv, addr, attrs, buf, len, + addr1, l, mr); + } +-- +2.27.0 + diff --git a/kvm-softmmu-physmem-Simplify-flatview_write-and-address_.patch b/kvm-softmmu-physmem-Simplify-flatview_write-and-address_.patch new file mode 100644 index 0000000..62f7037 --- /dev/null +++ b/kvm-softmmu-physmem-Simplify-flatview_write-and-address_.patch @@ -0,0 +1,80 @@ +From 916423392b46167c6683b0240610bb5a745590da Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 13 Apr 2022 14:51:06 -0400 +Subject: [PATCH 2/3] softmmu/physmem: Simplify flatview_write and + address_space_access_valid +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 151: hw/intc/arm_gicv3: Check for !MEMTX_OK instead of MEMTX_ERROR +RH-Commit: [2/3] daabe41eefd5c519def592e374fa368e32a680d3 (jmaloy/qemu-kvm) +RH-Bugzilla: 1999236 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Peter Xu + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1999236 +Upstream: Merged +CVE: CVE-2021-3750 + +commit 58e74682baf4e1ad26b064d8c02e5bc99c75c5d9 +Author: Philippe Mathieu-Daudé +Date: Wed Dec 15 19:24:20 2021 +0100 + + softmmu/physmem: Simplify flatview_write and address_space_access_valid + + Remove unuseful local 'result' variables. + + Reviewed-by: Peter Xu + Reviewed-by: David Hildenbrand + Reviewed-by: Alexander Bulekov + Reviewed-by: Stefan Hajnoczi + Signed-off-by: Philippe Mathieu-Daudé + Message-Id: <20211215182421.418374-3-philmd@redhat.com> + Signed-off-by: Thomas Huth + +(cherry picked from commit 58e74682baf4e1ad26b064d8c02e5bc99c75c5d9) +Signed-off-by: Jon Maloy +--- + softmmu/physmem.c | 11 +++-------- + 1 file changed, 3 insertions(+), 8 deletions(-) + +diff --git a/softmmu/physmem.c b/softmmu/physmem.c +index 3524c04c2a..483a31be81 100644 +--- a/softmmu/physmem.c ++++ b/softmmu/physmem.c +@@ -2815,14 +2815,11 @@ static MemTxResult flatview_write(FlatView *fv, hwaddr addr, MemTxAttrs attrs, + hwaddr l; + hwaddr addr1; + MemoryRegion *mr; +- MemTxResult result = MEMTX_OK; + + l = len; + mr = flatview_translate(fv, addr, &addr1, &l, true, attrs); +- result = flatview_write_continue(fv, addr, attrs, buf, len, +- addr1, l, mr); +- +- return result; ++ return flatview_write_continue(fv, addr, attrs, buf, len, ++ addr1, l, mr); + } + + /* Called within RCU critical section. */ +@@ -3119,12 +3116,10 @@ bool address_space_access_valid(AddressSpace *as, hwaddr addr, + MemTxAttrs attrs) + { + FlatView *fv; +- bool result; + + RCU_READ_LOCK_GUARD(); + fv = address_space_to_flatview(as); +- result = flatview_access_valid(fv, addr, len, is_write, attrs); +- return result; ++ return flatview_access_valid(fv, addr, len, is_write, attrs); + } + + static hwaddr +-- +2.27.0 + diff --git a/kvm-sysemu-tpm-Add-a-stub-function-for-TPM_IS_CRB.patch b/kvm-sysemu-tpm-Add-a-stub-function-for-TPM_IS_CRB.patch new file mode 100644 index 0000000..8fd2e16 --- /dev/null +++ b/kvm-sysemu-tpm-Add-a-stub-function-for-TPM_IS_CRB.patch @@ -0,0 +1,54 @@ +From 74b3e92dcb9e343e135a681259514b4fd28086ea Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Fri, 6 May 2022 15:25:09 +0200 +Subject: [PATCH 4/5] sysemu: tpm: Add a stub function for TPM_IS_CRB + +RH-Author: Eric Auger +RH-MergeRequest: 84: vfio/common: Remove spurious tpm-crb-cmd misalignment warning +RH-Commit: [1/2] 0ab55ca1aa12a3a7cbdef5a378928f75e030e536 (eauger1/centos-qemu-kvm) +RH-Bugzilla: 2037612 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Cornelia Huck +RH-Acked-by: Andrew Jones + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2037612 +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=45166961 +Upstream Status: YES +Tested: With TPM-CRB and VFIO + +In a subsequent patch, VFIO will need to recognize if +a memory region owner is a TPM CRB device. Hence VFIO +needs to use TPM_IS_CRB() even if CONFIG_TPM is unset. So +let's add a stub function. + +Signed-off-by: Eric Auger +Suggested-by: Cornelia Huck +Reviewed-by: Stefan Berger +Link: https://lore.kernel.org/r/20220506132510.1847942-2-eric.auger@redhat.com +Signed-off-by: Alex Williamson +(cherry picked from commit 4168cdad398843ed53d650a27651868b4d3e21c9) +Signed-off-by: Eric Auger +--- + include/sysemu/tpm.h | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/include/sysemu/tpm.h b/include/sysemu/tpm.h +index 68b2206463..fb40e30ff6 100644 +--- a/include/sysemu/tpm.h ++++ b/include/sysemu/tpm.h +@@ -80,6 +80,12 @@ static inline TPMVersion tpm_get_version(TPMIf *ti) + #define tpm_init() (0) + #define tpm_cleanup() + ++/* needed for an alignment check in non-tpm code */ ++static inline Object *TPM_IS_CRB(Object *obj) ++{ ++ return NULL; ++} ++ + #endif /* CONFIG_TPM */ + + #endif /* QEMU_TPM_H */ +-- +2.31.1 + diff --git a/kvm-target-arm-deprecate-named-CPU-models.patch b/kvm-target-arm-deprecate-named-CPU-models.patch new file mode 100644 index 0000000..dbe8d24 --- /dev/null +++ b/kvm-target-arm-deprecate-named-CPU-models.patch @@ -0,0 +1,129 @@ +From 1f8528b71d96c01dd6106f11681f4a4e2776ef5f Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= +Date: Mon, 21 Mar 2022 12:05:42 +0000 +Subject: [PATCH 06/18] target/arm: deprecate named CPU models +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Daniel P. Berrangé +RH-MergeRequest: 94: i386, aarch64, s390x: deprecate many named CPU models +RH-Commit: [6/6] afddeb9e898206fd04499f01c48caf7dc1a8b8ef (berrange/centos-src-qemu) +RH-Bugzilla: 2060839 +RH-Acked-by: Thomas Huth +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Cornelia Huck + +KVM requires use of the 'host' CPU model, so named CPU models are only +needed for TCG. Since we don't consider TCG to be supported we can +deprecate all the named CPU models. TCG users can rely on 'max' model. + +Note: this has the effect of deprecating the default built-in CPU +model 'cortex-a57'. Applications using QEMU are expected to make an +explicit choice about which CPU model they want, since no builtin +default can suit all purposes. + +https://bugzilla.redhat.com/show_bug.cgi?id=2060839 +Signed-off-by: Daniel P. Berrangé +--- + target/arm/cpu-qom.h | 1 + + target/arm/cpu.c | 5 +++++ + target/arm/cpu.h | 2 ++ + target/arm/cpu64.c | 8 +++++++- + target/arm/helper.c | 2 ++ + 5 files changed, 17 insertions(+), 1 deletion(-) + +diff --git a/target/arm/cpu-qom.h b/target/arm/cpu-qom.h +index 64c44cef2d..82e97249bc 100644 +--- a/target/arm/cpu-qom.h ++++ b/target/arm/cpu-qom.h +@@ -35,6 +35,7 @@ typedef struct ARMCPUInfo { + const char *name; + void (*initfn)(Object *obj); + void (*class_init)(ObjectClass *oc, void *data); ++ const char *deprecation_note; + } ARMCPUInfo; + + void arm_cpu_register(const ARMCPUInfo *info); +diff --git a/target/arm/cpu.c b/target/arm/cpu.c +index 5d4ca7a227..c74b0fb462 100644 +--- a/target/arm/cpu.c ++++ b/target/arm/cpu.c +@@ -2105,8 +2105,13 @@ static void arm_cpu_instance_init(Object *obj) + static void cpu_register_class_init(ObjectClass *oc, void *data) + { + ARMCPUClass *acc = ARM_CPU_CLASS(oc); ++ CPUClass *cc = CPU_CLASS(oc); + + acc->info = data; ++ ++ if (acc->info->deprecation_note) { ++ cc->deprecation_note = acc->info->deprecation_note; ++ } + } + + void arm_cpu_register(const ARMCPUInfo *info) +diff --git a/target/arm/cpu.h b/target/arm/cpu.h +index 23879de5fa..c0c9f680e5 100644 +--- a/target/arm/cpu.h ++++ b/target/arm/cpu.h +@@ -33,6 +33,8 @@ + #define KVM_HAVE_MCE_INJECTION 1 + #endif + ++#define RHEL_CPU_DEPRECATION "use 'host' / 'max'" ++ + #define EXCP_UDEF 1 /* undefined instruction */ + #define EXCP_SWI 2 /* software interrupt */ + #define EXCP_PREFETCH_ABORT 3 +diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c +index e80b831073..c8f152891c 100644 +--- a/target/arm/cpu64.c ++++ b/target/arm/cpu64.c +@@ -975,7 +975,8 @@ static void aarch64_a64fx_initfn(Object *obj) + #endif /* disabled for RHEL */ + + static const ARMCPUInfo aarch64_cpus[] = { +- { .name = "cortex-a57", .initfn = aarch64_a57_initfn }, ++ { .name = "cortex-a57", .initfn = aarch64_a57_initfn, ++ .deprecation_note = RHEL_CPU_DEPRECATION }, + #if 0 /* Disabled for Red Hat Enterprise Linux */ + { .name = "cortex-a53", .initfn = aarch64_a53_initfn }, + { .name = "cortex-a72", .initfn = aarch64_a72_initfn }, +@@ -1052,8 +1053,13 @@ static void aarch64_cpu_instance_init(Object *obj) + static void cpu_register_class_init(ObjectClass *oc, void *data) + { + ARMCPUClass *acc = ARM_CPU_CLASS(oc); ++ CPUClass *cc = CPU_CLASS(oc); + + acc->info = data; ++ ++ if (acc->info->deprecation_note) { ++ cc->deprecation_note = acc->info->deprecation_note; ++ } + } + + void aarch64_cpu_register(const ARMCPUInfo *info) +diff --git a/target/arm/helper.c b/target/arm/helper.c +index 7d14650615..3d34f63e49 100644 +--- a/target/arm/helper.c ++++ b/target/arm/helper.c +@@ -8560,6 +8560,7 @@ void arm_cpu_list(void) + static void arm_cpu_add_definition(gpointer data, gpointer user_data) + { + ObjectClass *oc = data; ++ CPUClass *cc = CPU_CLASS(oc); + CpuDefinitionInfoList **cpu_list = user_data; + CpuDefinitionInfo *info; + const char *typename; +@@ -8569,6 +8570,7 @@ static void arm_cpu_add_definition(gpointer data, gpointer user_data) + info->name = g_strndup(typename, + strlen(typename) - strlen("-" TYPE_ARM_CPU)); + info->q_typename = g_strdup(typename); ++ info->deprecated = !!cc->deprecation_note; + + QAPI_LIST_PREPEND(*cpu_list, info); + } +-- +2.35.3 + diff --git a/kvm-target-i386-deprecate-CPUs-older-than-x86_64-v2-ABI.patch b/kvm-target-i386-deprecate-CPUs-older-than-x86_64-v2-ABI.patch new file mode 100644 index 0000000..d63bfdb --- /dev/null +++ b/kvm-target-i386-deprecate-CPUs-older-than-x86_64-v2-ABI.patch @@ -0,0 +1,273 @@ +From 577b04770e47aed0f88acb4a415ed04ddbe087f1 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= +Date: Thu, 17 Mar 2022 17:59:22 +0000 +Subject: [PATCH 04/18] target/i386: deprecate CPUs older than x86_64-v2 ABI +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Daniel P. Berrangé +RH-MergeRequest: 94: i386, aarch64, s390x: deprecate many named CPU models +RH-Commit: [4/6] 71f6043f11b31ffa841a2e14d24972e571c18a9e (berrange/centos-src-qemu) +RH-Bugzilla: 2060839 +RH-Acked-by: Thomas Huth +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Cornelia Huck + +RHEL-9 is compiled with the x86_64-v2 ABI. We use this as a baseline to +select which CPUs we want to support, such that there is at least one +supported guest CPU that can be launched for every physical machine +capable of running RHEL-9 KVM. + +Supported CPUs: + + * QEMU models + + base (QEMU internal) + host (host passthrough) + max (host passthrough for KVM, + all emulated features for TCG) + + * Intel models + + Icelake-Server + Icelake-Server-noTSX + Cascadelake-Server (2019) + Cascadelake-Server-noTSX (2019) + Skylake-Server (2016) + Skylake-Server-IBRS (2016) + Skylake-Server-noTSX-IBRS (2016) + Skylake-Client (2015) + Skylake-Client-IBRS (2015) + Skylake-Client-noTSX-IBRS (2015) + Broadwell (2014) + Broadwell-IBRS (2014) + Broadwell-noTSX (2014) + Broadwell-noTSX-IBRS (2014) + Haswell (2013) + Haswell-IBRS (2013) + Haswell-noTSX (2013) + Haswell-noTSX-IBRS (2013) + IvyBridge (2012) + IvyBridge-IBRS (2012) + SandyBridge (2011) + SandyBridge-IBRS (2011) + Westmere (2010) + Westmere-IBRS (2010) + Nehalem (2008) + Nehalem-IBRS (2008) + + Cooperlake (2020) + Snowridge (2019) + KnightsMill (2017) + Denverton (2016) + + * AMD models + + EPYC-Milan (2021) + EPYC-Rome (2019) + EPYC (2017) + EPYC-IBPB (2017) + Opteron_G5 (2012) + Opteron_G4 (2011) + + * Other + + Dhyana (2018) + +(I've omitted the many -vNNN versions for brevity) + +Deprecated CPUs: + + 486 + athlon + Conroe + core2duo + coreduo + Icelake-Client (already deprecated upstream) + Icelake-Client-noTSX (already deprecated upstream) + kvm32 + kvm64 + n270 + Opteron_G1 + Opteron_G2 + Opteron_G3 + Penryn + pentium2 + pentium3 + pentium + phenom + qemu32 + qemu64 + +The deprecated CPU models are subject to removal in a future +major version of RHEL. + +Note: this has the effect of deprecating the default built-in CPU +model 'qemu64'. Applications using QEMU are expected to make an +explicit choice about which CPU model they want, since no builtin +default can suit all purposes. + +https://bugzilla.redhat.com/show_bug.cgi?id=2060839 +Signed-off-by: Daniel P. Berrangé +--- + target/i386/cpu.c | 21 +++++++++++++++++++++ + 1 file changed, 21 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index cb6b5467d0..87cb641b5f 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1780,9 +1780,13 @@ static const CPUCaches epyc_milan_cache_info = { + * PT in VMX operation + */ + ++#define RHEL_CPU_DEPRECATION \ ++ "use at least 'Nehalem' / 'Opteron_G4', or 'host' / 'max'" ++ + static const X86CPUDefinition builtin_x86_defs[] = { + { + .name = "qemu64", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 0xd, + .vendor = CPUID_VENDOR_AMD, + .family = 15, +@@ -1803,6 +1807,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "phenom", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 5, + .vendor = CPUID_VENDOR_AMD, + .family = 16, +@@ -1835,6 +1840,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "core2duo", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 10, + .vendor = CPUID_VENDOR_INTEL, + .family = 6, +@@ -1877,6 +1883,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "kvm64", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 0xd, + .vendor = CPUID_VENDOR_INTEL, + .family = 15, +@@ -1918,6 +1925,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "qemu32", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 4, + .vendor = CPUID_VENDOR_INTEL, + .family = 6, +@@ -1932,6 +1940,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "kvm32", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 5, + .vendor = CPUID_VENDOR_INTEL, + .family = 15, +@@ -1962,6 +1971,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "coreduo", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 10, + .vendor = CPUID_VENDOR_INTEL, + .family = 6, +@@ -1995,6 +2005,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "486", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 1, + .vendor = CPUID_VENDOR_INTEL, + .family = 4, +@@ -2007,6 +2018,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "pentium", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 1, + .vendor = CPUID_VENDOR_INTEL, + .family = 5, +@@ -2019,6 +2031,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "pentium2", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 2, + .vendor = CPUID_VENDOR_INTEL, + .family = 6, +@@ -2031,6 +2044,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "pentium3", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 3, + .vendor = CPUID_VENDOR_INTEL, + .family = 6, +@@ -2043,6 +2057,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "athlon", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 2, + .vendor = CPUID_VENDOR_AMD, + .family = 6, +@@ -2058,6 +2073,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "n270", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 10, + .vendor = CPUID_VENDOR_INTEL, + .family = 6, +@@ -2083,6 +2099,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "Conroe", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 10, + .vendor = CPUID_VENDOR_INTEL, + .family = 6, +@@ -2123,6 +2140,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "Penryn", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 10, + .vendor = CPUID_VENDOR_INTEL, + .family = 6, +@@ -3832,6 +3850,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "Opteron_G1", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 5, + .vendor = CPUID_VENDOR_AMD, + .family = 15, +@@ -3852,6 +3871,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "Opteron_G2", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 5, + .vendor = CPUID_VENDOR_AMD, + .family = 15, +@@ -3874,6 +3894,7 @@ static const X86CPUDefinition builtin_x86_defs[] = { + }, + { + .name = "Opteron_G3", ++ .deprecation_note = RHEL_CPU_DEPRECATION, + .level = 5, + .vendor = CPUID_VENDOR_AMD, + .family = 16, +-- +2.35.3 + diff --git a/kvm-target-ppc-cpu-models-Fix-ppc_cpu_aliases-list-for-R.patch b/kvm-target-ppc-cpu-models-Fix-ppc_cpu_aliases-list-for-R.patch new file mode 100644 index 0000000..c940cdb --- /dev/null +++ b/kvm-target-ppc-cpu-models-Fix-ppc_cpu_aliases-list-for-R.patch @@ -0,0 +1,48 @@ +From 39642d0d37e2ef61ce7fde0bc284d37a365e4482 Mon Sep 17 00:00:00 2001 +From: Murilo Opsfelder Araujo +Date: Mon, 2 May 2022 17:59:11 -0300 +Subject: [PATCH 2/2] target/ppc/cpu-models: Fix ppc_cpu_aliases list for RHEL +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Murilo Opsfelder Araújo +RH-MergeRequest: 81: target/ppc/cpu-models: remove extraneous "#endif" +RH-Commit: [1/1] 5fff003ad3deb84c6a8e69ab90552a31edb3b058 (mopsfelder/centos-stream-src-qemu-kvm) +RH-Bugzilla: 2081022 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Thomas Huth +RH-Acked-by: Laurent Vivier + +The commit b9d28ecdedaf ("Enable/disable devices for RHEL") removed the +"#if 0" from the beginning of the ppc_cpu_aliases list, which broke the +build on ppc64le: + + ../target/ppc/cpu-models.c:904:2: error: #endif without #if + #endif + ^ + 1 error generated. + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2081022 + +Fixes: b9d28ecdedaf (Enable/disable devices for RHEL) +Signed-off-by: Murilo Opsfelder Araujo +--- + target/ppc/cpu-models.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c +index dd78883410..528467eac1 100644 +--- a/target/ppc/cpu-models.c ++++ b/target/ppc/cpu-models.c +@@ -746,6 +746,7 @@ + /* PowerPC CPU aliases */ + + PowerPCCPUAlias ppc_cpu_aliases[] = { ++#if 0 /* Disabled for Red Hat Enterprise Linux */ + { "405", "405d4" }, + { "405cr", "405crc" }, + { "405gp", "405gpd" }, +-- +2.35.1 + diff --git a/kvm-target-s390x-deprecate-CPUs-older-than-z14.patch b/kvm-target-s390x-deprecate-CPUs-older-than-z14.patch new file mode 100644 index 0000000..212900d --- /dev/null +++ b/kvm-target-s390x-deprecate-CPUs-older-than-z14.patch @@ -0,0 +1,194 @@ +From 8459c305914e2a7a19dcd1662d54a89def7acfa6 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= +Date: Thu, 17 Mar 2022 17:59:22 +0000 +Subject: [PATCH 05/18] target/s390x: deprecate CPUs older than z14 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Daniel P. Berrangé +RH-MergeRequest: 94: i386, aarch64, s390x: deprecate many named CPU models +RH-Commit: [5/6] 2da9e06cf452287673f94f880a7eb8b2b37b7278 (berrange/centos-src-qemu) +RH-Bugzilla: 2060839 +RH-Acked-by: Thomas Huth +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Cornelia Huck + +RHEL-9 is compiled with the z14 ABI. We use this as a baseline to +select which CPUs we want to support, such that there is at least one +supported guest CPU that can be launched for every physical +machine capable of running RHEL-9 KVM. + +Supported CPUs: + + gen15a-base + gen15a + gen15b-base + gen15b + gen16a-base + gen16a + gen16b-base + gen16b + max + qemu + z14.2-base + z14.2 + z14-base + z14 + z14ZR1-base + z14ZR1 + +Deprecated CPUs: + + z10BC.2-base + z10BC.2 + z10BC-base + z10BC + z10EC.2-base + z10EC.2 + z10EC.3-base + z10EC.3 + z10EC-base + z10EC + z114-base + z114 + z13.2-base + z13.2 + z13-base + z13s-base + z13s + z13 + z196.2-base + z196.2 + z196-base + z196 + z800-base + z800 + z890.2-base + z890.2 + z890.3-base + z890.3 + z890-base + z890 + z900.2-base + z900.2 + z900.3-base + z900.3 + z900-base + z900 + z990.2-base + z990.2 + z990.3-base + z990.3 + z990.4-base + z990.4 + z990.5-base + z990.5 + z990-base + z990 + z9BC.2-base + z9BC.2 + z9BC-base + z9BC + z9EC.2-base + z9EC.2 + z9EC.3-base + z9EC.3 + z9EC-base + z9EC + zBC12-base + zBC12 + zEC12.2-base + zEC12.2 + zEC12-base + zEC12 + +https://bugzilla.redhat.com/show_bug.cgi?id=2060839 +Signed-off-by: Daniel P. Berrangé +--- + target/s390x/cpu_models.c | 11 +++++++++++ + target/s390x/cpu_models.h | 2 ++ + target/s390x/cpu_models_sysemu.c | 2 ++ + 3 files changed, 15 insertions(+) + +diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c +index 6d71428056..9b9fc41676 100644 +--- a/target/s390x/cpu_models.c ++++ b/target/s390x/cpu_models.c +@@ -45,6 +45,9 @@ + * of a following release have been a superset of the previous release. With + * generation 15 one base feature and one optional feature have been deprecated. + */ ++ ++#define RHEL_CPU_DEPRECATION "use at least 'z14', or 'host' / 'qemu' / 'max'" ++ + static S390CPUDef s390_cpu_defs[] = { + CPUDEF_INIT(0x2064, 7, 1, 38, 0x00000000U, "z900", "IBM zSeries 900 GA1"), + CPUDEF_INIT(0x2064, 7, 2, 38, 0x00000000U, "z900.2", "IBM zSeries 900 GA2"), +@@ -852,22 +855,30 @@ static void s390_host_cpu_model_class_init(ObjectClass *oc, void *data) + static void s390_base_cpu_model_class_init(ObjectClass *oc, void *data) + { + S390CPUClass *xcc = S390_CPU_CLASS(oc); ++ CPUClass *cc = CPU_CLASS(oc); + + /* all base models are migration safe */ + xcc->cpu_def = (const S390CPUDef *) data; + xcc->is_migration_safe = true; + xcc->is_static = true; + xcc->desc = xcc->cpu_def->desc; ++ if (xcc->cpu_def->gen < 14) { ++ cc->deprecation_note = RHEL_CPU_DEPRECATION; ++ } + } + + static void s390_cpu_model_class_init(ObjectClass *oc, void *data) + { + S390CPUClass *xcc = S390_CPU_CLASS(oc); ++ CPUClass *cc = CPU_CLASS(oc); + + /* model that can change between QEMU versions */ + xcc->cpu_def = (const S390CPUDef *) data; + xcc->is_migration_safe = true; + xcc->desc = xcc->cpu_def->desc; ++ if (xcc->cpu_def->gen < 14) { ++ cc->deprecation_note = RHEL_CPU_DEPRECATION; ++ } + } + + static void s390_qemu_cpu_model_class_init(ObjectClass *oc, void *data) +diff --git a/target/s390x/cpu_models.h b/target/s390x/cpu_models.h +index 74d1f87e4f..372160bcd7 100644 +--- a/target/s390x/cpu_models.h ++++ b/target/s390x/cpu_models.h +@@ -38,6 +38,8 @@ struct S390CPUDef { + S390FeatBitmap full_feat; + /* used to init full_feat from generated data */ + S390FeatInit full_init; ++ /* if deprecated, provides a suggestion */ ++ const char *deprecation_note; + }; + + /* CPU model based on a CPU definition */ +diff --git a/target/s390x/cpu_models_sysemu.c b/target/s390x/cpu_models_sysemu.c +index 6a04ccab1b..f3b7c304ec 100644 +--- a/target/s390x/cpu_models_sysemu.c ++++ b/target/s390x/cpu_models_sysemu.c +@@ -61,6 +61,7 @@ static void create_cpu_model_list(ObjectClass *klass, void *opaque) + CpuDefinitionInfo *info; + char *name = g_strdup(object_class_get_name(klass)); + S390CPUClass *scc = S390_CPU_CLASS(klass); ++ CPUClass *cc = CPU_CLASS(klass); + + /* strip off the -s390x-cpu */ + g_strrstr(name, "-" TYPE_S390_CPU)[0] = 0; +@@ -70,6 +71,7 @@ static void create_cpu_model_list(ObjectClass *klass, void *opaque) + info->migration_safe = scc->is_migration_safe; + info->q_static = scc->is_static; + info->q_typename = g_strdup(object_class_get_name(klass)); ++ info->deprecated = !!cc->deprecation_note; + /* check for unavailable features */ + if (cpu_list_data->model) { + Object *obj; +-- +2.35.3 + diff --git a/kvm-target-s390x-kvm-Honor-storage-keys-during-emulation.patch b/kvm-target-s390x-kvm-Honor-storage-keys-during-emulation.patch new file mode 100644 index 0000000..61752c7 --- /dev/null +++ b/kvm-target-s390x-kvm-Honor-storage-keys-during-emulation.patch @@ -0,0 +1,103 @@ +From 27c1d979a994f5afc59c3520af58d15aa5aae723 Mon Sep 17 00:00:00 2001 +From: Janis Schoetterl-Glausch +Date: Fri, 6 May 2022 17:39:56 +0200 +Subject: [PATCH 29/32] target/s390x: kvm: Honor storage keys during emulation + +RH-Author: Thomas Huth +RH-MergeRequest: 109: Honor storage keys during emulation of I/O instructions +RH-Commit: [2/2] 346dee1e13bfe1c074e4c6a4417091711d852f9c (thuth/qemu-kvm-cs9) +RH-Bugzilla: 2111994 +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand +RH-Acked-by: Claudio Imbrenda + +Storage key controlled protection is currently not honored when +emulating instructions. +If available, enable key protection for the MEM_OP ioctl, thereby +enabling it for the s390_cpu_virt_mem_* functions, when using kvm. +As a result, the emulation of the following instructions honors storage +keys: + +* CLP + The Synch I/O CLP command would need special handling in order + to support storage keys, but is currently not supported. +* CHSC + Performing commands asynchronously would require special + handling, but commands are currently always synchronous. +* STSI +* TSCH + Must (and does) not change channel if terminated due to + protection. +* MSCH + Suppressed on protection, works because fetching instruction. +* SSCH + Suppressed on protection, works because fetching instruction. +* STSCH +* STCRW + Suppressed on protection, this works because no partial store is + possible, because the operand cannot span multiple pages. +* PCISTB +* MPCIFC +* STPCIFC + +Signed-off-by: Janis Schoetterl-Glausch +Message-Id: <20220506153956.2217601-3-scgl@linux.ibm.com> +Signed-off-by: Thomas Huth + +(cherry picked from commit 54354861d21b69ec0781f43e67b8d4f6edad7e3f) +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2111994 +Signed-off-by: Thomas Huth +--- + target/s390x/kvm/kvm.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c +index 74f089d87f..1f1d1a33b8 100644 +--- a/target/s390x/kvm/kvm.c ++++ b/target/s390x/kvm/kvm.c +@@ -152,12 +152,15 @@ const KVMCapabilityInfo kvm_arch_required_capabilities[] = { + static int cap_sync_regs; + static int cap_async_pf; + static int cap_mem_op; ++static int cap_mem_op_extension; + static int cap_s390_irq; + static int cap_ri; + static int cap_hpage_1m; + static int cap_vcpu_resets; + static int cap_protected; + ++static bool mem_op_storage_key_support; ++ + static int active_cmma; + + static int kvm_s390_query_mem_limit(uint64_t *memory_limit) +@@ -355,6 +358,8 @@ int kvm_arch_init(MachineState *ms, KVMState *s) + cap_sync_regs = kvm_check_extension(s, KVM_CAP_SYNC_REGS); + cap_async_pf = kvm_check_extension(s, KVM_CAP_ASYNC_PF); + cap_mem_op = kvm_check_extension(s, KVM_CAP_S390_MEM_OP); ++ cap_mem_op_extension = kvm_check_extension(s, KVM_CAP_S390_MEM_OP_EXTENSION); ++ mem_op_storage_key_support = cap_mem_op_extension > 0; + cap_s390_irq = kvm_check_extension(s, KVM_CAP_S390_INJECT_IRQ); + cap_vcpu_resets = kvm_check_extension(s, KVM_CAP_S390_VCPU_RESETS); + cap_protected = kvm_check_extension(s, KVM_CAP_S390_PROTECTED); +@@ -843,6 +848,7 @@ int kvm_s390_mem_op(S390CPU *cpu, vaddr addr, uint8_t ar, void *hostbuf, + : KVM_S390_MEMOP_LOGICAL_READ, + .buf = (uint64_t)hostbuf, + .ar = ar, ++ .key = (cpu->env.psw.mask & PSW_MASK_KEY) >> PSW_SHIFT_KEY, + }; + int ret; + +@@ -852,6 +858,9 @@ int kvm_s390_mem_op(S390CPU *cpu, vaddr addr, uint8_t ar, void *hostbuf, + if (!hostbuf) { + mem_op.flags |= KVM_S390_MEMOP_F_CHECK_ONLY; + } ++ if (mem_op_storage_key_support) { ++ mem_op.flags |= KVM_S390_MEMOP_F_SKEY_PROTECTION; ++ } + + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_S390_MEM_OP, &mem_op); + if (ret < 0) { +-- +2.31.1 + diff --git a/kvm-tests-acpi-SLIC-update-expected-blobs.patch b/kvm-tests-acpi-SLIC-update-expected-blobs.patch new file mode 100644 index 0000000..4d5fc35 --- /dev/null +++ b/kvm-tests-acpi-SLIC-update-expected-blobs.patch @@ -0,0 +1,47 @@ +From 0f5984bd89d481bf2494d4b3c36ef80350f44811 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 30 Mar 2022 14:52:34 -0400 +Subject: [PATCH 12/18] tests: acpi: SLIC: update expected blobs + +RH-Author: Jon Maloy +RH-MergeRequest: 141: acpi: fix QEMU crash when started with SLIC table +RH-Commit: [4/10] ca28e5c57f9eb432e5ad6b1cb7ef646a86890dd5 (jmaloy/qemu-kvm) +RH-Bugzilla: 2062611 +RH-Acked-by: Igor Mammedov + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2062611 +Upstream: Merged + +commit c8adb4d222c42951a9d0367e5f5d4e1f5e2c9ad7 +Author: Igor Mammedov +Date: Mon Dec 27 14:31:20 2021 -0500 + + tests: acpi: SLIC: update expected blobs + + Signed-off-by: Igor Mammedov + Message-Id: <20211227193120.1084176-5-imammedo@redhat.com> + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + +(cherry picked from commit c8adb4d222c42951a9d0367e5f5d4e1f5e2c9ad7) +Signed-off-by: Jon Maloy +--- + tests/data/acpi/q35/FACP.slic | Bin 244 -> 244 bytes + tests/data/acpi/q35/SLIC.slic | Bin 0 -> 36 bytes + tests/qtest/bios-tables-test-allowed-diff.h | 2 -- + 3 files changed, 2 deletions(-) + +literal 0 +HcmV?d00001 + +diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h +index 49dbf8fa3e..dfb8523c8b 100644 +--- a/tests/qtest/bios-tables-test-allowed-diff.h ++++ b/tests/qtest/bios-tables-test-allowed-diff.h +@@ -1,3 +1 @@ + /* List of comma-separated changed AML files to ignore */ +-"tests/data/acpi/q35/FACP.slic", +-"tests/data/acpi/q35/SLIC.slic", +-- +2.27.0 + diff --git a/kvm-tests-acpi-add-SLIC-table-test.patch b/kvm-tests-acpi-add-SLIC-table-test.patch new file mode 100644 index 0000000..9e54a7f --- /dev/null +++ b/kvm-tests-acpi-add-SLIC-table-test.patch @@ -0,0 +1,76 @@ +From 341715473c2a71f11a3888420a0caecf27ed4eb5 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 30 Mar 2022 14:52:34 -0400 +Subject: [PATCH 11/18] tests: acpi: add SLIC table test + +RH-Author: Jon Maloy +RH-MergeRequest: 141: acpi: fix QEMU crash when started with SLIC table +RH-Commit: [3/10] baac9b82c16a50eb4640fd7146775c9d507c7b21 (jmaloy/qemu-kvm) +RH-Bugzilla: 2062611 +RH-Acked-by: Igor Mammedov + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2062611 +Upstream: Merged + +commit 11edfabee443b149468a82b5efc88c96d1d259ec +Author: Igor Mammedov +Date: Mon Dec 27 14:31:19 2021 -0500 + + tests: acpi: add SLIC table test + + When user uses '-acpitable' to add SLIC table, some ACPI + tables (FADT) will change its 'Oem ID'/'Oem Table ID' fields to + match that of SLIC. Test makes sure thati QEMU handles + those fields correctly when SLIC table is added with + '-acpitable' option. + + Conflicts: tests/qtest/bios-tables-test.c + due to missing 39d7554b2009 ("tests/acpi: add test case for VIOT") + + Signed-off-by: Igor Mammedov + Message-Id: <20211227193120.1084176-4-imammedo@redhat.com> + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + +(cherry picked from commit 11edfabee443b149468a82b5efc88c96d1d259ec) +Signed-off-by: Jon Maloy +--- + tests/qtest/bios-tables-test.c | 15 +++++++++++++++ + 1 file changed, 15 insertions(+) + +diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c +index 16d8304cde..e159b71136 100644 +--- a/tests/qtest/bios-tables-test.c ++++ b/tests/qtest/bios-tables-test.c +@@ -1467,6 +1467,20 @@ static void test_acpi_virt_tcg(void) + free_test_data(&data); + } + ++static void test_acpi_q35_slic(void) ++{ ++ test_data data = { ++ .machine = MACHINE_Q35, ++ .variant = ".slic", ++ }; ++ ++ test_acpi_one("-acpitable sig=SLIC,oem_id='CRASH ',oem_table_id='ME'," ++ "oem_rev=00002210,asl_compiler_id='qemu'," ++ "asl_compiler_rev=00000000,data=/dev/null", ++ &data); ++ free_test_data(&data); ++} ++ + static void test_oem_fields(test_data *data) + { + int i; +@@ -1641,6 +1655,7 @@ int main(int argc, char *argv[]) + qtest_add_func("acpi/q35/kvm/xapic", test_acpi_q35_kvm_xapic); + qtest_add_func("acpi/q35/kvm/dmar", test_acpi_q35_kvm_dmar); + } ++ qtest_add_func("acpi/q35/slic", test_acpi_q35_slic); + } else if (strcmp(arch, "aarch64") == 0) { + if (has_tcg) { + qtest_add_func("acpi/virt", test_acpi_virt_tcg); +-- +2.27.0 + diff --git a/kvm-tests-acpi-manually-pad-OEM_ID-OEM_TABLE_ID-for-test.patch b/kvm-tests-acpi-manually-pad-OEM_ID-OEM_TABLE_ID-for-test.patch new file mode 100644 index 0000000..05a6838 --- /dev/null +++ b/kvm-tests-acpi-manually-pad-OEM_ID-OEM_TABLE_ID-for-test.patch @@ -0,0 +1,84 @@ +From d94b3278c84cf7451489631d804a6b5cbd28a59d Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 30 Mar 2022 14:52:34 -0400 +Subject: [PATCH 13/18] tests: acpi: manually pad OEM_ID/OEM_TABLE_ID for + test_oem_fields() test + +RH-Author: Jon Maloy +RH-MergeRequest: 141: acpi: fix QEMU crash when started with SLIC table +RH-Commit: [5/10] 4ec8c738acec178c2f005f189b0c2a77a7af4088 (jmaloy/qemu-kvm) +RH-Bugzilla: 2062611 +RH-Acked-by: Igor Mammedov + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2062611 +Upstream: Merged + +commit a849522f726767022203ef2b6c395ea19facb866 +Author: Igor Mammedov +Date: Wed Jan 12 08:03:29 2022 -0500 + + tests: acpi: manually pad OEM_ID/OEM_TABLE_ID for test_oem_fields() test + + The next commit will revert OEM fields padding with whitespace to + padding with '\0' as it was before [1]. As result test_oem_fields() will + fail due to unexpectedly smaller ID sizes read from QEMU ACPI tables. + + Pad OEM_ID/OEM_TABLE_ID manually with spaces so that values the test + puts on QEMU CLI and expected values match. + + 1) 602b458201 ("acpi: Permit OEM ID and OEM table ID fields to be changed") + Signed-off-by: Igor Mammedov + Message-Id: <20220112130332.1648664-2-imammedo@redhat.com> + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + +(cherry picked from commit a849522f726767022203ef2b6c395ea19facb866) +Signed-off-by: Jon Maloy +--- + tests/qtest/bios-tables-test.c | 15 ++++++--------- + 1 file changed, 6 insertions(+), 9 deletions(-) + +diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c +index e159b71136..348fdbd202 100644 +--- a/tests/qtest/bios-tables-test.c ++++ b/tests/qtest/bios-tables-test.c +@@ -71,9 +71,10 @@ + + #define ACPI_REBUILD_EXPECTED_AML "TEST_ACPI_REBUILD_AML" + +-#define OEM_ID "TEST" +-#define OEM_TABLE_ID "OEM" +-#define OEM_TEST_ARGS "-machine x-oem-id="OEM_ID",x-oem-table-id="OEM_TABLE_ID ++#define OEM_ID "TEST " ++#define OEM_TABLE_ID "OEM " ++#define OEM_TEST_ARGS "-machine x-oem-id='" OEM_ID "',x-oem-table-id='" \ ++ OEM_TABLE_ID "'" + + typedef struct { + bool tcg_only; +@@ -1484,11 +1485,7 @@ static void test_acpi_q35_slic(void) + static void test_oem_fields(test_data *data) + { + int i; +- char oem_id[6]; +- char oem_table_id[8]; + +- strpadcpy(oem_id, sizeof oem_id, OEM_ID, ' '); +- strpadcpy(oem_table_id, sizeof oem_table_id, OEM_TABLE_ID, ' '); + for (i = 0; i < data->tables->len; ++i) { + AcpiSdtTable *sdt; + +@@ -1498,8 +1495,8 @@ static void test_oem_fields(test_data *data) + continue; + } + +- g_assert(memcmp(sdt->aml + 10, oem_id, 6) == 0); +- g_assert(memcmp(sdt->aml + 16, oem_table_id, 8) == 0); ++ g_assert(memcmp(sdt->aml + 10, OEM_ID, 6) == 0); ++ g_assert(memcmp(sdt->aml + 16, OEM_TABLE_ID, 8) == 0); + } + } + +-- +2.27.0 + diff --git a/kvm-tests-acpi-test-short-OEM_ID-OEM_TABLE_ID-values-in-.patch b/kvm-tests-acpi-test-short-OEM_ID-OEM_TABLE_ID-values-in-.patch new file mode 100644 index 0000000..66d62e5 --- /dev/null +++ b/kvm-tests-acpi-test-short-OEM_ID-OEM_TABLE_ID-values-in-.patch @@ -0,0 +1,77 @@ +From 485bf2eb8edabd4553d995d5e32224df1e510aa2 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 30 Mar 2022 14:52:34 -0400 +Subject: [PATCH 17/18] tests: acpi: test short OEM_ID/OEM_TABLE_ID values in + test_oem_fields() + +RH-Author: Jon Maloy +RH-MergeRequest: 141: acpi: fix QEMU crash when started with SLIC table +RH-Commit: [9/10] 31339223fb6c6cc32185b9fdaac76f2709b17ad6 (jmaloy/qemu-kvm) +RH-Bugzilla: 2062611 +RH-Acked-by: Igor Mammedov + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2062611 +Upstream: Merged + +commit 408ca92634770de5eac7965ed97c6260e770f2e7 +Author: Igor Mammedov +Date: Fri Jan 14 09:26:41 2022 -0500 + + tests: acpi: test short OEM_ID/OEM_TABLE_ID values in test_oem_fields() + + Previous patch [1] added explicit whitespace padding to OEM_ID/OEM_TABLE_ID + values used in test_oem_fields() testcase to avoid false positive and + bisection issues when QEMU is switched to \0' padding. As result + testcase ceased to test values that were shorter than max possible + length values. + + Update testcase to make sure that it's testing shorter IDs like it + used to before [2]. + + 1) "tests: acpi: manually pad OEM_ID/OEM_TABLE_ID for test_oem_fields() test" + 2) 602b458201 ("acpi: Permit OEM ID and OEM table ID fields to be changed") + + Signed-off-by: Igor Mammedov + Message-Id: <20220114142641.1727679-1-imammedo@redhat.com> + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + +(cherry picked from commit 408ca92634770de5eac7965ed97c6260e770f2e7) +Signed-off-by: Jon Maloy +--- + tests/qtest/bios-tables-test.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c +index 348fdbd202..515a647490 100644 +--- a/tests/qtest/bios-tables-test.c ++++ b/tests/qtest/bios-tables-test.c +@@ -71,10 +71,10 @@ + + #define ACPI_REBUILD_EXPECTED_AML "TEST_ACPI_REBUILD_AML" + +-#define OEM_ID "TEST " +-#define OEM_TABLE_ID "OEM " +-#define OEM_TEST_ARGS "-machine x-oem-id='" OEM_ID "',x-oem-table-id='" \ +- OEM_TABLE_ID "'" ++#define OEM_ID "TEST" ++#define OEM_TABLE_ID "OEM" ++#define OEM_TEST_ARGS "-machine x-oem-id=" OEM_ID ",x-oem-table-id=" \ ++ OEM_TABLE_ID + + typedef struct { + bool tcg_only; +@@ -1495,8 +1495,8 @@ static void test_oem_fields(test_data *data) + continue; + } + +- g_assert(memcmp(sdt->aml + 10, OEM_ID, 6) == 0); +- g_assert(memcmp(sdt->aml + 16, OEM_TABLE_ID, 8) == 0); ++ g_assert(strncmp((char *)sdt->aml + 10, OEM_ID, 6) == 0); ++ g_assert(strncmp((char *)sdt->aml + 16, OEM_TABLE_ID, 8) == 0); + } + } + +-- +2.27.0 + diff --git a/kvm-tests-acpi-update-expected-blobs.patch b/kvm-tests-acpi-update-expected-blobs.patch new file mode 100644 index 0000000..8f300c4 --- /dev/null +++ b/kvm-tests-acpi-update-expected-blobs.patch @@ -0,0 +1,58 @@ +From 4785d2a77fbea681975e5c48ae6a1be49058e089 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 30 Mar 2022 14:52:34 -0400 +Subject: [PATCH 16/18] tests: acpi: update expected blobs + +RH-Author: Jon Maloy +RH-MergeRequest: 141: acpi: fix QEMU crash when started with SLIC table +RH-Commit: [8/10] e069c5de88f34393d65d32b60380865832820302 (jmaloy/qemu-kvm) +RH-Bugzilla: 2062611 +RH-Acked-by: Igor Mammedov + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2062611 +Upstream: Merged + +commit 5adc3aba875416b0e077d8a29ddd0357883746f4 +Author: Igor Mammedov +Date: Wed Jan 12 08:03:32 2022 -0500 + + tests: acpi: update expected blobs + + Expected changes caused by previous commit: + + nvdimm ssdt (q35/pc/virt): + - * OEM Table ID "NVDIMM " + + * OEM Table ID "NVDIMM" + + SLIC test FADT (tests/data/acpi/q35/FACP.slic): + -[010h 0016 8] Oem Table ID : "ME " + +[010h 0016 8] Oem Table ID : "ME" + + Signed-off-by: Igor Mammedov + Message-Id: <20220112130332.1648664-5-imammedo@redhat.com> + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + +(cherry picked from commit 5adc3aba875416b0e077d8a29ddd0357883746f4) +Signed-off-by: Jon Maloy +--- + tests/data/acpi/pc/SSDT.dimmpxm | Bin 734 -> 734 bytes + tests/data/acpi/q35/FACP.slic | Bin 244 -> 244 bytes + tests/data/acpi/q35/SSDT.dimmpxm | Bin 734 -> 734 bytes + tests/data/acpi/virt/SSDT.memhp | Bin 736 -> 736 bytes + tests/qtest/bios-tables-test-allowed-diff.h | 4 ---- + 5 files changed, 4 deletions(-) + +diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h +index 7faa8f53be..dfb8523c8b 100644 +--- a/tests/qtest/bios-tables-test-allowed-diff.h ++++ b/tests/qtest/bios-tables-test-allowed-diff.h +@@ -1,5 +1 @@ + /* List of comma-separated changed AML files to ignore */ +-"tests/data/acpi/virt/SSDT.memhp", +-"tests/data/acpi/pc/SSDT.dimmpxm", +-"tests/data/acpi/q35/SSDT.dimmpxm", +-"tests/data/acpi/q35/FACP.slic", +-- +2.27.0 + diff --git a/kvm-tests-acpi-whitelist-expected-blobs-before-changing-.patch b/kvm-tests-acpi-whitelist-expected-blobs-before-changing-.patch new file mode 100644 index 0000000..4a1b350 --- /dev/null +++ b/kvm-tests-acpi-whitelist-expected-blobs-before-changing-.patch @@ -0,0 +1,47 @@ +From 4e6482073df85db5982aa03ab0355e632b7157fc Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 30 Mar 2022 14:52:34 -0400 +Subject: [PATCH 10/18] tests: acpi: whitelist expected blobs before changing + them + +RH-Author: Jon Maloy +RH-MergeRequest: 141: acpi: fix QEMU crash when started with SLIC table +RH-Commit: [2/10] c664ecad30ca9c13025a63bb31ae7b80fd63e4df (jmaloy/qemu-kvm) +RH-Bugzilla: 2062611 +RH-Acked-by: Igor Mammedov + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2062611 +Upstream: Merged + +commit e71f6ab9d93a7d01e833647e7010c1079c4cef30 +Author: Igor Mammedov +Date: Mon Dec 27 14:31:18 2021 -0500 + + tests: acpi: whitelist expected blobs before changing them + + Signed-off-by: Igor Mammedov + Message-Id: <20211227193120.1084176-3-imammedo@redhat.com> + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + +(cherry picked from commit e71f6ab9d93a7d01e833647e7010c1079c4cef30) +Signed-off-by: Jon Maloy +--- + tests/data/acpi/q35/FACP.slic | Bin 0 -> 244 bytes + tests/data/acpi/q35/SLIC.slic | 0 + tests/qtest/bios-tables-test-allowed-diff.h | 2 ++ + 3 files changed, 2 insertions(+) + create mode 100644 tests/data/acpi/q35/FACP.slic + create mode 100644 tests/data/acpi/q35/SLIC.slic + +diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h +index dfb8523c8b..49dbf8fa3e 100644 +--- a/tests/qtest/bios-tables-test-allowed-diff.h ++++ b/tests/qtest/bios-tables-test-allowed-diff.h +@@ -1 +1,3 @@ + /* List of comma-separated changed AML files to ignore */ ++"tests/data/acpi/q35/FACP.slic", ++"tests/data/acpi/q35/SLIC.slic", +-- +2.27.0 + diff --git a/kvm-tests-acpi-whitelist-nvdimm-s-SSDT-and-FACP.slic-exp.patch b/kvm-tests-acpi-whitelist-nvdimm-s-SSDT-and-FACP.slic-exp.patch new file mode 100644 index 0000000..30289c7 --- /dev/null +++ b/kvm-tests-acpi-whitelist-nvdimm-s-SSDT-and-FACP.slic-exp.patch @@ -0,0 +1,57 @@ +From a132a22e316121cf00ff733afb1ad1dc313e14b3 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 30 Mar 2022 14:52:34 -0400 +Subject: [PATCH 14/18] tests: acpi: whitelist nvdimm's SSDT and FACP.slic + expected blobs + +RH-Author: Jon Maloy +RH-MergeRequest: 141: acpi: fix QEMU crash when started with SLIC table +RH-Commit: [6/10] 3f3a929cde82f228da1e4bc66e4c869467c0289c (jmaloy/qemu-kvm) +RH-Bugzilla: 2062611 +RH-Acked-by: Igor Mammedov + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2062611 +Upstream: Merged + +commit d1e4a4654154925eddf0fc449fa9c92b806b9c8c +Author: Igor Mammedov +Date: Wed Jan 12 08:03:30 2022 -0500 + + tests: acpi: whitelist nvdimm's SSDT and FACP.slic expected blobs + + The next commit will revert OEM fields whitespace padding to + padding with '\0' as it was before [1]. That will change OEM + Table ID for: + * SSDT.*: where it was padded from 6 characters to 8 + * FACP.slic: where it was padded from 2 characters to 8 + after reverting whitespace padding, it will be replaced with + '\0' which effectively will shorten OEM table ID to 6 and 2 + characters. + + Whitelist affected tables before introducing the change. + + 1) 602b458201 ("acpi: Permit OEM ID and OEM table ID fields to be changed") + Signed-off-by: Igor Mammedov + Message-Id: <20220112130332.1648664-3-imammedo@redhat.com> + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + +(cherry picked from commit d1e4a4654154925eddf0fc449fa9c92b806b9c8c) +Signed-off-by: Jon Maloy +--- + tests/qtest/bios-tables-test-allowed-diff.h | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h +index dfb8523c8b..7faa8f53be 100644 +--- a/tests/qtest/bios-tables-test-allowed-diff.h ++++ b/tests/qtest/bios-tables-test-allowed-diff.h +@@ -1 +1,5 @@ + /* List of comma-separated changed AML files to ignore */ ++"tests/data/acpi/virt/SSDT.memhp", ++"tests/data/acpi/pc/SSDT.dimmpxm", ++"tests/data/acpi/q35/SSDT.dimmpxm", ++"tests/data/acpi/q35/FACP.slic", +-- +2.27.0 + diff --git a/kvm-tests-avocado-update-aarch64_virt-test-to-exercise-c.patch b/kvm-tests-avocado-update-aarch64_virt-test-to-exercise-c.patch new file mode 100644 index 0000000..4fcf786 --- /dev/null +++ b/kvm-tests-avocado-update-aarch64_virt-test-to-exercise-c.patch @@ -0,0 +1,157 @@ +From f52aa60217634c96fef59ce76b803a94610bf5c8 Mon Sep 17 00:00:00 2001 +From: Andrew Jones +Date: Wed, 15 Jun 2022 15:28:27 +0200 +Subject: [PATCH 01/18] tests/avocado: update aarch64_virt test to exercise + -cpu max +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Daniel P. Berrangé +RH-MergeRequest: 94: i386, aarch64, s390x: deprecate many named CPU models +RH-Commit: [1/6] df6839e567180a4c32afd98852f68b2279e00f7c (berrange/centos-src-qemu) +RH-Bugzilla: 2060839 +RH-Acked-by: Thomas Huth +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Cornelia Huck + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2066824 + +commit 11593544df6f8febb3ce87015c22b429bf43c4c7 +Author: Alex Bennée +Date: Tue Apr 19 10:09:56 2022 +0100 + + tests/avocado: update aarch64_virt test to exercise -cpu max + + The Fedora 29 kernel is quite old and importantly fails when running + in LPA2 scenarios. As it's not really exercising much of the CPU space + replace it with a custom 5.16.12 kernel with all the architecture + options turned on. There is a minimal buildroot initramfs included in + the kernel which has a few tools for stress testing the memory + subsystem. The userspace also targets the Neoverse N1 processor so + would fail with a v8.0 cpu like cortex-a53. + + While we are at it move the test into its own file so it can have an + assigned maintainer. + + Signed-off-by: Alex Bennée + Acked-by: Richard Henderson + Tested-by: Richard Henderson + Message-Id: <20220419091020.3008144-2-alex.bennee@linaro.org> + +Signed-off-by: Andrew Jones +--- + MAINTAINERS | 1 + + tests/avocado/boot_linux_console.py | 25 ------------- + tests/avocado/machine_aarch64_virt.py | 51 +++++++++++++++++++++++++++ + 3 files changed, 52 insertions(+), 25 deletions(-) + create mode 100644 tests/avocado/machine_aarch64_virt.py + +diff --git a/MAINTAINERS b/MAINTAINERS +index 2fe20a49ab..bfe8806f60 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -942,6 +942,7 @@ S: Maintained + F: hw/arm/virt* + F: include/hw/arm/virt.h + F: docs/system/arm/virt.rst ++F: tests/avocado/machine_aarch64_virt.py + + Xilinx Zynq + M: Edgar E. Iglesias +diff --git a/tests/avocado/boot_linux_console.py b/tests/avocado/boot_linux_console.py +index b40a3abc81..45a2ceda22 100644 +--- a/tests/avocado/boot_linux_console.py ++++ b/tests/avocado/boot_linux_console.py +@@ -325,31 +325,6 @@ def test_mips_malta32el_nanomips_64k_dbg(self): + kernel_hash = '18d1c68f2e23429e266ca39ba5349ccd0aeb7180' + self.do_test_mips_malta32el_nanomips(kernel_url, kernel_hash) + +- def test_aarch64_virt(self): +- """ +- :avocado: tags=arch:aarch64 +- :avocado: tags=machine:virt +- :avocado: tags=accel:tcg +- :avocado: tags=cpu:cortex-a53 +- """ +- kernel_url = ('https://archives.fedoraproject.org/pub/archive/fedora' +- '/linux/releases/29/Everything/aarch64/os/images/pxeboot' +- '/vmlinuz') +- kernel_hash = '8c73e469fc6ea06a58dc83a628fc695b693b8493' +- kernel_path = self.fetch_asset(kernel_url, asset_hash=kernel_hash) +- +- self.vm.set_console() +- kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE + +- 'console=ttyAMA0') +- self.require_accelerator("tcg") +- self.vm.add_args('-cpu', 'cortex-a53', +- '-accel', 'tcg', +- '-kernel', kernel_path, +- '-append', kernel_command_line) +- self.vm.launch() +- console_pattern = 'Kernel command line: %s' % kernel_command_line +- self.wait_for_console_pattern(console_pattern) +- + def test_aarch64_xlnx_versal_virt(self): + """ + :avocado: tags=arch:aarch64 +diff --git a/tests/avocado/machine_aarch64_virt.py b/tests/avocado/machine_aarch64_virt.py +new file mode 100644 +index 0000000000..21848cba70 +--- /dev/null ++++ b/tests/avocado/machine_aarch64_virt.py +@@ -0,0 +1,51 @@ ++# Functional test that boots a Linux kernel and checks the console ++# ++# Copyright (c) 2022 Linaro Ltd. ++# ++# Author: ++# Alex Bennée ++# ++# SPDX-License-Identifier: GPL-2.0-or-later ++ ++import time ++ ++from avocado_qemu import QemuSystemTest ++from avocado_qemu import wait_for_console_pattern ++from avocado_qemu import exec_command ++ ++class Aarch64VirtMachine(QemuSystemTest): ++ KERNEL_COMMON_COMMAND_LINE = 'printk.time=0 ' ++ ++ def wait_for_console_pattern(self, success_message, vm=None): ++ wait_for_console_pattern(self, success_message, ++ failure_message='Kernel panic - not syncing', ++ vm=vm) ++ ++ def test_aarch64_virt(self): ++ """ ++ :avocado: tags=arch:aarch64 ++ :avocado: tags=machine:virt ++ :avocado: tags=accel:tcg ++ :avocado: tags=cpu:max ++ """ ++ kernel_url = ('https://fileserver.linaro.org/s/' ++ 'z6B2ARM7DQT3HWN/download') ++ ++ kernel_hash = 'ed11daab50c151dde0e1e9c9cb8b2d9bd3215347' ++ kernel_path = self.fetch_asset(kernel_url, asset_hash=kernel_hash) ++ ++ self.vm.set_console() ++ kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE + ++ 'console=ttyAMA0') ++ self.require_accelerator("tcg") ++ self.vm.add_args('-cpu', 'max,pauth-impdef=on', ++ '-accel', 'tcg', ++ '-kernel', kernel_path, ++ '-append', kernel_command_line) ++ self.vm.launch() ++ self.wait_for_console_pattern('Welcome to Buildroot') ++ time.sleep(0.1) ++ exec_command(self, 'root') ++ time.sleep(0.1) ++ exec_command(self, 'cat /proc/self/maps') ++ time.sleep(0.1) +-- +2.35.3 + diff --git a/kvm-tests-qtest-fdc-test-Add-a-regression-test-for-CVE-2.patch b/kvm-tests-qtest-fdc-test-Add-a-regression-test-for-CVE-2.patch new file mode 100644 index 0000000..7b9a8f3 --- /dev/null +++ b/kvm-tests-qtest-fdc-test-Add-a-regression-test-for-CVE-2.patch @@ -0,0 +1,119 @@ +From cea7b15c613a11ea15a1458d6990be7044df6643 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= +Date: Thu, 18 Nov 2021 12:57:33 +0100 +Subject: [PATCH 17/17] tests/qtest/fdc-test: Add a regression test for + CVE-2021-3507 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 107: hw/block/fdc: Prevent end-of-track overrun (CVE-2021-3507) +RH-Commit: [2/2] 067c052df790959c28c1fcc16547676d36523bd9 (mrezanin/centos-src-qemu-kvm) +RH-Bugzilla: 1951522 +RH-Acked-by: Hanna Reitz +RH-Acked-by: Miroslav Rezanina + +Add the reproducer from https://gitlab.com/qemu-project/qemu/-/issues/339 + +Without the previous commit, when running 'make check-qtest-i386' +with QEMU configured with '--enable-sanitizers' we get: + + ==4028352==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x619000062a00 at pc 0x5626d03c491a bp 0x7ffdb4199410 sp 0x7ffdb4198bc0 + READ of size 786432 at 0x619000062a00 thread T0 + #0 0x5626d03c4919 in __asan_memcpy (qemu-system-i386+0x1e65919) + #1 0x5626d1c023cc in flatview_write_continue softmmu/physmem.c:2787:13 + #2 0x5626d1bf0c0f in flatview_write softmmu/physmem.c:2822:14 + #3 0x5626d1bf0798 in address_space_write softmmu/physmem.c:2914:18 + #4 0x5626d1bf0f37 in address_space_rw softmmu/physmem.c:2924:16 + #5 0x5626d1bf14c8 in cpu_physical_memory_rw softmmu/physmem.c:2933:5 + #6 0x5626d0bd5649 in cpu_physical_memory_write include/exec/cpu-common.h:82:5 + #7 0x5626d0bd0a07 in i8257_dma_write_memory hw/dma/i8257.c:452:9 + #8 0x5626d09f825d in fdctrl_transfer_handler hw/block/fdc.c:1616:13 + #9 0x5626d0a048b4 in fdctrl_start_transfer hw/block/fdc.c:1539:13 + #10 0x5626d09f4c3e in fdctrl_write_data hw/block/fdc.c:2266:13 + #11 0x5626d09f22f7 in fdctrl_write hw/block/fdc.c:829:9 + #12 0x5626d1c20bc5 in portio_write softmmu/ioport.c:207:17 + + 0x619000062a00 is located 0 bytes to the right of 512-byte region [0x619000062800,0x619000062a00) + allocated by thread T0 here: + #0 0x5626d03c66ec in posix_memalign (qemu-system-i386+0x1e676ec) + #1 0x5626d2b988d4 in qemu_try_memalign util/oslib-posix.c:210:11 + #2 0x5626d2b98b0c in qemu_memalign util/oslib-posix.c:226:27 + #3 0x5626d09fbaf0 in fdctrl_realize_common hw/block/fdc.c:2341:20 + #4 0x5626d0a150ed in isabus_fdc_realize hw/block/fdc-isa.c:113:5 + #5 0x5626d2367935 in device_set_realized hw/core/qdev.c:531:13 + + SUMMARY: AddressSanitizer: heap-buffer-overflow (qemu-system-i386+0x1e65919) in __asan_memcpy + Shadow bytes around the buggy address: + 0x0c32800044f0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa + 0x0c3280004500: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 0x0c3280004510: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 0x0c3280004520: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + 0x0c3280004530: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + =>0x0c3280004540:[fa]fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa + 0x0c3280004550: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa + 0x0c3280004560: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa + 0x0c3280004570: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa + 0x0c3280004580: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa + 0x0c3280004590: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd + Shadow byte legend (one shadow byte represents 8 application bytes): + Addressable: 00 + Heap left redzone: fa + Freed heap region: fd + ==4028352==ABORTING + +[ kwolf: Added snapshot=on to prevent write file lock failure ] + +Reported-by: Alexander Bulekov +Signed-off-by: Philippe Mathieu-Daudé +Reviewed-by: Alexander Bulekov +Signed-off-by: Kevin Wolf +(cherry picked from commit 46609b90d9e3a6304def11038a76b58ff43f77bc) +Signed-off-by: Jon Maloy +--- + tests/qtest/fdc-test.c | 21 +++++++++++++++++++++ + 1 file changed, 21 insertions(+) + +diff --git a/tests/qtest/fdc-test.c b/tests/qtest/fdc-test.c +index b0d40012e6..1d4f852128 100644 +--- a/tests/qtest/fdc-test.c ++++ b/tests/qtest/fdc-test.c +@@ -583,6 +583,26 @@ static void test_cve_2021_20196(void) + qtest_quit(s); + } + ++static void test_cve_2021_3507(void) ++{ ++ QTestState *s; ++ ++ s = qtest_initf("-nographic -m 32M -nodefaults " ++ "-drive file=%s,format=raw,if=floppy,snapshot=on", ++ test_image); ++ qtest_outl(s, 0x9, 0x0a0206); ++ qtest_outw(s, 0x3f4, 0x1600); ++ qtest_outw(s, 0x3f4, 0x0000); ++ qtest_outw(s, 0x3f4, 0x0000); ++ qtest_outw(s, 0x3f4, 0x0000); ++ qtest_outw(s, 0x3f4, 0x0200); ++ qtest_outw(s, 0x3f4, 0x0200); ++ qtest_outw(s, 0x3f4, 0x0000); ++ qtest_outw(s, 0x3f4, 0x0000); ++ qtest_outw(s, 0x3f4, 0x0000); ++ qtest_quit(s); ++} ++ + int main(int argc, char **argv) + { + int fd; +@@ -614,6 +634,7 @@ int main(int argc, char **argv) + qtest_add_func("/fdc/read_no_dma_19", test_read_no_dma_19); + qtest_add_func("/fdc/fuzz-registers", fuzz_registers); + qtest_add_func("/fdc/fuzz/cve_2021_20196", test_cve_2021_20196); ++ qtest_add_func("/fdc/fuzz/cve_2021_3507", test_cve_2021_3507); + + ret = g_test_run(); + +-- +2.31.1 + diff --git a/kvm-util-event-loop-base-Introduce-options-to-set-the-th.patch b/kvm-util-event-loop-base-Introduce-options-to-set-the-th.patch new file mode 100644 index 0000000..77929a6 --- /dev/null +++ b/kvm-util-event-loop-base-Introduce-options-to-set-the-th.patch @@ -0,0 +1,385 @@ +From 7a6fa42d4a4263c94b9bf18290f9e7680ea9e7f4 Mon Sep 17 00:00:00 2001 +From: Nicolas Saenz Julienne +Date: Mon, 25 Apr 2022 09:57:23 +0200 +Subject: [PATCH 03/16] util/event-loop-base: Introduce options to set the + thread pool size + +RH-Author: Nicolas Saenz Julienne +RH-MergeRequest: 93: util/thread-pool: Expose minimum and maximum size +RH-Commit: [3/3] af78a88ff3c69701cbb5f9e980c3d6ebbd13ff98 +RH-Bugzilla: 2031024 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Stefan Hajnoczi + +The thread pool regulates itself: when idle, it kills threads until +empty, when in demand, it creates new threads until full. This behaviour +doesn't play well with latency sensitive workloads where the price of +creating a new thread is too high. For example, when paired with qemu's +'-mlock', or using safety features like SafeStack, creating a new thread +has been measured take multiple milliseconds. + +In order to mitigate this let's introduce a new 'EventLoopBase' +property to set the thread pool size. The threads will be created during +the pool's initialization or upon updating the property's value, remain +available during its lifetime regardless of demand, and destroyed upon +freeing it. A properly characterized workload will then be able to +configure the pool to avoid any latency spikes. + +Signed-off-by: Nicolas Saenz Julienne +Reviewed-by: Stefan Hajnoczi +Acked-by: Markus Armbruster +Message-id: 20220425075723.20019-4-nsaenzju@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 71ad4713cc1d7fca24388b828ef31ae6cb38a31c) +--- + event-loop-base.c | 23 +++++++++++++ + include/block/aio.h | 10 ++++++ + include/block/thread-pool.h | 3 ++ + include/sysemu/event-loop-base.h | 4 +++ + iothread.c | 3 ++ + qapi/qom.json | 10 +++++- + util/aio-posix.c | 1 + + util/async.c | 20 ++++++++++++ + util/main-loop.c | 9 ++++++ + util/thread-pool.c | 55 +++++++++++++++++++++++++++++--- + 10 files changed, 133 insertions(+), 5 deletions(-) + +diff --git a/event-loop-base.c b/event-loop-base.c +index e7f99a6ec8..d5be4dc6fc 100644 +--- a/event-loop-base.c ++++ b/event-loop-base.c +@@ -14,6 +14,7 @@ + #include "qemu/osdep.h" + #include "qom/object_interfaces.h" + #include "qapi/error.h" ++#include "block/thread-pool.h" + #include "sysemu/event-loop-base.h" + + typedef struct { +@@ -21,9 +22,22 @@ typedef struct { + ptrdiff_t offset; /* field's byte offset in EventLoopBase struct */ + } EventLoopBaseParamInfo; + ++static void event_loop_base_instance_init(Object *obj) ++{ ++ EventLoopBase *base = EVENT_LOOP_BASE(obj); ++ ++ base->thread_pool_max = THREAD_POOL_MAX_THREADS_DEFAULT; ++} ++ + static EventLoopBaseParamInfo aio_max_batch_info = { + "aio-max-batch", offsetof(EventLoopBase, aio_max_batch), + }; ++static EventLoopBaseParamInfo thread_pool_min_info = { ++ "thread-pool-min", offsetof(EventLoopBase, thread_pool_min), ++}; ++static EventLoopBaseParamInfo thread_pool_max_info = { ++ "thread-pool-max", offsetof(EventLoopBase, thread_pool_max), ++}; + + static void event_loop_base_get_param(Object *obj, Visitor *v, + const char *name, void *opaque, Error **errp) +@@ -95,12 +109,21 @@ static void event_loop_base_class_init(ObjectClass *klass, void *class_data) + event_loop_base_get_param, + event_loop_base_set_param, + NULL, &aio_max_batch_info); ++ object_class_property_add(klass, "thread-pool-min", "int", ++ event_loop_base_get_param, ++ event_loop_base_set_param, ++ NULL, &thread_pool_min_info); ++ object_class_property_add(klass, "thread-pool-max", "int", ++ event_loop_base_get_param, ++ event_loop_base_set_param, ++ NULL, &thread_pool_max_info); + } + + static const TypeInfo event_loop_base_info = { + .name = TYPE_EVENT_LOOP_BASE, + .parent = TYPE_OBJECT, + .instance_size = sizeof(EventLoopBase), ++ .instance_init = event_loop_base_instance_init, + .class_size = sizeof(EventLoopBaseClass), + .class_init = event_loop_base_class_init, + .abstract = true, +diff --git a/include/block/aio.h b/include/block/aio.h +index 5634173b12..d128558f1d 100644 +--- a/include/block/aio.h ++++ b/include/block/aio.h +@@ -192,6 +192,8 @@ struct AioContext { + QSLIST_HEAD(, Coroutine) scheduled_coroutines; + QEMUBH *co_schedule_bh; + ++ int thread_pool_min; ++ int thread_pool_max; + /* Thread pool for performing work and receiving completion callbacks. + * Has its own locking. + */ +@@ -769,4 +771,12 @@ void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns, + void aio_context_set_aio_params(AioContext *ctx, int64_t max_batch, + Error **errp); + ++/** ++ * aio_context_set_thread_pool_params: ++ * @ctx: the aio context ++ * @min: min number of threads to have readily available in the thread pool ++ * @min: max number of threads the thread pool can contain ++ */ ++void aio_context_set_thread_pool_params(AioContext *ctx, int64_t min, ++ int64_t max, Error **errp); + #endif +diff --git a/include/block/thread-pool.h b/include/block/thread-pool.h +index 7dd7d730a0..2020bcc92d 100644 +--- a/include/block/thread-pool.h ++++ b/include/block/thread-pool.h +@@ -20,6 +20,8 @@ + + #include "block/block.h" + ++#define THREAD_POOL_MAX_THREADS_DEFAULT 64 ++ + typedef int ThreadPoolFunc(void *opaque); + + typedef struct ThreadPool ThreadPool; +@@ -33,5 +35,6 @@ BlockAIOCB *thread_pool_submit_aio(ThreadPool *pool, + int coroutine_fn thread_pool_submit_co(ThreadPool *pool, + ThreadPoolFunc *func, void *arg); + void thread_pool_submit(ThreadPool *pool, ThreadPoolFunc *func, void *arg); ++void thread_pool_update_params(ThreadPool *pool, struct AioContext *ctx); + + #endif +diff --git a/include/sysemu/event-loop-base.h b/include/sysemu/event-loop-base.h +index fced4c9fea..2748bf6ae1 100644 +--- a/include/sysemu/event-loop-base.h ++++ b/include/sysemu/event-loop-base.h +@@ -33,5 +33,9 @@ struct EventLoopBase { + + /* AioContext AIO engine parameters */ + int64_t aio_max_batch; ++ ++ /* AioContext thread pool parameters */ ++ int64_t thread_pool_min; ++ int64_t thread_pool_max; + }; + #endif +diff --git a/iothread.c b/iothread.c +index 8fa2f3bfb8..529194a566 100644 +--- a/iothread.c ++++ b/iothread.c +@@ -174,6 +174,9 @@ static void iothread_set_aio_context_params(EventLoopBase *base, Error **errp) + aio_context_set_aio_params(iothread->ctx, + iothread->parent_obj.aio_max_batch, + errp); ++ ++ aio_context_set_thread_pool_params(iothread->ctx, base->thread_pool_min, ++ base->thread_pool_max, errp); + } + + +diff --git a/qapi/qom.json b/qapi/qom.json +index 7d4a2ac1b9..6a653c6636 100644 +--- a/qapi/qom.json ++++ b/qapi/qom.json +@@ -508,10 +508,18 @@ + # 0 means that the engine will use its default. + # (default: 0) + # ++# @thread-pool-min: minimum number of threads reserved in the thread pool ++# (default:0) ++# ++# @thread-pool-max: maximum number of threads the thread pool can contain ++# (default:64) ++# + # Since: 7.1 + ## + { 'struct': 'EventLoopBaseProperties', +- 'data': { '*aio-max-batch': 'int' } } ++ 'data': { '*aio-max-batch': 'int', ++ '*thread-pool-min': 'int', ++ '*thread-pool-max': 'int' } } + + ## + # @IothreadProperties: +diff --git a/util/aio-posix.c b/util/aio-posix.c +index be0182a3c6..731f3826c0 100644 +--- a/util/aio-posix.c ++++ b/util/aio-posix.c +@@ -15,6 +15,7 @@ + + #include "qemu/osdep.h" + #include "block/block.h" ++#include "block/thread-pool.h" + #include "qemu/main-loop.h" + #include "qemu/rcu.h" + #include "qemu/rcu_queue.h" +diff --git a/util/async.c b/util/async.c +index 2ea1172f3e..554ba70cca 100644 +--- a/util/async.c ++++ b/util/async.c +@@ -563,6 +563,9 @@ AioContext *aio_context_new(Error **errp) + + ctx->aio_max_batch = 0; + ++ ctx->thread_pool_min = 0; ++ ctx->thread_pool_max = THREAD_POOL_MAX_THREADS_DEFAULT; ++ + return ctx; + fail: + g_source_destroy(&ctx->source); +@@ -696,3 +699,20 @@ void qemu_set_current_aio_context(AioContext *ctx) + assert(!get_my_aiocontext()); + set_my_aiocontext(ctx); + } ++ ++void aio_context_set_thread_pool_params(AioContext *ctx, int64_t min, ++ int64_t max, Error **errp) ++{ ++ ++ if (min > max || !max || min > INT_MAX || max > INT_MAX) { ++ error_setg(errp, "bad thread-pool-min/thread-pool-max values"); ++ return; ++ } ++ ++ ctx->thread_pool_min = min; ++ ctx->thread_pool_max = max; ++ ++ if (ctx->thread_pool) { ++ thread_pool_update_params(ctx->thread_pool, ctx); ++ } ++} +diff --git a/util/main-loop.c b/util/main-loop.c +index 5b13f456fa..a0f48186ab 100644 +--- a/util/main-loop.c ++++ b/util/main-loop.c +@@ -30,6 +30,7 @@ + #include "sysemu/replay.h" + #include "qemu/main-loop.h" + #include "block/aio.h" ++#include "block/thread-pool.h" + #include "qemu/error-report.h" + #include "qemu/queue.h" + #include "qemu/compiler.h" +@@ -187,12 +188,20 @@ int qemu_init_main_loop(Error **errp) + + static void main_loop_update_params(EventLoopBase *base, Error **errp) + { ++ ERRP_GUARD(); ++ + if (!qemu_aio_context) { + error_setg(errp, "qemu aio context not ready"); + return; + } + + aio_context_set_aio_params(qemu_aio_context, base->aio_max_batch, errp); ++ if (*errp) { ++ return; ++ } ++ ++ aio_context_set_thread_pool_params(qemu_aio_context, base->thread_pool_min, ++ base->thread_pool_max, errp); + } + + MainLoop *mloop; +diff --git a/util/thread-pool.c b/util/thread-pool.c +index d763cea505..196835b4d3 100644 +--- a/util/thread-pool.c ++++ b/util/thread-pool.c +@@ -58,7 +58,6 @@ struct ThreadPool { + QemuMutex lock; + QemuCond worker_stopped; + QemuSemaphore sem; +- int max_threads; + QEMUBH *new_thread_bh; + + /* The following variables are only accessed from one AioContext. */ +@@ -71,8 +70,27 @@ struct ThreadPool { + int new_threads; /* backlog of threads we need to create */ + int pending_threads; /* threads created but not running yet */ + bool stopping; ++ int min_threads; ++ int max_threads; + }; + ++static inline bool back_to_sleep(ThreadPool *pool, int ret) ++{ ++ /* ++ * The semaphore timed out, we should exit the loop except when: ++ * - There is work to do, we raced with the signal. ++ * - The max threads threshold just changed, we raced with the signal. ++ * - The thread pool forces a minimum number of readily available threads. ++ */ ++ if (ret == -1 && (!QTAILQ_EMPTY(&pool->request_list) || ++ pool->cur_threads > pool->max_threads || ++ pool->cur_threads <= pool->min_threads)) { ++ return true; ++ } ++ ++ return false; ++} ++ + static void *worker_thread(void *opaque) + { + ThreadPool *pool = opaque; +@@ -91,8 +109,9 @@ static void *worker_thread(void *opaque) + ret = qemu_sem_timedwait(&pool->sem, 10000); + qemu_mutex_lock(&pool->lock); + pool->idle_threads--; +- } while (ret == -1 && !QTAILQ_EMPTY(&pool->request_list)); +- if (ret == -1 || pool->stopping) { ++ } while (back_to_sleep(pool, ret)); ++ if (ret == -1 || pool->stopping || ++ pool->cur_threads > pool->max_threads) { + break; + } + +@@ -294,6 +313,33 @@ void thread_pool_submit(ThreadPool *pool, ThreadPoolFunc *func, void *arg) + thread_pool_submit_aio(pool, func, arg, NULL, NULL); + } + ++void thread_pool_update_params(ThreadPool *pool, AioContext *ctx) ++{ ++ qemu_mutex_lock(&pool->lock); ++ ++ pool->min_threads = ctx->thread_pool_min; ++ pool->max_threads = ctx->thread_pool_max; ++ ++ /* ++ * We either have to: ++ * - Increase the number available of threads until over the min_threads ++ * threshold. ++ * - Decrease the number of available threads until under the max_threads ++ * threshold. ++ * - Do nothing. The current number of threads fall in between the min and ++ * max thresholds. We'll let the pool manage itself. ++ */ ++ for (int i = pool->cur_threads; i < pool->min_threads; i++) { ++ spawn_thread(pool); ++ } ++ ++ for (int i = pool->cur_threads; i > pool->max_threads; i--) { ++ qemu_sem_post(&pool->sem); ++ } ++ ++ qemu_mutex_unlock(&pool->lock); ++} ++ + static void thread_pool_init_one(ThreadPool *pool, AioContext *ctx) + { + if (!ctx) { +@@ -306,11 +352,12 @@ static void thread_pool_init_one(ThreadPool *pool, AioContext *ctx) + qemu_mutex_init(&pool->lock); + qemu_cond_init(&pool->worker_stopped); + qemu_sem_init(&pool->sem, 0); +- pool->max_threads = 64; + pool->new_thread_bh = aio_bh_new(ctx, spawn_thread_bh_fn, pool); + + QLIST_INIT(&pool->head); + QTAILQ_INIT(&pool->request_list); ++ ++ thread_pool_update_params(pool, ctx); + } + + ThreadPool *thread_pool_new(AioContext *ctx) +-- +2.31.1 + diff --git a/kvm-util-main-loop-Introduce-the-main-loop-into-QOM.patch b/kvm-util-main-loop-Introduce-the-main-loop-into-QOM.patch new file mode 100644 index 0000000..2104424 --- /dev/null +++ b/kvm-util-main-loop-Introduce-the-main-loop-into-QOM.patch @@ -0,0 +1,233 @@ +From b4969662de01848f887a3918e97e516efc213f71 Mon Sep 17 00:00:00 2001 +From: Nicolas Saenz Julienne +Date: Mon, 25 Apr 2022 09:57:22 +0200 +Subject: [PATCH 02/16] util/main-loop: Introduce the main loop into QOM + +RH-Author: Nicolas Saenz Julienne +RH-MergeRequest: 93: util/thread-pool: Expose minimum and maximum size +RH-Commit: [2/3] a481b77e25ad50d13dcbe26b36c551b18c89bddd +RH-Bugzilla: 2031024 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Stefan Hajnoczi + +'event-loop-base' provides basic property handling for all 'AioContext' +based event loops. So let's define a new 'MainLoopClass' that inherits +from it. This will permit tweaking the main loop's properties through +qapi as well as through the command line using the '-object' keyword[1]. +Only one instance of 'MainLoopClass' might be created at any time. + +'EventLoopBaseClass' learns a new callback, 'can_be_deleted()' so as to +mark 'MainLoop' as non-deletable. + +[1] For example: + -object main-loop,id=main-loop,aio-max-batch= + +Signed-off-by: Nicolas Saenz Julienne +Reviewed-by: Stefan Hajnoczi +Acked-by: Markus Armbruster +Message-id: 20220425075723.20019-3-nsaenzju@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 70ac26b9e5ca8374bb3ef3f30b871726673c9f27) +--- + event-loop-base.c | 13 ++++++++ + include/qemu/main-loop.h | 10 ++++++ + include/sysemu/event-loop-base.h | 1 + + meson.build | 3 +- + qapi/qom.json | 13 ++++++++ + util/main-loop.c | 56 ++++++++++++++++++++++++++++++++ + 6 files changed, 95 insertions(+), 1 deletion(-) + +diff --git a/event-loop-base.c b/event-loop-base.c +index a924c73a7c..e7f99a6ec8 100644 +--- a/event-loop-base.c ++++ b/event-loop-base.c +@@ -73,10 +73,23 @@ static void event_loop_base_complete(UserCreatable *uc, Error **errp) + } + } + ++static bool event_loop_base_can_be_deleted(UserCreatable *uc) ++{ ++ EventLoopBaseClass *bc = EVENT_LOOP_BASE_GET_CLASS(uc); ++ EventLoopBase *backend = EVENT_LOOP_BASE(uc); ++ ++ if (bc->can_be_deleted) { ++ return bc->can_be_deleted(backend); ++ } ++ ++ return true; ++} ++ + static void event_loop_base_class_init(ObjectClass *klass, void *class_data) + { + UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass); + ucc->complete = event_loop_base_complete; ++ ucc->can_be_deleted = event_loop_base_can_be_deleted; + + object_class_property_add(klass, "aio-max-batch", "int", + event_loop_base_get_param, +diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h +index d3750c8e76..20c9387654 100644 +--- a/include/qemu/main-loop.h ++++ b/include/qemu/main-loop.h +@@ -26,9 +26,19 @@ + #define QEMU_MAIN_LOOP_H + + #include "block/aio.h" ++#include "qom/object.h" ++#include "sysemu/event-loop-base.h" + + #define SIG_IPI SIGUSR1 + ++#define TYPE_MAIN_LOOP "main-loop" ++OBJECT_DECLARE_TYPE(MainLoop, MainLoopClass, MAIN_LOOP) ++ ++struct MainLoop { ++ EventLoopBase parent_obj; ++}; ++typedef struct MainLoop MainLoop; ++ + /** + * qemu_init_main_loop: Set up the process so that it can run the main loop. + * +diff --git a/include/sysemu/event-loop-base.h b/include/sysemu/event-loop-base.h +index 8e77d8b69f..fced4c9fea 100644 +--- a/include/sysemu/event-loop-base.h ++++ b/include/sysemu/event-loop-base.h +@@ -25,6 +25,7 @@ struct EventLoopBaseClass { + + void (*init)(EventLoopBase *base, Error **errp); + void (*update_params)(EventLoopBase *base, Error **errp); ++ bool (*can_be_deleted)(EventLoopBase *base); + }; + + struct EventLoopBase { +diff --git a/meson.build b/meson.build +index b9c919a55e..5a7c10e639 100644 +--- a/meson.build ++++ b/meson.build +@@ -2832,7 +2832,8 @@ libqemuutil = static_library('qemuutil', + sources: util_ss.sources() + stub_ss.sources() + genh, + dependencies: [util_ss.dependencies(), libm, threads, glib, socket, malloc, pixman]) + qemuutil = declare_dependency(link_with: libqemuutil, +- sources: genh + version_res) ++ sources: genh + version_res, ++ dependencies: [event_loop_base]) + + if have_system or have_user + decodetree = generator(find_program('scripts/decodetree.py'), +diff --git a/qapi/qom.json b/qapi/qom.json +index a2439533c5..7d4a2ac1b9 100644 +--- a/qapi/qom.json ++++ b/qapi/qom.json +@@ -540,6 +540,17 @@ + '*poll-grow': 'int', + '*poll-shrink': 'int' } } + ++## ++# @MainLoopProperties: ++# ++# Properties for the main-loop object. ++# ++# Since: 7.1 ++## ++{ 'struct': 'MainLoopProperties', ++ 'base': 'EventLoopBaseProperties', ++ 'data': {} } ++ + ## + # @MemoryBackendProperties: + # +@@ -830,6 +841,7 @@ + { 'name': 'input-linux', + 'if': 'CONFIG_LINUX' }, + 'iothread', ++ 'main-loop', + { 'name': 'memory-backend-epc', + 'if': 'CONFIG_LINUX' }, + 'memory-backend-file', +@@ -895,6 +907,7 @@ + 'input-linux': { 'type': 'InputLinuxProperties', + 'if': 'CONFIG_LINUX' }, + 'iothread': 'IothreadProperties', ++ 'main-loop': 'MainLoopProperties', + 'memory-backend-epc': { 'type': 'MemoryBackendEpcProperties', + 'if': 'CONFIG_LINUX' }, + 'memory-backend-file': 'MemoryBackendFileProperties', +diff --git a/util/main-loop.c b/util/main-loop.c +index b7b0ce4ca0..5b13f456fa 100644 +--- a/util/main-loop.c ++++ b/util/main-loop.c +@@ -33,6 +33,7 @@ + #include "qemu/error-report.h" + #include "qemu/queue.h" + #include "qemu/compiler.h" ++#include "qom/object.h" + + #ifndef _WIN32 + #include +@@ -184,6 +185,61 @@ int qemu_init_main_loop(Error **errp) + return 0; + } + ++static void main_loop_update_params(EventLoopBase *base, Error **errp) ++{ ++ if (!qemu_aio_context) { ++ error_setg(errp, "qemu aio context not ready"); ++ return; ++ } ++ ++ aio_context_set_aio_params(qemu_aio_context, base->aio_max_batch, errp); ++} ++ ++MainLoop *mloop; ++ ++static void main_loop_init(EventLoopBase *base, Error **errp) ++{ ++ MainLoop *m = MAIN_LOOP(base); ++ ++ if (mloop) { ++ error_setg(errp, "only one main-loop instance allowed"); ++ return; ++ } ++ ++ main_loop_update_params(base, errp); ++ ++ mloop = m; ++ return; ++} ++ ++static bool main_loop_can_be_deleted(EventLoopBase *base) ++{ ++ return false; ++} ++ ++static void main_loop_class_init(ObjectClass *oc, void *class_data) ++{ ++ EventLoopBaseClass *bc = EVENT_LOOP_BASE_CLASS(oc); ++ ++ bc->init = main_loop_init; ++ bc->update_params = main_loop_update_params; ++ bc->can_be_deleted = main_loop_can_be_deleted; ++} ++ ++static const TypeInfo main_loop_info = { ++ .name = TYPE_MAIN_LOOP, ++ .parent = TYPE_EVENT_LOOP_BASE, ++ .class_init = main_loop_class_init, ++ .instance_size = sizeof(MainLoop), ++}; ++ ++static void main_loop_register_types(void) ++{ ++ type_register_static(&main_loop_info); ++} ++ ++type_init(main_loop_register_types) ++ + static int max_priority; + + #ifndef _WIN32 +-- +2.31.1 + diff --git a/kvm-vdpa-Add-device-migration-blocker.patch b/kvm-vdpa-Add-device-migration-blocker.patch new file mode 100644 index 0000000..1b83c98 --- /dev/null +++ b/kvm-vdpa-Add-device-migration-blocker.patch @@ -0,0 +1,106 @@ +From 8e0fdce814af4cfc84dce5e5920da989b1f1a86d Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 16:06:05 +0200 +Subject: [PATCH 26/32] vdpa: Add device migration blocker +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [26/27] 53d94d45b5e5e88f12b95f9b0f243696cfcbd7ce (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit c156d5bf2b142dcc06808ccee06882144f230aec +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:45 2022 +0200 + + vdpa: Add device migration blocker + + Since the vhost-vdpa device is exposing _F_LOG, adding a migration blocker if + it uses CVQ. + + However, qemu is able to migrate simple devices with no CVQ as long as + they use SVQ. To allow it, add a placeholder error to vhost_vdpa, and + only add to vhost_dev when used. vhost_dev machinery place the migration + blocker if needed. + + Signed-off-by: Eugenio Pérez + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + hw/virtio/vhost-vdpa.c | 15 +++++++++++++++ + include/hw/virtio/vhost-vdpa.h | 1 + + 2 files changed, 16 insertions(+) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 49effe5462..e3e5bce4bb 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -20,6 +20,7 @@ + #include "hw/virtio/vhost-shadow-virtqueue.h" + #include "hw/virtio/vhost-vdpa.h" + #include "exec/address-spaces.h" ++#include "migration/blocker.h" + #include "qemu/main-loop.h" + #include "cpu.h" + #include "trace.h" +@@ -1020,6 +1021,13 @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev) + return true; + } + ++ if (v->migration_blocker) { ++ int r = migrate_add_blocker(v->migration_blocker, &err); ++ if (unlikely(r < 0)) { ++ return false; ++ } ++ } ++ + for (i = 0; i < v->shadow_vqs->len; ++i) { + VirtQueue *vq = virtio_get_queue(dev->vdev, dev->vq_index + i); + VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); +@@ -1062,6 +1070,10 @@ err: + vhost_svq_stop(svq); + } + ++ if (v->migration_blocker) { ++ migrate_del_blocker(v->migration_blocker); ++ } ++ + return false; + } + +@@ -1081,6 +1093,9 @@ static bool vhost_vdpa_svqs_stop(struct vhost_dev *dev) + } + } + ++ if (v->migration_blocker) { ++ migrate_del_blocker(v->migration_blocker); ++ } + return true; + } + +diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h +index 1111d85643..d10a89303e 100644 +--- a/include/hw/virtio/vhost-vdpa.h ++++ b/include/hw/virtio/vhost-vdpa.h +@@ -35,6 +35,7 @@ typedef struct vhost_vdpa { + bool shadow_vqs_enabled; + /* IOVA mapping used by the Shadow Virtqueue */ + VhostIOVATree *iova_tree; ++ Error *migration_blocker; + GPtrArray *shadow_vqs; + const VhostShadowVirtqueueOps *shadow_vq_ops; + void *shadow_vq_ops_opaque; +-- +2.31.1 + diff --git a/kvm-vdpa-Add-x-svq-to-NetdevVhostVDPAOptions.patch b/kvm-vdpa-Add-x-svq-to-NetdevVhostVDPAOptions.patch new file mode 100644 index 0000000..8a7b600 --- /dev/null +++ b/kvm-vdpa-Add-x-svq-to-NetdevVhostVDPAOptions.patch @@ -0,0 +1,223 @@ +From 0b27781f9984c67625c49a516c3e38fbf5fa1b1b Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 16:06:16 +0200 +Subject: [PATCH 27/32] vdpa: Add x-svq to NetdevVhostVDPAOptions +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [27/27] bd85496c2a8c1ebf34f908fca2be2ab9852fd0e9 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit 1576dbb5bbc49344c606e969ec749be70c0fd94e +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:46 2022 +0200 + + vdpa: Add x-svq to NetdevVhostVDPAOptions + + Finally offering the possibility to enable SVQ from the command line. + + Signed-off-by: Eugenio Pérez + Acked-by: Markus Armbruster + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + net/vhost-vdpa.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++-- + qapi/net.json | 9 +++++- + 2 files changed, 77 insertions(+), 4 deletions(-) + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index 8b76dac966..50672bcd66 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -75,6 +75,28 @@ const int vdpa_feature_bits[] = { + VHOST_INVALID_FEATURE_BIT + }; + ++/** Supported device specific feature bits with SVQ */ ++static const uint64_t vdpa_svq_device_features = ++ BIT_ULL(VIRTIO_NET_F_CSUM) | ++ BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) | ++ BIT_ULL(VIRTIO_NET_F_MTU) | ++ BIT_ULL(VIRTIO_NET_F_MAC) | ++ BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | ++ BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) | ++ BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | ++ BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | ++ BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | ++ BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | ++ BIT_ULL(VIRTIO_NET_F_HOST_ECN) | ++ BIT_ULL(VIRTIO_NET_F_HOST_UFO) | ++ BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | ++ BIT_ULL(VIRTIO_NET_F_STATUS) | ++ BIT_ULL(VIRTIO_NET_F_CTRL_VQ) | ++ BIT_ULL(VIRTIO_F_ANY_LAYOUT) | ++ BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | ++ BIT_ULL(VIRTIO_NET_F_RSC_EXT) | ++ BIT_ULL(VIRTIO_NET_F_STANDBY); ++ + VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc) + { + VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); +@@ -133,9 +155,13 @@ err_init: + static void vhost_vdpa_cleanup(NetClientState *nc) + { + VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); ++ struct vhost_dev *dev = &s->vhost_net->dev; + + qemu_vfree(s->cvq_cmd_out_buffer); + qemu_vfree(s->cvq_cmd_in_buffer); ++ if (dev->vq_index + dev->nvqs == dev->vq_index_end) { ++ g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete); ++ } + if (s->vhost_net) { + vhost_net_cleanup(s->vhost_net); + g_free(s->vhost_net); +@@ -437,7 +463,9 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, + int vdpa_device_fd, + int queue_pair_index, + int nvqs, +- bool is_datapath) ++ bool is_datapath, ++ bool svq, ++ VhostIOVATree *iova_tree) + { + NetClientState *nc = NULL; + VhostVDPAState *s; +@@ -455,6 +483,8 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, + + s->vhost_vdpa.device_fd = vdpa_device_fd; + s->vhost_vdpa.index = queue_pair_index; ++ s->vhost_vdpa.shadow_vqs_enabled = svq; ++ s->vhost_vdpa.iova_tree = iova_tree; + if (!is_datapath) { + s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size, + vhost_vdpa_net_cvq_cmd_page_len()); +@@ -465,6 +495,8 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, + + s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops; + s->vhost_vdpa.shadow_vq_ops_opaque = s; ++ error_setg(&s->vhost_vdpa.migration_blocker, ++ "Migration disabled: vhost-vdpa uses CVQ."); + } + ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs); + if (ret) { +@@ -474,6 +506,14 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, + return nc; + } + ++static int vhost_vdpa_get_iova_range(int fd, ++ struct vhost_vdpa_iova_range *iova_range) ++{ ++ int ret = ioctl(fd, VHOST_VDPA_GET_IOVA_RANGE, iova_range); ++ ++ return ret < 0 ? -errno : 0; ++} ++ + static int vhost_vdpa_get_features(int fd, uint64_t *features, Error **errp) + { + int ret = ioctl(fd, VHOST_GET_FEATURES, features); +@@ -524,6 +564,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, + uint64_t features; + int vdpa_device_fd; + g_autofree NetClientState **ncs = NULL; ++ g_autoptr(VhostIOVATree) iova_tree = NULL; + NetClientState *nc; + int queue_pairs, r, i, has_cvq = 0; + +@@ -551,22 +592,45 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, + return queue_pairs; + } + ++ if (opts->x_svq) { ++ struct vhost_vdpa_iova_range iova_range; ++ ++ uint64_t invalid_dev_features = ++ features & ~vdpa_svq_device_features & ++ /* Transport are all accepted at this point */ ++ ~MAKE_64BIT_MASK(VIRTIO_TRANSPORT_F_START, ++ VIRTIO_TRANSPORT_F_END - VIRTIO_TRANSPORT_F_START); ++ ++ if (invalid_dev_features) { ++ error_setg(errp, "vdpa svq does not work with features 0x%" PRIx64, ++ invalid_dev_features); ++ goto err_svq; ++ } ++ ++ vhost_vdpa_get_iova_range(vdpa_device_fd, &iova_range); ++ iova_tree = vhost_iova_tree_new(iova_range.first, iova_range.last); ++ } ++ + ncs = g_malloc0(sizeof(*ncs) * queue_pairs); + + for (i = 0; i < queue_pairs; i++) { + ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, +- vdpa_device_fd, i, 2, true); ++ vdpa_device_fd, i, 2, true, opts->x_svq, ++ iova_tree); + if (!ncs[i]) + goto err; + } + + if (has_cvq) { + nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, +- vdpa_device_fd, i, 1, false); ++ vdpa_device_fd, i, 1, false, ++ opts->x_svq, iova_tree); + if (!nc) + goto err; + } + ++ /* iova_tree ownership belongs to last NetClientState */ ++ g_steal_pointer(&iova_tree); + return 0; + + err: +@@ -575,6 +639,8 @@ err: + qemu_del_net_client(ncs[i]); + } + } ++ ++err_svq: + qemu_close(vdpa_device_fd); + + return -1; +diff --git a/qapi/net.json b/qapi/net.json +index b92f3f5fb4..92848e4362 100644 +--- a/qapi/net.json ++++ b/qapi/net.json +@@ -445,12 +445,19 @@ + # @queues: number of queues to be created for multiqueue vhost-vdpa + # (default: 1) + # ++# @x-svq: Start device with (experimental) shadow virtqueue. (Since 7.1) ++# (default: false) ++# ++# Features: ++# @unstable: Member @x-svq is experimental. ++# + # Since: 5.1 + ## + { 'struct': 'NetdevVhostVDPAOptions', + 'data': { + '*vhostdev': 'str', +- '*queues': 'int' } } ++ '*queues': 'int', ++ '*x-svq': {'type': 'bool', 'features' : [ 'unstable'] } } } + + ## + # @NetClientDriver: +-- +2.31.1 + diff --git a/kvm-vdpa-Avoid-compiler-to-squash-reads-to-used-idx.patch b/kvm-vdpa-Avoid-compiler-to-squash-reads-to-used-idx.patch new file mode 100644 index 0000000..acd45e0 --- /dev/null +++ b/kvm-vdpa-Avoid-compiler-to-squash-reads-to-used-idx.patch @@ -0,0 +1,65 @@ +From df06ce560ddfefde98bef822ec2020382059921f Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:38:55 +0200 +Subject: [PATCH 10/32] vdpa: Avoid compiler to squash reads to used idx +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [10/27] b28789302d4f64749da26f413763f918161d9b70 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit c381abc37f0aba42ed2e3b41cdace8f8438829e4 +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:29 2022 +0200 + + vdpa: Avoid compiler to squash reads to used idx + + In the next patch we will allow busypolling of this value. The compiler + have a running path where shadow_used_idx, last_used_idx, and vring used + idx are not modified within the same thread busypolling. + + This was not an issue before since we always cleared device event + notifier before checking it, and that could act as memory barrier. + However, the busypoll needs something similar to kernel READ_ONCE. + + Let's add it here, sepparated from the polling. + + Signed-off-by: Eugenio Pérez + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + hw/virtio/vhost-shadow-virtqueue.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index 3fbda1e3d4..9c46c3a8fa 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -327,11 +327,12 @@ static void vhost_handle_guest_kick_notifier(EventNotifier *n) + + static bool vhost_svq_more_used(VhostShadowVirtqueue *svq) + { ++ uint16_t *used_idx = &svq->vring.used->idx; + if (svq->last_used_idx != svq->shadow_used_idx) { + return true; + } + +- svq->shadow_used_idx = cpu_to_le16(svq->vring.used->idx); ++ svq->shadow_used_idx = cpu_to_le16(*(volatile uint16_t *)used_idx); + + return svq->last_used_idx != svq->shadow_used_idx; + } +-- +2.31.1 + diff --git a/kvm-vdpa-Buffer-CVQ-support-on-shadow-virtqueue.patch b/kvm-vdpa-Buffer-CVQ-support-on-shadow-virtqueue.patch new file mode 100644 index 0000000..243aec8 --- /dev/null +++ b/kvm-vdpa-Buffer-CVQ-support-on-shadow-virtqueue.patch @@ -0,0 +1,323 @@ +From 881945094c0e4d33614d40959bfc20e395f5a478 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 16:05:40 +0200 +Subject: [PATCH 24/32] vdpa: Buffer CVQ support on shadow virtqueue +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [24/27] 5486f80141a3ad968a32e782bdcdead32f417352 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit 2df4dd31e194c94da7d28c02e92449f4a989fca9 +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:43 2022 +0200 + + vdpa: Buffer CVQ support on shadow virtqueue + + Introduce the control virtqueue support for vDPA shadow virtqueue. This + is needed for advanced networking features like rx filtering. + + Virtio-net control VQ copies the descriptors to qemu's VA, so we avoid + TOCTOU with the guest's or device's memory every time there is a device + model change. Otherwise, the guest could change the memory content in + the time between qemu and the device read it. + + To demonstrate command handling, VIRTIO_NET_F_CTRL_MACADDR is + implemented. If the virtio-net driver changes MAC the virtio-net device + model will be updated with the new one, and a rx filtering change event + will be raised. + + More cvq commands could be added here straightforwardly but they have + not been tested. + + Signed-off-by: Eugenio Pérez + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + net/vhost-vdpa.c | 213 +++++++++++++++++++++++++++++++++++++++++++++-- + 1 file changed, 205 insertions(+), 8 deletions(-) + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index 2e3b6b10d8..df42822463 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -33,6 +33,9 @@ typedef struct VhostVDPAState { + NetClientState nc; + struct vhost_vdpa vhost_vdpa; + VHostNetState *vhost_net; ++ ++ /* Control commands shadow buffers */ ++ void *cvq_cmd_out_buffer, *cvq_cmd_in_buffer; + bool started; + } VhostVDPAState; + +@@ -131,6 +134,8 @@ static void vhost_vdpa_cleanup(NetClientState *nc) + { + VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); + ++ qemu_vfree(s->cvq_cmd_out_buffer); ++ qemu_vfree(s->cvq_cmd_in_buffer); + if (s->vhost_net) { + vhost_net_cleanup(s->vhost_net); + g_free(s->vhost_net); +@@ -190,24 +195,191 @@ static NetClientInfo net_vhost_vdpa_info = { + .check_peer_type = vhost_vdpa_check_peer_type, + }; + ++static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr) ++{ ++ VhostIOVATree *tree = v->iova_tree; ++ DMAMap needle = { ++ /* ++ * No need to specify size or to look for more translations since ++ * this contiguous chunk was allocated by us. ++ */ ++ .translated_addr = (hwaddr)(uintptr_t)addr, ++ }; ++ const DMAMap *map = vhost_iova_tree_find_iova(tree, &needle); ++ int r; ++ ++ if (unlikely(!map)) { ++ error_report("Cannot locate expected map"); ++ return; ++ } ++ ++ r = vhost_vdpa_dma_unmap(v, map->iova, map->size + 1); ++ if (unlikely(r != 0)) { ++ error_report("Device cannot unmap: %s(%d)", g_strerror(r), r); ++ } ++ ++ vhost_iova_tree_remove(tree, map); ++} ++ ++static size_t vhost_vdpa_net_cvq_cmd_len(void) ++{ ++ /* ++ * MAC_TABLE_SET is the ctrl command that produces the longer out buffer. ++ * In buffer is always 1 byte, so it should fit here ++ */ ++ return sizeof(struct virtio_net_ctrl_hdr) + ++ 2 * sizeof(struct virtio_net_ctrl_mac) + ++ MAC_TABLE_ENTRIES * ETH_ALEN; ++} ++ ++static size_t vhost_vdpa_net_cvq_cmd_page_len(void) ++{ ++ return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size); ++} ++ ++/** Copy and map a guest buffer. */ ++static bool vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, ++ const struct iovec *out_data, ++ size_t out_num, size_t data_len, void *buf, ++ size_t *written, bool write) ++{ ++ DMAMap map = {}; ++ int r; ++ ++ if (unlikely(!data_len)) { ++ qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid legnth of %s buffer\n", ++ __func__, write ? "in" : "out"); ++ return false; ++ } ++ ++ *written = iov_to_buf(out_data, out_num, 0, buf, data_len); ++ map.translated_addr = (hwaddr)(uintptr_t)buf; ++ map.size = vhost_vdpa_net_cvq_cmd_page_len() - 1; ++ map.perm = write ? IOMMU_RW : IOMMU_RO, ++ r = vhost_iova_tree_map_alloc(v->iova_tree, &map); ++ if (unlikely(r != IOVA_OK)) { ++ error_report("Cannot map injected element"); ++ return false; ++ } ++ ++ r = vhost_vdpa_dma_map(v, map.iova, vhost_vdpa_net_cvq_cmd_page_len(), buf, ++ !write); ++ if (unlikely(r < 0)) { ++ goto dma_map_err; ++ } ++ ++ return true; ++ ++dma_map_err: ++ vhost_iova_tree_remove(v->iova_tree, &map); ++ return false; ++} ++ + /** +- * Forward buffer for the moment. ++ * Copy the guest element into a dedicated buffer suitable to be sent to NIC ++ * ++ * @iov: [0] is the out buffer, [1] is the in one ++ */ ++static bool vhost_vdpa_net_cvq_map_elem(VhostVDPAState *s, ++ VirtQueueElement *elem, ++ struct iovec *iov) ++{ ++ size_t in_copied; ++ bool ok; ++ ++ iov[0].iov_base = s->cvq_cmd_out_buffer; ++ ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, elem->out_sg, elem->out_num, ++ vhost_vdpa_net_cvq_cmd_len(), iov[0].iov_base, ++ &iov[0].iov_len, false); ++ if (unlikely(!ok)) { ++ return false; ++ } ++ ++ iov[1].iov_base = s->cvq_cmd_in_buffer; ++ ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, NULL, 0, ++ sizeof(virtio_net_ctrl_ack), iov[1].iov_base, ++ &in_copied, true); ++ if (unlikely(!ok)) { ++ vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer); ++ return false; ++ } ++ ++ iov[1].iov_len = sizeof(virtio_net_ctrl_ack); ++ return true; ++} ++ ++/** ++ * Do not forward commands not supported by SVQ. Otherwise, the device could ++ * accept it and qemu would not know how to update the device model. ++ */ ++static bool vhost_vdpa_net_cvq_validate_cmd(const struct iovec *out, ++ size_t out_num) ++{ ++ struct virtio_net_ctrl_hdr ctrl; ++ size_t n; ++ ++ n = iov_to_buf(out, out_num, 0, &ctrl, sizeof(ctrl)); ++ if (unlikely(n < sizeof(ctrl))) { ++ qemu_log_mask(LOG_GUEST_ERROR, ++ "%s: invalid legnth of out buffer %zu\n", __func__, n); ++ return false; ++ } ++ ++ switch (ctrl.class) { ++ case VIRTIO_NET_CTRL_MAC: ++ switch (ctrl.cmd) { ++ case VIRTIO_NET_CTRL_MAC_ADDR_SET: ++ return true; ++ default: ++ qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid mac cmd %u\n", ++ __func__, ctrl.cmd); ++ }; ++ break; ++ default: ++ qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid control class %u\n", ++ __func__, ctrl.class); ++ }; ++ ++ return false; ++} ++ ++/** ++ * Validate and copy control virtqueue commands. ++ * ++ * Following QEMU guidelines, we offer a copy of the buffers to the device to ++ * prevent TOCTOU bugs. + */ + static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, + VirtQueueElement *elem, + void *opaque) + { +- unsigned int n = elem->out_num + elem->in_num; +- g_autofree struct iovec *dev_buffers = g_new(struct iovec, n); ++ VhostVDPAState *s = opaque; + size_t in_len, dev_written; + virtio_net_ctrl_ack status = VIRTIO_NET_ERR; +- int r; ++ /* out and in buffers sent to the device */ ++ struct iovec dev_buffers[2] = { ++ { .iov_base = s->cvq_cmd_out_buffer }, ++ { .iov_base = s->cvq_cmd_in_buffer }, ++ }; ++ /* in buffer used for device model */ ++ const struct iovec in = { ++ .iov_base = &status, ++ .iov_len = sizeof(status), ++ }; ++ int r = -EINVAL; ++ bool ok; ++ ++ ok = vhost_vdpa_net_cvq_map_elem(s, elem, dev_buffers); ++ if (unlikely(!ok)) { ++ goto out; ++ } + +- memcpy(dev_buffers, elem->out_sg, elem->out_num); +- memcpy(dev_buffers + elem->out_num, elem->in_sg, elem->in_num); ++ ok = vhost_vdpa_net_cvq_validate_cmd(&dev_buffers[0], 1); ++ if (unlikely(!ok)) { ++ goto out; ++ } + +- r = vhost_svq_add(svq, &dev_buffers[0], elem->out_num, &dev_buffers[1], +- elem->in_num, elem); ++ r = vhost_svq_add(svq, &dev_buffers[0], 1, &dev_buffers[1], 1, elem); + if (unlikely(r != 0)) { + if (unlikely(r == -ENOSPC)) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n", +@@ -224,6 +396,18 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, + dev_written = vhost_svq_poll(svq); + if (unlikely(dev_written < sizeof(status))) { + error_report("Insufficient written data (%zu)", dev_written); ++ goto out; ++ } ++ ++ memcpy(&status, dev_buffers[1].iov_base, sizeof(status)); ++ if (status != VIRTIO_NET_OK) { ++ goto out; ++ } ++ ++ status = VIRTIO_NET_ERR; ++ virtio_net_handle_ctrl_iov(svq->vdev, &in, 1, dev_buffers, 1); ++ if (status != VIRTIO_NET_OK) { ++ error_report("Bad CVQ processing in model"); + } + + out: +@@ -234,6 +418,12 @@ out: + } + vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status))); + g_free(elem); ++ if (dev_buffers[0].iov_base) { ++ vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, dev_buffers[0].iov_base); ++ } ++ if (dev_buffers[1].iov_base) { ++ vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, dev_buffers[1].iov_base); ++ } + return r; + } + +@@ -266,6 +456,13 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, + s->vhost_vdpa.device_fd = vdpa_device_fd; + s->vhost_vdpa.index = queue_pair_index; + if (!is_datapath) { ++ s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size, ++ vhost_vdpa_net_cvq_cmd_page_len()); ++ memset(s->cvq_cmd_out_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len()); ++ s->cvq_cmd_in_buffer = qemu_memalign(qemu_real_host_page_size, ++ vhost_vdpa_net_cvq_cmd_page_len()); ++ memset(s->cvq_cmd_in_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len()); ++ + s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops; + s->vhost_vdpa.shadow_vq_ops_opaque = s; + } +-- +2.31.1 + diff --git a/kvm-vdpa-Export-vhost_vdpa_dma_map-and-unmap-calls.patch b/kvm-vdpa-Export-vhost_vdpa_dma_map-and-unmap-calls.patch new file mode 100644 index 0000000..d6e72ac --- /dev/null +++ b/kvm-vdpa-Export-vhost_vdpa_dma_map-and-unmap-calls.patch @@ -0,0 +1,84 @@ +From 3a5d325fcb2958318262efac31d5fd25fb062523 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:38:55 +0200 +Subject: [PATCH 21/32] vdpa: Export vhost_vdpa_dma_map and unmap calls +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [21/27] 97e7a583bbd3c12a0786d53132812ec41702c190 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit 463ba1e3b8cf080812895c5f26d95d8d7db2e692 +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:40 2022 +0200 + + vdpa: Export vhost_vdpa_dma_map and unmap calls + + Shadow CVQ will copy buffers on qemu VA, so we avoid TOCTOU attacks from + the guest that could set a different state in qemu device model and vdpa + device. + + To do so, it needs to be able to map these new buffers to the device. + + Signed-off-by: Eugenio Pérez + Acked-by: Jason Wang + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + hw/virtio/vhost-vdpa.c | 7 +++---- + include/hw/virtio/vhost-vdpa.h | 4 ++++ + 2 files changed, 7 insertions(+), 4 deletions(-) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 28df57b12e..14b02fe079 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -71,8 +71,8 @@ static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section, + return false; + } + +-static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, +- void *vaddr, bool readonly) ++int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, ++ void *vaddr, bool readonly) + { + struct vhost_msg_v2 msg = {}; + int fd = v->device_fd; +@@ -97,8 +97,7 @@ static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, + return ret; + } + +-static int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, +- hwaddr size) ++int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, hwaddr size) + { + struct vhost_msg_v2 msg = {}; + int fd = v->device_fd; +diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h +index a29dbb3f53..7214eb47dc 100644 +--- a/include/hw/virtio/vhost-vdpa.h ++++ b/include/hw/virtio/vhost-vdpa.h +@@ -39,4 +39,8 @@ typedef struct vhost_vdpa { + VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX]; + } VhostVDPA; + ++int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, ++ void *vaddr, bool readonly); ++int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, hwaddr size); ++ + #endif +-- +2.31.1 + diff --git a/kvm-vdpa-Extract-get-features-part-from-vhost_vdpa_get_m.patch b/kvm-vdpa-Extract-get-features-part-from-vhost_vdpa_get_m.patch new file mode 100644 index 0000000..44e97af --- /dev/null +++ b/kvm-vdpa-Extract-get-features-part-from-vhost_vdpa_get_m.patch @@ -0,0 +1,108 @@ +From 9a290bd74f983f3a65aa9ec5df2da9aa94bfdecd Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 16:05:42 +0200 +Subject: [PATCH 25/32] vdpa: Extract get features part from + vhost_vdpa_get_max_queue_pairs +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [25/27] 654ad68e10a4df84cced923c64e72d500721ad67 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit 8170ab3f43989680491d00f1017f60b25d346114 +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:44 2022 +0200 + + vdpa: Extract get features part from vhost_vdpa_get_max_queue_pairs + + To know the device features is needed for CVQ SVQ, so SVQ knows if it + can handle all commands or not. Extract from + vhost_vdpa_get_max_queue_pairs so we can reuse it. + + Signed-off-by: Eugenio Pérez + Acked-by: Jason Wang + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + net/vhost-vdpa.c | 30 ++++++++++++++++++++---------- + 1 file changed, 20 insertions(+), 10 deletions(-) + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index df42822463..8b76dac966 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -474,20 +474,24 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, + return nc; + } + +-static int vhost_vdpa_get_max_queue_pairs(int fd, int *has_cvq, Error **errp) ++static int vhost_vdpa_get_features(int fd, uint64_t *features, Error **errp) ++{ ++ int ret = ioctl(fd, VHOST_GET_FEATURES, features); ++ if (unlikely(ret < 0)) { ++ error_setg_errno(errp, errno, ++ "Fail to query features from vhost-vDPA device"); ++ } ++ return ret; ++} ++ ++static int vhost_vdpa_get_max_queue_pairs(int fd, uint64_t features, ++ int *has_cvq, Error **errp) + { + unsigned long config_size = offsetof(struct vhost_vdpa_config, buf); + g_autofree struct vhost_vdpa_config *config = NULL; + __virtio16 *max_queue_pairs; +- uint64_t features; + int ret; + +- ret = ioctl(fd, VHOST_GET_FEATURES, &features); +- if (ret) { +- error_setg(errp, "Fail to query features from vhost-vDPA device"); +- return ret; +- } +- + if (features & (1 << VIRTIO_NET_F_CTRL_VQ)) { + *has_cvq = 1; + } else { +@@ -517,10 +521,11 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, + NetClientState *peer, Error **errp) + { + const NetdevVhostVDPAOptions *opts; ++ uint64_t features; + int vdpa_device_fd; + g_autofree NetClientState **ncs = NULL; + NetClientState *nc; +- int queue_pairs, i, has_cvq = 0; ++ int queue_pairs, r, i, has_cvq = 0; + + assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA); + opts = &netdev->u.vhost_vdpa; +@@ -534,7 +539,12 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, + return -errno; + } + +- queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd, ++ r = vhost_vdpa_get_features(vdpa_device_fd, &features, errp); ++ if (unlikely(r < 0)) { ++ return r; ++ } ++ ++ queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd, features, + &has_cvq, errp); + if (queue_pairs < 0) { + qemu_close(vdpa_device_fd); +-- +2.31.1 + diff --git a/kvm-vdpa-manual-forward-CVQ-buffers.patch b/kvm-vdpa-manual-forward-CVQ-buffers.patch new file mode 100644 index 0000000..61909ff --- /dev/null +++ b/kvm-vdpa-manual-forward-CVQ-buffers.patch @@ -0,0 +1,166 @@ +From c33bc0b7f2b5cfa330a6d89d60ee94de129c65c1 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 16:05:38 +0200 +Subject: [PATCH 23/32] vdpa: manual forward CVQ buffers +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [23/27] ce128d5152be7eebf87e186eb8b58c2ed95aff6d (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit bd907ae4b00ebedad5e586af05ea3d6490318d45 +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:42 2022 +0200 + + vdpa: manual forward CVQ buffers + + Do a simple forwarding of CVQ buffers, the same work SVQ could do but + through callbacks. No functional change intended. + + Signed-off-by: Eugenio Pérez + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + hw/virtio/vhost-vdpa.c | 3 +- + include/hw/virtio/vhost-vdpa.h | 3 ++ + net/vhost-vdpa.c | 58 ++++++++++++++++++++++++++++++++++ + 3 files changed, 63 insertions(+), 1 deletion(-) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 14b02fe079..49effe5462 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -417,7 +417,8 @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v, + for (unsigned n = 0; n < hdev->nvqs; ++n) { + g_autoptr(VhostShadowVirtqueue) svq; + +- svq = vhost_svq_new(v->iova_tree, NULL, NULL); ++ svq = vhost_svq_new(v->iova_tree, v->shadow_vq_ops, ++ v->shadow_vq_ops_opaque); + if (unlikely(!svq)) { + error_setg(errp, "Cannot create svq %u", n); + return -1; +diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h +index 7214eb47dc..1111d85643 100644 +--- a/include/hw/virtio/vhost-vdpa.h ++++ b/include/hw/virtio/vhost-vdpa.h +@@ -15,6 +15,7 @@ + #include + + #include "hw/virtio/vhost-iova-tree.h" ++#include "hw/virtio/vhost-shadow-virtqueue.h" + #include "hw/virtio/virtio.h" + #include "standard-headers/linux/vhost_types.h" + +@@ -35,6 +36,8 @@ typedef struct vhost_vdpa { + /* IOVA mapping used by the Shadow Virtqueue */ + VhostIOVATree *iova_tree; + GPtrArray *shadow_vqs; ++ const VhostShadowVirtqueueOps *shadow_vq_ops; ++ void *shadow_vq_ops_opaque; + struct vhost_dev *dev; + VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX]; + } VhostVDPA; +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index df1e69ee72..2e3b6b10d8 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -11,11 +11,14 @@ + + #include "qemu/osdep.h" + #include "clients.h" ++#include "hw/virtio/virtio-net.h" + #include "net/vhost_net.h" + #include "net/vhost-vdpa.h" + #include "hw/virtio/vhost-vdpa.h" + #include "qemu/config-file.h" + #include "qemu/error-report.h" ++#include "qemu/log.h" ++#include "qemu/memalign.h" + #include "qemu/option.h" + #include "qapi/error.h" + #include +@@ -187,6 +190,57 @@ static NetClientInfo net_vhost_vdpa_info = { + .check_peer_type = vhost_vdpa_check_peer_type, + }; + ++/** ++ * Forward buffer for the moment. ++ */ ++static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, ++ VirtQueueElement *elem, ++ void *opaque) ++{ ++ unsigned int n = elem->out_num + elem->in_num; ++ g_autofree struct iovec *dev_buffers = g_new(struct iovec, n); ++ size_t in_len, dev_written; ++ virtio_net_ctrl_ack status = VIRTIO_NET_ERR; ++ int r; ++ ++ memcpy(dev_buffers, elem->out_sg, elem->out_num); ++ memcpy(dev_buffers + elem->out_num, elem->in_sg, elem->in_num); ++ ++ r = vhost_svq_add(svq, &dev_buffers[0], elem->out_num, &dev_buffers[1], ++ elem->in_num, elem); ++ if (unlikely(r != 0)) { ++ if (unlikely(r == -ENOSPC)) { ++ qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n", ++ __func__); ++ } ++ goto out; ++ } ++ ++ /* ++ * We can poll here since we've had BQL from the time we sent the ++ * descriptor. Also, we need to take the answer before SVQ pulls by itself, ++ * when BQL is released ++ */ ++ dev_written = vhost_svq_poll(svq); ++ if (unlikely(dev_written < sizeof(status))) { ++ error_report("Insufficient written data (%zu)", dev_written); ++ } ++ ++out: ++ in_len = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, ++ sizeof(status)); ++ if (unlikely(in_len < sizeof(status))) { ++ error_report("Bad device CVQ written length"); ++ } ++ vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status))); ++ g_free(elem); ++ return r; ++} ++ ++static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = { ++ .avail_handler = vhost_vdpa_net_handle_ctrl_avail, ++}; ++ + static NetClientState *net_vhost_vdpa_init(NetClientState *peer, + const char *device, + const char *name, +@@ -211,6 +265,10 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, + + s->vhost_vdpa.device_fd = vdpa_device_fd; + s->vhost_vdpa.index = queue_pair_index; ++ if (!is_datapath) { ++ s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops; ++ s->vhost_vdpa.shadow_vq_ops_opaque = s; ++ } + ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs); + if (ret) { + qemu_del_net_client(nc); +-- +2.31.1 + diff --git a/kvm-vfio-common-remove-spurious-tpm-crb-cmd-misalignment.patch b/kvm-vfio-common-remove-spurious-tpm-crb-cmd-misalignment.patch new file mode 100644 index 0000000..26083c1 --- /dev/null +++ b/kvm-vfio-common-remove-spurious-tpm-crb-cmd-misalignment.patch @@ -0,0 +1,114 @@ +From b90a5878355bd549200ed1eff52ea084325bfc8a Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Fri, 6 May 2022 15:25:10 +0200 +Subject: [PATCH 5/5] vfio/common: remove spurious tpm-crb-cmd misalignment + warning +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 84: vfio/common: Remove spurious tpm-crb-cmd misalignment warning +RH-Commit: [2/2] 9b73a9aec59cb50d5e3468cc553464bf4a73d0a1 (eauger1/centos-qemu-kvm) +RH-Bugzilla: 2037612 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Cornelia Huck +RH-Acked-by: Andrew Jones + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2037612 +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=45166961 +Upstream Status: YES +Tested: With TPM-CRB and VFIO + +The CRB command buffer currently is a RAM MemoryRegion and given +its base address alignment, it causes an error report on +vfio_listener_region_add(). This region could have been a RAM device +region, easing the detection of such safe situation but this option +was not well received. So let's add a helper function that uses the +memory region owner type to detect the situation is safe wrt +the assignment. Other device types can be checked here if such kind +of problem occurs again. + +Conflicts in hw/vfio/common.c +We don't have 8e3b0cbb721 ("Replace qemu_real_host_page variables with inlined functions") + +Signed-off-by: Eric Auger +Reviewed-by: Philippe Mathieu-Daudé +Acked-by: Stefan Berger +Reviewed-by: Cornelia Huck +Link: https://lore.kernel.org/r/20220506132510.1847942-3-eric.auger@redhat.com +Signed-off-by: Alex Williamson +(cherry picked from commit 851d6d1a0ff29a87ec588205842edf6b86d99b5c) +Signed-off-by: Eric Auger +--- + hw/vfio/common.c | 27 ++++++++++++++++++++++++++- + hw/vfio/trace-events | 1 + + 2 files changed, 27 insertions(+), 1 deletion(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 080046e3f5..0fbe0d47af 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -40,6 +40,7 @@ + #include "trace.h" + #include "qapi/error.h" + #include "migration/migration.h" ++#include "sysemu/tpm.h" + + VFIOGroupList vfio_group_list = + QLIST_HEAD_INITIALIZER(vfio_group_list); +@@ -861,6 +862,22 @@ static void vfio_unregister_ram_discard_listener(VFIOContainer *container, + g_free(vrdl); + } + ++static bool vfio_known_safe_misalignment(MemoryRegionSection *section) ++{ ++ MemoryRegion *mr = section->mr; ++ ++ if (!TPM_IS_CRB(mr->owner)) { ++ return false; ++ } ++ ++ /* this is a known safe misaligned region, just trace for debug purpose */ ++ trace_vfio_known_safe_misalignment(memory_region_name(mr), ++ section->offset_within_address_space, ++ section->offset_within_region, ++ qemu_real_host_page_size); ++ return true; ++} ++ + static void vfio_listener_region_add(MemoryListener *listener, + MemoryRegionSection *section) + { +@@ -884,7 +901,15 @@ static void vfio_listener_region_add(MemoryListener *listener, + if (unlikely((section->offset_within_address_space & + ~qemu_real_host_page_mask) != + (section->offset_within_region & ~qemu_real_host_page_mask))) { +- error_report("%s received unaligned region", __func__); ++ if (!vfio_known_safe_misalignment(section)) { ++ error_report("%s received unaligned region %s iova=0x%"PRIx64 ++ " offset_within_region=0x%"PRIx64 ++ " qemu_real_host_page_size=0x%"PRIxPTR, ++ __func__, memory_region_name(section->mr), ++ section->offset_within_address_space, ++ section->offset_within_region, ++ qemu_real_host_page_size); ++ } + return; + } + +diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events +index 0ef1b5f4a6..582882db91 100644 +--- a/hw/vfio/trace-events ++++ b/hw/vfio/trace-events +@@ -100,6 +100,7 @@ vfio_listener_region_add_skip(uint64_t start, uint64_t end) "SKIPPING region_add + vfio_spapr_group_attach(int groupfd, int tablefd) "Attached groupfd %d to liobn fd %d" + vfio_listener_region_add_iommu(uint64_t start, uint64_t end) "region_add [iommu] 0x%"PRIx64" - 0x%"PRIx64 + vfio_listener_region_add_ram(uint64_t iova_start, uint64_t iova_end, void *vaddr) "region_add [ram] 0x%"PRIx64" - 0x%"PRIx64" [%p]" ++vfio_known_safe_misalignment(const char *name, uint64_t iova, uint64_t offset_within_region, uintptr_t page_size) "Region \"%s\" iova=0x%"PRIx64" offset_within_region=0x%"PRIx64" qemu_real_host_page_size=0x%"PRIxPTR ": cannot be mapped for DMA" + vfio_listener_region_add_no_dma_map(const char *name, uint64_t iova, uint64_t size, uint64_t page_size) "Region \"%s\" 0x%"PRIx64" size=0x%"PRIx64" is not aligned to 0x%"PRIx64" and cannot be mapped for DMA" + vfio_listener_region_del_skip(uint64_t start, uint64_t end) "SKIPPING region_del 0x%"PRIx64" - 0x%"PRIx64 + vfio_listener_region_del(uint64_t start, uint64_t end) "region_del 0x%"PRIx64" - 0x%"PRIx64 +-- +2.31.1 + diff --git a/kvm-vfio-common-remove-spurious-warning-on-vfio_listener.patch b/kvm-vfio-common-remove-spurious-warning-on-vfio_listener.patch new file mode 100644 index 0000000..7e644c5 --- /dev/null +++ b/kvm-vfio-common-remove-spurious-warning-on-vfio_listener.patch @@ -0,0 +1,78 @@ +From 3de8fb9f3dba18d04efa10b70bcec641035effc5 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Tue, 24 May 2022 05:14:05 -0400 +Subject: [PATCH 16/16] vfio/common: remove spurious warning on + vfio_listener_region_del + +RH-Author: Eric Auger +RH-MergeRequest: 101: vfio/common: remove spurious warning on vfio_listener_region_del +RH-Commit: [1/1] dac688b8a981ebb964fea79ea198c329b9cdb551 (eauger1/centos-qemu-kvm) +RH-Bugzilla: 2086262 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Cornelia Huck +RH-Acked-by: Alex Williamson + + Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2086262 + Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=45876133 + Upstream Status: YES + Tested: With TPM-CRB and VFIO + +851d6d1a0f ("vfio/common: remove spurious tpm-crb-cmd misalignment +warning") removed the warning on vfio_listener_region_add() path. + +However the same warning also hits on region_del path. Let's remove +it and reword the dynamic trace as this can be called on both +map and unmap path. + +Contextual Conflict in hw/vfio/common.c +We don't have 8e3b0cbb721 ("Replace qemu_real_host_page variables with inlined functions") + +Signed-off-by: Eric Auger +Reviewed-by: Cornelia Huck +Link: https://lore.kernel.org/r/20220524091405.416256-1-eric.auger@redhat.com +Fixes: 851d6d1a0ff2 ("vfio/common: remove spurious tpm-crb-cmd misalignment warning") +Signed-off-by: Alex Williamson +(cherry picked from commit ec6600be0dc16982181c7ad80d94c143c0807dd2) +Signed-off-by: Eric Auger +--- + hw/vfio/common.c | 10 +++++++++- + hw/vfio/trace-events | 2 +- + 2 files changed, 10 insertions(+), 2 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 0fbe0d47af..637981f9a1 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -1145,7 +1145,15 @@ static void vfio_listener_region_del(MemoryListener *listener, + if (unlikely((section->offset_within_address_space & + ~qemu_real_host_page_mask) != + (section->offset_within_region & ~qemu_real_host_page_mask))) { +- error_report("%s received unaligned region", __func__); ++ if (!vfio_known_safe_misalignment(section)) { ++ error_report("%s received unaligned region %s iova=0x%"PRIx64 ++ " offset_within_region=0x%"PRIx64 ++ " qemu_real_host_page_size=0x%"PRIxPTR, ++ __func__, memory_region_name(section->mr), ++ section->offset_within_address_space, ++ section->offset_within_region, ++ qemu_real_host_page_size); ++ } + return; + } + +diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events +index 582882db91..73dffe9e00 100644 +--- a/hw/vfio/trace-events ++++ b/hw/vfio/trace-events +@@ -100,7 +100,7 @@ vfio_listener_region_add_skip(uint64_t start, uint64_t end) "SKIPPING region_add + vfio_spapr_group_attach(int groupfd, int tablefd) "Attached groupfd %d to liobn fd %d" + vfio_listener_region_add_iommu(uint64_t start, uint64_t end) "region_add [iommu] 0x%"PRIx64" - 0x%"PRIx64 + vfio_listener_region_add_ram(uint64_t iova_start, uint64_t iova_end, void *vaddr) "region_add [ram] 0x%"PRIx64" - 0x%"PRIx64" [%p]" +-vfio_known_safe_misalignment(const char *name, uint64_t iova, uint64_t offset_within_region, uintptr_t page_size) "Region \"%s\" iova=0x%"PRIx64" offset_within_region=0x%"PRIx64" qemu_real_host_page_size=0x%"PRIxPTR ": cannot be mapped for DMA" ++vfio_known_safe_misalignment(const char *name, uint64_t iova, uint64_t offset_within_region, uintptr_t page_size) "Region \"%s\" iova=0x%"PRIx64" offset_within_region=0x%"PRIx64" qemu_real_host_page_size=0x%"PRIxPTR + vfio_listener_region_add_no_dma_map(const char *name, uint64_t iova, uint64_t size, uint64_t page_size) "Region \"%s\" 0x%"PRIx64" size=0x%"PRIx64" is not aligned to 0x%"PRIx64" and cannot be mapped for DMA" + vfio_listener_region_del_skip(uint64_t start, uint64_t end) "SKIPPING region_del 0x%"PRIx64" - 0x%"PRIx64 + vfio_listener_region_del(uint64_t start, uint64_t end) "region_del 0x%"PRIx64" - 0x%"PRIx64 +-- +2.31.1 + diff --git a/kvm-vhost-Add-SVQDescState.patch b/kvm-vhost-Add-SVQDescState.patch new file mode 100644 index 0000000..b1ea4bb --- /dev/null +++ b/kvm-vhost-Add-SVQDescState.patch @@ -0,0 +1,135 @@ +From 14200f493243f73152ea4a4b97274f0ec4fb36fa Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:38:55 +0200 +Subject: [PATCH 15/32] vhost: Add SVQDescState +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [15/27] 2e2866f22e37cace8598ff44dfcdc07fcc915d6d (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit 9e87868fcaf5785c8e1490c290505fa32305ff91 +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:34 2022 +0200 + + vhost: Add SVQDescState + + This will allow SVQ to add context to the different queue elements. + + This patch only store the actual element, no functional change intended. + + Signed-off-by: Eugenio Pérez + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + hw/virtio/vhost-shadow-virtqueue.c | 16 ++++++++-------- + hw/virtio/vhost-shadow-virtqueue.h | 8 ++++++-- + 2 files changed, 14 insertions(+), 10 deletions(-) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index 3cec03d709..a08e3d4025 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -256,7 +256,7 @@ static int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, + return -EINVAL; + } + +- svq->ring_id_maps[qemu_head] = elem; ++ svq->desc_state[qemu_head].elem = elem; + vhost_svq_kick(svq); + return 0; + } +@@ -411,21 +411,21 @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, + return NULL; + } + +- if (unlikely(!svq->ring_id_maps[used_elem.id])) { ++ if (unlikely(!svq->desc_state[used_elem.id].elem)) { + qemu_log_mask(LOG_GUEST_ERROR, + "Device %s says index %u is used, but it was not available", + svq->vdev->name, used_elem.id); + return NULL; + } + +- num = svq->ring_id_maps[used_elem.id]->in_num + +- svq->ring_id_maps[used_elem.id]->out_num; ++ num = svq->desc_state[used_elem.id].elem->in_num + ++ svq->desc_state[used_elem.id].elem->out_num; + last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id); + svq->desc_next[last_used_chain] = svq->free_head; + svq->free_head = used_elem.id; + + *len = used_elem.len; +- return g_steal_pointer(&svq->ring_id_maps[used_elem.id]); ++ return g_steal_pointer(&svq->desc_state[used_elem.id].elem); + } + + static void vhost_svq_flush(VhostShadowVirtqueue *svq, +@@ -595,7 +595,7 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, + memset(svq->vring.desc, 0, driver_size); + svq->vring.used = qemu_memalign(qemu_real_host_page_size, device_size); + memset(svq->vring.used, 0, device_size); +- svq->ring_id_maps = g_new0(VirtQueueElement *, svq->vring.num); ++ svq->desc_state = g_new0(SVQDescState, svq->vring.num); + svq->desc_next = g_new0(uint16_t, svq->vring.num); + for (unsigned i = 0; i < svq->vring.num - 1; i++) { + svq->desc_next[i] = cpu_to_le16(i + 1); +@@ -620,7 +620,7 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) + + for (unsigned i = 0; i < svq->vring.num; ++i) { + g_autofree VirtQueueElement *elem = NULL; +- elem = g_steal_pointer(&svq->ring_id_maps[i]); ++ elem = g_steal_pointer(&svq->desc_state[i].elem); + if (elem) { + virtqueue_detach_element(svq->vq, elem, 0); + } +@@ -632,7 +632,7 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) + } + svq->vq = NULL; + g_free(svq->desc_next); +- g_free(svq->ring_id_maps); ++ g_free(svq->desc_state); + qemu_vfree(svq->vring.desc); + qemu_vfree(svq->vring.used); + } +diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h +index c132c994e9..d646c35054 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.h ++++ b/hw/virtio/vhost-shadow-virtqueue.h +@@ -15,6 +15,10 @@ + #include "standard-headers/linux/vhost_types.h" + #include "hw/virtio/vhost-iova-tree.h" + ++typedef struct SVQDescState { ++ VirtQueueElement *elem; ++} SVQDescState; ++ + /* Shadow virtqueue to relay notifications */ + typedef struct VhostShadowVirtqueue { + /* Shadow vring */ +@@ -47,8 +51,8 @@ typedef struct VhostShadowVirtqueue { + /* IOVA mapping */ + VhostIOVATree *iova_tree; + +- /* Map for use the guest's descriptors */ +- VirtQueueElement **ring_id_maps; ++ /* SVQ vring descriptors state */ ++ SVQDescState *desc_state; + + /* Next VirtQueue element that guest made available */ + VirtQueueElement *next_guest_avail_elem; +-- +2.31.1 + diff --git a/kvm-vhost-Add-svq-avail_handler-callback.patch b/kvm-vhost-Add-svq-avail_handler-callback.patch new file mode 100644 index 0000000..a8b585d --- /dev/null +++ b/kvm-vhost-Add-svq-avail_handler-callback.patch @@ -0,0 +1,164 @@ +From 433106c286a1961737300ebaece6f10b2747e7d8 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:38:55 +0200 +Subject: [PATCH 20/32] vhost: Add svq avail_handler callback +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [20/27] d228eb89d204f8be623bc870503bbf0078dfc9ae (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit e966c0b781aebabd2c0f5eef91678f08ce1d068c +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:39 2022 +0200 + + vhost: Add svq avail_handler callback + + This allows external handlers to be aware of new buffers that the guest + places in the virtqueue. + + When this callback is defined the ownership of the guest's virtqueue + element is transferred to the callback. This means that if the user + wants to forward the descriptor it needs to manually inject it. The + callback is also free to process the command by itself and use the + element with svq_push. + + Signed-off-by: Eugenio Pérez + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + hw/virtio/vhost-shadow-virtqueue.c | 14 ++++++++++++-- + hw/virtio/vhost-shadow-virtqueue.h | 31 +++++++++++++++++++++++++++++- + hw/virtio/vhost-vdpa.c | 3 ++- + 3 files changed, 44 insertions(+), 4 deletions(-) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index 95d0d7a7ee..e53aac45f6 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -306,7 +306,11 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq) + break; + } + +- r = vhost_svq_add_element(svq, elem); ++ if (svq->ops) { ++ r = svq->ops->avail_handler(svq, elem, svq->ops_opaque); ++ } else { ++ r = vhost_svq_add_element(svq, elem); ++ } + if (unlikely(r != 0)) { + if (r == -ENOSPC) { + /* +@@ -685,12 +689,16 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) + * shadow methods and file descriptors. + * + * @iova_tree: Tree to perform descriptors translations ++ * @ops: SVQ owner callbacks ++ * @ops_opaque: ops opaque pointer + * + * Returns the new virtqueue or NULL. + * + * In case of error, reason is reported through error_report. + */ +-VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree) ++VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree, ++ const VhostShadowVirtqueueOps *ops, ++ void *ops_opaque) + { + g_autofree VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1); + int r; +@@ -712,6 +720,8 @@ VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree) + event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND); + event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call); + svq->iova_tree = iova_tree; ++ svq->ops = ops; ++ svq->ops_opaque = ops_opaque; + return g_steal_pointer(&svq); + + err_init_hdev_call: +diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h +index cf442f7dea..d04c34a589 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.h ++++ b/hw/virtio/vhost-shadow-virtqueue.h +@@ -25,6 +25,27 @@ typedef struct SVQDescState { + unsigned int ndescs; + } SVQDescState; + ++typedef struct VhostShadowVirtqueue VhostShadowVirtqueue; ++ ++/** ++ * Callback to handle an avail buffer. ++ * ++ * @svq: Shadow virtqueue ++ * @elem: Element placed in the queue by the guest ++ * @vq_callback_opaque: Opaque ++ * ++ * Returns 0 if the vq is running as expected. ++ * ++ * Note that ownership of elem is transferred to the callback. ++ */ ++typedef int (*VirtQueueAvailCallback)(VhostShadowVirtqueue *svq, ++ VirtQueueElement *elem, ++ void *vq_callback_opaque); ++ ++typedef struct VhostShadowVirtqueueOps { ++ VirtQueueAvailCallback avail_handler; ++} VhostShadowVirtqueueOps; ++ + /* Shadow virtqueue to relay notifications */ + typedef struct VhostShadowVirtqueue { + /* Shadow vring */ +@@ -69,6 +90,12 @@ typedef struct VhostShadowVirtqueue { + */ + uint16_t *desc_next; + ++ /* Caller callbacks */ ++ const VhostShadowVirtqueueOps *ops; ++ ++ /* Caller callbacks opaque */ ++ void *ops_opaque; ++ + /* Next head to expose to the device */ + uint16_t shadow_avail_idx; + +@@ -102,7 +129,9 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, + VirtQueue *vq); + void vhost_svq_stop(VhostShadowVirtqueue *svq); + +-VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree); ++VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree, ++ const VhostShadowVirtqueueOps *ops, ++ void *ops_opaque); + + void vhost_svq_free(gpointer vq); + G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostShadowVirtqueue, vhost_svq_free); +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 33dcaa135e..28df57b12e 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -416,8 +416,9 @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v, + + shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free); + for (unsigned n = 0; n < hdev->nvqs; ++n) { +- g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new(v->iova_tree); ++ g_autoptr(VhostShadowVirtqueue) svq; + ++ svq = vhost_svq_new(v->iova_tree, NULL, NULL); + if (unlikely(!svq)) { + error_setg(errp, "Cannot create svq %u", n); + return -1; +-- +2.31.1 + diff --git a/kvm-vhost-Check-for-queue-full-at-vhost_svq_add.patch b/kvm-vhost-Check-for-queue-full-at-vhost_svq_add.patch new file mode 100644 index 0000000..9b09d42 --- /dev/null +++ b/kvm-vhost-Check-for-queue-full-at-vhost_svq_add.patch @@ -0,0 +1,134 @@ +From 893dffb820973361bcef33612a6b924554a856c1 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:38:55 +0200 +Subject: [PATCH 13/32] vhost: Check for queue full at vhost_svq_add +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [13/27] d4bd8299fb7733a1e190618dfc92b4b53b7bbeb3 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit f20b70eb5a68cfd8fef74a13ccdd494ef1cb0221 +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:32 2022 +0200 + + vhost: Check for queue full at vhost_svq_add + + The series need to expose vhost_svq_add with full functionality, + including checking for full queue. + + Signed-off-by: Eugenio Pérez + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + hw/virtio/vhost-shadow-virtqueue.c | 59 +++++++++++++++++------------- + 1 file changed, 33 insertions(+), 26 deletions(-) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index e3fc3c2658..1d2bab287b 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -233,21 +233,29 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq) + * Add an element to a SVQ. + * + * The caller must check that there is enough slots for the new element. It +- * takes ownership of the element: In case of failure, it is free and the SVQ +- * is considered broken. ++ * takes ownership of the element: In case of failure not ENOSPC, it is free. ++ * ++ * Return -EINVAL if element is invalid, -ENOSPC if dev queue is full + */ +-static bool vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem) ++static int vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem) + { + unsigned qemu_head; +- bool ok = vhost_svq_add_split(svq, elem, &qemu_head); ++ unsigned ndescs = elem->in_num + elem->out_num; ++ bool ok; ++ ++ if (unlikely(ndescs > vhost_svq_available_slots(svq))) { ++ return -ENOSPC; ++ } ++ ++ ok = vhost_svq_add_split(svq, elem, &qemu_head); + if (unlikely(!ok)) { + g_free(elem); +- return false; ++ return -EINVAL; + } + + svq->ring_id_maps[qemu_head] = elem; + vhost_svq_kick(svq); +- return true; ++ return 0; + } + + /** +@@ -274,7 +282,7 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq) + + while (true) { + VirtQueueElement *elem; +- bool ok; ++ int r; + + if (svq->next_guest_avail_elem) { + elem = g_steal_pointer(&svq->next_guest_avail_elem); +@@ -286,25 +294,24 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq) + break; + } + +- if (elem->out_num + elem->in_num > vhost_svq_available_slots(svq)) { +- /* +- * This condition is possible since a contiguous buffer in GPA +- * does not imply a contiguous buffer in qemu's VA +- * scatter-gather segments. If that happens, the buffer exposed +- * to the device needs to be a chain of descriptors at this +- * moment. +- * +- * SVQ cannot hold more available buffers if we are here: +- * queue the current guest descriptor and ignore further kicks +- * until some elements are used. +- */ +- svq->next_guest_avail_elem = elem; +- return; +- } +- +- ok = vhost_svq_add(svq, elem); +- if (unlikely(!ok)) { +- /* VQ is broken, just return and ignore any other kicks */ ++ r = vhost_svq_add(svq, elem); ++ if (unlikely(r != 0)) { ++ if (r == -ENOSPC) { ++ /* ++ * This condition is possible since a contiguous buffer in ++ * GPA does not imply a contiguous buffer in qemu's VA ++ * scatter-gather segments. If that happens, the buffer ++ * exposed to the device needs to be a chain of descriptors ++ * at this moment. ++ * ++ * SVQ cannot hold more available buffers if we are here: ++ * queue the current guest descriptor and ignore kicks ++ * until some elements are used. ++ */ ++ svq->next_guest_avail_elem = elem; ++ } ++ ++ /* VQ is full or broken, just return and ignore kicks */ + return; + } + } +-- +2.31.1 + diff --git a/kvm-vhost-Decouple-vhost_svq_add-from-VirtQueueElement.patch b/kvm-vhost-Decouple-vhost_svq_add-from-VirtQueueElement.patch new file mode 100644 index 0000000..6755aad --- /dev/null +++ b/kvm-vhost-Decouple-vhost_svq_add-from-VirtQueueElement.patch @@ -0,0 +1,138 @@ +From 5c8de23e185a1a1f0b19eac3c9fa03411c9f545c Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:38:55 +0200 +Subject: [PATCH 14/32] vhost: Decouple vhost_svq_add from VirtQueueElement +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [14/27] 463087dd316adc91b9c7a4e6634c6fc1745c1849 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit 1f46ae65d85f677b660bda46685dd3e94885a7cb +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:33 2022 +0200 + + vhost: Decouple vhost_svq_add from VirtQueueElement + + VirtQueueElement comes from the guest, but we're heading SVQ to be able + to modify the element presented to the device without the guest's + knowledge. + + To do so, make SVQ accept sg buffers directly, instead of using + VirtQueueElement. + + Add vhost_svq_add_element to maintain element convenience. + + Signed-off-by: Eugenio Pérez + Acked-by: Jason Wang + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + hw/virtio/vhost-shadow-virtqueue.c | 33 ++++++++++++++++++++---------- + 1 file changed, 22 insertions(+), 11 deletions(-) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index 1d2bab287b..3cec03d709 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -172,30 +172,31 @@ static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, + } + + static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, +- VirtQueueElement *elem, unsigned *head) ++ const struct iovec *out_sg, size_t out_num, ++ const struct iovec *in_sg, size_t in_num, ++ unsigned *head) + { + unsigned avail_idx; + vring_avail_t *avail = svq->vring.avail; + bool ok; +- g_autofree hwaddr *sgs = g_new(hwaddr, MAX(elem->out_num, elem->in_num)); ++ g_autofree hwaddr *sgs = g_new(hwaddr, MAX(out_num, in_num)); + + *head = svq->free_head; + + /* We need some descriptors here */ +- if (unlikely(!elem->out_num && !elem->in_num)) { ++ if (unlikely(!out_num && !in_num)) { + qemu_log_mask(LOG_GUEST_ERROR, + "Guest provided element with no descriptors"); + return false; + } + +- ok = vhost_svq_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num, +- elem->in_num > 0, false); ++ ok = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_num, in_num > 0, ++ false); + if (unlikely(!ok)) { + return false; + } + +- ok = vhost_svq_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, false, +- true); ++ ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, false, true); + if (unlikely(!ok)) { + return false; + } +@@ -237,17 +238,19 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq) + * + * Return -EINVAL if element is invalid, -ENOSPC if dev queue is full + */ +-static int vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem) ++static int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, ++ size_t out_num, const struct iovec *in_sg, ++ size_t in_num, VirtQueueElement *elem) + { + unsigned qemu_head; +- unsigned ndescs = elem->in_num + elem->out_num; ++ unsigned ndescs = in_num + out_num; + bool ok; + + if (unlikely(ndescs > vhost_svq_available_slots(svq))) { + return -ENOSPC; + } + +- ok = vhost_svq_add_split(svq, elem, &qemu_head); ++ ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num, &qemu_head); + if (unlikely(!ok)) { + g_free(elem); + return -EINVAL; +@@ -258,6 +261,14 @@ static int vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem) + return 0; + } + ++/* Convenience wrapper to add a guest's element to SVQ */ ++static int vhost_svq_add_element(VhostShadowVirtqueue *svq, ++ VirtQueueElement *elem) ++{ ++ return vhost_svq_add(svq, elem->out_sg, elem->out_num, elem->in_sg, ++ elem->in_num, elem); ++} ++ + /** + * Forward available buffers. + * +@@ -294,7 +305,7 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq) + break; + } + +- r = vhost_svq_add(svq, elem); ++ r = vhost_svq_add_element(svq, elem); + if (unlikely(r != 0)) { + if (r == -ENOSPC) { + /* +-- +2.31.1 + diff --git a/kvm-vhost-Expose-vhost_svq_add.patch b/kvm-vhost-Expose-vhost_svq_add.patch new file mode 100644 index 0000000..70dc774 --- /dev/null +++ b/kvm-vhost-Expose-vhost_svq_add.patch @@ -0,0 +1,73 @@ +From cefd6583a8483c7a80f9cde8f7ad4705983af9e7 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:38:55 +0200 +Subject: [PATCH 18/32] vhost: Expose vhost_svq_add +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [18/27] bfb44f597d350336113783bcc9b3c9d9d32ff8c0 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit d0291f3f284d3bc220cdb13b0d8ac8a44eb5fd4c +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:37 2022 +0200 + + vhost: Expose vhost_svq_add + + This allows external parts of SVQ to forward custom buffers to the + device. + + Signed-off-by: Eugenio Pérez + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + hw/virtio/vhost-shadow-virtqueue.c | 6 +++--- + hw/virtio/vhost-shadow-virtqueue.h | 3 +++ + 2 files changed, 6 insertions(+), 3 deletions(-) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index 1ce52d5b4a..cb879e7b88 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -238,9 +238,9 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq) + * + * Return -EINVAL if element is invalid, -ENOSPC if dev queue is full + */ +-static int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, +- size_t out_num, const struct iovec *in_sg, +- size_t in_num, VirtQueueElement *elem) ++int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, ++ size_t out_num, const struct iovec *in_sg, size_t in_num, ++ VirtQueueElement *elem) + { + unsigned qemu_head; + unsigned ndescs = in_num + out_num; +diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h +index d9fc1f1799..dd78f4bec2 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.h ++++ b/hw/virtio/vhost-shadow-virtqueue.h +@@ -86,6 +86,9 @@ bool vhost_svq_valid_features(uint64_t features, Error **errp); + + void vhost_svq_push_elem(VhostShadowVirtqueue *svq, + const VirtQueueElement *elem, uint32_t len); ++int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, ++ size_t out_num, const struct iovec *in_sg, size_t in_num, ++ VirtQueueElement *elem); + + void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd); + void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd); +-- +2.31.1 + diff --git a/kvm-vhost-Fix-device-s-used-descriptor-dequeue.patch b/kvm-vhost-Fix-device-s-used-descriptor-dequeue.patch new file mode 100644 index 0000000..f149c05 --- /dev/null +++ b/kvm-vhost-Fix-device-s-used-descriptor-dequeue.patch @@ -0,0 +1,83 @@ +From 793d6d56190397624efdcaf6e0112bd12e39c05d Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:25:01 +0200 +Subject: [PATCH 02/32] vhost: Fix device's used descriptor dequeue +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [2/27] b92803a0681c94c65d243dd07424522387594760 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit 81abfa5724c9a6502d7a1d3a67c55f2a303a1170 +Author: Eugenio Pérez +Date: Thu May 12 19:57:43 2022 +0200 + + vhost: Fix device's used descriptor dequeue + + Only the first one of them were properly enqueued back. + + Fixes: 100890f7ca ("vhost: Shadow virtqueue buffers forwarding") + + Signed-off-by: Eugenio Pérez + Message-Id: <20220512175747.142058-3-eperezma@redhat.com> + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + +Signed-off-by: Eugenio Pérez +--- + hw/virtio/vhost-shadow-virtqueue.c | 17 +++++++++++++++-- + 1 file changed, 15 insertions(+), 2 deletions(-) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index 3155801f50..31fc50907d 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -334,12 +334,22 @@ static void vhost_svq_disable_notification(VhostShadowVirtqueue *svq) + svq->vring.avail->flags |= cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT); + } + ++static uint16_t vhost_svq_last_desc_of_chain(const VhostShadowVirtqueue *svq, ++ uint16_t num, uint16_t i) ++{ ++ for (uint16_t j = 0; j < (num - 1); ++j) { ++ i = le16_to_cpu(svq->desc_next[i]); ++ } ++ ++ return i; ++} ++ + static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, + uint32_t *len) + { + const vring_used_t *used = svq->vring.used; + vring_used_elem_t used_elem; +- uint16_t last_used; ++ uint16_t last_used, last_used_chain, num; + + if (!vhost_svq_more_used(svq)) { + return NULL; +@@ -365,7 +375,10 @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, + return NULL; + } + +- svq->desc_next[used_elem.id] = svq->free_head; ++ num = svq->ring_id_maps[used_elem.id]->in_num + ++ svq->ring_id_maps[used_elem.id]->out_num; ++ last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id); ++ svq->desc_next[last_used_chain] = svq->free_head; + svq->free_head = used_elem.id; + + *len = used_elem.len; +-- +2.31.1 + diff --git a/kvm-vhost-Fix-element-in-vhost_svq_add-failure.patch b/kvm-vhost-Fix-element-in-vhost_svq_add-failure.patch new file mode 100644 index 0000000..51eb700 --- /dev/null +++ b/kvm-vhost-Fix-element-in-vhost_svq_add-failure.patch @@ -0,0 +1,68 @@ +From aa99cf129923e0203c0caeb3b4e94a0eb973746f Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:36:38 +0200 +Subject: [PATCH 04/32] vhost: Fix element in vhost_svq_add failure +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [4/27] 96689c99a47dd49591c0d126cb1fbb975b2f79b4 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit 5181db132b587754dda3a520eec923b87a65bbb7 +Author: Eugenio Pérez +Date: Thu May 12 19:57:47 2022 +0200 + + vhost: Fix element in vhost_svq_add failure + + Coverity rightly reports that is not free in that case. + + Fixes: Coverity CID 1487559 + Fixes: 100890f7ca ("vhost: Shadow virtqueue buffers forwarding") + + Signed-off-by: Eugenio Pérez + Message-Id: <20220512175747.142058-7-eperezma@redhat.com> + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + +Signed-off-by: Eugenio Pérez +--- + hw/virtio/vhost-shadow-virtqueue.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index 31fc50907d..06d0bb39d9 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -199,11 +199,19 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, + return true; + } + ++/** ++ * Add an element to a SVQ. ++ * ++ * The caller must check that there is enough slots for the new element. It ++ * takes ownership of the element: In case of failure, it is free and the SVQ ++ * is considered broken. ++ */ + static bool vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem) + { + unsigned qemu_head; + bool ok = vhost_svq_add_split(svq, elem, &qemu_head); + if (unlikely(!ok)) { ++ g_free(elem); + return false; + } + +-- +2.31.1 + diff --git a/kvm-vhost-Move-vhost_svq_kick-call-to-vhost_svq_add.patch b/kvm-vhost-Move-vhost_svq_kick-call-to-vhost_svq_add.patch new file mode 100644 index 0000000..513d7b4 --- /dev/null +++ b/kvm-vhost-Move-vhost_svq_kick-call-to-vhost_svq_add.patch @@ -0,0 +1,61 @@ +From 3a944d8cd3d35b2398ff68d9ed8ea51d27dfab3c Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:38:55 +0200 +Subject: [PATCH 12/32] vhost: Move vhost_svq_kick call to vhost_svq_add +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [12/27] 29a7e1fb4992c4beca1e9a3379bb4c8a0f567459 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit 98b5adef8493a2bfad6655cfee84299e88bedbf7 +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:31 2022 +0200 + + vhost: Move vhost_svq_kick call to vhost_svq_add + + The series needs to expose vhost_svq_add with full functionality, + including kick + + Signed-off-by: Eugenio Pérez + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + hw/virtio/vhost-shadow-virtqueue.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index 05cd39d1eb..e3fc3c2658 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -246,6 +246,7 @@ static bool vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem) + } + + svq->ring_id_maps[qemu_head] = elem; ++ vhost_svq_kick(svq); + return true; + } + +@@ -306,7 +307,6 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq) + /* VQ is broken, just return and ignore any other kicks */ + return; + } +- vhost_svq_kick(svq); + } + + virtio_queue_set_notification(svq->vq, true); +-- +2.31.1 + diff --git a/kvm-vhost-Reorder-vhost_svq_kick.patch b/kvm-vhost-Reorder-vhost_svq_kick.patch new file mode 100644 index 0000000..f61f3c3 --- /dev/null +++ b/kvm-vhost-Reorder-vhost_svq_kick.patch @@ -0,0 +1,88 @@ +From fdbf66e4c70de16ab36d70ea591322b1b24df591 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:38:55 +0200 +Subject: [PATCH 11/32] vhost: Reorder vhost_svq_kick +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [11/27] 1d08b97eb3960a0f85f2dd48c3331b803f7ea205 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit d93a2405ca6efa9dc1c420cee5a34bd8242818d0 +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:30 2022 +0200 + + vhost: Reorder vhost_svq_kick + + Future code needs to call it from vhost_svq_add. + + No functional change intended. + + Signed-off-by: Eugenio Pérez + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + hw/virtio/vhost-shadow-virtqueue.c | 28 ++++++++++++++-------------- + 1 file changed, 14 insertions(+), 14 deletions(-) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index 9c46c3a8fa..05cd39d1eb 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -215,6 +215,20 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, + return true; + } + ++static void vhost_svq_kick(VhostShadowVirtqueue *svq) ++{ ++ /* ++ * We need to expose the available array entries before checking the used ++ * flags ++ */ ++ smp_mb(); ++ if (svq->vring.used->flags & VRING_USED_F_NO_NOTIFY) { ++ return; ++ } ++ ++ event_notifier_set(&svq->hdev_kick); ++} ++ + /** + * Add an element to a SVQ. + * +@@ -235,20 +249,6 @@ static bool vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem) + return true; + } + +-static void vhost_svq_kick(VhostShadowVirtqueue *svq) +-{ +- /* +- * We need to expose the available array entries before checking the used +- * flags +- */ +- smp_mb(); +- if (svq->vring.used->flags & VRING_USED_F_NO_NOTIFY) { +- return; +- } +- +- event_notifier_set(&svq->hdev_kick); +-} +- + /** + * Forward available buffers. + * +-- +2.31.1 + diff --git a/kvm-vhost-Track-descriptor-chain-in-private-at-SVQ.patch b/kvm-vhost-Track-descriptor-chain-in-private-at-SVQ.patch new file mode 100644 index 0000000..31bfccc --- /dev/null +++ b/kvm-vhost-Track-descriptor-chain-in-private-at-SVQ.patch @@ -0,0 +1,123 @@ +From 486647551223cc01f4dba87197030bbf4e674f0f Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:24:48 +0200 +Subject: [PATCH 01/32] vhost: Track descriptor chain in private at SVQ +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [1/27] 26d16dc383e3064ac6e4288d5c52b39fee0ad204 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit 495fe3a78749c39c0e772c4e1a55d6cb8a7e5292 +Author: Eugenio Pérez +Date: Thu May 12 19:57:42 2022 +0200 + + vhost: Track descriptor chain in private at SVQ + + The device could have access to modify them, and it definitely have + access when we implement packed vq. Harden SVQ maintaining a private + copy of the descriptor chain. Other fields like buffer addresses are + already maintained sepparatedly. + + Signed-off-by: Eugenio Pérez + Message-Id: <20220512175747.142058-2-eperezma@redhat.com> + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + +Signed-off-by: Eugenio Pérez +--- + hw/virtio/vhost-shadow-virtqueue.c | 12 +++++++----- + hw/virtio/vhost-shadow-virtqueue.h | 6 ++++++ + 2 files changed, 13 insertions(+), 5 deletions(-) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index b232803d1b..3155801f50 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -138,6 +138,7 @@ static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, + for (n = 0; n < num; n++) { + if (more_descs || (n + 1 < num)) { + descs[i].flags = flags | cpu_to_le16(VRING_DESC_F_NEXT); ++ descs[i].next = cpu_to_le16(svq->desc_next[i]); + } else { + descs[i].flags = flags; + } +@@ -145,10 +146,10 @@ static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, + descs[i].len = cpu_to_le32(iovec[n].iov_len); + + last = i; +- i = cpu_to_le16(descs[i].next); ++ i = cpu_to_le16(svq->desc_next[i]); + } + +- svq->free_head = le16_to_cpu(descs[last].next); ++ svq->free_head = le16_to_cpu(svq->desc_next[last]); + } + + static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, +@@ -336,7 +337,6 @@ static void vhost_svq_disable_notification(VhostShadowVirtqueue *svq) + static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, + uint32_t *len) + { +- vring_desc_t *descs = svq->vring.desc; + const vring_used_t *used = svq->vring.used; + vring_used_elem_t used_elem; + uint16_t last_used; +@@ -365,7 +365,7 @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, + return NULL; + } + +- descs[used_elem.id].next = svq->free_head; ++ svq->desc_next[used_elem.id] = svq->free_head; + svq->free_head = used_elem.id; + + *len = used_elem.len; +@@ -540,8 +540,9 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, + svq->vring.used = qemu_memalign(qemu_real_host_page_size, device_size); + memset(svq->vring.used, 0, device_size); + svq->ring_id_maps = g_new0(VirtQueueElement *, svq->vring.num); ++ svq->desc_next = g_new0(uint16_t, svq->vring.num); + for (unsigned i = 0; i < svq->vring.num - 1; i++) { +- svq->vring.desc[i].next = cpu_to_le16(i + 1); ++ svq->desc_next[i] = cpu_to_le16(i + 1); + } + } + +@@ -574,6 +575,7 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) + virtqueue_detach_element(svq->vq, next_avail_elem, 0); + } + svq->vq = NULL; ++ g_free(svq->desc_next); + g_free(svq->ring_id_maps); + qemu_vfree(svq->vring.desc); + qemu_vfree(svq->vring.used); +diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h +index e5e24c536d..c132c994e9 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.h ++++ b/hw/virtio/vhost-shadow-virtqueue.h +@@ -53,6 +53,12 @@ typedef struct VhostShadowVirtqueue { + /* Next VirtQueue element that guest made available */ + VirtQueueElement *next_guest_avail_elem; + ++ /* ++ * Backup next field for each descriptor so we can recover securely, not ++ * needing to trust the device access. ++ */ ++ uint16_t *desc_next; ++ + /* Next head to expose to the device */ + uint16_t shadow_avail_idx; + +-- +2.31.1 + diff --git a/kvm-vhost-Track-number-of-descs-in-SVQDescState.patch b/kvm-vhost-Track-number-of-descs-in-SVQDescState.patch new file mode 100644 index 0000000..6a2e147 --- /dev/null +++ b/kvm-vhost-Track-number-of-descs-in-SVQDescState.patch @@ -0,0 +1,81 @@ +From 24b8cf88f53f9fc7cb393c9cad908f759980bfee Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:38:55 +0200 +Subject: [PATCH 16/32] vhost: Track number of descs in SVQDescState +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [16/27] 26f30cb6dd35c1eb1ddabe25113431bed3d744aa (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit ac4cfdc6f39c06732d27554523f9d5f8a53b4ffa +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:35 2022 +0200 + + vhost: Track number of descs in SVQDescState + + A guest's buffer continuos on GPA may need multiple descriptors on + qemu's VA, so SVQ should track its length sepparatedly. + + Signed-off-by: Eugenio Pérez + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + hw/virtio/vhost-shadow-virtqueue.c | 4 ++-- + hw/virtio/vhost-shadow-virtqueue.h | 6 ++++++ + 2 files changed, 8 insertions(+), 2 deletions(-) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index a08e3d4025..4d99075e73 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -257,6 +257,7 @@ static int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, + } + + svq->desc_state[qemu_head].elem = elem; ++ svq->desc_state[qemu_head].ndescs = ndescs; + vhost_svq_kick(svq); + return 0; + } +@@ -418,8 +419,7 @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, + return NULL; + } + +- num = svq->desc_state[used_elem.id].elem->in_num + +- svq->desc_state[used_elem.id].elem->out_num; ++ num = svq->desc_state[used_elem.id].ndescs; + last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id); + svq->desc_next[last_used_chain] = svq->free_head; + svq->free_head = used_elem.id; +diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h +index d646c35054..5c7e7cbab6 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.h ++++ b/hw/virtio/vhost-shadow-virtqueue.h +@@ -17,6 +17,12 @@ + + typedef struct SVQDescState { + VirtQueueElement *elem; ++ ++ /* ++ * Number of descriptors exposed to the device. May or may not match ++ * guest's ++ */ ++ unsigned int ndescs; + } SVQDescState; + + /* Shadow virtqueue to relay notifications */ +-- +2.31.1 + diff --git a/kvm-vhost-add-vhost_svq_poll.patch b/kvm-vhost-add-vhost_svq_poll.patch new file mode 100644 index 0000000..fa27e5e --- /dev/null +++ b/kvm-vhost-add-vhost_svq_poll.patch @@ -0,0 +1,92 @@ +From 0ab3da1092362470d256b433c546bd365d34f930 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:38:55 +0200 +Subject: [PATCH 19/32] vhost: add vhost_svq_poll +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [19/27] 6807bb0bb6e5183b46a03b12b4027c7d767e8555 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit 3f44d13dda83d390cc9563e56e7d337e4f6223f4 +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:38 2022 +0200 + + vhost: add vhost_svq_poll + + It allows the Shadow Control VirtQueue to wait for the device to use the + available buffers. + + Signed-off-by: Eugenio Pérez + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + hw/virtio/vhost-shadow-virtqueue.c | 27 +++++++++++++++++++++++++++ + hw/virtio/vhost-shadow-virtqueue.h | 1 + + 2 files changed, 28 insertions(+) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index cb879e7b88..95d0d7a7ee 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -485,6 +485,33 @@ static void vhost_svq_flush(VhostShadowVirtqueue *svq, + } while (!vhost_svq_enable_notification(svq)); + } + ++/** ++ * Poll the SVQ for one device used buffer. ++ * ++ * This function race with main event loop SVQ polling, so extra ++ * synchronization is needed. ++ * ++ * Return the length written by the device. ++ */ ++size_t vhost_svq_poll(VhostShadowVirtqueue *svq) ++{ ++ int64_t start_us = g_get_monotonic_time(); ++ do { ++ uint32_t len; ++ VirtQueueElement *elem = vhost_svq_get_buf(svq, &len); ++ if (elem) { ++ return len; ++ } ++ ++ if (unlikely(g_get_monotonic_time() - start_us > 10e6)) { ++ return 0; ++ } ++ ++ /* Make sure we read new used_idx */ ++ smp_rmb(); ++ } while (true); ++} ++ + /** + * Forward used buffers. + * +diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h +index dd78f4bec2..cf442f7dea 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.h ++++ b/hw/virtio/vhost-shadow-virtqueue.h +@@ -89,6 +89,7 @@ void vhost_svq_push_elem(VhostShadowVirtqueue *svq, + int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, + size_t out_num, const struct iovec *in_sg, size_t in_num, + VirtQueueElement *elem); ++size_t vhost_svq_poll(VhostShadowVirtqueue *svq); + + void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd); + void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd); +-- +2.31.1 + diff --git a/kvm-vhost-add-vhost_svq_push_elem.patch b/kvm-vhost-add-vhost_svq_push_elem.patch new file mode 100644 index 0000000..2a9ec40 --- /dev/null +++ b/kvm-vhost-add-vhost_svq_push_elem.patch @@ -0,0 +1,83 @@ +From a26eb02b3a49c5d1163685ba5b83b67138c09047 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:38:55 +0200 +Subject: [PATCH 17/32] vhost: add vhost_svq_push_elem +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [17/27] d064b40a262f2dfdc9f648d250aa8c8020c40385 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit 432efd144e990b6e040862de25f8f0b6a6eeb03d +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:36 2022 +0200 + + vhost: add vhost_svq_push_elem + + This function allows external SVQ users to return guest's available + buffers. + + Signed-off-by: Eugenio Pérez + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + hw/virtio/vhost-shadow-virtqueue.c | 16 ++++++++++++++++ + hw/virtio/vhost-shadow-virtqueue.h | 3 +++ + 2 files changed, 19 insertions(+) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index 4d99075e73..1ce52d5b4a 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -428,6 +428,22 @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, + return g_steal_pointer(&svq->desc_state[used_elem.id].elem); + } + ++/** ++ * Push an element to SVQ, returning it to the guest. ++ */ ++void vhost_svq_push_elem(VhostShadowVirtqueue *svq, ++ const VirtQueueElement *elem, uint32_t len) ++{ ++ virtqueue_push(svq->vq, elem, len); ++ if (svq->next_guest_avail_elem) { ++ /* ++ * Avail ring was full when vhost_svq_flush was called, so it's a ++ * good moment to make more descriptors available if possible. ++ */ ++ vhost_handle_guest_kick(svq); ++ } ++} ++ + static void vhost_svq_flush(VhostShadowVirtqueue *svq, + bool check_for_avail_queue) + { +diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h +index 5c7e7cbab6..d9fc1f1799 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.h ++++ b/hw/virtio/vhost-shadow-virtqueue.h +@@ -84,6 +84,9 @@ typedef struct VhostShadowVirtqueue { + + bool vhost_svq_valid_features(uint64_t features, Error **errp); + ++void vhost_svq_push_elem(VhostShadowVirtqueue *svq, ++ const VirtQueueElement *elem, uint32_t len); ++ + void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd); + void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd); + void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq, +-- +2.31.1 + diff --git a/kvm-vhost-move-descriptor-translation-to-vhost_svq_vring.patch b/kvm-vhost-move-descriptor-translation-to-vhost_svq_vring.patch new file mode 100644 index 0000000..08bcaf2 --- /dev/null +++ b/kvm-vhost-move-descriptor-translation-to-vhost_svq_vring.patch @@ -0,0 +1,120 @@ +From 2bdea90bfbce3b8d5bfa86178a942a470b85b835 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:38:55 +0200 +Subject: [PATCH 07/32] vhost: move descriptor translation to + vhost_svq_vring_write_descs +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [7/27] 5533c72065e4ebf8ea7db966c976a3b29bdafb82 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit 009c2549bb9dc7f7061009eb87f2a53d4b364983 +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:26 2022 +0200 + + vhost: move descriptor translation to vhost_svq_vring_write_descs + + It's done for both in and out descriptors so it's better placed here. + + Acked-by: Jason Wang + Signed-off-by: Eugenio Pérez + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + hw/virtio/vhost-shadow-virtqueue.c | 38 +++++++++++++++++++++--------- + 1 file changed, 27 insertions(+), 11 deletions(-) + +diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c +index 06d0bb39d9..3fbda1e3d4 100644 +--- a/hw/virtio/vhost-shadow-virtqueue.c ++++ b/hw/virtio/vhost-shadow-virtqueue.c +@@ -122,17 +122,35 @@ static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq, + return true; + } + +-static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, +- const struct iovec *iovec, size_t num, +- bool more_descs, bool write) ++/** ++ * Write descriptors to SVQ vring ++ * ++ * @svq: The shadow virtqueue ++ * @sg: Cache for hwaddr ++ * @iovec: The iovec from the guest ++ * @num: iovec length ++ * @more_descs: True if more descriptors come in the chain ++ * @write: True if they are writeable descriptors ++ * ++ * Return true if success, false otherwise and print error. ++ */ ++static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, ++ const struct iovec *iovec, size_t num, ++ bool more_descs, bool write) + { + uint16_t i = svq->free_head, last = svq->free_head; + unsigned n; + uint16_t flags = write ? cpu_to_le16(VRING_DESC_F_WRITE) : 0; + vring_desc_t *descs = svq->vring.desc; ++ bool ok; + + if (num == 0) { +- return; ++ return true; ++ } ++ ++ ok = vhost_svq_translate_addr(svq, sg, iovec, num); ++ if (unlikely(!ok)) { ++ return false; + } + + for (n = 0; n < num; n++) { +@@ -150,6 +168,7 @@ static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, + } + + svq->free_head = le16_to_cpu(svq->desc_next[last]); ++ return true; + } + + static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, +@@ -169,21 +188,18 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, + return false; + } + +- ok = vhost_svq_translate_addr(svq, sgs, elem->out_sg, elem->out_num); ++ ok = vhost_svq_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num, ++ elem->in_num > 0, false); + if (unlikely(!ok)) { + return false; + } +- vhost_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num, +- elem->in_num > 0, false); +- + +- ok = vhost_svq_translate_addr(svq, sgs, elem->in_sg, elem->in_num); ++ ok = vhost_svq_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, false, ++ true); + if (unlikely(!ok)) { + return false; + } + +- vhost_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, false, true); +- + /* + * Put the entry in the available array (but don't update avail->idx until + * they do sync). +-- +2.31.1 + diff --git a/kvm-vhost-net-fix-improper-cleanup-in-vhost_net_start.patch b/kvm-vhost-net-fix-improper-cleanup-in-vhost_net_start.patch new file mode 100644 index 0000000..70e8f59 --- /dev/null +++ b/kvm-vhost-net-fix-improper-cleanup-in-vhost_net_start.patch @@ -0,0 +1,56 @@ +From edb2bd99355f300b512c040e91f5870ea14a5d7e Mon Sep 17 00:00:00 2001 +From: Si-Wei Liu +Date: Fri, 6 May 2022 19:28:15 -0700 +Subject: [PATCH 11/16] vhost-net: fix improper cleanup in vhost_net_start +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jason Wang +RH-MergeRequest: 98: Multiqueue fixes for vhost-vDPA +RH-Commit: [4/7] e88e482dd4b344f0cc887a358268beaed4d62917 (jasowang/qemu-kvm-cs) +RH-Bugzilla: 2070804 +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cindy Lu + +vhost_net_start() missed a corresponding stop_one() upon error from +vhost_set_vring_enable(). While at it, make the error handling for +err_start more robust. No real issue was found due to this though. + +Signed-off-by: Si-Wei Liu +Acked-by: Jason Wang +Message-Id: <1651890498-24478-5-git-send-email-si-wei.liu@oracle.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 6f3910b5eee00b8cc959e94659c0d524c482a418) +Signed-off-by: Jason Wang +--- + hw/net/vhost_net.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c +index 30379d2ca4..d6d7c51f62 100644 +--- a/hw/net/vhost_net.c ++++ b/hw/net/vhost_net.c +@@ -381,6 +381,7 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, + r = vhost_set_vring_enable(peer, peer->vring_enable); + + if (r < 0) { ++ vhost_net_stop_one(get_vhost_net(peer), dev); + goto err_start; + } + } +@@ -390,7 +391,8 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, + + err_start: + while (--i >= 0) { +- peer = qemu_get_peer(ncs , i); ++ peer = qemu_get_peer(ncs, i < data_queue_pairs ? ++ i : n->max_queue_pairs); + vhost_net_stop_one(get_vhost_net(peer), dev); + } + e = k->set_guest_notifiers(qbus->parent, total_notifiers, false); +-- +2.31.1 + diff --git a/kvm-vhost-net-vdpa-add-stubs-for-when-no-virtio-net-devi.patch b/kvm-vhost-net-vdpa-add-stubs-for-when-no-virtio-net-devi.patch new file mode 100644 index 0000000..31677fd --- /dev/null +++ b/kvm-vhost-net-vdpa-add-stubs-for-when-no-virtio-net-devi.patch @@ -0,0 +1,87 @@ +From a9095850da8dd4ea3fdb725cb7f79118144e22fa Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:39:27 +0200 +Subject: [PATCH 22/32] vhost-net-vdpa: add stubs for when no virtio-net device + is present +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [22/27] a2b25a805bb06094a5fab27ce8f82bee12a9fcb5 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit 94c643732dc110d04bbdf0eb43c41bce23b3593e +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:41 2022 +0200 + + vhost-net-vdpa: add stubs for when no virtio-net device is present + + net/vhost-vdpa.c will need functions that are declared in + vhost-shadow-virtqueue.c, that needs functions of virtio-net.c. + + Copy the vhost-vdpa-stub.c code so + only the constructor net_init_vhost_vdpa needs to be defined. + + Signed-off-by: Eugenio Pérez + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + net/meson.build | 3 ++- + net/vhost-vdpa-stub.c | 21 +++++++++++++++++++++ + 2 files changed, 23 insertions(+), 1 deletion(-) + create mode 100644 net/vhost-vdpa-stub.c + +diff --git a/net/meson.build b/net/meson.build +index c965e83b26..116a9e7cbb 100644 +--- a/net/meson.build ++++ b/net/meson.build +@@ -41,7 +41,8 @@ endif + softmmu_ss.add(when: 'CONFIG_POSIX', if_true: files(tap_posix)) + softmmu_ss.add(when: 'CONFIG_WIN32', if_true: files('tap-win32.c')) + if have_vhost_net_vdpa +- softmmu_ss.add(files('vhost-vdpa.c')) ++ softmmu_ss.add(when: 'CONFIG_VIRTIO_NET', if_true: files('vhost-vdpa.c'), if_false: files('vhost-vdpa-stub.c')) ++ softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-vdpa-stub.c')) + endif + + subdir('can') +diff --git a/net/vhost-vdpa-stub.c b/net/vhost-vdpa-stub.c +new file mode 100644 +index 0000000000..1732ed2443 +--- /dev/null ++++ b/net/vhost-vdpa-stub.c +@@ -0,0 +1,21 @@ ++/* ++ * vhost-vdpa-stub.c ++ * ++ * Copyright (c) 2022 Red Hat, Inc. ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ * ++ */ ++ ++#include "qemu/osdep.h" ++#include "clients.h" ++#include "net/vhost-vdpa.h" ++#include "qapi/error.h" ++ ++int net_init_vhost_vdpa(const Netdev *netdev, const char *name, ++ NetClientState *peer, Error **errp) ++{ ++ error_setg(errp, "vhost-vdpa requires frontend driver virtio-net-*"); ++ return -1; ++} +-- +2.31.1 + diff --git a/kvm-vhost-vdpa-backend-feature-should-set-only-once.patch b/kvm-vhost-vdpa-backend-feature-should-set-only-once.patch new file mode 100644 index 0000000..747bf5f --- /dev/null +++ b/kvm-vhost-vdpa-backend-feature-should-set-only-once.patch @@ -0,0 +1,58 @@ +From 46c5a35aa56cf0dd55376638dbf7d46e85f497e1 Mon Sep 17 00:00:00 2001 +From: Si-Wei Liu +Date: Fri, 6 May 2022 19:28:16 -0700 +Subject: [PATCH 12/16] vhost-vdpa: backend feature should set only once +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jason Wang +RH-MergeRequest: 98: Multiqueue fixes for vhost-vDPA +RH-Commit: [5/7] 7531bb8da0c99b29997e8bfc6d1e811daf3cdd38 (jasowang/qemu-kvm-cs) +RH-Bugzilla: 2070804 +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cindy Lu + +The vhost_vdpa_one_time_request() branch in +vhost_vdpa_set_backend_cap() incorrectly sends down +ioctls on vhost_dev with non-zero index. This may +end up with multiple VHOST_SET_BACKEND_FEATURES +ioctl calls sent down on the vhost-vdpa fd that is +shared between all these vhost_dev's. + +To fix it, send down ioctl only once via the first +vhost_dev with index 0. Toggle the polarity of the +vhost_vdpa_one_time_request() test should do the +trick. + +Fixes: 4d191cfdc7de ("vhost-vdpa: classify one time request") +Signed-off-by: Si-Wei Liu +Reviewed-by: Stefano Garzarella +Acked-by: Jason Wang +Acked-by: Eugenio Pérez +Message-Id: <1651890498-24478-6-git-send-email-si-wei.liu@oracle.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 6aee7e4233f6467f69531fcd352adff028f3f5ea) +Signed-off-by: Jason Wang +--- + hw/virtio/vhost-vdpa.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 8adf7c0b92..6e3dbd9e89 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -665,7 +665,7 @@ static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) + + features &= f; + +- if (vhost_vdpa_one_time_request(dev)) { ++ if (!vhost_vdpa_one_time_request(dev)) { + r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features); + if (r) { + return -EFAULT; +-- +2.31.1 + diff --git a/kvm-vhost-vdpa-change-name-and-polarity-for-vhost_vdpa_o.patch b/kvm-vhost-vdpa-change-name-and-polarity-for-vhost_vdpa_o.patch new file mode 100644 index 0000000..2466557 --- /dev/null +++ b/kvm-vhost-vdpa-change-name-and-polarity-for-vhost_vdpa_o.patch @@ -0,0 +1,123 @@ +From 58acdab17ec00ab76105ab92a51c5ba4dec3df5a Mon Sep 17 00:00:00 2001 +From: Si-Wei Liu +Date: Fri, 6 May 2022 19:28:17 -0700 +Subject: [PATCH 13/16] vhost-vdpa: change name and polarity for + vhost_vdpa_one_time_request() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jason Wang +RH-MergeRequest: 98: Multiqueue fixes for vhost-vDPA +RH-Commit: [6/7] 7029778f463a136ff412c63b86b6953390e47bf8 (jasowang/qemu-kvm-cs) +RH-Bugzilla: 2070804 +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cindy Lu + +The name vhost_vdpa_one_time_request() was confusing. No +matter whatever it returns, its typical occurrence had +always been at requests that only need to be applied once. +And the name didn't suggest what it actually checks for. +Change it to vhost_vdpa_first_dev() with polarity flipped +for better readibility of code. That way it is able to +reflect what the check is really about. + +This call is applicable to request which performs operation +only once, before queues are set up, and usually at the beginning +of the caller function. Document the requirement for it in place. + +Signed-off-by: Si-Wei Liu +Message-Id: <1651890498-24478-7-git-send-email-si-wei.liu@oracle.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Stefano Garzarella +Acked-by: Jason Wang +(cherry picked from commit d71b0609fc04217e28d17009f04d74b08be6f466) +Signed-off-by: Jason Wang +--- + hw/virtio/vhost-vdpa.c | 23 +++++++++++++++-------- + 1 file changed, 15 insertions(+), 8 deletions(-) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 6e3dbd9e89..33dcaa135e 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -366,11 +366,18 @@ static void vhost_vdpa_get_iova_range(struct vhost_vdpa *v) + v->iova_range.last); + } + +-static bool vhost_vdpa_one_time_request(struct vhost_dev *dev) ++/* ++ * The use of this function is for requests that only need to be ++ * applied once. Typically such request occurs at the beginning ++ * of operation, and before setting up queues. It should not be ++ * used for request that performs operation until all queues are ++ * set, which would need to check dev->vq_index_end instead. ++ */ ++static bool vhost_vdpa_first_dev(struct vhost_dev *dev) + { + struct vhost_vdpa *v = dev->opaque; + +- return v->index != 0; ++ return v->index == 0; + } + + static int vhost_vdpa_get_dev_features(struct vhost_dev *dev, +@@ -451,7 +458,7 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) + + vhost_vdpa_get_iova_range(v); + +- if (vhost_vdpa_one_time_request(dev)) { ++ if (!vhost_vdpa_first_dev(dev)) { + return 0; + } + +@@ -594,7 +601,7 @@ static int vhost_vdpa_memslots_limit(struct vhost_dev *dev) + static int vhost_vdpa_set_mem_table(struct vhost_dev *dev, + struct vhost_memory *mem) + { +- if (vhost_vdpa_one_time_request(dev)) { ++ if (!vhost_vdpa_first_dev(dev)) { + return 0; + } + +@@ -623,7 +630,7 @@ static int vhost_vdpa_set_features(struct vhost_dev *dev, + struct vhost_vdpa *v = dev->opaque; + int ret; + +- if (vhost_vdpa_one_time_request(dev)) { ++ if (!vhost_vdpa_first_dev(dev)) { + return 0; + } + +@@ -665,7 +672,7 @@ static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) + + features &= f; + +- if (!vhost_vdpa_one_time_request(dev)) { ++ if (vhost_vdpa_first_dev(dev)) { + r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features); + if (r) { + return -EFAULT; +@@ -1118,7 +1125,7 @@ static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base, + struct vhost_log *log) + { + struct vhost_vdpa *v = dev->opaque; +- if (v->shadow_vqs_enabled || vhost_vdpa_one_time_request(dev)) { ++ if (v->shadow_vqs_enabled || !vhost_vdpa_first_dev(dev)) { + return 0; + } + +@@ -1240,7 +1247,7 @@ static int vhost_vdpa_get_features(struct vhost_dev *dev, + + static int vhost_vdpa_set_owner(struct vhost_dev *dev) + { +- if (vhost_vdpa_one_time_request(dev)) { ++ if (!vhost_vdpa_first_dev(dev)) { + return 0; + } + +-- +2.31.1 + diff --git a/kvm-vhost-vdpa-fix-improper-cleanup-in-net_init_vhost_vd.patch b/kvm-vhost-vdpa-fix-improper-cleanup-in-net_init_vhost_vd.patch new file mode 100644 index 0000000..7716cbf --- /dev/null +++ b/kvm-vhost-vdpa-fix-improper-cleanup-in-net_init_vhost_vd.patch @@ -0,0 +1,48 @@ +From 3142102adb98f46518c0ac1773b0c48710c6bed6 Mon Sep 17 00:00:00 2001 +From: Si-Wei Liu +Date: Fri, 6 May 2022 19:28:14 -0700 +Subject: [PATCH 10/16] vhost-vdpa: fix improper cleanup in net_init_vhost_vdpa +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jason Wang +RH-MergeRequest: 98: Multiqueue fixes for vhost-vDPA +RH-Commit: [3/7] c83ff6c97d34cfae3c3447edde934b42a9ace75f (jasowang/qemu-kvm-cs) +RH-Bugzilla: 2070804 +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cindy Lu + +... such that no memory leaks on dangling net clients in case of +error. + +Signed-off-by: Si-Wei Liu +Acked-by: Jason Wang +Message-Id: <1651890498-24478-4-git-send-email-si-wei.liu@oracle.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 9bd055073e375c8a0d7ebce925e05d914d69fc7f) +Signed-off-by: Jason Wang +--- + net/vhost-vdpa.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index 1e9fe47c03..df1e69ee72 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -306,7 +306,9 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, + + err: + if (i) { +- qemu_del_net_client(ncs[0]); ++ for (i--; i >= 0; i--) { ++ qemu_del_net_client(ncs[i]); ++ } + } + qemu_close(vdpa_device_fd); + +-- +2.31.1 + diff --git a/kvm-vhost-vsock-detach-the-virqueue-element-in-case-of-e.patch b/kvm-vhost-vsock-detach-the-virqueue-element-in-case-of-e.patch new file mode 100644 index 0000000..50013c9 --- /dev/null +++ b/kvm-vhost-vsock-detach-the-virqueue-element-in-case-of-e.patch @@ -0,0 +1,76 @@ +From ff4e95d8652dadfed09913c7968514a2a7f36591 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Thu, 14 Apr 2022 10:38:26 -0400 +Subject: [PATCH 2/2] vhost-vsock: detach the virqueue element in case of error + +RH-Author: Jon Maloy +RH-MergeRequest: 153: vhost-vsock: detach the virqueue element in case of error +RH-Commit: [1/1] 024dbc9073fddbe89a8ae8eb201f5bc674bffb64 (jmaloy/qemu-kvm) +RH-Bugzilla: 2063262 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Stefano Garzarella + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2063262 +Upstream: Merged +CVE: CVE-2022-26354 + +commit 8d1b247f3748ac4078524130c6d7ae42b6140aaf +Author: Stefano Garzarella +Date: Mon Feb 28 10:50:58 2022 +0100 + + vhost-vsock: detach the virqueue element in case of error + + In vhost_vsock_common_send_transport_reset(), if an element popped from + the virtqueue is invalid, we should call virtqueue_detach_element() to + detach it from the virtqueue before freeing its memory. + + Fixes: fc0b9b0e1c ("vhost-vsock: add virtio sockets device") + Fixes: CVE-2022-26354 + Cc: qemu-stable@nongnu.org + Reported-by: VictorV + Signed-off-by: Stefano Garzarella + Message-Id: <20220228095058.27899-1-sgarzare@redhat.com> + Reviewed-by: Stefan Hajnoczi + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Michael S. Tsirkin + +(cherry picked from commit 8d1b247f3748ac4078524130c6d7ae42b6140aaf) +Signed-off-by: Jon Maloy +--- + hw/virtio/vhost-vsock-common.c | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +diff --git a/hw/virtio/vhost-vsock-common.c b/hw/virtio/vhost-vsock-common.c +index 3f3771274e..ed706681ac 100644 +--- a/hw/virtio/vhost-vsock-common.c ++++ b/hw/virtio/vhost-vsock-common.c +@@ -153,19 +153,23 @@ static void vhost_vsock_common_send_transport_reset(VHostVSockCommon *vvc) + if (elem->out_num) { + error_report("invalid vhost-vsock event virtqueue element with " + "out buffers"); +- goto out; ++ goto err; + } + + if (iov_from_buf(elem->in_sg, elem->in_num, 0, + &event, sizeof(event)) != sizeof(event)) { + error_report("vhost-vsock event virtqueue element is too short"); +- goto out; ++ goto err; + } + + virtqueue_push(vq, elem, sizeof(event)); + virtio_notify(VIRTIO_DEVICE(vvc), vq); + +-out: ++ g_free(elem); ++ return; ++ ++err: ++ virtqueue_detach_element(vq, elem, 0); + g_free(elem); + } + +-- +2.27.0 + diff --git a/kvm-virtio-iommu-Add-an-assert-check-in-translate-routin.patch b/kvm-virtio-iommu-Add-an-assert-check-in-translate-routin.patch new file mode 100644 index 0000000..2a72cc7 --- /dev/null +++ b/kvm-virtio-iommu-Add-an-assert-check-in-translate-routin.patch @@ -0,0 +1,46 @@ +From 643d9c28ff8b15c333cc748c5e712659ad2a257c Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Mon, 13 Jun 2022 14:10:10 +0800 +Subject: [PATCH 03/17] virtio-iommu: Add an assert check in translate routine + +RH-Author: Eric Auger +RH-MergeRequest: 105: virtio-iommu: Fix bypass mode for assigned devices +RH-Commit: [3/5] 19f309fd0beda40d65f51c454e37936658ac9f38 (eauger1/centos-qemu-kvm) +RH-Bugzilla: 2100106 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Peter Xu +RH-Acked-by: Cornelia Huck + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2100106 + +With address space switch supported, dma access translation only +happen after endpoint is attached to a non-bypass domain. + +Signed-off-by: Zhenzhong Duan +Message-Id: <20220613061010.2674054-4-zhenzhong.duan@intel.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 23b5f0ff6d923d3bca11cf44eed3daf7a0a836a8) +Signed-off-by: Eric Auger +--- + hw/virtio/virtio-iommu.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c +index 440a1c28a7..e970d4d5a6 100644 +--- a/hw/virtio/virtio-iommu.c ++++ b/hw/virtio/virtio-iommu.c +@@ -866,6 +866,10 @@ static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr, + qemu_rec_mutex_lock(&s->mutex); + + ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid)); ++ ++ if (bypass_allowed) ++ assert(ep && ep->domain && !ep->domain->bypass); ++ + if (!ep) { + if (!bypass_allowed) { + error_report_once("%s sid=%d is not known!!", __func__, sid); +-- +2.31.1 + diff --git a/kvm-virtio-iommu-Add-bypass-mode-support-to-assigned-dev.patch b/kvm-virtio-iommu-Add-bypass-mode-support-to-assigned-dev.patch new file mode 100644 index 0000000..3352666 --- /dev/null +++ b/kvm-virtio-iommu-Add-bypass-mode-support-to-assigned-dev.patch @@ -0,0 +1,250 @@ +From d60774ee3168eefb21a4120a38107cd36ae17e07 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Mon, 13 Jun 2022 14:10:08 +0800 +Subject: [PATCH 01/17] virtio-iommu: Add bypass mode support to assigned + device + +RH-Author: Eric Auger +RH-MergeRequest: 105: virtio-iommu: Fix bypass mode for assigned devices +RH-Commit: [1/5] 4777815533b31c7f4f09af8902e378fd3fc1186a (eauger1/centos-qemu-kvm) +RH-Bugzilla: 2100106 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Peter Xu +RH-Acked-by: Cornelia Huck + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2100106 + +Currently assigned devices can not work in virtio-iommu bypass mode. +Guest driver fails to probe the device due to DMA failure. And the +reason is because of lacking GPA -> HPA mappings when VM is created. + +Add a root container memory region to hold both bypass memory region +and iommu memory region, so the switch between them is supported +just like the implementation in virtual VT-d. + +Signed-off-by: Zhenzhong Duan +Message-Id: <20220613061010.2674054-2-zhenzhong.duan@intel.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 90519b90539b16258d1d52b908b199f44877dc18) +Signed-off-by: Eric Auger +--- + hw/virtio/trace-events | 1 + + hw/virtio/virtio-iommu.c | 115 ++++++++++++++++++++++++++++++- + include/hw/virtio/virtio-iommu.h | 2 + + 3 files changed, 116 insertions(+), 2 deletions(-) + +diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events +index a5102eac9e..2ab5881b88 100644 +--- a/hw/virtio/trace-events ++++ b/hw/virtio/trace-events +@@ -114,6 +114,7 @@ virtio_iommu_remap(const char *name, uint64_t virt_start, uint64_t virt_end, uin + virtio_iommu_set_page_size_mask(const char *name, uint64_t old, uint64_t new) "mr=%s old_mask=0x%"PRIx64" new_mask=0x%"PRIx64 + virtio_iommu_notify_flag_add(const char *name) "add notifier to mr %s" + virtio_iommu_notify_flag_del(const char *name) "del notifier from mr %s" ++virtio_iommu_switch_address_space(uint8_t bus, uint8_t slot, uint8_t fn, bool on) "Device %02x:%02x.%x switching address space (iommu enabled=%d)" + + # virtio-mem.c + virtio_mem_send_response(uint16_t type) "type=%" PRIu16 +diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c +index 6d5ea0bdf1..5e99e6c62b 100644 +--- a/hw/virtio/virtio-iommu.c ++++ b/hw/virtio/virtio-iommu.c +@@ -70,6 +70,77 @@ static inline uint16_t virtio_iommu_get_bdf(IOMMUDevice *dev) + return PCI_BUILD_BDF(pci_bus_num(dev->bus), dev->devfn); + } + ++static bool virtio_iommu_device_bypassed(IOMMUDevice *sdev) ++{ ++ uint32_t sid; ++ bool bypassed; ++ VirtIOIOMMU *s = sdev->viommu; ++ VirtIOIOMMUEndpoint *ep; ++ ++ sid = virtio_iommu_get_bdf(sdev); ++ ++ qemu_mutex_lock(&s->mutex); ++ /* need to check bypass before system reset */ ++ if (!s->endpoints) { ++ bypassed = s->config.bypass; ++ goto unlock; ++ } ++ ++ ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid)); ++ if (!ep || !ep->domain) { ++ bypassed = s->config.bypass; ++ } else { ++ bypassed = ep->domain->bypass; ++ } ++ ++unlock: ++ qemu_mutex_unlock(&s->mutex); ++ return bypassed; ++} ++ ++/* Return whether the device is using IOMMU translation. */ ++static bool virtio_iommu_switch_address_space(IOMMUDevice *sdev) ++{ ++ bool use_remapping; ++ ++ assert(sdev); ++ ++ use_remapping = !virtio_iommu_device_bypassed(sdev); ++ ++ trace_virtio_iommu_switch_address_space(pci_bus_num(sdev->bus), ++ PCI_SLOT(sdev->devfn), ++ PCI_FUNC(sdev->devfn), ++ use_remapping); ++ ++ /* Turn off first then on the other */ ++ if (use_remapping) { ++ memory_region_set_enabled(&sdev->bypass_mr, false); ++ memory_region_set_enabled(MEMORY_REGION(&sdev->iommu_mr), true); ++ } else { ++ memory_region_set_enabled(MEMORY_REGION(&sdev->iommu_mr), false); ++ memory_region_set_enabled(&sdev->bypass_mr, true); ++ } ++ ++ return use_remapping; ++} ++ ++static void virtio_iommu_switch_address_space_all(VirtIOIOMMU *s) ++{ ++ GHashTableIter iter; ++ IOMMUPciBus *iommu_pci_bus; ++ int i; ++ ++ g_hash_table_iter_init(&iter, s->as_by_busptr); ++ while (g_hash_table_iter_next(&iter, NULL, (void **)&iommu_pci_bus)) { ++ for (i = 0; i < PCI_DEVFN_MAX; i++) { ++ if (!iommu_pci_bus->pbdev[i]) { ++ continue; ++ } ++ virtio_iommu_switch_address_space(iommu_pci_bus->pbdev[i]); ++ } ++ } ++} ++ + /** + * The bus number is used for lookup when SID based operations occur. + * In that case we lazily populate the IOMMUPciBus array from the bus hash +@@ -214,6 +285,7 @@ static gboolean virtio_iommu_notify_map_cb(gpointer key, gpointer value, + static void virtio_iommu_detach_endpoint_from_domain(VirtIOIOMMUEndpoint *ep) + { + VirtIOIOMMUDomain *domain = ep->domain; ++ IOMMUDevice *sdev = container_of(ep->iommu_mr, IOMMUDevice, iommu_mr); + + if (!ep->domain) { + return; +@@ -222,6 +294,7 @@ static void virtio_iommu_detach_endpoint_from_domain(VirtIOIOMMUEndpoint *ep) + ep->iommu_mr); + QLIST_REMOVE(ep, next); + ep->domain = NULL; ++ virtio_iommu_switch_address_space(sdev); + } + + static VirtIOIOMMUEndpoint *virtio_iommu_get_endpoint(VirtIOIOMMU *s, +@@ -324,12 +397,39 @@ static AddressSpace *virtio_iommu_find_add_as(PCIBus *bus, void *opaque, + + trace_virtio_iommu_init_iommu_mr(name); + ++ memory_region_init(&sdev->root, OBJECT(s), name, UINT64_MAX); ++ address_space_init(&sdev->as, &sdev->root, TYPE_VIRTIO_IOMMU); ++ ++ /* ++ * Build the IOMMU disabled container with aliases to the ++ * shared MRs. Note that aliasing to a shared memory region ++ * could help the memory API to detect same FlatViews so we ++ * can have devices to share the same FlatView when in bypass ++ * mode. (either by not configuring virtio-iommu driver or with ++ * "iommu=pt"). It will greatly reduce the total number of ++ * FlatViews of the system hence VM runs faster. ++ */ ++ memory_region_init_alias(&sdev->bypass_mr, OBJECT(s), ++ "system", get_system_memory(), 0, ++ memory_region_size(get_system_memory())); ++ + memory_region_init_iommu(&sdev->iommu_mr, sizeof(sdev->iommu_mr), + TYPE_VIRTIO_IOMMU_MEMORY_REGION, + OBJECT(s), name, + UINT64_MAX); +- address_space_init(&sdev->as, +- MEMORY_REGION(&sdev->iommu_mr), TYPE_VIRTIO_IOMMU); ++ ++ /* ++ * Hook both the containers under the root container, we ++ * switch between iommu & bypass MRs by enable/disable ++ * corresponding sub-containers ++ */ ++ memory_region_add_subregion_overlap(&sdev->root, 0, ++ MEMORY_REGION(&sdev->iommu_mr), ++ 0); ++ memory_region_add_subregion_overlap(&sdev->root, 0, ++ &sdev->bypass_mr, 0); ++ ++ virtio_iommu_switch_address_space(sdev); + g_free(name); + } + return &sdev->as; +@@ -343,6 +443,7 @@ static int virtio_iommu_attach(VirtIOIOMMU *s, + uint32_t flags = le32_to_cpu(req->flags); + VirtIOIOMMUDomain *domain; + VirtIOIOMMUEndpoint *ep; ++ IOMMUDevice *sdev; + + trace_virtio_iommu_attach(domain_id, ep_id); + +@@ -376,6 +477,8 @@ static int virtio_iommu_attach(VirtIOIOMMU *s, + QLIST_INSERT_HEAD(&domain->endpoint_list, ep, next); + + ep->domain = domain; ++ sdev = container_of(ep->iommu_mr, IOMMUDevice, iommu_mr); ++ virtio_iommu_switch_address_space(sdev); + + /* Replay domain mappings on the associated memory region */ + g_tree_foreach(domain->mappings, virtio_iommu_notify_map_cb, +@@ -888,6 +991,7 @@ static void virtio_iommu_set_config(VirtIODevice *vdev, + return; + } + dev_config->bypass = in_config->bypass; ++ virtio_iommu_switch_address_space_all(dev); + } + + trace_virtio_iommu_set_config(in_config->bypass); +@@ -1027,6 +1131,8 @@ static void virtio_iommu_system_reset(void *opaque) + * system reset + */ + s->config.bypass = s->boot_bypass; ++ virtio_iommu_switch_address_space_all(s); ++ + } + + static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) +@@ -1043,6 +1149,11 @@ static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) + virtio_iommu_handle_command); + s->event_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE, NULL); + ++ /* ++ * config.bypass is needed to get initial address space early, such as ++ * in vfio realize ++ */ ++ s->config.bypass = s->boot_bypass; + s->config.page_size_mask = TARGET_PAGE_MASK; + s->config.input_range.end = UINT64_MAX; + s->config.domain_range.end = UINT32_MAX; +diff --git a/include/hw/virtio/virtio-iommu.h b/include/hw/virtio/virtio-iommu.h +index 84391f8448..102eeefa73 100644 +--- a/include/hw/virtio/virtio-iommu.h ++++ b/include/hw/virtio/virtio-iommu.h +@@ -37,6 +37,8 @@ typedef struct IOMMUDevice { + int devfn; + IOMMUMemoryRegion iommu_mr; + AddressSpace as; ++ MemoryRegion root; /* The root container of the device */ ++ MemoryRegion bypass_mr; /* The alias of shared memory MR */ + } IOMMUDevice; + + typedef struct IOMMUPciBus { +-- +2.31.1 + diff --git a/kvm-virtio-iommu-Fix-migration-regression.patch b/kvm-virtio-iommu-Fix-migration-regression.patch new file mode 100644 index 0000000..f5ae4d6 --- /dev/null +++ b/kvm-virtio-iommu-Fix-migration-regression.patch @@ -0,0 +1,54 @@ +From 8d45902b4884315ec090e607e9f03606b21001cf Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Fri, 24 Jun 2022 17:37:40 +0800 +Subject: [PATCH 05/17] virtio-iommu: Fix migration regression + +RH-Author: Eric Auger +RH-MergeRequest: 105: virtio-iommu: Fix bypass mode for assigned devices +RH-Commit: [5/5] 9652c4aaaf88e24083fab1fbc3d1423260c93ca6 (eauger1/centos-qemu-kvm) +RH-Bugzilla: 2100106 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Peter Xu +RH-Acked-by: Cornelia Huck + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2100106 + +We also need to switch to the right address space on dest side +after loading the device status. DMA to wrong address space is +destructive. + +Fixes: 3facd774962fd ("virtio-iommu: Add bypass mode support to assigned device") +Suggested-by: Eric Auger +Signed-off-by: Zhenzhong Duan +Message-Id: <20220624093740.3525267-1-zhenzhong.duan@intel.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Eric Auger +(cherry picked from commit d355566bd958e24e7e384da6ea89a9fc88d7bfed) +Signed-off-by: Eric Auger +--- + hw/virtio/virtio-iommu.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c +index 44a041dec9..2012835554 100644 +--- a/hw/virtio/virtio-iommu.c ++++ b/hw/virtio/virtio-iommu.c +@@ -1324,6 +1324,14 @@ static int iommu_post_load(void *opaque, int version_id) + VirtIOIOMMU *s = opaque; + + g_tree_foreach(s->domains, reconstruct_endpoints, s); ++ ++ /* ++ * Memory regions are dynamically turned on/off depending on ++ * 'config.bypass' and attached domain type if there is. After ++ * migration, we need to make sure the memory regions are ++ * still correct. ++ */ ++ virtio_iommu_switch_address_space_all(s); + return 0; + } + +-- +2.31.1 + diff --git a/kvm-virtio-iommu-Fix-the-partial-copy-of-probe-request.patch b/kvm-virtio-iommu-Fix-the-partial-copy-of-probe-request.patch new file mode 100644 index 0000000..7747bfe --- /dev/null +++ b/kvm-virtio-iommu-Fix-the-partial-copy-of-probe-request.patch @@ -0,0 +1,67 @@ +From b681247c29b59af40c86f8f0ae5709138ae9bf1a Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Thu, 23 Jun 2022 10:31:52 +0800 +Subject: [PATCH 04/17] virtio-iommu: Fix the partial copy of probe request + +RH-Author: Eric Auger +RH-MergeRequest: 105: virtio-iommu: Fix bypass mode for assigned devices +RH-Commit: [4/5] c402164414a8e69bbb6df20af3c2b6d2589d6f3e (eauger1/centos-qemu-kvm) +RH-Bugzilla: 2100106 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Peter Xu +RH-Acked-by: Cornelia Huck + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2100106 + +The structure of probe request doesn't include the tail, this leads +to a few field missed to be copied. Currently this isn't an issue as +those missed field belong to reserved field, just in case reserved +field will be used in the future. + +Changed 4th parameter of virtio_iommu_iov_to_req() to receive size +of device-readable part. + +Fixes: 1733eebb9e75b ("virtio-iommu: Implement RESV_MEM probe request") +Signed-off-by: Zhenzhong Duan +Message-Id: <20220623023152.3473231-1-zhenzhong.duan@intel.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Jean-Philippe Brucker +Reviewed-by: Eric Auger +(cherry picked from commit 45461aace83d961e933b27519b81d17b4c690514) +Signed-off-by: Eric Auger +--- + hw/virtio/virtio-iommu.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c +index e970d4d5a6..44a041dec9 100644 +--- a/hw/virtio/virtio-iommu.c ++++ b/hw/virtio/virtio-iommu.c +@@ -676,11 +676,10 @@ static int virtio_iommu_probe(VirtIOIOMMU *s, + + static int virtio_iommu_iov_to_req(struct iovec *iov, + unsigned int iov_cnt, +- void *req, size_t req_sz) ++ void *req, size_t payload_sz) + { +- size_t sz, payload_sz = req_sz - sizeof(struct virtio_iommu_req_tail); ++ size_t sz = iov_to_buf(iov, iov_cnt, 0, req, payload_sz); + +- sz = iov_to_buf(iov, iov_cnt, 0, req, payload_sz); + if (unlikely(sz != payload_sz)) { + return VIRTIO_IOMMU_S_INVAL; + } +@@ -693,7 +692,8 @@ static int virtio_iommu_handle_ ## __req(VirtIOIOMMU *s, \ + unsigned int iov_cnt) \ + { \ + struct virtio_iommu_req_ ## __req req; \ +- int ret = virtio_iommu_iov_to_req(iov, iov_cnt, &req, sizeof(req)); \ ++ int ret = virtio_iommu_iov_to_req(iov, iov_cnt, &req, \ ++ sizeof(req) - sizeof(struct virtio_iommu_req_tail));\ + \ + return ret ? ret : virtio_iommu_ ## __req(s, &req); \ + } +-- +2.31.1 + diff --git a/kvm-virtio-iommu-Use-recursive-lock-to-avoid-deadlock.patch b/kvm-virtio-iommu-Use-recursive-lock-to-avoid-deadlock.patch new file mode 100644 index 0000000..df961b0 --- /dev/null +++ b/kvm-virtio-iommu-Use-recursive-lock-to-avoid-deadlock.patch @@ -0,0 +1,141 @@ +From 881c999e302e7ee1212b47c523a2cf442c549417 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Mon, 13 Jun 2022 14:10:09 +0800 +Subject: [PATCH 02/17] virtio-iommu: Use recursive lock to avoid deadlock + +RH-Author: Eric Auger +RH-MergeRequest: 105: virtio-iommu: Fix bypass mode for assigned devices +RH-Commit: [2/5] 67dce1eecb49555f728f119f8efac00417ff65bf (eauger1/centos-qemu-kvm) +RH-Bugzilla: 2100106 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Peter Xu +RH-Acked-by: Cornelia Huck + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2100106 + +When switching address space with mutex lock hold, mapping will be +replayed for assigned device. This will trigger relock deadlock. + +Also release the mutex resource in unrealize routine. + +Signed-off-by: Zhenzhong Duan +Message-Id: <20220613061010.2674054-3-zhenzhong.duan@intel.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 08f2030a2e46f1e93d186b3a683e5caef1df562b) +Signed-off-by: Eric Auger +--- + hw/virtio/virtio-iommu.c | 20 +++++++++++--------- + include/hw/virtio/virtio-iommu.h | 2 +- + 2 files changed, 12 insertions(+), 10 deletions(-) + +diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c +index 5e99e6c62b..440a1c28a7 100644 +--- a/hw/virtio/virtio-iommu.c ++++ b/hw/virtio/virtio-iommu.c +@@ -79,7 +79,7 @@ static bool virtio_iommu_device_bypassed(IOMMUDevice *sdev) + + sid = virtio_iommu_get_bdf(sdev); + +- qemu_mutex_lock(&s->mutex); ++ qemu_rec_mutex_lock(&s->mutex); + /* need to check bypass before system reset */ + if (!s->endpoints) { + bypassed = s->config.bypass; +@@ -94,7 +94,7 @@ static bool virtio_iommu_device_bypassed(IOMMUDevice *sdev) + } + + unlock: +- qemu_mutex_unlock(&s->mutex); ++ qemu_rec_mutex_unlock(&s->mutex); + return bypassed; + } + +@@ -746,7 +746,7 @@ static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq) + tail.status = VIRTIO_IOMMU_S_DEVERR; + goto out; + } +- qemu_mutex_lock(&s->mutex); ++ qemu_rec_mutex_lock(&s->mutex); + switch (head.type) { + case VIRTIO_IOMMU_T_ATTACH: + tail.status = virtio_iommu_handle_attach(s, iov, iov_cnt); +@@ -775,7 +775,7 @@ static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq) + default: + tail.status = VIRTIO_IOMMU_S_UNSUPP; + } +- qemu_mutex_unlock(&s->mutex); ++ qemu_rec_mutex_unlock(&s->mutex); + + out: + sz = iov_from_buf(elem->in_sg, elem->in_num, 0, +@@ -863,7 +863,7 @@ static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr, + sid = virtio_iommu_get_bdf(sdev); + + trace_virtio_iommu_translate(mr->parent_obj.name, sid, addr, flag); +- qemu_mutex_lock(&s->mutex); ++ qemu_rec_mutex_lock(&s->mutex); + + ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid)); + if (!ep) { +@@ -947,7 +947,7 @@ static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr, + trace_virtio_iommu_translate_out(addr, entry.translated_addr, sid); + + unlock: +- qemu_mutex_unlock(&s->mutex); ++ qemu_rec_mutex_unlock(&s->mutex); + return entry; + } + +@@ -1036,7 +1036,7 @@ static void virtio_iommu_replay(IOMMUMemoryRegion *mr, IOMMUNotifier *n) + + sid = virtio_iommu_get_bdf(sdev); + +- qemu_mutex_lock(&s->mutex); ++ qemu_rec_mutex_lock(&s->mutex); + + if (!s->endpoints) { + goto unlock; +@@ -1050,7 +1050,7 @@ static void virtio_iommu_replay(IOMMUMemoryRegion *mr, IOMMUNotifier *n) + g_tree_foreach(ep->domain->mappings, virtio_iommu_remap, mr); + + unlock: +- qemu_mutex_unlock(&s->mutex); ++ qemu_rec_mutex_unlock(&s->mutex); + } + + static int virtio_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu_mr, +@@ -1169,7 +1169,7 @@ static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) + virtio_add_feature(&s->features, VIRTIO_IOMMU_F_PROBE); + virtio_add_feature(&s->features, VIRTIO_IOMMU_F_BYPASS_CONFIG); + +- qemu_mutex_init(&s->mutex); ++ qemu_rec_mutex_init(&s->mutex); + + s->as_by_busptr = g_hash_table_new_full(NULL, NULL, NULL, g_free); + +@@ -1197,6 +1197,8 @@ static void virtio_iommu_device_unrealize(DeviceState *dev) + g_tree_destroy(s->endpoints); + } + ++ qemu_rec_mutex_destroy(&s->mutex); ++ + virtio_delete_queue(s->req_vq); + virtio_delete_queue(s->event_vq); + virtio_cleanup(vdev); +diff --git a/include/hw/virtio/virtio-iommu.h b/include/hw/virtio/virtio-iommu.h +index 102eeefa73..2ad5ee320b 100644 +--- a/include/hw/virtio/virtio-iommu.h ++++ b/include/hw/virtio/virtio-iommu.h +@@ -58,7 +58,7 @@ struct VirtIOIOMMU { + ReservedRegion *reserved_regions; + uint32_t nb_reserved_regions; + GTree *domains; +- QemuMutex mutex; ++ QemuRecMutex mutex; + GTree *endpoints; + bool boot_bypass; + }; +-- +2.31.1 + diff --git a/kvm-virtio-net-Expose-MAC_TABLE_ENTRIES.patch b/kvm-virtio-net-Expose-MAC_TABLE_ENTRIES.patch new file mode 100644 index 0000000..4ae4cc4 --- /dev/null +++ b/kvm-virtio-net-Expose-MAC_TABLE_ENTRIES.patch @@ -0,0 +1,69 @@ +From dffe24d5c1f5a4676e9d2a5bc032effd420b008f Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:38:55 +0200 +Subject: [PATCH 08/32] virtio-net: Expose MAC_TABLE_ENTRIES +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [8/27] 5c3b96215ddf853cafc594da47f57d7e157db4ee (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit 6758c01f054c2a842d41d927d628b09f649d3254 +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:27 2022 +0200 + + virtio-net: Expose MAC_TABLE_ENTRIES + + vhost-vdpa control virtqueue needs to know the maximum entries supported + by the virtio-net device, so we know if it is possible to apply the + filter. + + Signed-off-by: Eugenio Pérez + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + hw/net/virtio-net.c | 1 - + include/hw/virtio/virtio-net.h | 3 +++ + 2 files changed, 3 insertions(+), 1 deletion(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 633de61513..2a127f0a3b 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -49,7 +49,6 @@ + + #define VIRTIO_NET_VM_VERSION 11 + +-#define MAC_TABLE_ENTRIES 64 + #define MAX_VLAN (1 << 12) /* Per 802.1Q definition */ + + /* previously fixed value */ +diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h +index eb87032627..cce1c554f7 100644 +--- a/include/hw/virtio/virtio-net.h ++++ b/include/hw/virtio/virtio-net.h +@@ -35,6 +35,9 @@ OBJECT_DECLARE_SIMPLE_TYPE(VirtIONet, VIRTIO_NET) + * and latency. */ + #define TX_BURST 256 + ++/* Maximum VIRTIO_NET_CTRL_MAC_TABLE_SET unicast + multicast entries. */ ++#define MAC_TABLE_ENTRIES 64 ++ + typedef struct virtio_net_conf + { + uint32_t txtimer; +-- +2.31.1 + diff --git a/kvm-virtio-net-Expose-ctrl-virtqueue-logic.patch b/kvm-virtio-net-Expose-ctrl-virtqueue-logic.patch new file mode 100644 index 0000000..b4b9012 --- /dev/null +++ b/kvm-virtio-net-Expose-ctrl-virtqueue-logic.patch @@ -0,0 +1,169 @@ +From 49e91b34b62f5da147fa2fb80d203dd675c48f64 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Thu, 21 Jul 2022 15:38:55 +0200 +Subject: [PATCH 09/32] virtio-net: Expose ctrl virtqueue logic +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 108: Net Control Virtqueue shadow Support +RH-Commit: [9/27] c4ab1e35f4ca728df82a687763c662369282c513 (eperezmartin/qemu-kvm) +RH-Bugzilla: 1939363 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/1939363 + +Upstream Status: git://git.qemu.org/qemu.git + +commit 640b8a1c588b56349b3307d88459ea1cd86181fb +Author: Eugenio Pérez +Date: Wed Jul 20 08:59:28 2022 +0200 + + virtio-net: Expose ctrl virtqueue logic + + This allows external vhost-net devices to modify the state of the + VirtIO device model once the vhost-vdpa device has acknowledged the + control commands. + + Signed-off-by: Eugenio Pérez + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +Signed-off-by: Eugenio Pérez +--- + hw/net/virtio-net.c | 84 ++++++++++++++++++++-------------- + include/hw/virtio/virtio-net.h | 4 ++ + 2 files changed, 53 insertions(+), 35 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 2a127f0a3b..59bedba681 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -1433,57 +1433,71 @@ static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, + return VIRTIO_NET_OK; + } + +-static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) ++size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev, ++ const struct iovec *in_sg, unsigned in_num, ++ const struct iovec *out_sg, ++ unsigned out_num) + { + VirtIONet *n = VIRTIO_NET(vdev); + struct virtio_net_ctrl_hdr ctrl; + virtio_net_ctrl_ack status = VIRTIO_NET_ERR; +- VirtQueueElement *elem; + size_t s; + struct iovec *iov, *iov2; +- unsigned int iov_cnt; ++ ++ if (iov_size(in_sg, in_num) < sizeof(status) || ++ iov_size(out_sg, out_num) < sizeof(ctrl)) { ++ virtio_error(vdev, "virtio-net ctrl missing headers"); ++ return 0; ++ } ++ ++ iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num); ++ s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl)); ++ iov_discard_front(&iov, &out_num, sizeof(ctrl)); ++ if (s != sizeof(ctrl)) { ++ status = VIRTIO_NET_ERR; ++ } else if (ctrl.class == VIRTIO_NET_CTRL_RX) { ++ status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num); ++ } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) { ++ status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num); ++ } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) { ++ status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num); ++ } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) { ++ status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num); ++ } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) { ++ status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num); ++ } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) { ++ status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num); ++ } ++ ++ s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status)); ++ assert(s == sizeof(status)); ++ ++ g_free(iov2); ++ return sizeof(status); ++} ++ ++static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) ++{ ++ VirtQueueElement *elem; + + for (;;) { ++ size_t written; + elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); + if (!elem) { + break; + } +- if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) || +- iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) { +- virtio_error(vdev, "virtio-net ctrl missing headers"); ++ ++ written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num, ++ elem->out_sg, elem->out_num); ++ if (written > 0) { ++ virtqueue_push(vq, elem, written); ++ virtio_notify(vdev, vq); ++ g_free(elem); ++ } else { + virtqueue_detach_element(vq, elem, 0); + g_free(elem); + break; + } +- +- iov_cnt = elem->out_num; +- iov2 = iov = g_memdup2(elem->out_sg, +- sizeof(struct iovec) * elem->out_num); +- s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl)); +- iov_discard_front(&iov, &iov_cnt, sizeof(ctrl)); +- if (s != sizeof(ctrl)) { +- status = VIRTIO_NET_ERR; +- } else if (ctrl.class == VIRTIO_NET_CTRL_RX) { +- status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt); +- } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) { +- status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt); +- } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) { +- status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt); +- } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) { +- status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt); +- } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) { +- status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt); +- } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) { +- status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt); +- } +- +- s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status)); +- assert(s == sizeof(status)); +- +- virtqueue_push(vq, elem, sizeof(status)); +- virtio_notify(vdev, vq); +- g_free(iov2); +- g_free(elem); + } + } + +diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h +index cce1c554f7..ef234ffe7e 100644 +--- a/include/hw/virtio/virtio-net.h ++++ b/include/hw/virtio/virtio-net.h +@@ -221,6 +221,10 @@ struct VirtIONet { + struct EBPFRSSContext ebpf_rss; + }; + ++size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev, ++ const struct iovec *in_sg, unsigned in_num, ++ const struct iovec *out_sg, ++ unsigned out_num); + void virtio_net_set_netclient_name(VirtIONet *n, const char *name, + const char *type); + +-- +2.31.1 + diff --git a/kvm-virtio-net-align-ctrl_vq-index-for-non-mq-guest-for-.patch b/kvm-virtio-net-align-ctrl_vq-index-for-non-mq-guest-for-.patch new file mode 100644 index 0000000..9da7ea7 --- /dev/null +++ b/kvm-virtio-net-align-ctrl_vq-index-for-non-mq-guest-for-.patch @@ -0,0 +1,143 @@ +From 316b73277de233c7a9b6917077c00d7012060944 Mon Sep 17 00:00:00 2001 +From: Si-Wei Liu +Date: Fri, 6 May 2022 19:28:13 -0700 +Subject: [PATCH 09/16] virtio-net: align ctrl_vq index for non-mq guest for + vhost_vdpa +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jason Wang +RH-MergeRequest: 98: Multiqueue fixes for vhost-vDPA +RH-Commit: [2/7] 7f764bbb579c7b473ad67fc25b46e698d277e781 (jasowang/qemu-kvm-cs) +RH-Bugzilla: 2070804 +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cindy Lu + +With MQ enabled vdpa device and non-MQ supporting guest e.g. +booting vdpa with mq=on over OVMF of single vqp, below assert +failure is seen: + +../hw/virtio/vhost-vdpa.c:560: vhost_vdpa_get_vq_index: Assertion `idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs' failed. + +0 0x00007f8ce3ff3387 in raise () at /lib64/libc.so.6 +1 0x00007f8ce3ff4a78 in abort () at /lib64/libc.so.6 +2 0x00007f8ce3fec1a6 in __assert_fail_base () at /lib64/libc.so.6 +3 0x00007f8ce3fec252 in () at /lib64/libc.so.6 +4 0x0000558f52d79421 in vhost_vdpa_get_vq_index (dev=, idx=) at ../hw/virtio/vhost-vdpa.c:563 +5 0x0000558f52d79421 in vhost_vdpa_get_vq_index (dev=, idx=) at ../hw/virtio/vhost-vdpa.c:558 +6 0x0000558f52d7329a in vhost_virtqueue_mask (hdev=0x558f55c01800, vdev=0x558f568f91f0, n=2, mask=) at ../hw/virtio/vhost.c:1557 +7 0x0000558f52c6b89a in virtio_pci_set_guest_notifier (d=d@entry=0x558f568f0f60, n=n@entry=2, assign=assign@entry=true, with_irqfd=with_irqfd@entry=false) + at ../hw/virtio/virtio-pci.c:974 +8 0x0000558f52c6c0d8 in virtio_pci_set_guest_notifiers (d=0x558f568f0f60, nvqs=3, assign=true) at ../hw/virtio/virtio-pci.c:1019 +9 0x0000558f52bf091d in vhost_net_start (dev=dev@entry=0x558f568f91f0, ncs=0x558f56937cd0, data_queue_pairs=data_queue_pairs@entry=1, cvq=cvq@entry=1) + at ../hw/net/vhost_net.c:361 +10 0x0000558f52d4e5e7 in virtio_net_set_status (status=, n=0x558f568f91f0) at ../hw/net/virtio-net.c:289 +11 0x0000558f52d4e5e7 in virtio_net_set_status (vdev=0x558f568f91f0, status=15 '\017') at ../hw/net/virtio-net.c:370 +12 0x0000558f52d6c4b2 in virtio_set_status (vdev=vdev@entry=0x558f568f91f0, val=val@entry=15 '\017') at ../hw/virtio/virtio.c:1945 +13 0x0000558f52c69eff in virtio_pci_common_write (opaque=0x558f568f0f60, addr=, val=, size=) at ../hw/virtio/virtio-pci.c:1292 +14 0x0000558f52d15d6e in memory_region_write_accessor (mr=0x558f568f19d0, addr=20, value=, size=1, shift=, mask=, attrs=...) + at ../softmmu/memory.c:492 +15 0x0000558f52d127de in access_with_adjusted_size (addr=addr@entry=20, value=value@entry=0x7f8cdbffe748, size=size@entry=1, access_size_min=, access_size_max=, access_fn=0x558f52d15cf0 , mr=0x558f568f19d0, attrs=...) at ../softmmu/memory.c:554 +16 0x0000558f52d157ef in memory_region_dispatch_write (mr=mr@entry=0x558f568f19d0, addr=20, data=, op=, attrs=attrs@entry=...) + at ../softmmu/memory.c:1504 +17 0x0000558f52d078e7 in flatview_write_continue (fv=fv@entry=0x7f8accbc3b90, addr=addr@entry=103079215124, attrs=..., ptr=ptr@entry=0x7f8ce6300028, len=len@entry=1, addr1=, l=, mr=0x558f568f19d0) at /home/opc/qemu-upstream/include/qemu/host-utils.h:165 +18 0x0000558f52d07b06 in flatview_write (fv=0x7f8accbc3b90, addr=103079215124, attrs=..., buf=0x7f8ce6300028, len=1) at ../softmmu/physmem.c:2822 +19 0x0000558f52d0b36b in address_space_write (as=, addr=, attrs=..., buf=buf@entry=0x7f8ce6300028, len=) + at ../softmmu/physmem.c:2914 +20 0x0000558f52d0b3da in address_space_rw (as=, addr=, attrs=..., + attrs@entry=..., buf=buf@entry=0x7f8ce6300028, len=, is_write=) at ../softmmu/physmem.c:2924 +21 0x0000558f52dced09 in kvm_cpu_exec (cpu=cpu@entry=0x558f55c2da60) at ../accel/kvm/kvm-all.c:2903 +22 0x0000558f52dcfabd in kvm_vcpu_thread_fn (arg=arg@entry=0x558f55c2da60) at ../accel/kvm/kvm-accel-ops.c:49 +23 0x0000558f52f9f04a in qemu_thread_start (args=) at ../util/qemu-thread-posix.c:556 +24 0x00007f8ce4392ea5 in start_thread () at /lib64/libpthread.so.0 +25 0x00007f8ce40bb9fd in clone () at /lib64/libc.so.6 + +The cause for the assert failure is due to that the vhost_dev index +for the ctrl vq was not aligned with actual one in use by the guest. +Upon multiqueue feature negotiation in virtio_net_set_multiqueue(), +if guest doesn't support multiqueue, the guest vq layout would shrink +to a single queue pair, consisting of 3 vqs in total (rx, tx and ctrl). +This results in ctrl_vq taking a different vhost_dev group index than +the default. We can map vq to the correct vhost_dev group by checking +if MQ is supported by guest and successfully negotiated. Since the +MQ feature is only present along with CTRL_VQ, we ensure the index +2 is only meant for the control vq while MQ is not supported by guest. + +Fixes: 22288fe ("virtio-net: vhost control virtqueue support") +Suggested-by: Jason Wang +Signed-off-by: Si-Wei Liu +Acked-by: Jason Wang +Message-Id: <1651890498-24478-3-git-send-email-si-wei.liu@oracle.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 68b0a6395f36a8f48f56f46d05f30be2067598b0) +Signed-off-by: Jason Wang +--- + hw/net/virtio-net.c | 33 +++++++++++++++++++++++++++++++-- + 1 file changed, 31 insertions(+), 2 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index ffb3475201..f0bb29c741 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -14,6 +14,7 @@ + #include "qemu/osdep.h" + #include "qemu/atomic.h" + #include "qemu/iov.h" ++#include "qemu/log.h" + #include "qemu/main-loop.h" + #include "qemu/module.h" + #include "hw/virtio/virtio.h" +@@ -3171,8 +3172,22 @@ static NetClientInfo net_virtio_info = { + static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx) + { + VirtIONet *n = VIRTIO_NET(vdev); +- NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx)); ++ NetClientState *nc; + assert(n->vhost_started); ++ if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) { ++ /* Must guard against invalid features and bogus queue index ++ * from being set by malicious guest, or penetrated through ++ * buggy migration stream. ++ */ ++ if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { ++ qemu_log_mask(LOG_GUEST_ERROR, ++ "%s: bogus vq index ignored\n", __func__); ++ return false; ++ } ++ nc = qemu_get_subqueue(n->nic, n->max_queue_pairs); ++ } else { ++ nc = qemu_get_subqueue(n->nic, vq2q(idx)); ++ } + return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx); + } + +@@ -3180,8 +3195,22 @@ static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx, + bool mask) + { + VirtIONet *n = VIRTIO_NET(vdev); +- NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx)); ++ NetClientState *nc; + assert(n->vhost_started); ++ if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) { ++ /* Must guard against invalid features and bogus queue index ++ * from being set by malicious guest, or penetrated through ++ * buggy migration stream. ++ */ ++ if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) { ++ qemu_log_mask(LOG_GUEST_ERROR, ++ "%s: bogus vq index ignored\n", __func__); ++ return; ++ } ++ nc = qemu_get_subqueue(n->nic, n->max_queue_pairs); ++ } else { ++ nc = qemu_get_subqueue(n->nic, vq2q(idx)); ++ } + vhost_net_virtqueue_mask(get_vhost_net(nc->peer), + vdev, idx, mask); + } +-- +2.31.1 + diff --git a/kvm-virtio-net-don-t-handle-mq-request-in-userspace-hand.patch b/kvm-virtio-net-don-t-handle-mq-request-in-userspace-hand.patch new file mode 100644 index 0000000..3930cc2 --- /dev/null +++ b/kvm-virtio-net-don-t-handle-mq-request-in-userspace-hand.patch @@ -0,0 +1,109 @@ +From 521a1953bc11ab6823dcbbee773bcf86e926a9e7 Mon Sep 17 00:00:00 2001 +From: Si-Wei Liu +Date: Fri, 6 May 2022 19:28:18 -0700 +Subject: [PATCH 14/16] virtio-net: don't handle mq request in userspace + handler for vhost-vdpa +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jason Wang +RH-MergeRequest: 98: Multiqueue fixes for vhost-vDPA +RH-Commit: [7/7] 9781cab45448ae16a00fbf10cf7995df6b984a0a (jasowang/qemu-kvm-cs) +RH-Bugzilla: 2070804 +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cindy Lu + +virtio_queue_host_notifier_read() tends to read pending event +left behind on ioeventfd in the vhost_net_stop() path, and +attempts to handle outstanding kicks from userspace vq handler. +However, in the ctrl_vq handler, virtio_net_handle_mq() has a +recursive call into virtio_net_set_status(), which may lead to +segmentation fault as shown in below stack trace: + +0 0x000055f800df1780 in qdev_get_parent_bus (dev=0x0) at ../hw/core/qdev.c:376 +1 0x000055f800c68ad8 in virtio_bus_device_iommu_enabled (vdev=vdev@entry=0x0) at ../hw/virtio/virtio-bus.c:331 +2 0x000055f800d70d7f in vhost_memory_unmap (dev=) at ../hw/virtio/vhost.c:318 +3 0x000055f800d70d7f in vhost_memory_unmap (dev=, buffer=0x7fc19bec5240, len=2052, is_write=1, access_len=2052) at ../hw/virtio/vhost.c:336 +4 0x000055f800d71867 in vhost_virtqueue_stop (dev=dev@entry=0x55f8037ccc30, vdev=vdev@entry=0x55f8044ec590, vq=0x55f8037cceb0, idx=0) at ../hw/virtio/vhost.c:1241 +5 0x000055f800d7406c in vhost_dev_stop (hdev=hdev@entry=0x55f8037ccc30, vdev=vdev@entry=0x55f8044ec590) at ../hw/virtio/vhost.c:1839 +6 0x000055f800bf00a7 in vhost_net_stop_one (net=0x55f8037ccc30, dev=0x55f8044ec590) at ../hw/net/vhost_net.c:315 +7 0x000055f800bf0678 in vhost_net_stop (dev=dev@entry=0x55f8044ec590, ncs=0x55f80452bae0, data_queue_pairs=data_queue_pairs@entry=7, cvq=cvq@entry=1) + at ../hw/net/vhost_net.c:423 +8 0x000055f800d4e628 in virtio_net_set_status (status=, n=0x55f8044ec590) at ../hw/net/virtio-net.c:296 +9 0x000055f800d4e628 in virtio_net_set_status (vdev=vdev@entry=0x55f8044ec590, status=15 '\017') at ../hw/net/virtio-net.c:370 +10 0x000055f800d534d8 in virtio_net_handle_ctrl (iov_cnt=, iov=, cmd=0 '\000', n=0x55f8044ec590) at ../hw/net/virtio-net.c:1408 +11 0x000055f800d534d8 in virtio_net_handle_ctrl (vdev=0x55f8044ec590, vq=0x7fc1a7e888d0) at ../hw/net/virtio-net.c:1452 +12 0x000055f800d69f37 in virtio_queue_host_notifier_read (vq=0x7fc1a7e888d0) at ../hw/virtio/virtio.c:2331 +13 0x000055f800d69f37 in virtio_queue_host_notifier_read (n=n@entry=0x7fc1a7e8894c) at ../hw/virtio/virtio.c:3575 +14 0x000055f800c688e6 in virtio_bus_cleanup_host_notifier (bus=, n=n@entry=14) at ../hw/virtio/virtio-bus.c:312 +15 0x000055f800d73106 in vhost_dev_disable_notifiers (hdev=hdev@entry=0x55f8035b51b0, vdev=vdev@entry=0x55f8044ec590) + at ../../../include/hw/virtio/virtio-bus.h:35 +16 0x000055f800bf00b2 in vhost_net_stop_one (net=0x55f8035b51b0, dev=0x55f8044ec590) at ../hw/net/vhost_net.c:316 +17 0x000055f800bf0678 in vhost_net_stop (dev=dev@entry=0x55f8044ec590, ncs=0x55f80452bae0, data_queue_pairs=data_queue_pairs@entry=7, cvq=cvq@entry=1) + at ../hw/net/vhost_net.c:423 +18 0x000055f800d4e628 in virtio_net_set_status (status=, n=0x55f8044ec590) at ../hw/net/virtio-net.c:296 +19 0x000055f800d4e628 in virtio_net_set_status (vdev=0x55f8044ec590, status=15 '\017') at ../hw/net/virtio-net.c:370 +20 0x000055f800d6c4b2 in virtio_set_status (vdev=0x55f8044ec590, val=) at ../hw/virtio/virtio.c:1945 +21 0x000055f800d11d9d in vm_state_notify (running=running@entry=false, state=state@entry=RUN_STATE_SHUTDOWN) at ../softmmu/runstate.c:333 +22 0x000055f800d04e7a in do_vm_stop (state=state@entry=RUN_STATE_SHUTDOWN, send_stop=send_stop@entry=false) at ../softmmu/cpus.c:262 +23 0x000055f800d04e99 in vm_shutdown () at ../softmmu/cpus.c:280 +24 0x000055f800d126af in qemu_cleanup () at ../softmmu/runstate.c:812 +25 0x000055f800ad5b13 in main (argc=, argv=, envp=) at ../softmmu/main.c:51 + +For now, temporarily disable handling MQ request from the ctrl_vq +userspace hanlder to avoid the recursive virtio_net_set_status() +call. Some rework is needed to allow changing the number of +queues without going through a full virtio_net_set_status cycle, +particularly for vhost-vdpa backend. + +This patch will need to be reverted as soon as future patches of +having the change of #queues handled in userspace is merged. + +Fixes: 402378407db ("vhost-vdpa: multiqueue support") +Signed-off-by: Si-Wei Liu +Acked-by: Jason Wang +Message-Id: <1651890498-24478-8-git-send-email-si-wei.liu@oracle.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 2a7888cc3aa31faee839fa5dddad354ff8941f4c) +Signed-off-by: Jason Wang +--- + hw/net/virtio-net.c | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index f0bb29c741..099e65036d 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -1381,6 +1381,7 @@ static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, + { + VirtIODevice *vdev = VIRTIO_DEVICE(n); + uint16_t queue_pairs; ++ NetClientState *nc = qemu_get_queue(n->nic); + + virtio_net_disable_rss(n); + if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) { +@@ -1412,6 +1413,18 @@ static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, + return VIRTIO_NET_ERR; + } + ++ /* Avoid changing the number of queue_pairs for vdpa device in ++ * userspace handler. A future fix is needed to handle the mq ++ * change in userspace handler with vhost-vdpa. Let's disable ++ * the mq handling from userspace for now and only allow get ++ * done through the kernel. Ripples may be seen when falling ++ * back to userspace, but without doing it qemu process would ++ * crash on a recursive entry to virtio_net_set_status(). ++ */ ++ if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) { ++ return VIRTIO_NET_ERR; ++ } ++ + n->curr_queue_pairs = queue_pairs; + /* stop the backend before changing the number of queue_pairs to avoid handling a + * disabled queue */ +-- +2.31.1 + diff --git a/kvm-virtio-net-setup-vhost_dev-and-notifiers-for-cvq-onl.patch b/kvm-virtio-net-setup-vhost_dev-and-notifiers-for-cvq-onl.patch new file mode 100644 index 0000000..f6072d2 --- /dev/null +++ b/kvm-virtio-net-setup-vhost_dev-and-notifiers-for-cvq-onl.patch @@ -0,0 +1,52 @@ +From 9e737aba614e94da4458f02d4ff97e95ffffd19f Mon Sep 17 00:00:00 2001 +From: Si-Wei Liu +Date: Fri, 6 May 2022 19:28:12 -0700 +Subject: [PATCH 08/16] virtio-net: setup vhost_dev and notifiers for cvq only + when feature is negotiated +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jason Wang +RH-MergeRequest: 98: Multiqueue fixes for vhost-vDPA +RH-Commit: [1/7] a5c5a2862b2e4d15ef7c09da3e4234fdef37cc66 (jasowang/qemu-kvm-cs) +RH-Bugzilla: 2070804 +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Laurent Vivier +RH-Acked-by: Cindy Lu + +When the control virtqueue feature is absent or not negotiated, +vhost_net_start() still tries to set up vhost_dev and install +vhost notifiers for the control virtqueue, which results in +erroneous ioctl calls with incorrect queue index sending down +to driver. Do that only when needed. + +Fixes: 22288fe ("virtio-net: vhost control virtqueue support") +Signed-off-by: Si-Wei Liu +Acked-by: Jason Wang +Message-Id: <1651890498-24478-2-git-send-email-si-wei.liu@oracle.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit aa8581945a13712ff3eed0ad3ba7a9664fc1604b) +Signed-off-by: Jason Wang +--- + hw/net/virtio-net.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 1067e72b39..ffb3475201 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -245,7 +245,8 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status) + VirtIODevice *vdev = VIRTIO_DEVICE(n); + NetClientState *nc = qemu_get_queue(n->nic); + int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; +- int cvq = n->max_ncs - n->max_queue_pairs; ++ int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ? ++ n->max_ncs - n->max_queue_pairs : 0; + + if (!get_vhost_net(nc->peer)) { + return; +-- +2.31.1 + diff --git a/kvm-virtio-scsi-clean-up-virtio_scsi_handle_cmd_vq.patch b/kvm-virtio-scsi-clean-up-virtio_scsi_handle_cmd_vq.patch new file mode 100644 index 0000000..897e04c --- /dev/null +++ b/kvm-virtio-scsi-clean-up-virtio_scsi_handle_cmd_vq.patch @@ -0,0 +1,77 @@ +From 975af1b9f1811e113e1babd928ae70f8e4ebefb5 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 17 May 2022 09:28:19 +0100 +Subject: [PATCH 13/16] virtio-scsi: clean up virtio_scsi_handle_cmd_vq() + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 88: virtio-scsi: fix 100% CPU consumption with IOThreads +RH-Commit: [5/6] 27b0225783fa9bbb8fe5ee692bd3f0a888d49d07 (stefanha/centos-stream-qemu-kvm) +RH-Bugzilla: 2079347 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefano Garzarella + +virtio_scsi_handle_cmd_vq() is only called from hw/scsi/virtio-scsi.c +now and its return value is no longer used. Remove the function +prototype from virtio-scsi.h and drop the return value. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Paolo Bonzini +Message-id: 20220427143541.119567-6-stefanha@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit ad482b57ef841b2d4883c5079d20ba44ff5e4b3e) +Signed-off-by: Stefan Hajnoczi +--- + hw/scsi/virtio-scsi.c | 5 +---- + include/hw/virtio/virtio-scsi.h | 1 - + 2 files changed, 1 insertion(+), 5 deletions(-) + +diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c +index a47033d91d..df5ff8bab7 100644 +--- a/hw/scsi/virtio-scsi.c ++++ b/hw/scsi/virtio-scsi.c +@@ -685,12 +685,11 @@ static void virtio_scsi_handle_cmd_req_submit(VirtIOSCSI *s, VirtIOSCSIReq *req) + scsi_req_unref(sreq); + } + +-bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq) ++static void virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq) + { + VirtIOSCSIReq *req, *next; + int ret = 0; + bool suppress_notifications = virtio_queue_get_notification(vq); +- bool progress = false; + + QTAILQ_HEAD(, VirtIOSCSIReq) reqs = QTAILQ_HEAD_INITIALIZER(reqs); + +@@ -700,7 +699,6 @@ bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq) + } + + while ((req = virtio_scsi_pop_req(s, vq))) { +- progress = true; + ret = virtio_scsi_handle_cmd_req_prepare(s, req); + if (!ret) { + QTAILQ_INSERT_TAIL(&reqs, req, next); +@@ -725,7 +723,6 @@ bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq) + QTAILQ_FOREACH_SAFE(req, &reqs, next, next) { + virtio_scsi_handle_cmd_req_submit(s, req); + } +- return progress; + } + + static void virtio_scsi_handle_cmd(VirtIODevice *vdev, VirtQueue *vq) +diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h +index 44dc3b81ec..2497530064 100644 +--- a/include/hw/virtio/virtio-scsi.h ++++ b/include/hw/virtio/virtio-scsi.h +@@ -151,7 +151,6 @@ void virtio_scsi_common_realize(DeviceState *dev, + Error **errp); + + void virtio_scsi_common_unrealize(DeviceState *dev); +-bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq); + void virtio_scsi_init_req(VirtIOSCSI *s, VirtQueue *vq, VirtIOSCSIReq *req); + void virtio_scsi_free_req(VirtIOSCSIReq *req); + void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev, +-- +2.31.1 + diff --git a/kvm-virtio-scsi-clean-up-virtio_scsi_handle_ctrl_vq.patch b/kvm-virtio-scsi-clean-up-virtio_scsi_handle_ctrl_vq.patch new file mode 100644 index 0000000..30f012f --- /dev/null +++ b/kvm-virtio-scsi-clean-up-virtio_scsi_handle_ctrl_vq.patch @@ -0,0 +1,65 @@ +From c6e16a7a5a18ec2bc4f8a6f5cc1c887e18b16cdf Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 17 May 2022 09:28:12 +0100 +Subject: [PATCH 12/16] virtio-scsi: clean up virtio_scsi_handle_ctrl_vq() + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 88: virtio-scsi: fix 100% CPU consumption with IOThreads +RH-Commit: [4/6] ca3751b7bfad5163c5b1c81b8525936a848d42ea (stefanha/centos-stream-qemu-kvm) +RH-Bugzilla: 2079347 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefano Garzarella + +virtio_scsi_handle_ctrl_vq() is only called from hw/scsi/virtio-scsi.c +now and its return value is no longer used. Remove the function +prototype from virtio-scsi.h and drop the return value. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Paolo Bonzini +Message-id: 20220427143541.119567-5-stefanha@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 73b3b49f1880f236b4d0ffd7efb00280c05a5fab) +Signed-off-by: Stefan Hajnoczi +--- + hw/scsi/virtio-scsi.c | 5 +---- + include/hw/virtio/virtio-scsi.h | 1 - + 2 files changed, 1 insertion(+), 5 deletions(-) + +diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c +index dd2185b943..a47033d91d 100644 +--- a/hw/scsi/virtio-scsi.c ++++ b/hw/scsi/virtio-scsi.c +@@ -460,16 +460,13 @@ static void virtio_scsi_handle_ctrl_req(VirtIOSCSI *s, VirtIOSCSIReq *req) + } + } + +-bool virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq) ++static void virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq) + { + VirtIOSCSIReq *req; +- bool progress = false; + + while ((req = virtio_scsi_pop_req(s, vq))) { +- progress = true; + virtio_scsi_handle_ctrl_req(s, req); + } +- return progress; + } + + /* +diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h +index 5957597825..44dc3b81ec 100644 +--- a/include/hw/virtio/virtio-scsi.h ++++ b/include/hw/virtio/virtio-scsi.h +@@ -152,7 +152,6 @@ void virtio_scsi_common_realize(DeviceState *dev, + + void virtio_scsi_common_unrealize(DeviceState *dev); + bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq); +-bool virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq); + void virtio_scsi_init_req(VirtIOSCSI *s, VirtQueue *vq, VirtIOSCSIReq *req); + void virtio_scsi_free_req(VirtIOSCSIReq *req); + void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev, +-- +2.31.1 + diff --git a/kvm-virtio-scsi-clean-up-virtio_scsi_handle_event_vq.patch b/kvm-virtio-scsi-clean-up-virtio_scsi_handle_event_vq.patch new file mode 100644 index 0000000..bfdd39b --- /dev/null +++ b/kvm-virtio-scsi-clean-up-virtio_scsi_handle_event_vq.patch @@ -0,0 +1,62 @@ +From 019d5a0ca5d13f837a59b9e2815e2fd7ac120807 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 17 May 2022 09:28:06 +0100 +Subject: [PATCH 11/16] virtio-scsi: clean up virtio_scsi_handle_event_vq() + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 88: virtio-scsi: fix 100% CPU consumption with IOThreads +RH-Commit: [3/6] f8dbc4c1991c61e4cf8dea50942c3cd509c9c4bd (stefanha/centos-stream-qemu-kvm) +RH-Bugzilla: 2079347 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefano Garzarella + +virtio_scsi_handle_event_vq() is only called from hw/scsi/virtio-scsi.c +now and its return value is no longer used. Remove the function +prototype from virtio-scsi.h and drop the return value. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Paolo Bonzini +Message-id: 20220427143541.119567-4-stefanha@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 37ce2de95169dacab3fb53d11bd4509b9c2e3a4c) +Signed-off-by: Stefan Hajnoczi +--- + hw/scsi/virtio-scsi.c | 4 +--- + include/hw/virtio/virtio-scsi.h | 1 - + 2 files changed, 1 insertion(+), 4 deletions(-) + +diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c +index 7b69eeed64..dd2185b943 100644 +--- a/hw/scsi/virtio-scsi.c ++++ b/hw/scsi/virtio-scsi.c +@@ -856,13 +856,11 @@ void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev, + virtio_scsi_complete_req(req); + } + +-bool virtio_scsi_handle_event_vq(VirtIOSCSI *s, VirtQueue *vq) ++static void virtio_scsi_handle_event_vq(VirtIOSCSI *s, VirtQueue *vq) + { + if (s->events_dropped) { + virtio_scsi_push_event(s, NULL, VIRTIO_SCSI_T_NO_EVENT, 0); +- return true; + } +- return false; + } + + static void virtio_scsi_handle_event(VirtIODevice *vdev, VirtQueue *vq) +diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h +index 543681bc18..5957597825 100644 +--- a/include/hw/virtio/virtio-scsi.h ++++ b/include/hw/virtio/virtio-scsi.h +@@ -151,7 +151,6 @@ void virtio_scsi_common_realize(DeviceState *dev, + Error **errp); + + void virtio_scsi_common_unrealize(DeviceState *dev); +-bool virtio_scsi_handle_event_vq(VirtIOSCSI *s, VirtQueue *vq); + bool virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq); + bool virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq); + void virtio_scsi_init_req(VirtIOSCSI *s, VirtQueue *vq, VirtIOSCSIReq *req); +-- +2.31.1 + diff --git a/kvm-virtio-scsi-don-t-waste-CPU-polling-the-event-virtqu.patch b/kvm-virtio-scsi-don-t-waste-CPU-polling-the-event-virtqu.patch new file mode 100644 index 0000000..5ba11a2 --- /dev/null +++ b/kvm-virtio-scsi-don-t-waste-CPU-polling-the-event-virtqu.patch @@ -0,0 +1,103 @@ +From 1b609b2af303fb6498b2ef94ac4f2e900dc8c1b2 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 17 May 2022 09:27:45 +0100 +Subject: [PATCH 10/16] virtio-scsi: don't waste CPU polling the event + virtqueue + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 88: virtio-scsi: fix 100% CPU consumption with IOThreads +RH-Commit: [2/6] 7e613d9b9fa8ceb668c78cb3ce7ebe1d73a004b5 (stefanha/centos-stream-qemu-kvm) +RH-Bugzilla: 2079347 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefano Garzarella + +The virtio-scsi event virtqueue is not emptied by its handler function. +This is typical for rx virtqueues where the device uses buffers when +some event occurs (e.g. a packet is received, an error condition +happens, etc). + +Polling non-empty virtqueues wastes CPU cycles. We are not waiting for +new buffers to become available, we are waiting for an event to occur, +so it's a misuse of CPU resources to poll for buffers. + +Introduce the new virtio_queue_aio_attach_host_notifier_no_poll() API, +which is identical to virtio_queue_aio_attach_host_notifier() except +that it does not poll the virtqueue. + +Before this patch the following command-line consumed 100% CPU in the +IOThread polling and calling virtio_scsi_handle_event(): + + $ qemu-system-x86_64 -M accel=kvm -m 1G -cpu host \ + --object iothread,id=iothread0 \ + --device virtio-scsi-pci,iothread=iothread0 \ + --blockdev file,filename=test.img,aio=native,cache.direct=on,node-name=drive0 \ + --device scsi-hd,drive=drive0 + +After this patch CPU is no longer wasted. + +Reported-by: Nir Soffer +Signed-off-by: Stefan Hajnoczi +Tested-by: Nir Soffer +Message-id: 20220427143541.119567-3-stefanha@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 38738f7dbbda90fbc161757b7f4be35b52205552) +Signed-off-by: Stefan Hajnoczi +--- + hw/scsi/virtio-scsi-dataplane.c | 2 +- + hw/virtio/virtio.c | 13 +++++++++++++ + include/hw/virtio/virtio.h | 1 + + 3 files changed, 15 insertions(+), 1 deletion(-) + +diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c +index 29575cbaf6..8bb6e6acfc 100644 +--- a/hw/scsi/virtio-scsi-dataplane.c ++++ b/hw/scsi/virtio-scsi-dataplane.c +@@ -138,7 +138,7 @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev) + + aio_context_acquire(s->ctx); + virtio_queue_aio_attach_host_notifier(vs->ctrl_vq, s->ctx); +- virtio_queue_aio_attach_host_notifier(vs->event_vq, s->ctx); ++ virtio_queue_aio_attach_host_notifier_no_poll(vs->event_vq, s->ctx); + + for (i = 0; i < vs->conf.num_queues; i++) { + virtio_queue_aio_attach_host_notifier(vs->cmd_vqs[i], s->ctx); +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index 9d637e043e..67a873f54a 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -3534,6 +3534,19 @@ void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx) + virtio_queue_host_notifier_aio_poll_end); + } + ++/* ++ * Same as virtio_queue_aio_attach_host_notifier() but without polling. Use ++ * this for rx virtqueues and similar cases where the virtqueue handler ++ * function does not pop all elements. When the virtqueue is left non-empty ++ * polling consumes CPU cycles and should not be used. ++ */ ++void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ctx) ++{ ++ aio_set_event_notifier(ctx, &vq->host_notifier, true, ++ virtio_queue_host_notifier_read, ++ NULL, NULL); ++} ++ + void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx) + { + aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL, NULL, NULL); +diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h +index b31c4507f5..b62a35fdca 100644 +--- a/include/hw/virtio/virtio.h ++++ b/include/hw/virtio/virtio.h +@@ -317,6 +317,7 @@ EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq); + void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled); + void virtio_queue_host_notifier_read(EventNotifier *n); + void virtio_queue_aio_attach_host_notifier(VirtQueue *vq, AioContext *ctx); ++void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ctx); + void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx); + VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector); + VirtQueue *virtio_vector_next_queue(VirtQueue *vq); +-- +2.31.1 + diff --git a/kvm-virtio-scsi-fix-ctrl-and-event-handler-functions-in-.patch b/kvm-virtio-scsi-fix-ctrl-and-event-handler-functions-in-.patch new file mode 100644 index 0000000..1f22ba0 --- /dev/null +++ b/kvm-virtio-scsi-fix-ctrl-and-event-handler-functions-in-.patch @@ -0,0 +1,119 @@ +From 5aaf33dbbbc89d58a52337985641723b9ee13541 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Wed, 27 Apr 2022 15:35:36 +0100 +Subject: [PATCH 09/16] virtio-scsi: fix ctrl and event handler functions in + dataplane mode + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 88: virtio-scsi: fix 100% CPU consumption with IOThreads +RH-Commit: [1/6] 3087889041b960f14a6b3893243f78523a78f637 (stefanha/centos-stream-qemu-kvm) +RH-Bugzilla: 2079347 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefano Garzarella + +Commit f34e8d8b8d48d73f36a67b6d5e492ef9784b5012 ("virtio-scsi: prepare +virtio_scsi_handle_cmd for dataplane") prepared the virtio-scsi cmd +virtqueue handler function to be used in both the dataplane and +non-datpalane code paths. + +It failed to convert the ctrl and event virtqueue handler functions, +which are not designed to be called from the dataplane code path but +will be since the ioeventfd is set up for those virtqueues when +dataplane starts. + +Convert the ctrl and event virtqueue handler functions now so they +operate correctly when called from the dataplane code path. Avoid code +duplication by extracting this code into a helper function. + +Fixes: f34e8d8b8d48d73f36a67b6d5e492ef9784b5012 ("virtio-scsi: prepare virtio_scsi_handle_cmd for dataplane") +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Paolo Bonzini +Message-id: 20220427143541.119567-2-stefanha@redhat.com +[Fixed s/by used/be used/ typo pointed out by Michael Tokarev +. +--Stefan] +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 2f743ef6366c2df4ef51ef3ae318138cdc0125ab) +Signed-off-by: Stefan Hajnoczi +--- + hw/scsi/virtio-scsi.c | 42 +++++++++++++++++++++++++++--------------- + 1 file changed, 27 insertions(+), 15 deletions(-) + +diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c +index 7f6da33a8a..7b69eeed64 100644 +--- a/hw/scsi/virtio-scsi.c ++++ b/hw/scsi/virtio-scsi.c +@@ -472,16 +472,32 @@ bool virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq) + return progress; + } + ++/* ++ * If dataplane is configured but not yet started, do so now and return true on ++ * success. ++ * ++ * Dataplane is started by the core virtio code but virtqueue handler functions ++ * can also be invoked when a guest kicks before DRIVER_OK, so this helper ++ * function helps us deal with manually starting ioeventfd in that case. ++ */ ++static bool virtio_scsi_defer_to_dataplane(VirtIOSCSI *s) ++{ ++ if (!s->ctx || s->dataplane_started) { ++ return false; ++ } ++ ++ virtio_device_start_ioeventfd(&s->parent_obj.parent_obj); ++ return !s->dataplane_fenced; ++} ++ + static void virtio_scsi_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) + { + VirtIOSCSI *s = (VirtIOSCSI *)vdev; + +- if (s->ctx) { +- virtio_device_start_ioeventfd(vdev); +- if (!s->dataplane_fenced) { +- return; +- } ++ if (virtio_scsi_defer_to_dataplane(s)) { ++ return; + } ++ + virtio_scsi_acquire(s); + virtio_scsi_handle_ctrl_vq(s, vq); + virtio_scsi_release(s); +@@ -720,12 +736,10 @@ static void virtio_scsi_handle_cmd(VirtIODevice *vdev, VirtQueue *vq) + /* use non-QOM casts in the data path */ + VirtIOSCSI *s = (VirtIOSCSI *)vdev; + +- if (s->ctx && !s->dataplane_started) { +- virtio_device_start_ioeventfd(vdev); +- if (!s->dataplane_fenced) { +- return; +- } ++ if (virtio_scsi_defer_to_dataplane(s)) { ++ return; + } ++ + virtio_scsi_acquire(s); + virtio_scsi_handle_cmd_vq(s, vq); + virtio_scsi_release(s); +@@ -855,12 +869,10 @@ static void virtio_scsi_handle_event(VirtIODevice *vdev, VirtQueue *vq) + { + VirtIOSCSI *s = VIRTIO_SCSI(vdev); + +- if (s->ctx) { +- virtio_device_start_ioeventfd(vdev); +- if (!s->dataplane_fenced) { +- return; +- } ++ if (virtio_scsi_defer_to_dataplane(s)) { ++ return; + } ++ + virtio_scsi_acquire(s); + virtio_scsi_handle_event_vq(s, vq); + virtio_scsi_release(s); +-- +2.31.1 + diff --git a/kvm-virtio-scsi-move-request-related-items-from-.h-to-.c.patch b/kvm-virtio-scsi-move-request-related-items-from-.h-to-.c.patch new file mode 100644 index 0000000..8487f5c --- /dev/null +++ b/kvm-virtio-scsi-move-request-related-items-from-.h-to-.c.patch @@ -0,0 +1,168 @@ +From 6603f216dbc07a1d221b1665409cfec6cc9960e2 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 17 May 2022 09:28:26 +0100 +Subject: [PATCH 14/16] virtio-scsi: move request-related items from .h to .c + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 88: virtio-scsi: fix 100% CPU consumption with IOThreads +RH-Commit: [6/6] ecdf5289abd04062c85c5ed8e577a5249684a3b0 (stefanha/centos-stream-qemu-kvm) +RH-Bugzilla: 2079347 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Kevin Wolf +RH-Acked-by: Stefano Garzarella + +There is no longer a need to expose the request and related APIs in +virtio-scsi.h since there are no callers outside virtio-scsi.c. + +Note the block comment in VirtIOSCSIReq has been adjusted to meet the +coding style. + +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Paolo Bonzini +Message-id: 20220427143541.119567-7-stefanha@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 3dc584abeef0e1277c2de8c1c1974cb49444eb0a) +Signed-off-by: Stefan Hajnoczi +--- + hw/scsi/virtio-scsi.c | 45 ++++++++++++++++++++++++++++++--- + include/hw/virtio/virtio-scsi.h | 40 ----------------------------- + 2 files changed, 41 insertions(+), 44 deletions(-) + +diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c +index df5ff8bab7..2450c9438c 100644 +--- a/hw/scsi/virtio-scsi.c ++++ b/hw/scsi/virtio-scsi.c +@@ -29,6 +29,43 @@ + #include "hw/virtio/virtio-access.h" + #include "trace.h" + ++typedef struct VirtIOSCSIReq { ++ /* ++ * Note: ++ * - fields up to resp_iov are initialized by virtio_scsi_init_req; ++ * - fields starting at vring are zeroed by virtio_scsi_init_req. ++ */ ++ VirtQueueElement elem; ++ ++ VirtIOSCSI *dev; ++ VirtQueue *vq; ++ QEMUSGList qsgl; ++ QEMUIOVector resp_iov; ++ ++ union { ++ /* Used for two-stage request submission */ ++ QTAILQ_ENTRY(VirtIOSCSIReq) next; ++ ++ /* Used for cancellation of request during TMFs */ ++ int remaining; ++ }; ++ ++ SCSIRequest *sreq; ++ size_t resp_size; ++ enum SCSIXferMode mode; ++ union { ++ VirtIOSCSICmdResp cmd; ++ VirtIOSCSICtrlTMFResp tmf; ++ VirtIOSCSICtrlANResp an; ++ VirtIOSCSIEvent event; ++ } resp; ++ union { ++ VirtIOSCSICmdReq cmd; ++ VirtIOSCSICtrlTMFReq tmf; ++ VirtIOSCSICtrlANReq an; ++ } req; ++} VirtIOSCSIReq; ++ + static inline int virtio_scsi_get_lun(uint8_t *lun) + { + return ((lun[2] << 8) | lun[3]) & 0x3FFF; +@@ -45,7 +82,7 @@ static inline SCSIDevice *virtio_scsi_device_get(VirtIOSCSI *s, uint8_t *lun) + return scsi_device_get(&s->bus, 0, lun[1], virtio_scsi_get_lun(lun)); + } + +-void virtio_scsi_init_req(VirtIOSCSI *s, VirtQueue *vq, VirtIOSCSIReq *req) ++static void virtio_scsi_init_req(VirtIOSCSI *s, VirtQueue *vq, VirtIOSCSIReq *req) + { + VirtIODevice *vdev = VIRTIO_DEVICE(s); + const size_t zero_skip = +@@ -58,7 +95,7 @@ void virtio_scsi_init_req(VirtIOSCSI *s, VirtQueue *vq, VirtIOSCSIReq *req) + memset((uint8_t *)req + zero_skip, 0, sizeof(*req) - zero_skip); + } + +-void virtio_scsi_free_req(VirtIOSCSIReq *req) ++static void virtio_scsi_free_req(VirtIOSCSIReq *req) + { + qemu_iovec_destroy(&req->resp_iov); + qemu_sglist_destroy(&req->qsgl); +@@ -801,8 +838,8 @@ static void virtio_scsi_reset(VirtIODevice *vdev) + s->events_dropped = false; + } + +-void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev, +- uint32_t event, uint32_t reason) ++static void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev, ++ uint32_t event, uint32_t reason) + { + VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s); + VirtIOSCSIReq *req; +diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h +index 2497530064..abdda2cbd0 100644 +--- a/include/hw/virtio/virtio-scsi.h ++++ b/include/hw/virtio/virtio-scsi.h +@@ -94,42 +94,6 @@ struct VirtIOSCSI { + uint32_t host_features; + }; + +-typedef struct VirtIOSCSIReq { +- /* Note: +- * - fields up to resp_iov are initialized by virtio_scsi_init_req; +- * - fields starting at vring are zeroed by virtio_scsi_init_req. +- * */ +- VirtQueueElement elem; +- +- VirtIOSCSI *dev; +- VirtQueue *vq; +- QEMUSGList qsgl; +- QEMUIOVector resp_iov; +- +- union { +- /* Used for two-stage request submission */ +- QTAILQ_ENTRY(VirtIOSCSIReq) next; +- +- /* Used for cancellation of request during TMFs */ +- int remaining; +- }; +- +- SCSIRequest *sreq; +- size_t resp_size; +- enum SCSIXferMode mode; +- union { +- VirtIOSCSICmdResp cmd; +- VirtIOSCSICtrlTMFResp tmf; +- VirtIOSCSICtrlANResp an; +- VirtIOSCSIEvent event; +- } resp; +- union { +- VirtIOSCSICmdReq cmd; +- VirtIOSCSICtrlTMFReq tmf; +- VirtIOSCSICtrlANReq an; +- } req; +-} VirtIOSCSIReq; +- + static inline void virtio_scsi_acquire(VirtIOSCSI *s) + { + if (s->ctx) { +@@ -151,10 +115,6 @@ void virtio_scsi_common_realize(DeviceState *dev, + Error **errp); + + void virtio_scsi_common_unrealize(DeviceState *dev); +-void virtio_scsi_init_req(VirtIOSCSI *s, VirtQueue *vq, VirtIOSCSIReq *req); +-void virtio_scsi_free_req(VirtIOSCSIReq *req); +-void virtio_scsi_push_event(VirtIOSCSI *s, SCSIDevice *dev, +- uint32_t event, uint32_t reason); + + void virtio_scsi_dataplane_setup(VirtIOSCSI *s, Error **errp); + int virtio_scsi_dataplane_start(VirtIODevice *s); +-- +2.31.1 + diff --git a/kvm-virtiofsd-Drop-membership-of-all-supplementary-group.patch b/kvm-virtiofsd-Drop-membership-of-all-supplementary-group.patch new file mode 100644 index 0000000..face8e6 --- /dev/null +++ b/kvm-virtiofsd-Drop-membership-of-all-supplementary-group.patch @@ -0,0 +1,110 @@ +From 2754dc2c7def01d7dd1bb39f3e86ef444652d397 Mon Sep 17 00:00:00 2001 +From: Vivek Goyal +Date: Tue, 25 Jan 2022 13:51:14 -0500 +Subject: [PATCH 1/6] virtiofsd: Drop membership of all supplementary groups + (CVE-2022-0358) + +RH-Author: Dr. David Alan Gilbert +RH-MergeRequest: 102: virtiofsd: Drop membership of all supplementary groups (CVE-2022-0358) +RH-Commit: [1/1] 93e56c88277fec8e42559a899d32b80fac4a923f +RH-Bugzilla: 2046198 +RH-Acked-by: Greg Kurz +RH-Acked-by: Sergio Lopez +RH-Acked-by: Laszlo Ersek + +At the start, drop membership of all supplementary groups. This is +not required. + +If we have membership of "root" supplementary group and when we switch +uid/gid using setresuid/setsgid, we still retain membership of existing +supplemntary groups. And that can allow some operations which are not +normally allowed. + +For example, if root in guest creates a dir as follows. + +$ mkdir -m 03777 test_dir + +This sets SGID on dir as well as allows unprivileged users to write into +this dir. + +And now as unprivileged user open file as follows. + +$ su test +$ fd = open("test_dir/priviledge_id", O_RDWR|O_CREAT|O_EXCL, 02755); + +This will create SGID set executable in test_dir/. + +And that's a problem because now an unpriviliged user can execute it, +get egid=0 and get access to resources owned by "root" group. This is +privilege escalation. + +Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=2044863 +Fixes: CVE-2022-0358 +Reported-by: JIETAO XIAO +Suggested-by: Miklos Szeredi +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Vivek Goyal +Message-Id: +Signed-off-by: Dr. David Alan Gilbert + dgilbert: Fixed missing {}'s style nit +(cherry picked from commit 449e8171f96a6a944d1f3b7d3627ae059eae21ca) +--- + tools/virtiofsd/passthrough_ll.c | 27 +++++++++++++++++++++++++++ + 1 file changed, 27 insertions(+) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index 64b5b4fbb1..b3d0674f6d 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -54,6 +54,7 @@ + #include + #include + #include ++#include + + #include "qemu/cutils.h" + #include "passthrough_helpers.h" +@@ -1161,6 +1162,30 @@ static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) + #define OURSYS_setresuid SYS_setresuid + #endif + ++static void drop_supplementary_groups(void) ++{ ++ int ret; ++ ++ ret = getgroups(0, NULL); ++ if (ret == -1) { ++ fuse_log(FUSE_LOG_ERR, "getgroups() failed with error=%d:%s\n", ++ errno, strerror(errno)); ++ exit(1); ++ } ++ ++ if (!ret) { ++ return; ++ } ++ ++ /* Drop all supplementary groups. We should not need it */ ++ ret = setgroups(0, NULL); ++ if (ret == -1) { ++ fuse_log(FUSE_LOG_ERR, "setgroups() failed with error=%d:%s\n", ++ errno, strerror(errno)); ++ exit(1); ++ } ++} ++ + /* + * Change to uid/gid of caller so that file is created with + * ownership of caller. +@@ -3926,6 +3951,8 @@ int main(int argc, char *argv[]) + + qemu_init_exec_dir(argv[0]); + ++ drop_supplementary_groups(); ++ + pthread_mutex_init(&lo.mutex, NULL); + lo.inodes = g_hash_table_new(lo_key_hash, lo_key_equal); + lo.root.fd = -1; +-- +2.27.0 + diff --git a/kvm-vmxcap-Add-5-level-EPT-bit.patch b/kvm-vmxcap-Add-5-level-EPT-bit.patch new file mode 100644 index 0000000..8cdb980 --- /dev/null +++ b/kvm-vmxcap-Add-5-level-EPT-bit.patch @@ -0,0 +1,48 @@ +From f0f87dcea3fe14b20b8599cda9b1151ca2490d0c Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 30 Mar 2022 14:52:34 -0400 +Subject: [PATCH 07/18] vmxcap: Add 5-level EPT bit + +RH-Author: Jon Maloy +RH-MergeRequest: 139: vmxcap: Add 5-level EPT bit +RH-Commit: [1/2] 4c098f551f1ed8e2a5582f466afda35b28d97055 (jmaloy/qemu-kvm) +RH-Bugzilla: 2065207 +RH-Acked-by: Paolo Bonzini + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2065207 +UPSTREAM: Merged + +commit d312378e59658473aa91aa15c67ec6200d92e5ff +Author: Vitaly Kuznetsov +Date: Mon Feb 21 15:53:16 2022 +0100 + + vmxcap: Add 5-level EPT bit + + 5-level EPT is present in Icelake Server CPUs and is supported by QEMU + ('vmx-page-walk-5'). + + Signed-off-by: Vitaly Kuznetsov + Message-Id: <20220221145316.576138-2-vkuznets@redhat.com> + Signed-off-by: Paolo Bonzini + +(cherry picked from commit d312378e59658473aa91aa15c67ec6200d92e5ff) +Signed-off-by: Jon Maloy +--- + scripts/kvm/vmxcap | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/scripts/kvm/vmxcap b/scripts/kvm/vmxcap +index 6fe66d5f57..f140040104 100755 +--- a/scripts/kvm/vmxcap ++++ b/scripts/kvm/vmxcap +@@ -249,6 +249,7 @@ controls = [ + bits = { + 0: 'Execute-only EPT translations', + 6: 'Page-walk length 4', ++ 7: 'Page-walk length 5', + 8: 'Paging-structure memory type UC', + 14: 'Paging-structure memory type WB', + 16: '2MB EPT pages', +-- +2.27.0 + diff --git a/kvm-x86-Add-q35-RHEL-8.6.0-machine-type.patch b/kvm-x86-Add-q35-RHEL-8.6.0-machine-type.patch new file mode 100644 index 0000000..56ecea7 --- /dev/null +++ b/kvm-x86-Add-q35-RHEL-8.6.0-machine-type.patch @@ -0,0 +1,68 @@ +From 31530bf621dc28689142ffa83d025ec4a4f110c1 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Tue, 11 Jan 2022 18:29:31 +0000 +Subject: [PATCH 2/2] x86: Add q35 RHEL 8.6.0 machine type +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +RH-MergeRequest: 99: x86: Add q35 RHEL 8.6.0 machine type +RH-Commit: [1/1] a694724b6fa972e312bb76b5569bc979d6c596ef +RH-Bugzilla: 2031035 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Cornelia Huck + +Add the new 8.6.0 machine type; note that while the -AV +notation has gone in the product naming, just keep the smbios +definitions the same for consistency. + +Signed-off-by: Dr. David Alan Gilbert +--- + hw/i386/pc_q35.c | 21 ++++++++++++++++++++- + 1 file changed, 20 insertions(+), 1 deletion(-) + +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index f6e77bca0e..5559261d9e 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -646,6 +646,24 @@ static void pc_q35_machine_rhel_options(MachineClass *m) + compat_props_add(m->compat_props, pc_rhel_compat, pc_rhel_compat_len); + } + ++static void pc_q35_init_rhel860(MachineState *machine) ++{ ++ pc_q35_init(machine); ++} ++ ++static void pc_q35_machine_rhel860_options(MachineClass *m) ++{ ++ PCMachineClass *pcmc = PC_MACHINE_CLASS(m); ++ pc_q35_machine_rhel_options(m); ++ m->desc = "RHEL-8.6.0 PC (Q35 + ICH9, 2009)"; ++ pcmc->smbios_stream_product = "RHEL-AV"; ++ pcmc->smbios_stream_version = "8.6.0"; ++} ++ ++DEFINE_PC_MACHINE(q35_rhel860, "pc-q35-rhel8.6.0", pc_q35_init_rhel860, ++ pc_q35_machine_rhel860_options); ++ ++ + static void pc_q35_init_rhel850(MachineState *machine) + { + pc_q35_init(machine); +@@ -654,8 +672,9 @@ static void pc_q35_init_rhel850(MachineState *machine) + static void pc_q35_machine_rhel850_options(MachineClass *m) + { + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); +- pc_q35_machine_rhel_options(m); ++ pc_q35_machine_rhel860_options(m); + m->desc = "RHEL-8.5.0 PC (Q35 + ICH9, 2009)"; ++ m->alias = NULL; + pcmc->smbios_stream_product = "RHEL-AV"; + pcmc->smbios_stream_version = "8.5.0"; + compat_props_add(m->compat_props, hw_compat_rhel_8_5, +-- +2.27.0 + diff --git a/kvm-x86.conf b/kvm-x86.conf new file mode 100644 index 0000000..3f7842a --- /dev/null +++ b/kvm-x86.conf @@ -0,0 +1,12 @@ +# Setting modprobe kvm_intel/kvm_amd nested = 1 +# only enables Nested Virtualization until the next reboot or +# module reload. Uncomment the option applicable +# to your system below to enable the feature permanently. +# +# User changes in this file are preserved across upgrades. +# +# For Intel +#options kvm_intel nested=1 +# +# For AMD +#options kvm_amd nested=1 diff --git a/kvm.conf b/kvm.conf new file mode 100644 index 0000000..24e60e9 --- /dev/null +++ b/kvm.conf @@ -0,0 +1,3 @@ +# +# User changes in this file are preserved across upgrades. +# diff --git a/modules-load.conf b/modules-load.conf new file mode 100644 index 0000000..45b477d --- /dev/null +++ b/modules-load.conf @@ -0,0 +1,4 @@ +# When using SELinux in libvirt, automatic loading of the kvm.ko kernel +# module might not work when qemu-kvm tries to access /dev/kvm - thus we +# simply always load this module during the boot process already. +kvm diff --git a/qemu-ga.sysconfig b/qemu-ga.sysconfig new file mode 100644 index 0000000..67bad0c --- /dev/null +++ b/qemu-ga.sysconfig @@ -0,0 +1,19 @@ +# This is a systemd environment file, not a shell script. +# It provides settings for "/lib/systemd/system/qemu-guest-agent.service". + +# Comma-separated blacklist of RPCs to disable, or empty list to enable all. +# +# You can get the list of RPC commands using "qemu-ga --blacklist='?'". +# There should be no spaces between commas and commands in the blacklist. +BLACKLIST_RPC=guest-file-open,guest-file-close,guest-file-read,guest-file-write,guest-file-seek,guest-file-flush,guest-exec,guest-exec-status + +# Fsfreeze hook script specification. +# +# FSFREEZE_HOOK_PATHNAME=/dev/null : disables the feature. +# +# FSFREEZE_HOOK_PATHNAME=/path/to/executable : enables the feature with the +# specified binary or shell script. +# +# FSFREEZE_HOOK_PATHNAME= : enables the feature with the +# default value (invoke "qemu-ga --help" to interrogate). +FSFREEZE_HOOK_PATHNAME=/etc/qemu-ga/fsfreeze-hook diff --git a/qemu-guest-agent.service b/qemu-guest-agent.service new file mode 100644 index 0000000..b33e951 --- /dev/null +++ b/qemu-guest-agent.service @@ -0,0 +1,20 @@ +[Unit] +Description=QEMU Guest Agent +BindsTo=dev-virtio\x2dports-org.qemu.guest_agent.0.device +After=dev-virtio\x2dports-org.qemu.guest_agent.0.device +IgnoreOnIsolate=True + +[Service] +UMask=0077 +EnvironmentFile=/etc/sysconfig/qemu-ga +ExecStart=/usr/bin/qemu-ga \ + --method=virtio-serial \ + --path=/dev/virtio-ports/org.qemu.guest_agent.0 \ + --blacklist=${BLACKLIST_RPC} \ + -F${FSFREEZE_HOOK_PATHNAME} +StandardError=syslog +Restart=always +RestartSec=0 + +[Install] +WantedBy=dev-virtio\x2dports-org.qemu.guest_agent.0.device diff --git a/qemu-kvm.spec b/qemu-kvm.spec new file mode 100644 index 0000000..66f14a2 --- /dev/null +++ b/qemu-kvm.spec @@ -0,0 +1,4174 @@ +%global SLOF_gittagdate 20191022 + +%global SLOF_gittagcommit 899d9883 + +%global have_usbredir 1 +%global have_spice 1 +%global have_opengl 1 +%global have_fdt 1 +%global have_gluster 1 +%global have_kvm_setup 0 +%global have_memlock_limits 0 + + + +# Release candidate version tracking +# global rcver rc4 +%if 0%{?rcver:1} +%global rcrel .%{rcver} +%global rcstr -%{rcver} +%endif + +%ifnarch %{ix86} x86_64 + %global have_usbredir 0 +%endif + +%ifnarch s390x + %global have_librdma 1 +%else + %global have_librdma 0 +%endif + +%ifarch %{ix86} + %global kvm_target i386 +%endif +%ifarch x86_64 + %global kvm_target x86_64 +%else + %global have_spice 0 + %global have_opengl 0 + %global have_gluster 0 +%endif +%ifarch %{power64} + %global kvm_target ppc64 + %global have_kvm_setup 1 + %global have_memlock_limits 1 +%endif +%ifarch s390x + %global kvm_target s390x + %global have_kvm_setup 1 +%endif +%ifarch ppc + %global kvm_target ppc +%endif +%ifarch aarch64 + %global kvm_target aarch64 +%endif + +#Versions of various parts: + +%global requires_all_modules \ +%if %{have_spice} \ +Requires: %{name}-ui-spice = %{epoch}:%{version}-%{release} \ +%endif \ +%if %{have_opengl} \ +Requires: %{name}-ui-opengl = %{epoch}:%{version}-%{release} \ +%endif \ +Requires: %{name}-block-curl = %{epoch}:%{version}-%{release} \ +%if %{have_gluster} \ +Requires: %{name}-block-gluster = %{epoch}:%{version}-%{release} \ +%endif \ +%if %{have_usbredir} \ +Requires: %{name}-hw-usbredir = %{epoch}:%{version}-%{release} \ +%endif \ +Requires: %{name}-block-iscsi = %{epoch}:%{version}-%{release} \ +Requires: %{name}-block-rbd = %{epoch}:%{version}-%{release} \ +Requires: %{name}-block-ssh = %{epoch}:%{version}-%{release} + +# Macro to properly setup RHEL/RHEV conflict handling +%define rhev_ma_conflicts() \ +Obsoletes: %1-ma <= %{epoch}:%{version}-%{release} \ +Obsoletes: %1-rhev <= %{epoch}:%{version}-%{release} + +Summary: QEMU is a machine emulator and virtualizer +Name: qemu-kvm +Version: 6.2.0 +Release: 12%{?rcrel}%{?dist} +# Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped +Epoch: 15 +License: GPLv2 and GPLv2+ and CC-BY +Group: Development/Tools +URL: http://www.qemu.org/ +ExclusiveArch: x86_64 %{power64} aarch64 s390x + + +Source0: http://wiki.qemu.org/download/qemu-6.2.0.tar.xz + +# KSM control scripts +Source4: ksm.service +Source5: ksm.sysconfig +Source6: ksmctl.c +Source7: ksmtuned.service +Source8: ksmtuned +Source9: ksmtuned.conf +Source10: qemu-guest-agent.service +Source11: 99-qemu-guest-agent.rules +Source12: bridge.conf +Source13: qemu-ga.sysconfig +Source21: kvm-setup +Source22: kvm-setup.service +Source23: 85-kvm.preset +Source26: vhost.conf +Source27: kvm.conf +Source28: 95-kvm-memlock.conf +Source30: kvm-s390x.conf +Source31: kvm-x86.conf +Source32: qemu-pr-helper.service +Source33: qemu-pr-helper.socket +Source34: 81-kvm-rhel.rules +Source35: udev-kvm-check.c +Source36: README.tests +Source37: tests_data_acpi_pc_SSDT.dimmpxm +Source38: tests_data_acpi_q35_FACP.slic +Source39: tests_data_acpi_q35_SSDT.dimmpxm +Source40: tests_data_acpi_virt_SSDT.memhp + +Patch0001: 0001-redhat-Adding-slirp-to-the-exploded-tree.patch +Patch0005: 0005-Initial-redhat-build.patch +Patch0006: 0006-Enable-disable-devices-for-RHEL.patch +Patch0007: 0007-Machine-type-related-general-changes.patch +Patch0008: 0008-Add-aarch64-machine-types.patch +Patch0009: 0009-Add-ppc64-machine-types.patch +Patch0010: 0010-Add-s390x-machine-types.patch +Patch0011: 0011-Add-x86_64-machine-types.patch +Patch0012: 0012-Enable-make-check.patch +Patch0013: 0013-vfio-cap-number-of-devices-that-can-be-assigned.patch +Patch0014: 0014-Add-support-statement-to-help-output.patch +Patch0015: 0015-globally-limit-the-maximum-number-of-CPUs.patch +Patch0016: 0016-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch +Patch0017: 0017-virtio-scsi-Reject-scsi-cd-if-data-plane-enabled-RHE.patch +Patch0018: 0018-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch +Patch0019: 0019-compat-Update-hw_compat_rhel_8_5.patch +Patch0020: 0020-redhat-Update-pseries-rhel8.5.0-machine-type.patch +Patch0021: 0021-redhat-virt-rhel8.5.0-Update-machine-type-compatibil.patch +Patch0022: 0022-Fix-virtio-net-pci-vectors-compat.patch +Patch0023: 0023-x86-rhel-machine-types-Add-pc_rhel_8_5_compat.patch +Patch0024: 0024-x86-rhel-machine-types-Wire-compat-into-q35-and-i440.patch +Patch0025: 0025-redhat-Add-s390x-machine-type-compatibility-handling.patch +# For bz#2005325 - Fix CPU Model for new IBM Z Hardware - qemu part +Patch26: kvm-redhat-Add-rhel8.6.0-machine-type-for-s390x.patch +# For bz#2031041 - Add rhel-8.6.0 machine types for RHEL 8.6 [ppc64le] +Patch27: kvm-redhat-Define-pseries-rhel8.6.0-machine-type.patch +# For bz#2031039 - Add rhel-8.6.0 machine types for RHEL 8.6 [aarch64] +Patch28: kvm-hw-arm-virt-Register-iommu-as-a-class-property.patch +# For bz#2031039 - Add rhel-8.6.0 machine types for RHEL 8.6 [aarch64] +Patch29: kvm-hw-arm-virt-Register-its-as-a-class-property.patch +# For bz#2031039 - Add rhel-8.6.0 machine types for RHEL 8.6 [aarch64] +Patch30: kvm-hw-arm-virt-Rename-default_bus_bypass_iommu.patch +# For bz#2031039 - Add rhel-8.6.0 machine types for RHEL 8.6 [aarch64] +Patch31: kvm-hw-arm-virt-Add-8.6-machine-type.patch +# For bz#2031039 - Add rhel-8.6.0 machine types for RHEL 8.6 [aarch64] +Patch32: kvm-hw-arm-virt-Check-no_tcg_its-and-minor-style-changes.patch +# For bz#2029582 - [8.6] machine types: 6.2: Fix prefer_sockets +Patch33: kvm-rhel-machine-types-x86-set-prefer_sockets.patch +# For bz#2036580 - CVE-2021-4158 virt:rhel/qemu-kvm: QEMU: NULL pointer dereference in pci_write() in hw/acpi/pcihp.c [rhel-8] +Patch34: kvm-acpi-validate-hotplug-selector-on-access.patch +# For bz#2031035 - Add rhel-8.6.0 machine types for RHEL 8.6 [x86] +Patch35: kvm-x86-Add-q35-RHEL-8.6.0-machine-type.patch +# For bz#2046198 - CVE-2022-0358 virt:av/qemu-kvm: QEMU: virtiofsd: potential privilege escalation via CVE-2018-13405 [rhel-8.6] +Patch36: kvm-virtiofsd-Drop-membership-of-all-supplementary-group.patch +# For bz#2033279 - [wrb][qemu-kvm 6.2] The hot-unplugged device can not be hot-plugged back +Patch37: kvm-softmmu-fix-device-deletion-events-with-device-JSON-.patch +# For bz#2021778 - Qemu core dump when do full backup during system reset +# For bz#2036178 - Qemu core dumped when do block-stream to a snapshot node on non-enough space storage +Patch38: kvm-block-backend-prevent-dangling-BDS-pointers-across-a.patch +# For bz#2021778 - Qemu core dump when do full backup during system reset +# For bz#2036178 - Qemu core dumped when do block-stream to a snapshot node on non-enough space storage +Patch39: kvm-iotests-stream-error-on-reset-New-test.patch +# For bz#2037135 - Booting from Local Snapshot Core Dumped Whose Backing File Is Based on RBD +Patch40: kvm-block-rbd-fix-handling-of-holes-in-.bdrv_co_block_st.patch +# For bz#2037135 - Booting from Local Snapshot Core Dumped Whose Backing File Is Based on RBD +Patch41: kvm-block-rbd-workaround-for-ceph-issue-53784.patch +# For bz#1518984 - [Intel 8.6 Feat] qemu-kvm: SGX 1.5 (SGX1 + Flexible Launch Control) support +Patch42: kvm-numa-Enable-numa-for-SGX-EPC-sections.patch +# For bz#1518984 - [Intel 8.6 Feat] qemu-kvm: SGX 1.5 (SGX1 + Flexible Launch Control) support +Patch43: kvm-numa-Support-SGX-numa-in-the-monitor-and-Libvirt-int.patch +# For bz#1518984 - [Intel 8.6 Feat] qemu-kvm: SGX 1.5 (SGX1 + Flexible Launch Control) support +Patch44: kvm-doc-Add-the-SGX-numa-description.patch +# For bz#1518984 - [Intel 8.6 Feat] qemu-kvm: SGX 1.5 (SGX1 + Flexible Launch Control) support +Patch45: kvm-Enable-SGX-RH-Only.patch +# For bz#1518984 - [Intel 8.6 Feat] qemu-kvm: SGX 1.5 (SGX1 + Flexible Launch Control) support +Patch46: kvm-qapi-Cleanup-SGX-related-comments-and-restore-sectio.patch +# For bz#2041480 - [incremental_backup] Inconsistent block status reply in qemu-nbd +Patch47: kvm-block-io-Update-BSC-only-if-want_zero-is-true.patch +# For bz#2041480 - [incremental_backup] Inconsistent block status reply in qemu-nbd +Patch48: kvm-iotests-block-status-cache-New-test.patch +# For bz#2035185 - Qemu core dump when start guest with nbd node or do block jobs to nbd node +Patch49: kvm-block-nbd-Delete-reconnect-delay-timer-when-done.patch +# For bz#2035185 - Qemu core dump when start guest with nbd node or do block jobs to nbd node +Patch50: kvm-block-nbd-Assert-there-are-no-timers-when-closed.patch +# For bz#2035185 - Qemu core dump when start guest with nbd node or do block jobs to nbd node +Patch51: kvm-iotests.py-Add-QemuStorageDaemon-class.patch +# For bz#2035185 - Qemu core dump when start guest with nbd node or do block jobs to nbd node +Patch52: kvm-iotests-281-Test-lingering-timers.patch +# For bz#2035185 - Qemu core dump when start guest with nbd node or do block jobs to nbd node +Patch53: kvm-block-nbd-Move-s-ioc-on-AioContext-change.patch +# For bz#2035185 - Qemu core dump when start guest with nbd node or do block jobs to nbd node +Patch54: kvm-iotests-281-Let-NBD-connection-yield-in-iothread.patch +# For bz#2062613 - Revert IBM-specific Ubuntu-compatibility machine type for 8.6-AV GA [rhel-8.7.0] +Patch55: kvm-Revert-redhat-Add-hw_compat_4_2_extra-and-apply-to-u.patch +# For bz#2062613 - Revert IBM-specific Ubuntu-compatibility machine type for 8.6-AV GA [rhel-8.7.0] +Patch56: kvm-Revert-redhat-Enable-FDC-device-for-upstream-machine.patch +# For bz#2062613 - Revert IBM-specific Ubuntu-compatibility machine type for 8.6-AV GA [rhel-8.7.0] +Patch57: kvm-Revert-redhat-Expose-upstream-machines-pc-4.2-and-pc.patch +# For bz#2060843 - [virtual network][vDPA] qemu crash after hot unplug vdpa device [rhel-8.7.0] +Patch58: kvm-hw-virtio-vdpa-Fix-leak-of-host-notifier-memory-regi.patch +# For bz#2062610 - Do operation to disk will hang in the guest of target host after hotplugging and migrating [rhel-8.7.0] +Patch59: kvm-pci-expose-TYPE_XIO3130_DOWNSTREAM-name.patch +# For bz#2062610 - Do operation to disk will hang in the guest of target host after hotplugging and migrating [rhel-8.7.0] +Patch60: kvm-acpi-pcihp-pcie-set-power-on-cap-on-parent-slot.patch +# For bz#2065207 - Win11 (q35+edk2) guest broke after install wsl2 through 'wsl --install -d Ubuntu-20.04' [rhel-8.7.0] +Patch61: kvm-vmxcap-Add-5-level-EPT-bit.patch +# For bz#2065207 - Win11 (q35+edk2) guest broke after install wsl2 through 'wsl --install -d Ubuntu-20.04' [rhel-8.7.0] +Patch62: kvm-i386-Add-Icelake-Server-v6-CPU-model-with-5-level-EP.patch +# For bz#2062611 - Guest can not start with SLIC acpi table [rhel-8.7.0] +Patch63: kvm-acpi-fix-QEMU-crash-when-started-with-SLIC-table.patch +# For bz#2062611 - Guest can not start with SLIC acpi table [rhel-8.7.0] +Patch64: kvm-tests-acpi-whitelist-expected-blobs-before-changing-.patch +# For bz#2062611 - Guest can not start with SLIC acpi table [rhel-8.7.0] +Patch65: kvm-tests-acpi-add-SLIC-table-test.patch +# For bz#2062611 - Guest can not start with SLIC acpi table [rhel-8.7.0] +Patch66: kvm-tests-acpi-SLIC-update-expected-blobs.patch +# For bz#2062611 - Guest can not start with SLIC acpi table [rhel-8.7.0] +Patch67: kvm-tests-acpi-manually-pad-OEM_ID-OEM_TABLE_ID-for-test.patch +# For bz#2062611 - Guest can not start with SLIC acpi table [rhel-8.7.0] +Patch68: kvm-tests-acpi-whitelist-nvdimm-s-SSDT-and-FACP.slic-exp.patch +# For bz#2062611 - Guest can not start with SLIC acpi table [rhel-8.7.0] +Patch69: kvm-acpi-fix-OEM-ID-OEM-Table-ID-padding.patch +# For bz#2062611 - Guest can not start with SLIC acpi table [rhel-8.7.0] +Patch70: kvm-tests-acpi-update-expected-blobs.patch +# For bz#2062611 - Guest can not start with SLIC acpi table [rhel-8.7.0] +Patch71: kvm-tests-acpi-test-short-OEM_ID-OEM_TABLE_ID-values-in-.patch +# For bz#2068202 - RHEL 9.0 guest with vsock device migration failed from RHEL 9.0 > RHEL 8.6 [rhel-8.7.0] +Patch72: kvm-RHEL-disable-seqpacket-for-vhost-vsock-device-in-rhe.patch +# For bz#2067118 - qemu crash after execute blockdev-reopen with iothread +Patch73: kvm-block-Lock-AioContext-for-drain_end-in-blockdev-reop.patch +# For bz#2067118 - qemu crash after execute blockdev-reopen with iothread +Patch74: kvm-iotests-Test-blockdev-reopen-with-iothreads-and-thro.patch +# For bz#2071070 - s390x/css: fix PMCW invalid mask +Patch75: kvm-s390x-css-fix-PMCW-invalid-mask.patch +# For bz#1999236 - CVE-2021-3750 virt:rhel/qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue leads to use-after-free [rhel-8] +Patch76: kvm-hw-intc-arm_gicv3-Check-for-MEMTX_OK-instead-of-MEMT.patch +# For bz#1999236 - CVE-2021-3750 virt:rhel/qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue leads to use-after-free [rhel-8] +Patch77: kvm-softmmu-physmem-Simplify-flatview_write-and-address_.patch +# For bz#1999236 - CVE-2021-3750 virt:rhel/qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue leads to use-after-free [rhel-8] +Patch78: kvm-softmmu-physmem-Introduce-MemTxAttrs-memory-field-an.patch +# For bz#2040738 - CVE-2021-4207 virt:rhel/qemu-kvm: QEMU: QXL: double fetch in qxl_cursor() can lead to heap buffer overflow [rhel-8] +Patch79: kvm-display-qxl-render-fix-race-condition-in-qxl_cursor-.patch +# For bz#2063262 - CVE-2022-26354 virt:rhel/qemu-kvm: QEMU: vhost-vsock: missing virtqueue detach on error can lead to memory leak [rhel-8] +Patch80: kvm-vhost-vsock-detach-the-virqueue-element-in-case-of-e.patch + +BuildRequires: wget +BuildRequires: rpm-build +BuildRequires: ninja-build +#BuildRequires: meson >= 0.58.2 +BuildRequires: zlib-devel +BuildRequires: glib2-devel +BuildRequires: which +BuildRequires: gnutls-devel +BuildRequires: cyrus-sasl-devel +BuildRequires: libtool +BuildRequires: libaio-devel +BuildRequires: rsync +BuildRequires: python3-devel +BuildRequires: pciutils-devel +BuildRequires: libiscsi-devel +BuildRequires: ncurses-devel +BuildRequires: libattr-devel +BuildRequires: libusbx-devel >= 1.0.23 +%if %{have_usbredir} +BuildRequires: usbredir-devel >= 0.7.1 +%endif +BuildRequires: texinfo +BuildRequires: python3-sphinx +%if %{have_spice} +BuildRequires: spice-protocol >= 0.12.12 +BuildRequires: spice-server-devel >= 0.12.8 +BuildRequires: libcacard-devel +# For smartcard NSS support +BuildRequires: nss-devel +%endif +BuildRequires: libseccomp-devel >= 2.4.0 +# For network block driver +BuildRequires: libcurl-devel +BuildRequires: libssh-devel +BuildRequires: librados-devel +BuildRequires: librbd-devel +%if %{have_gluster} +# For gluster block driver +BuildRequires: glusterfs-api-devel +BuildRequires: glusterfs-devel +%endif +# We need both because the 'stap' binary is probed for by configure +BuildRequires: systemtap +BuildRequires: systemtap-sdt-devel +# For VNC PNG support +BuildRequires: libpng-devel +# For uuid generation +BuildRequires: libuuid-devel +# For Braille device support +BuildRequires: brlapi-devel +# For test suite +BuildRequires: check-devel +# For virtiofs +BuildRequires: libcap-ng-devel +# Hard requirement for version >= 1.3 +BuildRequires: pixman-devel +# Documentation requirement +BuildRequires: perl-podlators +BuildRequires: texinfo +BuildRequires: python3-sphinx +# For rdma +%if 0%{?have_librdma} +BuildRequires: rdma-core-devel +%endif +%if %{have_fdt} +BuildRequires: libfdt-devel >= 1.6.0 +%endif +# iasl and cpp for acpi generation (not a hard requirement as we can use +# pre-compiled files, but it's better to use this) +%ifarch %{ix86} x86_64 +BuildRequires: iasl +BuildRequires: cpp +%endif +# For compressed guest memory dumps +BuildRequires: lzo-devel snappy-devel +# For NUMA memory binding +%ifnarch s390x +BuildRequires: numactl-devel +%endif +BuildRequires: libgcrypt-devel +# qemu-pr-helper multipath support (requires libudev too) +BuildRequires: device-mapper-multipath-devel +BuildRequires: systemd-devel +# used by qemu-bridge-helper and qemu-pr-helper +BuildRequires: libcap-ng-devel + +BuildRequires: diffutils +%ifarch x86_64 +BuildRequires: libpmem-devel +Requires: libpmem +%endif + +# qemu-keymap +BuildRequires: pkgconfig(xkbcommon) + +# For s390-pgste flag +%ifarch s390x +BuildRequires: binutils >= 2.27-16 +%endif + +%if %{have_opengl} +BuildRequires: pkgconfig(epoxy) +BuildRequires: pkgconfig(libdrm) +BuildRequires: pkgconfig(gbm) +%endif + +BuildRequires: perl-Test-Harness + +Requires: qemu-kvm-core = %{epoch}:%{version}-%{release} +Requires: qemu-kvm-docs = %{epoch}:%{version}-%{release} +%rhev_ma_conflicts qemu-kvm + +%{requires_all_modules} + +%define qemudocdir %{_docdir}/%{name} + +%description +qemu-kvm is an open source virtualizer that provides hardware +emulation for the KVM hypervisor. qemu-kvm acts as a virtual +machine monitor together with the KVM kernel modules, and emulates the +hardware for a full system such as a PC and its associated peripherals. + + +%package -n qemu-kvm-core +Summary: qemu-kvm core components +Requires: %{name}-common = %{epoch}:%{version}-%{release} +Requires: qemu-img = %{epoch}:%{version}-%{release} +%ifarch %{ix86} x86_64 +Requires: edk2-ovmf +%endif +%ifarch aarch64 +Requires: edk2-aarch64 +%endif + +%ifarch %{power64} +Requires: SLOF >= %{SLOF_gittagdate}-1.git%{SLOF_gittagcommit} +%endif +Requires: libseccomp >= 2.4.0 +# For compressed guest memory dumps +Requires: lzo snappy +%if %{have_kvm_setup} +Requires(post): systemd-units +Requires(preun): systemd-units + %ifarch %{power64} +Requires: powerpc-utils + %endif +%endif +Requires: libusbx >= 1.0.23 +%if %{have_fdt} +Requires: libfdt >= 1.6.0 +%endif + +%rhev_ma_conflicts qemu-kvm + +%description -n qemu-kvm-core +qemu-kvm is an open source virtualizer that provides hardware +emulation for the KVM hypervisor. qemu-kvm acts as a virtual +machine monitor together with the KVM kernel modules, and emulates the +hardware for a full system such as a PC and its associated peripherals. + +%package -n qemu-kvm-docs +Summary: qemu-kvm documentation + +%description -n qemu-kvm-docs +qemu-kvm-docs provides documentation files regarding qemu-kvm. + +%package -n qemu-img +Summary: QEMU command line tool for manipulating disk images +Group: Development/Tools + +%rhev_ma_conflicts qemu-img + +%description -n qemu-img +This package provides a command line tool for manipulating disk images. + +%package -n qemu-kvm-common +Summary: QEMU common files needed by all QEMU targets +Group: Development/Tools +Requires(post): /usr/bin/getent +Requires(post): /usr/sbin/groupadd +Requires(post): /usr/sbin/useradd +Requires(post): systemd-units +Requires(preun): systemd-units +Requires(postun): systemd-units +%ifarch %{ix86} x86_64 +Requires: seabios-bin >= 1.10.2-1 +Requires: sgabios-bin +%endif +%ifnarch aarch64 s390x +Requires: seavgabios-bin >= 1.12.0-3 +Requires: ipxe-roms-qemu >= 20170123-1 +%endif + +%rhev_ma_conflicts qemu-kvm-common + +%description -n qemu-kvm-common +qemu-kvm is an open source virtualizer that provides hardware emulation for +the KVM hypervisor. + +This package provides documentation and auxiliary programs used with qemu-kvm. + + +%package -n qemu-guest-agent +Summary: QEMU guest agent +Requires(post): systemd-units +Requires(preun): systemd-units +Requires(postun): systemd-units + +%description -n qemu-guest-agent +qemu-kvm is an open source virtualizer that provides hardware emulation for +the KVM hypervisor. + +This package provides an agent to run inside guests, which communicates +with the host over a virtio-serial channel named "org.qemu.guest_agent.0" + +This package does not need to be installed on the host OS. + +%package tests +Summary: tests for the qemu-kvm package +Requires: %{name} = %{epoch}:%{version}-%{release} + +%define testsdir %{_libdir}/%{name}/tests-src + +%description tests +The qemu-kvm-tests rpm contains tests that can be used to verify +the functionality of the installed qemu-kvm package + +Install this package if you want access to the avocado_qemu +tests, or qemu-iotests. + +%package block-curl +Summary: QEMU CURL block driver +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} + +%description block-curl +This package provides the additional CURL block driver for QEMU. + +Install this package if you want to access remote disks over +http, https, ftp and other transports provided by the CURL library. + + +%if %{have_gluster} +%package block-gluster +Summary: QEMU Gluster block driver +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} +%description block-gluster +This package provides the additional Gluster block driver for QEMU. + +Install this package if you want to access remote Gluster storage. +%endif + + +%package block-iscsi +Summary: QEMU iSCSI block driver +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} + +%description block-iscsi +This package provides the additional iSCSI block driver for QEMU. + +Install this package if you want to access iSCSI volumes. + + +%package block-rbd +Summary: QEMU Ceph/RBD block driver +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} + +%description block-rbd +This package provides the additional Ceph/RBD block driver for QEMU. + +Install this package if you want to access remote Ceph volumes +using the rbd protocol. + + +%package block-ssh +Summary: QEMU SSH block driver +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} + +%description block-ssh +This package provides the additional SSH block driver for QEMU. + +Install this package if you want to access remote disks using +the Secure Shell (SSH) protocol. + + +%if %{have_spice} +%package ui-spice +Summary: QEMU spice support +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} +%if %{have_opengl} +Requires: %{name}-ui-opengl%{?_isa} = %{epoch}:%{version}-%{release} +%endif + +%description ui-spice +This package provides spice support. +%endif + + +%if %{have_opengl} +%package ui-opengl +Summary: QEMU opengl support +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} +Requires: mesa-libGL +Requires: mesa-libEGL +Requires: mesa-dri-drivers + +%description ui-opengl +This package provides opengl support. +%endif + +%if %{have_usbredir} +%package hw-usbredir +Summary: QEMU usbredir support +Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release} +Requires: usbredir >= 0.7.1 + +%description hw-usbredir +This package provides usbredir support. +%endif + + +%prep +%setup -q -n qemu-%{version}%{?rcstr} +# Remove slirp content in scratchbuilds because it's being applyed as a patch +rm -fr slirp +mkdir slirp +%autopatch -p1 + +%global qemu_kvm_build qemu_kvm_build +mkdir -p %{qemu_kvm_build} + +cp -f %{SOURCE37} tests/data/acpi/pc/SSDT.dimmpxm +cp -f %{SOURCE38} tests/data/acpi/q35/FACP.slic +cp -f %{SOURCE39} tests/data/acpi/q35/SSDT.dimmpxm +cp -f %{SOURCE40} tests/data/acpi/virt/SSDT.memhp + +%build +%global buildarch %{kvm_target}-softmmu + +# --build-id option is used for giving info to the debug packages. +buildldflags="VL_LDFLAGS=-Wl,--build-id" + +%global block_drivers_list qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle + +%if 0%{have_gluster} + %global block_drivers_list %{block_drivers_list},gluster +%endif + + +%define disable_everything \\\ + --disable-alsa \\\ + --disable-attr \\\ + --disable-auth-pam \\\ + --disable-avx2 \\\ + --disable-avx512f \\\ + --disable-bochs \\\ + --disable-bpf \\\ + --disable-brlapi \\\ + --disable-bsd-user \\\ + --disable-bzip2 \\\ + --disable-cap-ng \\\ + --disable-capstone \\\ + --disable-cfi \\\ + --disable-cfi-debug \\\ + --disable-cloop \\\ + --disable-cocoa \\\ + --disable-coreaudio \\\ + --disable-coroutine-pool \\\ + --disable-crypto-afalg \\\ + --disable-curl \\\ + --disable-curses \\\ + --disable-debug-info \\\ + --disable-debug-mutex \\\ + --disable-debug-tcg \\\ + --disable-dmg \\\ + --disable-docs \\\ + --disable-dsound \\\ + --disable-fdt \\\ + --disable-fuse \\\ + --disable-fuse-lseek \\\ + --disable-gcrypt \\\ + --disable-gettext \\\ + --disable-gio \\\ + --disable-glusterfs \\\ + --disable-gnutls \\\ + --disable-gtk \\\ + --disable-guest-agent \\\ + --disable-guest-agent-msi \\\ + --disable-hax \\\ + --disable-hvf \\\ + --disable-iconv \\\ + --disable-jack \\\ + --disable-kvm \\\ + --disable-l2tpv3 \\\ + --disable-libdaxctl \\\ + --disable-libiscsi \\\ + --disable-libnfs \\\ + --disable-libpmem \\\ + --disable-libssh \\\ + --disable-libudev \\\ + --disable-libusb \\\ + --disable-libxml2 \\\ + --disable-linux-aio \\\ + --disable-linux-io-uring \\\ + --disable-linux-user \\\ + --disable-live-block-migration \\\ + --disable-lto \\\ + --disable-lzfse \\\ + --disable-lzo \\\ + --disable-malloc-trim \\\ + --disable-membarrier \\\ + --disable-modules \\\ + --disable-module-upgrades \\\ + --disable-mpath \\\ + --disable-multiprocess \\\ + --disable-netmap \\\ + --disable-nettle \\\ + --disable-numa \\\ + --disable-nvmm \\\ + --disable-opengl \\\ + --disable-oss \\\ + --disable-pa \\\ + --disable-parallels \\\ + --disable-pie \\\ + --disable-pvrdma \\\ + --disable-qcow1 \\\ + --disable-qed \\\ + --disable-qom-cast-debug \\\ + --disable-rbd \\\ + --disable-rdma \\\ + --disable-replication \\\ + --disable-rng-none \\\ + --disable-safe-stack \\\ + --disable-sanitizers \\\ + --disable-sdl \\\ + --disable-sdl-image \\\ + --disable-seccomp \\\ + --disable-selinux \\\ + --disable-slirp-smbd \\\ + --disable-smartcard \\\ + --disable-snappy \\\ + --disable-sparse \\\ + --disable-spice \\\ + --disable-spice-protocol \\\ + --disable-strip \\\ + --disable-system \\\ + --disable-tcg \\\ + --disable-tools \\\ + --disable-tpm \\\ + --disable-u2f \\\ + --disable-usb-redir \\\ + --disable-user \\\ + --disable-vde \\\ + --disable-vdi \\\ + --disable-vhost-crypto \\\ + --disable-vhost-kernel \\\ + --disable-vhost-net \\\ + --disable-vhost-scsi \\\ + --disable-vhost-user \\\ + --disable-vhost-user-blk-server \\\ + --disable-vhost-vdpa \\\ + --disable-vhost-vsock \\\ + --disable-virglrenderer \\\ + --disable-virtfs \\\ + --disable-virtiofsd \\\ + --disable-vnc \\\ + --disable-vnc-jpeg \\\ + --disable-vnc-png \\\ + --disable-vnc-sasl \\\ + --disable-vte \\\ + --disable-vvfat \\\ + --disable-werror \\\ + --disable-whpx \\\ + --disable-xen \\\ + --disable-xen-pci-passthrough \\\ + --disable-xfsctl \\\ + --disable-xkbcommon \\\ + --disable-zstd \\\ + --with-git-submodules=ignore + +pushd %{qemu_kvm_build} +../configure \ + --prefix="%{_prefix}" \ + --libdir="%{_libdir}" \ + --datadir="%{_datadir}" \ + --sysconfdir="%{_sysconfdir}" \ + --interp-prefix=%{_prefix}/qemu-%M \ + --localstatedir="%{_localstatedir}" \ + --docdir="%{_docdir}" \ + --libexecdir="%{_libexecdir}" \ + --extra-ldflags="-Wl,--build-id -Wl,-z,relro -Wl,-z,now" \ + --extra-cflags="%{optflags}" \ + --with-pkgversion="%{name}-%{version}-%{release}" \ + --with-suffix="%{name}" \ + --firmwarepath=%{_prefix}/share/qemu-firmware \ + --meson="git" \ + --target-list="%{buildarch}" \ + --block-drv-rw-whitelist=%{block_drivers_list} \ + --audio-drv-list= \ + --block-drv-ro-whitelist=vmdk,vhdx,vpc,https,ssh \ + --with-coroutine=ucontext \ + --with-git=git \ + --tls-priority=@QEMU,SYSTEM \ + %{disable_everything} \ + --enable-attr \ +%ifarch %{ix86} x86_64 + --enable-avx2 \ +%endif + --enable-cap-ng \ + --enable-capstone=internal \ + --enable-coroutine-pool \ + --enable-curl \ + --enable-debug-info \ + --enable-docs \ +%if 0%{have_fdt} + --enable-fdt=system \ +%endif + --enable-gcrypt \ +%if 0%{have_gluster} + --enable-glusterfs \ +%endif + --enable-gnutls \ + --enable-guest-agent \ + --enable-iconv \ + --enable-kvm \ + --enable-libiscsi \ +%ifarch x86_64 + --enable-libpmem \ +%endif + --enable-libssh \ + --enable-libusb \ + --enable-libudev \ + --enable-linux-aio \ + --enable-lzo \ + --enable-malloc-trim \ + --enable-modules \ + --enable-mpath \ +%ifnarch s390x + --enable-numa \ +%endif +%if 0%{have_opengl} + --enable-opengl \ +%endif + --enable-pie \ + --enable-rbd \ +%if 0%{have_librdma} + --enable-rdma \ +%endif + --enable-seccomp \ + --enable-snappy \ +%if 0%{have_spice} + --enable-smartcard \ + --enable-spice \ + --enable-spice-protocol \ +%endif + --enable-system \ + --enable-tcg \ + --enable-tools \ + --enable-tpm \ + --enable-trace-backend=dtrace \ +%if 0%{have_usbredir} + --enable-usb-redir \ +%endif + --enable-virtiofsd \ + --enable-vhost-kernel \ + --enable-vhost-net \ + --enable-vhost-user \ + --enable-vhost-user-blk-server \ + --enable-vhost-vdpa \ + --enable-vhost-vsock \ + --enable-vnc \ + --enable-vnc-png \ + --enable-vnc-sasl \ + --enable-werror \ + --enable-xkbcommon \ + --without-default-devices \ + --with-devices-%{kvm_target}=%{kvm_target}-rh-devices + + +echo "qemu-kvm config-host.mak contents:" +echo "===" +cat config-host.mak +echo "===" + +make V=1 %{?_smp_mflags} $buildldflags + +# Setup back compat qemu-kvm binary +%{__python3} scripts/tracetool.py --backend dtrace --format stap \ + --group=all --binary %{_libexecdir}/qemu-kvm --probe-prefix qemu.kvm \ + trace/trace-events-all qemu-kvm.stp + +%{__python3} scripts/tracetool.py --backends=dtrace --format=log-stap \ + --group=all --binary %{_libexecdir}/qemu-kvm --probe-prefix qemu.kvm \ + trace/trace-events-all qemu-kvm-log.stp + +%{__python3} scripts/tracetool.py --backend dtrace --format simpletrace-stap \ + --group=all --binary %{_libexecdir}/qemu-kvm --probe-prefix qemu.kvm \ + trace/trace-events-all qemu-kvm-simpletrace.stp + +cp -a %{kvm_target}-softmmu/qemu-system-%{kvm_target} qemu-kvm + +gcc %{SOURCE6} $RPM_OPT_FLAGS $RPM_LD_FLAGS -o ksmctl +gcc %{SOURCE35} $RPM_OPT_FLAGS $RPM_LD_FLAGS -o udev-kvm-check + +%ifarch s390x + # Copy the built new images into place for "make check": + cp pc-bios/s390-ccw/s390-ccw.img pc-bios/s390-ccw/s390-netboot.img pc-bios/ +%endif + +popd + +%install +pushd %{qemu_kvm_build} +%define _udevdir %(pkg-config --variable=udevdir udev) +%define _udevrulesdir %{_udevdir}/rules.d + +install -D -p -m 0644 %{SOURCE4} $RPM_BUILD_ROOT%{_unitdir}/ksm.service +install -D -p -m 0644 %{SOURCE5} $RPM_BUILD_ROOT%{_sysconfdir}/sysconfig/ksm +install -D -p -m 0755 ksmctl $RPM_BUILD_ROOT%{_libexecdir}/ksmctl + +install -D -p -m 0644 %{SOURCE7} $RPM_BUILD_ROOT%{_unitdir}/ksmtuned.service +install -D -p -m 0755 %{SOURCE8} $RPM_BUILD_ROOT%{_sbindir}/ksmtuned +install -D -p -m 0644 %{SOURCE9} $RPM_BUILD_ROOT%{_sysconfdir}/ksmtuned.conf +install -D -p -m 0644 %{SOURCE26} $RPM_BUILD_ROOT%{_sysconfdir}/modprobe.d/vhost.conf +%ifarch s390x + install -D -p -m 0644 %{SOURCE30} $RPM_BUILD_ROOT%{_sysconfdir}/modprobe.d/kvm.conf +%else +%ifarch %{ix86} x86_64 + install -D -p -m 0644 %{SOURCE31} $RPM_BUILD_ROOT%{_sysconfdir}/modprobe.d/kvm.conf +%else + install -D -p -m 0644 %{SOURCE27} $RPM_BUILD_ROOT%{_sysconfdir}/modprobe.d/kvm.conf +%endif +%endif + +mkdir -p $RPM_BUILD_ROOT%{_bindir}/ +mkdir -p $RPM_BUILD_ROOT%{_udevrulesdir}/ +mkdir -p $RPM_BUILD_ROOT%{_datadir}/%{name} + +# Create new directories and put them all under tests-src +mkdir -p $RPM_BUILD_ROOT%{testsdir}/python +mkdir -p $RPM_BUILD_ROOT%{testsdir}/tests +mkdir -p $RPM_BUILD_ROOT%{testsdir}/tests/avocado +mkdir -p $RPM_BUILD_ROOT%{testsdir}/tests/qemu-iotests +mkdir -p $RPM_BUILD_ROOT%{testsdir}/scripts/qmp + +install -p -m 0755 udev-kvm-check $RPM_BUILD_ROOT%{_udevdir} +install -p -m 0644 %{SOURCE34} $RPM_BUILD_ROOT%{_udevrulesdir} + +install -m 0644 scripts/dump-guest-memory.py \ + $RPM_BUILD_ROOT%{_datadir}/%{name} + +# Install avocado_qemu tests +cp -R tests/avocado/* $RPM_BUILD_ROOT%{testsdir}/tests/avocado/ + +# Install qemu.py and qmp/ scripts required to run avocado_qemu tests +cp -R python/qemu $RPM_BUILD_ROOT%{testsdir}/python +cp -R scripts/qmp/* $RPM_BUILD_ROOT%{testsdir}/scripts/qmp +install -p -m 0644 ../tests/Makefile.include $RPM_BUILD_ROOT%{testsdir}/tests/ + +# Install qemu-iotests +cp -R ../tests/qemu-iotests/* $RPM_BUILD_ROOT%{testsdir}/tests/qemu-iotests/ +cp -ur tests/qemu-iotests/* $RPM_BUILD_ROOT%{testsdir}/tests/qemu-iotests/ +# Avoid ambiguous 'python' interpreter name +find $RPM_BUILD_ROOT%{testsdir}/tests/qemu-iotests/* -maxdepth 1 -type f -exec sed -i -e '1 s+/usr/bin/env \(python\|python3\)+%{__python3}+' {} \; +find $RPM_BUILD_ROOT%{testsdir}/scripts/qmp/* -maxdepth 1 -type f -exec sed -i -e '1 s+/usr/bin/env \(python\|python3\)+%{__python3}+' {} \; +find $RPM_BUILD_ROOT%{testsdir}/scripts/qmp/* -maxdepth 1 -type f -exec sed -i -e '1 s+/usr/bin/\(python\|python3\)+%{__python3}+' {} \; + +install -p -m 0644 %{SOURCE36} $RPM_BUILD_ROOT%{testsdir}/README + +make DESTDIR=$RPM_BUILD_ROOT \ + sharedir="%{_datadir}/%{name}" \ + datadir="%{_datadir}/%{name}" \ + install + +mkdir -p $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset + +# Move vhost-user JSON files to the standard "qemu" directory +mkdir -p $RPM_BUILD_ROOT%{_datadir}/qemu +mv $RPM_BUILD_ROOT%{_datadir}/%{name}/vhost-user $RPM_BUILD_ROOT%{_datadir}/qemu/ + +# Install qemu-guest-agent service and udev rules +install -m 0644 %{_sourcedir}/qemu-guest-agent.service %{buildroot}%{_unitdir} +install -m 0644 %{_sourcedir}/qemu-ga.sysconfig %{buildroot}%{_sysconfdir}/sysconfig/qemu-ga +install -m 0644 %{_sourcedir}/99-qemu-guest-agent.rules %{buildroot}%{_udevrulesdir} + +# - the fsfreeze hook script: +install -D --preserve-timestamps \ + scripts/qemu-guest-agent/fsfreeze-hook \ + $RPM_BUILD_ROOT%{_sysconfdir}/qemu-ga/fsfreeze-hook +# Workaround for the missing /etc/qemu-kvm/fsfreeze-hook +# Please, do not carry this over to RHEL-9 +mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/qemu-kvm/ +ln -s %{_sysconfdir}/qemu-ga/fsfreeze-hook \ + $RPM_BUILD_ROOT%{_sysconfdir}/qemu-kvm/fsfreeze-hook + +# - the directory for user scripts: +mkdir $RPM_BUILD_ROOT%{_sysconfdir}/qemu-ga/fsfreeze-hook.d + +# - and the fsfreeze script samples: +mkdir --parents $RPM_BUILD_ROOT%{_datadir}/%{name}/qemu-ga/fsfreeze-hook.d/ +install --preserve-timestamps --mode=0644 \ + scripts/qemu-guest-agent/fsfreeze-hook.d/*.sample \ + $RPM_BUILD_ROOT%{_datadir}/%{name}/qemu-ga/fsfreeze-hook.d/ + +# - Install dedicated log directory: +mkdir -p -v $RPM_BUILD_ROOT%{_localstatedir}/log/qemu-ga/ + +mkdir -p $RPM_BUILD_ROOT%{_bindir} +install -c -m 0755 qga/qemu-ga ${RPM_BUILD_ROOT}%{_bindir}/qemu-ga + +mkdir -p $RPM_BUILD_ROOT%{_mandir}/man8 + +install -m 0755 %{kvm_target}-softmmu/qemu-system-%{kvm_target} $RPM_BUILD_ROOT%{_libexecdir}/qemu-kvm +install -m 0644 qemu-kvm.stp $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/ +install -m 0644 qemu-kvm-log.stp $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/ +install -m 0644 qemu-kvm-simpletrace.stp $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/ +install -d -m 0755 "$RPM_BUILD_ROOT%{_datadir}/%{name}/systemtap/script.d" +install -c -m 0644 scripts/systemtap/script.d/qemu_kvm.stp "$RPM_BUILD_ROOT%{_datadir}/%{name}/systemtap/script.d/" +install -d -m 0755 "$RPM_BUILD_ROOT%{_datadir}/%{name}/systemtap/conf.d" +install -c -m 0644 scripts/systemtap/conf.d/qemu_kvm.conf "$RPM_BUILD_ROOT%{_datadir}/%{name}/systemtap/conf.d/" + + +rm $RPM_BUILD_ROOT/%{_datadir}/applications/qemu.desktop +rm $RPM_BUILD_ROOT%{_bindir}/qemu-system-%{kvm_target} +rm $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/qemu-system-%{kvm_target}.stp +rm $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/qemu-system-%{kvm_target}-simpletrace.stp +rm $RPM_BUILD_ROOT%{_datadir}/systemtap/tapset/qemu-system-%{kvm_target}-log.stp +rm $RPM_BUILD_ROOT%{_bindir}/elf2dmp + +# Install simpletrace +install -m 0755 scripts/simpletrace.py $RPM_BUILD_ROOT%{_datadir}/%{name}/simpletrace.py +# Avoid ambiguous 'python' interpreter name +mkdir -p $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool +install -m 0644 -t $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool scripts/tracetool/*.py +mkdir -p $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool/backend +install -m 0644 -t $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool/backend scripts/tracetool/backend/*.py +mkdir -p $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool/format +install -m 0644 -t $RPM_BUILD_ROOT%{_datadir}/%{name}/tracetool/format scripts/tracetool/format/*.py + +mkdir -p $RPM_BUILD_ROOT%{qemudocdir} +install -p -m 0644 -t ${RPM_BUILD_ROOT}%{qemudocdir} ../README.rst ../README.systemtap ../COPYING ../COPYING.LIB ../LICENSE ../docs/interop/qmp-spec.txt + +# Rename man page +pushd ${RPM_BUILD_ROOT}%{_mandir}/man1/ +for fn in qemu.1*; do + mv $fn "qemu-kvm${fn#qemu}" +done +popd +chmod -x ${RPM_BUILD_ROOT}%{_mandir}/man1/* +chmod -x ${RPM_BUILD_ROOT}%{_mandir}/man8/* + +install -D -p -m 0644 ../qemu.sasl $RPM_BUILD_ROOT%{_sysconfdir}/sasl2/%{name}.conf + +# Install keymaps +pushd pc-bios/keymaps +for kmp in *; do + install -m 0644 $kmp ${RPM_BUILD_ROOT}%{_datadir}/%{name}/keymaps/ +done +rm -f ${RPM_BUILD_ROOT}%{_datadir}/%{name}/keymaps/*.stamp +popd + +# Provided by package openbios +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/openbios-ppc +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/openbios-sparc32 +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/openbios-sparc64 +# Provided by package SLOF +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/slof.bin + +# Remove unpackaged files. +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/palcode-clipper +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/petalogix*.dtb +rm -f ${RPM_BUILD_ROOT}%{_datadir}/%{name}/bamboo.dtb +rm -f ${RPM_BUILD_ROOT}%{_datadir}/%{name}/ppc_rom.bin +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/s390-zipl.rom +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/u-boot.e500 +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/qemu_vga.ndrv +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/skiboot.lid +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/qboot.rom + +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/s390-ccw.img +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/s390-netboot.img +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/hppa-firmware.img +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/canyonlands.dtb +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/u-boot-sam460-20100605.bin + +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/firmware +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/edk2-*.fd +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/edk2-licenses.txt + +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv32-sifive_u-fw_jump.bin +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv32-virt-fw_jump.bin +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv32-generic-fw_dynamic.* +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv64-sifive_u-fw_jump.bin +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv64-virt-fw_jump.bin +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/opensbi-riscv64-generic-fw_dynamic.* +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/qemu-nsis.bmp +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/npcm7xx_bootrom.bin + +rm -rf ${RPM_BUILD_ROOT}%{_libdir}/qemu-kvm/ui-spice-app.so + +# Remove virtfs-proxy-helper files +rm -rf ${RPM_BUILD_ROOT}%{_libexecdir}/virtfs-proxy-helper +rm -rf ${RPM_BUILD_ROOT}%{_mandir}/man1/virtfs-proxy-helper* + +%ifarch s390x + # Use the s390-*.imgs that we've just built, not the pre-built ones + install -m 0644 pc-bios/s390-ccw/s390-ccw.img $RPM_BUILD_ROOT%{_datadir}/%{name}/ + install -m 0644 pc-bios/s390-ccw/s390-netboot.img $RPM_BUILD_ROOT%{_datadir}/%{name}/ +%else + rm -rf ${RPM_BUILD_ROOT}%{_libdir}/qemu-kvm/hw-s390x-virtio-gpu-ccw.so +%endif + +%ifnarch x86_64 + rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/kvmvapic.bin + rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/linuxboot.bin + rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/multiboot.bin + rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/multiboot_dma.bin + rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/pvh.bin +%endif + +# Remove sparc files +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/QEMU,tcx.bin +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/QEMU,cgthree.bin + +# Remove ivshmem example programs +rm -rf ${RPM_BUILD_ROOT}%{_bindir}/ivshmem-client +rm -rf ${RPM_BUILD_ROOT}%{_bindir}/ivshmem-server + +# Remove efi roms +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/efi*.rom + +# Provided by package ipxe +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/pxe*rom +# Provided by package vgabios +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/vgabios*bin +# Provided by package seabios +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/bios*.bin +# Provided by package sgabios +rm -rf ${RPM_BUILD_ROOT}%{_datadir}/%{name}/sgabios.bin + +# the pxe gpxe images will be symlinks to the images on +# /usr/share/ipxe, as QEMU doesn't know how to look +# for other paths, yet. +pxe_link() { + ln -s ../ipxe.efi/$2.rom %{buildroot}%{_datadir}/%{name}/efi-$1.rom +} + +%ifnarch aarch64 s390x +pxe_link e1000 8086100e +pxe_link ne2k_pci 10ec8029 +pxe_link pcnet 10222000 +pxe_link rtl8139 10ec8139 +pxe_link virtio 1af41000 +pxe_link e1000e 808610d3 +%endif + +rom_link() { + ln -s $1 %{buildroot}%{_datadir}/%{name}/$2 +} + +%ifnarch aarch64 s390x + rom_link ../seavgabios/vgabios-isavga.bin vgabios.bin + rom_link ../seavgabios/vgabios-cirrus.bin vgabios-cirrus.bin + rom_link ../seavgabios/vgabios-qxl.bin vgabios-qxl.bin + rom_link ../seavgabios/vgabios-stdvga.bin vgabios-stdvga.bin + rom_link ../seavgabios/vgabios-vmware.bin vgabios-vmware.bin + rom_link ../seavgabios/vgabios-virtio.bin vgabios-virtio.bin + rom_link ../seavgabios/vgabios-ramfb.bin vgabios-ramfb.bin + rom_link ../seavgabios/vgabios-bochs-display.bin vgabios-bochs-display.bin +%endif +%ifarch x86_64 + rom_link ../seabios/bios.bin bios.bin + rom_link ../seabios/bios-256k.bin bios-256k.bin + rom_link ../sgabios/sgabios.bin sgabios.bin +%endif + +%if 0%{have_kvm_setup} + install -D -p -m 755 %{SOURCE21} $RPM_BUILD_ROOT%{_prefix}/lib/systemd/kvm-setup + install -D -p -m 644 %{SOURCE22} $RPM_BUILD_ROOT%{_unitdir}/kvm-setup.service + install -D -p -m 644 %{SOURCE23} $RPM_BUILD_ROOT%{_presetdir}/85-kvm.preset +%endif + +%if 0%{have_memlock_limits} + install -D -p -m 644 %{SOURCE28} $RPM_BUILD_ROOT%{_sysconfdir}/security/limits.d/95-kvm-memlock.conf +%endif + +# Install rules to use the bridge helper with libvirt's virbr0 +install -D -m 0644 %{SOURCE12} $RPM_BUILD_ROOT%{_sysconfdir}/%{name}/bridge.conf + +# Install qemu-pr-helper service +install -m 0644 %{_sourcedir}/qemu-pr-helper.service %{buildroot}%{_unitdir} +install -m 0644 %{_sourcedir}/qemu-pr-helper.socket %{buildroot}%{_unitdir} + +find $RPM_BUILD_ROOT -name '*.la' -or -name '*.a' | xargs rm -f + +# We need to make the block device modules and other qemu SO files executable +# otherwise RPM won't pick up their dependencies. +chmod +x $RPM_BUILD_ROOT%{_libdir}/qemu-kvm/*.so + +# Remove buildinfo +rm -rf $RPM_BUILD_ROOT%{qemudocdir}/interop/.buildinfo +rm -rf $RPM_BUILD_ROOT%{qemudocdir}/system/.buildinfo +rm -rf $RPM_BUILD_ROOT%{qemudocdir}/tools/.buildinfo +rm -rf $RPM_BUILD_ROOT%{qemudocdir}/user/.buildinfo +rm -rf $RPM_BUILD_ROOT%{qemudocdir}/devel/.buildinfo +rm -rf $RPM_BUILD_ROOT%{qemudocdir}/.buildinfo + +# Remove spec +rm -rf $RPM_BUILD_ROOT%{qemudocdir}/specs + +popd + +%check +pushd %{qemu_kvm_build} +echo "Testing qemu-kvm-build" +export DIFF=diff; make check V=1 +popd + +%post -n qemu-kvm-common +%systemd_post ksm.service +%systemd_post ksmtuned.service + +getent group kvm >/dev/null || groupadd -g 36 -r kvm +getent group qemu >/dev/null || groupadd -g 107 -r qemu +getent passwd qemu >/dev/null || \ +useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ + -c "qemu user" qemu + +# load kvm modules now, so we can make sure no reboot is needed. +# If there's already a kvm module installed, we don't mess with it +%udev_rules_update +sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : + udevadm trigger --subsystem-match=misc --sysname-match=kvm --action=add || : +%if %{have_kvm_setup} + systemctl daemon-reload # Make sure it sees the new presets and unitfile + %systemd_post kvm-setup.service + if systemctl is-enabled kvm-setup.service > /dev/null; then + systemctl start kvm-setup.service + fi +%endif + +%preun -n qemu-kvm-common +%systemd_preun ksm.service +%systemd_preun ksmtuned.service +%if %{have_kvm_setup} +%systemd_preun kvm-setup.service +%endif + +%postun -n qemu-kvm-common +%systemd_postun_with_restart ksm.service +%systemd_postun_with_restart ksmtuned.service + +%post -n qemu-guest-agent +%systemd_post qemu-guest-agent.service +%preun -n qemu-guest-agent +%systemd_preun qemu-guest-agent.service +%postun -n qemu-guest-agent +%systemd_postun_with_restart qemu-guest-agent.service + +%files +# Deliberately empty + +%files -n qemu-kvm-docs +%defattr(-,root,root) +%dir %{qemudocdir} +%doc %{qemudocdir}/genindex.html +%doc %{qemudocdir}/search.html +%doc %{qemudocdir}/objects.inv +%doc %{qemudocdir}/searchindex.js +%doc %{qemudocdir}/README.rst +%doc %{qemudocdir}/COPYING +%doc %{qemudocdir}/COPYING.LIB +%doc %{qemudocdir}/LICENSE +%doc %{qemudocdir}/README.systemtap +%doc %{qemudocdir}/qmp-spec.txt +%doc %{qemudocdir}/interop/* +%doc %{qemudocdir}/index.html +%doc %{qemudocdir}/about/* +%doc %{qemudocdir}/system/* +%doc %{qemudocdir}/tools/* +%doc %{qemudocdir}/user/* +%doc %{qemudocdir}/devel/* +%doc %{qemudocdir}/_static/* + +%files -n qemu-kvm-common +%defattr(-,root,root) +%{_mandir}/man7/qemu-qmp-ref.7* +%{_mandir}/man7/qemu-cpu-models.7* +%{_bindir}/qemu-keymap +%{_bindir}/qemu-pr-helper +%{_bindir}/qemu-edid +%{_bindir}/qemu-trace-stap +%{_unitdir}/qemu-pr-helper.service +%{_unitdir}/qemu-pr-helper.socket +%{_mandir}/man7/qemu-ga-ref.7* +%{_mandir}/man8/qemu-pr-helper.8* +%{_mandir}/man1/virtiofsd.1* + +%dir %{_datadir}/%{name}/ +%{_datadir}/%{name}/keymaps/ +%{_mandir}/man1/%{name}.1* +%{_mandir}/man1/qemu-trace-stap.1* +%{_mandir}/man7/qemu-block-drivers.7* +%attr(4755, -, -) %{_libexecdir}/qemu-bridge-helper +%config(noreplace) %{_sysconfdir}/sasl2/%{name}.conf +%{_unitdir}/ksm.service +%{_libexecdir}/ksmctl +%config(noreplace) %{_sysconfdir}/sysconfig/ksm +%{_unitdir}/ksmtuned.service +%{_sbindir}/ksmtuned +%{_udevdir}/udev-kvm-check +%{_udevrulesdir}/81-kvm-rhel.rules +%ghost %{_sysconfdir}/kvm +%config(noreplace) %{_sysconfdir}/ksmtuned.conf +%dir %{_sysconfdir}/%{name} +%config(noreplace) %{_sysconfdir}/%{name}/bridge.conf +%config(noreplace) %{_sysconfdir}/modprobe.d/vhost.conf +%config(noreplace) %{_sysconfdir}/modprobe.d/kvm.conf +%{_datadir}/%{name}/simpletrace.py* +%{_datadir}/%{name}/tracetool/*.py* +%{_datadir}/%{name}/tracetool/backend/*.py* +%{_datadir}/%{name}/tracetool/format/*.py* + +%ifarch x86_64 + %{_datadir}/%{name}/bios.bin + %{_datadir}/%{name}/bios-256k.bin + %{_datadir}/%{name}/linuxboot.bin + %{_datadir}/%{name}/multiboot.bin + %{_datadir}/%{name}/multiboot_dma.bin + %{_datadir}/%{name}/kvmvapic.bin + %{_datadir}/%{name}/sgabios.bin + %{_datadir}/%{name}/pvh.bin +%endif +%ifarch s390x + %{_datadir}/%{name}/s390-ccw.img + %{_datadir}/%{name}/s390-netboot.img +%endif +%ifnarch aarch64 s390x + %{_datadir}/%{name}/vgabios.bin + %{_datadir}/%{name}/vgabios-cirrus.bin + %{_datadir}/%{name}/vgabios-qxl.bin + %{_datadir}/%{name}/vgabios-stdvga.bin + %{_datadir}/%{name}/vgabios-vmware.bin + %{_datadir}/%{name}/vgabios-virtio.bin + %{_datadir}/%{name}/vgabios-ramfb.bin + %{_datadir}/%{name}/vgabios-bochs-display.bin + %{_datadir}/%{name}/efi-e1000.rom + %{_datadir}/%{name}/efi-e1000e.rom + %{_datadir}/%{name}/efi-virtio.rom + %{_datadir}/%{name}/efi-pcnet.rom + %{_datadir}/%{name}/efi-rtl8139.rom + %{_datadir}/%{name}/efi-ne2k_pci.rom + %{_libdir}/qemu-kvm/hw-display-virtio-vga.so +%endif + %{_libdir}/%{name}/hw-display-virtio-gpu-gl.so +%ifnarch s390x + %{_libdir}/%{name}/hw-display-virtio-gpu-pci-gl.so +%endif +%ifarch x86_64 %{power64} + %{_libdir}/%{name}/hw-display-virtio-vga-gl.so +%endif + %{_libdir}/%{name}/accel-qtest-%{kvm_target}.so +%ifarch x86_64 + %{_libdir}/%{name}/accel-tcg-%{kvm_target}.so +%endif +%{_libdir}/%{name}/hw-usb-host.so +%{_datadir}/icons/* +%{_datadir}/%{name}/linuxboot_dma.bin +%{_datadir}/%{name}/dump-guest-memory.py* +%{_datadir}/%{name}/trace-events-all +%if 0%{have_kvm_setup} + %{_prefix}/lib/systemd/kvm-setup + %{_unitdir}/kvm-setup.service + %{_presetdir}/85-kvm.preset +%endif +%if 0%{have_memlock_limits} + %{_sysconfdir}/security/limits.d/95-kvm-memlock.conf +%endif +%{_libexecdir}/virtiofsd + +# This is the standard location for vhost-user JSON files defined in the +# vhost-user specification for interoperability with other software. Unlike +# most other paths we use it's "qemu" instead of "qemu-kvm". +%{_datadir}/qemu/vhost-user/50-qemu-virtiofsd.json + +%files -n qemu-kvm-core +%defattr(-,root,root) +%{_libexecdir}/qemu-kvm +%{_datadir}/systemtap/tapset/qemu-kvm.stp +%{_datadir}/systemtap/tapset/qemu-kvm-log.stp +%{_datadir}/systemtap/tapset/qemu-kvm-simpletrace.stp +%{_datadir}/%{name}/systemtap/script.d/qemu_kvm.stp +%{_datadir}/%{name}/systemtap/conf.d/qemu_kvm.conf + +%{_libdir}/qemu-kvm/hw-display-virtio-gpu.so +%ifarch s390x + %{_libdir}/qemu-kvm/hw-s390x-virtio-gpu-ccw.so +%else + %{_libdir}/qemu-kvm/hw-display-virtio-gpu-pci.so +%endif + +%files -n qemu-img +%defattr(-,root,root) +%{_bindir}/qemu-img +%{_bindir}/qemu-io +%{_bindir}/qemu-nbd +%{_bindir}/qemu-storage-daemon +%{_mandir}/man1/qemu-img.1* +%{_mandir}/man8/qemu-nbd.8* +%{_mandir}/man1/qemu-storage-daemon.1* +%{_mandir}/man7/qemu-storage-daemon-qmp-ref.7* + +%files -n qemu-guest-agent +%defattr(-,root,root,-) +%doc COPYING README.rst +%{_bindir}/qemu-ga +%{_mandir}/man8/qemu-ga.8* +%{_unitdir}/qemu-guest-agent.service +%{_udevrulesdir}/99-qemu-guest-agent.rules +%config(noreplace) %{_sysconfdir}/sysconfig/qemu-ga +%{_sysconfdir}/qemu-ga +%{_sysconfdir}/qemu-kvm/fsfreeze-hook +%{_datadir}/%{name}/qemu-ga +%dir %{_localstatedir}/log/qemu-ga + +%files tests +%{testsdir} + +%files block-curl +%{_libdir}/qemu-kvm/block-curl.so + +%if %{have_gluster} +%files block-gluster +%{_libdir}/qemu-kvm/block-gluster.so +%endif + +%files block-iscsi +%{_libdir}/qemu-kvm/block-iscsi.so + +%files block-rbd +%{_libdir}/qemu-kvm/block-rbd.so + +%files block-ssh +%{_libdir}/qemu-kvm/block-ssh.so + +%if 0%{have_spice} +%files ui-spice + %{_libdir}/qemu-kvm/hw-usb-smartcard.so + %{_libdir}/qemu-kvm/audio-spice.so + %{_libdir}/qemu-kvm/ui-spice-core.so + %{_libdir}/qemu-kvm/chardev-spice.so +%ifarch x86_64 + %{_libdir}/qemu-kvm/hw-display-qxl.so +%endif +%endif + +%if 0%{have_opengl} +%files ui-opengl + %{_libdir}/qemu-kvm/ui-egl-headless.so + %{_libdir}/qemu-kvm/ui-opengl.so +%endif + +%if %{have_usbredir} +%files hw-usbredir + %{_libdir}/qemu-kvm/hw-usb-redirect.so +%endif + + +%changelog +* Thu Apr 21 2022 Jon Maloy - 6.2.0-12 +- kvm-display-qxl-render-fix-race-condition-in-qxl_cursor-.patch [bz#2040738] +- kvm-vhost-vsock-detach-the-virqueue-element-in-case-of-e.patch [bz#2063262] +- Resolves: bz#2040738 + (CVE-2021-4207 virt:rhel/qemu-kvm: QEMU: QXL: double fetch in qxl_cursor() can lead to heap buffer overflow [rhel-8]) +- Resolves: bz#2063262 + (CVE-2022-26354 virt:rhel/qemu-kvm: QEMU: vhost-vsock: missing virtqueue detach on error can lead to memory leak [rhel-8]) + +* Thu Apr 21 2022 Jon Maloy - 6.2.0-11 +- kvm-hw-intc-arm_gicv3-Check-for-MEMTX_OK-instead-of-MEMT.patch [bz#1999236] +- kvm-softmmu-physmem-Simplify-flatview_write-and-address_.patch [bz#1999236] +- kvm-softmmu-physmem-Introduce-MemTxAttrs-memory-field-an.patch [bz#1999236] +- Resolves: bz#1999236 + (CVE-2021-3750 virt:rhel/qemu-kvm: QEMU: hcd-ehci: DMA reentrancy issue leads to use-after-free [rhel-8]) + +* Thu Apr 21 2022 Jon Maloy - 6.2.0-10 +- kvm-RHEL-disable-seqpacket-for-vhost-vsock-device-in-rhe.patch [bz#2068202] +- kvm-block-Lock-AioContext-for-drain_end-in-blockdev-reop.patch [bz#2067118] +- kvm-iotests-Test-blockdev-reopen-with-iothreads-and-thro.patch [bz#2067118] +- kvm-s390x-css-fix-PMCW-invalid-mask.patch [bz#2071070] +- kvm-Set-permission-on-installing-files.patch [bz#2072377] +- Resolves: bz#2068202 + (RHEL 9.0 guest with vsock device migration failed from RHEL 9.0 > RHEL 8.6 [rhel-8.7.0]) +- Resolves: bz#2067118 + (qemu crash after execute blockdev-reopen with iothread) +- Resolves: bz#2071070 + (s390x/css: fix PMCW invalid mask) +- Resolves: bz#2072377 + (Fix build warnings that occur when installing the keymap files) + +* Wed Apr 06 2022 Jon Maloy - 6.2.0-9 +- kvm-Revert-redhat-Add-hw_compat_4_2_extra-and-apply-to-u.patch [bz#2062613] +- kvm-Revert-redhat-Enable-FDC-device-for-upstream-machine.patch [bz#2062613] +- kvm-Revert-redhat-Expose-upstream-machines-pc-4.2-and-pc.patch [bz#2062613] +- kvm-hw-virtio-vdpa-Fix-leak-of-host-notifier-memory-regi.patch [bz#2060843] +- kvm-pci-expose-TYPE_XIO3130_DOWNSTREAM-name.patch [bz#2062610] +- kvm-acpi-pcihp-pcie-set-power-on-cap-on-parent-slot.patch [bz#2062610] +- kvm-vmxcap-Add-5-level-EPT-bit.patch [bz#2065207] +- kvm-i386-Add-Icelake-Server-v6-CPU-model-with-5-level-EP.patch [bz#2065207] +- kvm-acpi-fix-QEMU-crash-when-started-with-SLIC-table.patch [bz#2062611] +- kvm-tests-acpi-whitelist-expected-blobs-before-changing-.patch [bz#2062611] +- kvm-tests-acpi-add-SLIC-table-test.patch [bz#2062611] +- kvm-tests-acpi-SLIC-update-expected-blobs.patch [bz#2062611] +- kvm-tests-acpi-manually-pad-OEM_ID-OEM_TABLE_ID-for-test.patch [bz#2062611] +- kvm-tests-acpi-whitelist-nvdimm-s-SSDT-and-FACP.slic-exp.patch [bz#2062611] +- kvm-acpi-fix-OEM-ID-OEM-Table-ID-padding.patch [bz#2062611] +- kvm-tests-acpi-update-expected-blobs.patch [bz#2062611] +- kvm-tests-acpi-test-short-OEM_ID-OEM_TABLE_ID-values-in-.patch [bz#2062611] +- kvm-rhel-workaround-for-lack-of-binary-patches-in-SRPM.patch [bz#2062611] +- Resolves: bz#2062613 + (Revert IBM-specific Ubuntu-compatibility machine type for 8.6-AV GA [rhel-8.7.0]) +- Resolves: bz#2060843 + ([virtual network][vDPA] qemu crash after hot unplug vdpa device [rhel-8.7.0]) +- Resolves: bz#2062610 + (Do operation to disk will hang in the guest of target host after hotplugging and migrating [rhel-8.7.0]) +- Resolves: bz#2065207 + (Win11 (q35+edk2) guest broke after install wsl2 through 'wsl --install -d Ubuntu-20.04' [rhel-8.7.0]) +- Resolves: bz#2062611 + (Guest can not start with SLIC acpi table [rhel-8.7.0]) + +* Tue Feb 22 2022 Jon Maloy - 6.2.0-8 +- kvm-block-nbd-Delete-reconnect-delay-timer-when-done.patch [bz#2035185] +- kvm-block-nbd-Assert-there-are-no-timers-when-closed.patch [bz#2035185] +- kvm-iotests.py-Add-QemuStorageDaemon-class.patch [bz#2035185] +- kvm-iotests-281-Test-lingering-timers.patch [bz#2035185] +- kvm-block-nbd-Move-s-ioc-on-AioContext-change.patch [bz#2035185] +- kvm-iotests-281-Let-NBD-connection-yield-in-iothread.patch [bz#2035185] +- Resolves: bz#2035185 + (Qemu core dump when start guest with nbd node or do block jobs to nbd node) + +* Tue Feb 15 2022 Jon Maloy - 6.2.0-7 +- kvm-numa-Enable-numa-for-SGX-EPC-sections.patch [bz#1518984] +- kvm-numa-Support-SGX-numa-in-the-monitor-and-Libvirt-int.patch [bz#1518984] +- kvm-doc-Add-the-SGX-numa-description.patch [bz#1518984] +- kvm-Enable-SGX-RH-Only.patch [bz#1518984] +- kvm-qapi-Cleanup-SGX-related-comments-and-restore-sectio.patch [bz#1518984] +- kvm-block-io-Update-BSC-only-if-want_zero-is-true.patch [bz#2041480] +- kvm-iotests-block-status-cache-New-test.patch [bz#2041480] +- Resolves: bz#1518984 + ([Intel 8.6 Feat] qemu-kvm: SGX 1.5 (SGX1 + Flexible Launch Control) support) +- Resolves: bz#2041480 + ([incremental_backup] Inconsistent block status reply in qemu-nbd) + +* Tue Feb 08 2022 Jon Maloy - 6.2.0-6 +- kvm-virtiofsd-Drop-membership-of-all-supplementary-group.patch [bz#2046198] +- kvm-softmmu-fix-device-deletion-events-with-device-JSON-.patch [bz#2033279] +- kvm-block-backend-prevent-dangling-BDS-pointers-across-a.patch [bz#2021778 bz#2036178] +- kvm-iotests-stream-error-on-reset-New-test.patch [bz#2021778 bz#2036178] +- kvm-block-rbd-fix-handling-of-holes-in-.bdrv_co_block_st.patch [bz#2037135] +- kvm-block-rbd-workaround-for-ceph-issue-53784.patch [bz#2037135] +- Resolves: bz#2046198 + (CVE-2022-0358 virt:av/qemu-kvm: QEMU: virtiofsd: potential privilege escalation via CVE-2018-13405 [rhel-8.6]) +- Resolves: bz#2033279 + ([wrb][qemu-kvm 6.2] The hot-unplugged device can not be hot-plugged back) +- Resolves: bz#2021778 + (Qemu core dump when do full backup during system reset) +- Resolves: bz#2036178 + (Qemu core dumped when do block-stream to a snapshot node on non-enough space storage) +- Resolves: bz#2037135 + (Booting from Local Snapshot Core Dumped Whose Backing File Is Based on RBD) + +* Tue Jan 25 2022 Jon Maloy - 6.2.0-5 +- kvm-acpi-validate-hotplug-selector-on-access.patch [bz#2036580] +- kvm-x86-Add-q35-RHEL-8.6.0-machine-type.patch [bz#2031035] +- Resolves: bz#2036580 + (CVE-2021-4158 virt:rhel/qemu-kvm: QEMU: NULL pointer dereference in pci_write() in hw/acpi/pcihp.c [rhel-8]) +- Resolves: bz#2031035 + (Add rhel-8.6.0 machine types for RHEL 8.6 [x86]) + +* Mon Jan 17 2022 Jon Maloy - 6.2.0-4 +- kvm-hw-arm-virt-Register-iommu-as-a-class-property.patch [bz#2031039] +- kvm-hw-arm-virt-Register-its-as-a-class-property.patch [bz#2031039] +- kvm-hw-arm-virt-Rename-default_bus_bypass_iommu.patch [bz#2031039] +- kvm-hw-arm-virt-Add-8.6-machine-type.patch [bz#2031039] +- kvm-hw-arm-virt-Check-no_tcg_its-and-minor-style-changes.patch [bz#2031039] +- kvm-rhel-machine-types-x86-set-prefer_sockets.patch [bz#2029582] +- Resolves: bz#2031039 + (Add rhel-8.6.0 machine types for RHEL 8.6 [aarch64]) +- Resolves: bz#2029582 + ([8.6] machine types: 6.2: Fix prefer_sockets) + +* Mon Jan 03 2022 Jon Maloy - 6.2.0-2 +- kvm-redhat-Add-rhel8.6.0-machine-type-for-s390x.patch [bz#2005325] +- kvm-redhat-Define-pseries-rhel8.6.0-machine-type.patch [bz#2031041] +- Resolves: bz#2005325 + (Fix CPU Model for new IBM Z Hardware - qemu part) +- Resolves: bz#2031041 + (Add rhel-8.6.0 machine types for RHEL 8.6 [ppc64le]) + +* Thu Dec 16 2021 Jon Maloy - 6.2.0-1.el8 +- Rebase to qemu-kvm 6.2.0 +- Resolves bz#2027716 + +* Mon Nov 22 2021 Jon Maloy - 6.1.0-5 +- kvm-e1000-fix-tx-re-entrancy-problem.patch [bz#1930092] +- kvm-hw-scsi-scsi-disk-MODE_PAGE_ALLS-not-allowed-in-MODE.patch [bz#2020720] +- Resolves: bz#1930092 + (CVE-2021-20257 virt:rhel/qemu-kvm: QEMU: net: e1000: infinite loop while processing transmit descriptors [rhel-8.5.0]) +- Resolves: bz#2020720 + (CVE-2021-3930 virt:rhel/qemu-kvm: QEMU: off-by-one error in mode_sense_page() in hw/scsi/scsi-disk.c [rhel-8]) + +* Thu Oct 21 2021 Jon Maloy - 6.1.0-4 +- kvm-spec-Remove-qemu-kiwi-build.patch [bz#2002694] +- kvm-hw-arm-virt-Add-hw_compat_rhel_8_5-to-8.5-machine-ty.patch [bz#1998947] +- Resolves: bz#2002694 + (remove qemu-kiwi rpm from qemu-kvm sources in rhel-8.6) +- Resolves: bz#1998947 + (Add machine type compatibility update for 6.1 rebase [aarch64]) + +* Tue Oct 12 2021 Jon Maloy - 6.1.0-3 +- kvm-virtio-net-fix-use-after-unmap-free-for-sg.patch [bz#1999221] +- Resolves: bz#1999221 + (CVE-2021-3748 virt:rhel/qemu-kvm: QEMU: virtio-net: heap use-after-free in virtio_net_receive_rcu [rhel-8]) + +* Fri Oct 01 2021 Jon Maloy - 6.1.0-2 +- kvm-qxl-fix-pre-save-logic.patch [bz#2002907] +- kvm-redhat-Define-hw_compat_rhel_8_5.patch [bz#1998949] +- kvm-redhat-Update-pseries-rhel8.5.0.patch [bz#1998949] +- kvm-redhat-Add-s390x-machine-type-compatibility-update-f.patch [bz#1998950] +- Resolves: bz#2002907 + (Unexpectedly failed when managedsave the guest which has qxl video device) +- Resolves: bz#1998949 + (Add machine type compatibility update for 6.1 rebase [ppc64le]) +- Resolves: bz#1998950 + (Add machine type compatibility update for 6.1 rebase [s390x]) + +* Wed Aug 25 2021 Danilo Cesar Lemes de Paula - 6.0.0-29.el8 +- kvm-file-posix-Cap-max_iov-at-IOV_MAX.patch [bz#1994494] +- kvm-migration-Move-yank-outside-qemu_start_incoming_migr.patch [bz#1974366] +- Resolves: bz#1994494 + (VM remains in paused state when trying to write on a resized disk resides on iscsi) +- Resolves: bz#1974366 + (Fail to set migrate incoming for 2nd time after the first time failed) + +* Wed Aug 18 2021 Danilo Cesar Lemes de Paula - 6.0.0-28.el8 +- kvm-iotests-Improve-and-rename-test-291-to-qemu-img-bitm.patch [bz#1946084] +- kvm-qemu-img-Fail-fast-on-convert-bitmaps-with-inconsist.patch [bz#1946084] +- kvm-qemu-img-Add-skip-broken-bitmaps-for-convert-bitmaps.patch [bz#1946084] +- kvm-audio-Never-send-migration-section.patch [bz#1991671] +- Resolves: bz#1946084 + (qemu-img convert --bitmaps fail if a bitmap is inconsistent) +- Resolves: bz#1991671 + (vmstate differs between -audiodev and QEMU_AUDIO_DRV when no sound frontends devs present.) + +* Wed Aug 04 2021 Miroslav Rezanina - 6.0.0-27 +- kvm-migration-move-wait-unplug-loop-to-its-own-function.patch [bz#1976852] +- kvm-migration-failover-continue-to-wait-card-unplug-on-e.patch [bz#1976852] +- kvm-aarch64-Add-USB-storage-devices.patch [bz#1974579] +- Resolves: bz#1976852 + ([failover vf migration] The failover vf will be unregistered if canceling the migration whose status is "wait-unplug") +- Resolves: bz#1974579 + (It's not possible to start installation from a virtual USB device on aarch64) + +* Thu Jul 29 2021 Miroslav Rezanina - 6.0.0-26 +- kvm-acpi-pc-revert-back-to-v5.2-PCI-slot-enumeration.patch [bz#1977798] +- kvm-migration-failover-reset-partially_hotplugged.patch [bz#1787194] +- kvm-hmp-Fix-loadvm-to-resume-the-VM-on-success-instead-o.patch [bz#1959676] +- kvm-migration-Move-bitmap_mutex-out-of-migration_bitmap_.patch [bz#1959729] +- kvm-i386-cpu-Expose-AVX_VNNI-instruction-to-guest.patch [bz#1924822] +- kvm-ratelimit-protect-with-a-mutex.patch [bz#1838221] +- kvm-Update-Linux-headers-to-5.13-rc4.patch [bz#1838221] +- kvm-i386-Add-ratelimit-for-bus-locks-acquired-in-guest.patch [bz#1838221] +- kvm-iothread-generalize-iothread_set_param-iothread_get_.patch [bz#1930286] +- kvm-iothread-add-aio-max-batch-parameter.patch [bz#1930286] +- kvm-linux-aio-limit-the-batch-size-using-aio-max-batch-p.patch [bz#1930286] +- kvm-block-nvme-Fix-VFIO_MAP_DMA-failed-No-space-left-on-.patch [bz#1848881] +- Resolves: bz#1977798 + (RHEL8.5 guest network interface name changed after upgrade to qemu-6.0) +- Resolves: bz#1787194 + (After canceling the migration of a vm with VF which enables failover, using "migrate -d tcp:invalid uri" to re-migrating the vm will cause the VF in vm to be hot-unplug.) +- Resolves: bz#1959676 + (guest status is paused after loadvm on rhel8.5.0) +- Resolves: bz#1959729 + (SAP/3TB VM migration slowness [idle db]) +- Resolves: bz#1924822 + ([Intel 8.5 FEAT] qemu-kvm AVX2 VNNI - Fast Train) +- Resolves: bz#1838221 + ([Intel 8.5 FEAT] qemu-kvm Bus Lock VM Exit - Fast Train) +- Resolves: bz#1930286 + (randread and randrw regression with virtio-blk multi-queue) +- Resolves: bz#1848881 + (nvme:// block driver can exhaust IOMMU DMAs, hanging the VM, possible data loss) + +* Tue Jul 20 2021 Danilo Cesar Lemes de Paula - 6.0.0-25.el8 +- kvm-s390x-cpumodel-add-3931-and-3932.patch [bz#1976171] +- kvm-file-posix-fix-max_iov-for-dev-sg-devices.patch [bz#1943653] +- kvm-scsi-generic-pass-max_segments-via-max_iov-field-in-.patch [bz#1943653] +- kvm-osdep-provide-ROUND_DOWN-macro.patch [bz#1943653] +- kvm-block-backend-align-max_transfer-to-request-alignmen.patch [bz#1943653] +- kvm-block-add-max_hw_transfer-to-BlockLimits.patch [bz#1943653] +- kvm-file-posix-try-BLKSECTGET-on-block-devices-too-do-no.patch [bz#1943653] +- Resolves: bz#1976171 + ([IBM 8.5 FEAT] CPU Model for new IBM Z Hardware - qemu part) +- Resolves: bz#1943653 + (RHV VM pauses due to 'qemu-kvm' getting EINVAL on i/o to a direct lun with scsi passthrough enabled) + +* Fri Jul 16 2021 Danilo Cesar Lemes de Paula - 6.0.0-24.el8 +- kvm-s390x-css-Introduce-an-ESW-struct.patch [bz#1968326] +- kvm-s390x-css-Split-out-the-IRB-sense-data.patch [bz#1968326] +- kvm-s390x-css-Refactor-IRB-construction.patch [bz#1968326] +- kvm-s390x-css-Add-passthrough-IRB.patch [bz#1968326] +- kvm-vhost-user-blk-Fail-gracefully-on-too-large-queue-si.patch [bz#1935014 bz#1935019 bz#1935020 bz#1935031] +- kvm-vhost-user-blk-Make-sure-to-set-Error-on-realize-fai.patch [bz#1935014 bz#1935019 bz#1935020 bz#1935031] +- kvm-vhost-user-blk-Don-t-reconnect-during-initialisation.patch [bz#1935014 bz#1935019 bz#1935020 bz#1935031] +- kvm-vhost-user-blk-Improve-error-reporting-in-realize.patch [bz#1935014 bz#1935019 bz#1935020 bz#1935031] +- kvm-vhost-user-blk-Get-more-feature-flags-from-vhost-dev.patch [bz#1935014 bz#1935019 bz#1935020 bz#1935031] +- kvm-virtio-Fail-if-iommu_platform-is-requested-but-unsup.patch [bz#1935014 bz#1935019 bz#1935020 bz#1935031] +- kvm-vhost-user-blk-Check-that-num-queues-is-supported-by.patch [bz#1935014 bz#1935019 bz#1935020 bz#1935031] +- kvm-vhost-user-Fix-backends-without-multiqueue-support.patch [bz#1935014 bz#1935019 bz#1935020 bz#1935031] +- Resolves: bz#1968326 + ([vfio_ccw] I/O error when checking format - dasdfmt requires --force in quick mode when passed through) +- Resolves: bz#1935014 + (qemu crash when attach vhost-user-blk-pci with option queue-size=4096) +- Resolves: bz#1935019 + (qemu guest failed boot when attach vhost-user-blk-pci with option iommu_platform=on) +- Resolves: bz#1935020 + (qemu guest failed boot when attach vhost-user-blk-pci with option packed=on) +- Resolves: bz#1935031 + (qemu guest failed boot when attach vhost-user-blk-pci with unmatched num-queues with qsd) + +* Thu Jul 08 2021 Danilo Cesar Lemes de Paula - 6.0.0-23.el8 +- kvm-Add-mtod_check.patch [bz#1970823 bz#1970842 bz#1970850 bz#1970858] +- kvm-bootp-limit-vendor-specific-area-to-input-packet-mem.patch [bz#1970823 bz#1970842 bz#1970850 bz#1970858] +- kvm-bootp-check-bootp_input-buffer-size.patch [bz#1970823] +- kvm-upd6-check-udp6_input-buffer-size.patch [bz#1970842] +- kvm-tftp-check-tftp_input-buffer-size.patch [bz#1970850] +- kvm-tftp-introduce-a-header-structure.patch [bz#1970823 bz#1970842 bz#1970850 bz#1970858] +- kvm-udp-check-upd_input-buffer-size.patch [bz#1970858] +- kvm-Fix-DHCP-broken-in-libslirp-v4.6.0.patch [bz#1970823 bz#1970842 bz#1970850 bz#1970858] +- kvm-redhat-use-the-standard-vhost-user-JSON-path.patch [bz#1804196] +- Resolves: bz#1970823 + (CVE-2021-3592 virt:av/qemu-kvm: QEMU: slirp: invalid pointer initialization may lead to information disclosure (bootp) [rhel-av-8]) +- Resolves: bz#1970842 + (CVE-2021-3593 virt:av/qemu-kvm: QEMU: slirp: invalid pointer initialization may lead to information disclosure (udp6) [rhel-av-8]) +- Resolves: bz#1970850 + (CVE-2021-3595 virt:av/qemu-kvm: QEMU: slirp: invalid pointer initialization may lead to information disclosure (tftp) [rhel-av-8]) +- Resolves: bz#1970858 + (CVE-2021-3594 virt:av/qemu-kvm: QEMU: slirp: invalid pointer initialization may lead to information disclosure (udp) [rhel-av-8]) +- Resolves: bz#1804196 + (inconsistent paths for interop json files) + +* Fri Jul 02 2021 Danilo Cesar Lemes de Paula - 6.0.0-22.el8 +- kvm-redhat-Expose-upstream-machines-pc-4.2-and-pc-2.11.patch [bz#1897923] +- kvm-redhat-Enable-FDC-device-for-upstream-machines-too.patch [bz#1897923] +- kvm-redhat-Add-hw_compat_4_2_extra-and-apply-to-upstream.patch [bz#1897923] +- kvm-ppc-pef.c-initialize-cgs-ready-in-kvmppc_svm_init.patch [bz#1789757] +- kvm-virtio-gpu-handle-partial-maps-properly.patch [bz#1932279] +- kvm-redhat-Fix-unversioned-Obsoletes-warning.patch [bz#1950405 bz#1967330] +- kvm-redhat-Move-qemu-kvm-docs-dependency-to-qemu-kvm.patch [bz#1950405 bz#1967330] +- kvm-redhat-introducting-qemu-kvm-hw-usbredir.patch [bz#1950405 bz#1967330] +- kvm-spapr-Fix-EEH-capability-issue-on-KVM-guest-for-PCI-.patch [bz#1976015] +- Resolves: bz#1897923 + (support Live Migration from Ubuntu 18.04 i440fx to RHEL) +- Resolves: bz#1789757 + ([IBM 8.5 FEAT] Add machine option to enable secure VM support) +- Resolves: bz#1932279 + ([aarch64] qemu core dumped when using smmuv3 and iommu_platform enabling at virtio-gpu-pci) +- Resolves: bz#1950405 + (review qemu-kvm-core dependencies) +- Resolves: bz#1967330 + (Make qemu-kvm use versioned obsoletes for qemu-kvm-ma and qemu-kvm-rhev) +- Resolves: bz#1976015 + (spapr: Fix EEH capability issue on KVM guest for PCI passthru) + +* Wed Jun 23 2021 Danilo Cesar Lemes de Paula - 6.0.0-21.el8 +- kvm-block-backend-add-drained_poll.patch [bz#1960137] +- kvm-nbd-server-Use-drained-block-ops-to-quiesce-the-serv.patch [bz#1960137] +- kvm-disable-CONFIG_USB_STORAGE_BOT.patch [bz#1866133] +- kvm-doc-Fix-some-mistakes-in-the-SEV-documentation.patch [bz#1954750] +- kvm-docs-Add-SEV-ES-documentation-to-amd-memory-encrypti.patch [bz#1954750] +- kvm-docs-interop-firmware.json-Add-SEV-ES-support.patch [bz#1954750] +- Resolves: bz#1960137 + ([incremental backup] qemu-kvm hangs when Rebooting the VM during full backup) +- Resolves: bz#1866133 + (Disable usb-bot device in QEMU (unsupported)) +- Resolves: bz#1954750 + (firmware scheme for sev-es) + +* Mon Jun 21 2021 Danilo Cesar Lemes de Paula - 6.0.0-20.el8 +- kvm-x86-Add-x86-rhel8.5-machine-types.patch [bz#1957838] +- kvm-redhat-x86-Enable-kvm-asyncpf-int-by-default.patch [bz#1967603] +- kvm-yank-Unregister-function-when-using-TLS-migration.patch [bz#1964326] +- Resolves: bz#1957838 + (8.5 machine types for x86) +- Resolves: bz#1967603 + (Enable interrupt based asynchronous page fault mechanism by default) +- Resolves: bz#1964326 + (Qemu core dump when do tls migration via tcp protocol) + +* Fri Jun 11 2021 Danilo Cesar Lemes de Paula - 6.0.0-19.el8 +- kvm-pc-bios-s390-ccw-don-t-try-to-read-the-next-block-if.patch [bz#1965626] +- kvm-redhat-Install-the-s390-netboot.img-that-we-ve-built.patch [bz#1966463] +- kvm-sockets-update-SOCKET_ADDRESS_TYPE_FD-listen-2-backl.patch [bz#1967177] +- kvm-target-i386-sev-add-support-to-query-the-attestation.patch [bz#1957022] +- kvm-spapr-Don-t-hijack-current_machine-boot_order.patch [bz#1960119] +- kvm-target-i386-Add-CPU-model-versions-supporting-xsaves.patch [bz#1942914] +- kvm-spapr-Remove-stale-comment-about-power-saving-LPCR-b.patch [bz#1940731] +- kvm-spapr-Set-LPCR-to-current-AIL-mode-when-starting-a-n.patch [bz#1940731] +- Resolves: bz#1965626 + (RHEL8.2 - QEMU BIOS fails to read stage2 loader (kvm)) +- Resolves: bz#1966463 + (Rebuild the s390-netboot.img for downstream instead of shipping the upstream image) +- Resolves: bz#1967177 + (QEMU 6.0.0 socket_get_fd() fails with the error "socket_get_fd: too many connections") +- Resolves: bz#1957022 + (SEV: Add support to query the attestation report) +- Resolves: bz#1960119 + ([regression]Failed to reset guest) +- Resolves: bz#1942914 + ([Hyper-V][RHEL8.4]Nested Hyper-V on KVM: On Intel CPU L1 2016 can not start with cpu model Skylake-Server-noTSX-IBRS or Skylake-Client-noTSX-IBRS) +- Resolves: bz#1940731 + ([ppc64le] Hotplug vcpu device hit call trace:[qemu output] KVM: unknown exit, hardware reason 7fff9ce87ed8) + +* Tue Jun 01 2021 Danilo Cesar Lemes de Paula - 6.0.0-18.el8 +- kvm-virtio-net-failover-add-missing-remove_migration_sta.patch [bz#1953045] +- kvm-hw-arm-virt-Add-8.5-machine-type.patch [bz#1957667] +- kvm-hw-arm-virt-Disable-PL011-clock-migration-through-hw.patch [bz#1957667] +- kvm-arm-virt-Register-highmem-and-gic-version-as-class-p.patch [bz#1957667] +- kvm-virtio-blk-Fix-rollback-path-in-virtio_blk_data_plan.patch [bz#1927108] +- kvm-virtio-blk-Configure-all-host-notifiers-in-a-single-.patch [bz#1927108] +- kvm-virtio-scsi-Set-host-notifiers-and-callbacks-separat.patch [bz#1927108] +- kvm-virtio-scsi-Configure-all-host-notifiers-in-a-single.patch [bz#1927108] +- kvm-hw-arm-smmuv3-Another-range-invalidation-fix.patch [bz#1929720] +- Resolves: bz#1953045 + (qemu-kvm NULL pointer de-reference during migration at migrate_fd_connect ->...-> notifier_list_notify) +- Resolves: bz#1957667 + ([aarch64] Add 8.5 machine type) +- Resolves: bz#1927108 + (It's too slow to load scsi disk when use 384 vcpus) +- Resolves: bz#1929720 + ([aarch64] Handle vsmmuv3 IOTLB invalidation with non power of 2 size) + +* Tue May 25 2021 Danilo Cesar Lemes de Paula - 6.0.0-17.el8 +- kvm-redhat-s390x-add-rhel-8.5.0-compat-machine.patch [bz#1951476] +- kvm-redhat-add-missing-entries-in-hw_compat_rhel_8_4.patch [bz#1957834] +- kvm-redhat-Define-pseries-rhel8.5.0-machine-type.patch [bz#1957834] +- Resolves: bz#1951476 + ([s390x] RHEL AV 8.5 new machine type for s390x) +- Resolves: bz#1957834 + ([ppc64le] RHEL AV 8.5 new machine type for ppc64le) + +* Mon May 03 2021 Danilo Cesar Lemes de Paula - 6.0.0-16.el8 +- Rebase to qemu-kvm 6.0.0 + +* Wed Apr 28 2021 Danilo Cesar Lemes de Paula - 5.2.0-16.el8 +- kvm-virtio-pci-compat-page-aligned-ATS.patch [bz#1942362] +- Resolves: bz#1942362 + (Live migration with iommu from rhel8.3.1 to rhel8.4 fails: qemu-kvm: get_pci_config_device: Bad config data) + +* Mon Apr 12 2021 Danilo Cesar Lemes de Paula - 5.2.0-15.el8_4 +- kvm-block-Simplify-qmp_block_resize-error-paths.patch [bz#1903511] +- kvm-block-Fix-locking-in-qmp_block_resize.patch [bz#1903511] +- kvm-block-Fix-deadlock-in-bdrv_co_yield_to_drain.patch [bz#1903511] +- Resolves: bz#1903511 + (no response on QMP command 'block_resize') + +* Sat Mar 20 2021 Danilo Cesar Lemes de Paula - 5.2.0-14.el8 +- kvm-vhost-user-blk-fix-blkcfg-num_queues-endianness.patch [bz#1937004] +- kvm-block-export-fix-blk_size-double-byteswap.patch [bz#1937004] +- kvm-block-export-use-VIRTIO_BLK_SECTOR_BITS.patch [bz#1937004] +- kvm-block-export-fix-vhost-user-blk-export-sector-number.patch [bz#1937004] +- kvm-block-export-port-virtio-blk-discard-write-zeroes-in.patch [bz#1937004] +- kvm-block-export-port-virtio-blk-read-write-range-check.patch [bz#1937004] +- kvm-spec-ui-spice-sub-package.patch [bz#1936373] +- kvm-spec-ui-opengl-sub-package.patch [bz#1936373] +- Resolves: bz#1937004 + (vhost-user-blk server endianness and input validation fixes) +- Resolves: bz#1936373 + (move spice & opengl modules to rpm subpackages) + +* Tue Mar 16 2021 Danilo Cesar Lemes de Paula - 5.2.0-13.el8 +- kvm-i386-acpi-restore-device-paths-for-pre-5.1-vms.patch [bz#1934158] +- Resolves: bz#1934158 + (Windows guest looses network connectivity when NIC was configured with static IP) + +* Mon Mar 15 2021 Danilo Cesar Lemes de Paula - 5.2.0-12.el8 +- kvm-scsi-disk-move-scsi_handle_rw_error-earlier.patch [bz#1927530] +- kvm-scsi-disk-do-not-complete-requests-early-for-rerror-.patch [bz#1927530] +- kvm-scsi-introduce-scsi_sense_from_errno.patch [bz#1927530] +- kvm-scsi-disk-pass-SCSI-status-to-scsi_handle_rw_error.patch [bz#1927530] +- kvm-scsi-disk-pass-guest-recoverable-errors-through-even.patch [bz#1927530] +- kvm-hw-intc-arm_gic-Fix-interrupt-ID-in-GICD_SGIR-regist.patch [bz#1936948] +- Resolves: bz#1927530 + (RHEL8 Hypervisor - OVIRT - Issues seen on a virtualization guest with direct passthrough LUNS pausing when a host gets a Thin threshold warning) +- Resolves: bz#1936948 + (CVE-2021-20221 virt:av/qemu-kvm: qemu: out-of-bound heap buffer access via an interrupt ID field [rhel-av-8.4.0]) + +* Mon Mar 08 2021 Danilo Cesar Lemes de Paula - 5.2.0-11.el8 +- kvm-qxl-set-qxl.ssd.dcl.con-on-secondary-devices.patch [bz#1932190] +- kvm-qxl-also-notify-the-rendering-is-done-when-skipping-.patch [bz#1932190] +- kvm-virtiofsd-Save-error-code-early-at-the-failure-calls.patch [bz#1935071] +- kvm-virtiofs-drop-remapped-security.capability-xattr-as-.patch [bz#1935071] +- Resolves: bz#1932190 + (Timeout when dump the screen from 2nd VGA) +- Resolves: bz#1935071 + (CVE-2021-20263 virt:8.4/qemu-kvm: QEMU: virtiofsd: 'security.capabilities' is not dropped with xattrmap option [rhel-av-8]) + +* Wed Mar 03 2021 Danilo Cesar Lemes de Paula - 5.2.0-10.el8 +- kvm-migration-dirty-bitmap-Use-struct-for-alias-map-inne.patch [bz#1930757] +- kvm-migration-dirty-bitmap-Allow-control-of-bitmap-persi.patch [bz#1930757] +- kvm-qemu-iotests-300-Add-test-case-for-modifying-persist.patch [bz#1930757] +- kvm-failover-fix-indentantion.patch [bz#1819991] +- kvm-failover-Use-always-atomics-for-primary_should_be_hi.patch [bz#1819991] +- kvm-failover-primary-bus-is-only-used-once-and-where-it-.patch [bz#1819991] +- kvm-failover-Remove-unused-parameter.patch [bz#1819991] +- kvm-failover-Remove-external-partially_hotplugged-proper.patch [bz#1819991] +- kvm-failover-qdev_device_add-returns-err-or-dev-set.patch [bz#1819991] +- kvm-failover-Rename-bool-to-failover_primary_hidden.patch [bz#1819991] +- kvm-failover-g_strcmp0-knows-how-to-handle-NULL.patch [bz#1819991] +- kvm-failover-Remove-primary_device_opts.patch [bz#1819991] +- kvm-failover-remove-standby_id-variable.patch [bz#1819991] +- kvm-failover-Remove-primary_device_dict.patch [bz#1819991] +- kvm-failover-Remove-memory-leak.patch [bz#1819991] +- kvm-failover-simplify-virtio_net_find_primary.patch [bz#1819991] +- kvm-failover-should_be_hidden-should-take-a-bool.patch [bz#1819991] +- kvm-failover-Rename-function-to-hide_device.patch [bz#1819991] +- kvm-failover-virtio_net_connect_failover_devices-does-no.patch [bz#1819991] +- kvm-failover-Rename-to-failover_find_primary_device.patch [bz#1819991] +- kvm-failover-simplify-qdev_device_add-failover-case.patch [bz#1819991] +- kvm-failover-simplify-qdev_device_add.patch [bz#1819991] +- kvm-failover-make-sure-that-id-always-exist.patch [bz#1819991] +- kvm-failover-remove-failover_find_primary_device-error-p.patch [bz#1819991] +- kvm-failover-split-failover_find_primary_device_id.patch [bz#1819991] +- kvm-failover-We-don-t-need-to-cache-primary_device_id-an.patch [bz#1819991] +- kvm-failover-Caller-of-this-two-functions-already-have-p.patch [bz#1819991] +- kvm-failover-simplify-failover_unplug_primary.patch [bz#1819991] +- kvm-failover-Remove-primary_dev-member.patch [bz#1819991] +- kvm-virtio-net-add-missing-object_unref.patch [bz#1819991] +- kvm-x86-cpu-Populate-SVM-CPUID-feature-bits.patch [bz#1926785] +- kvm-i386-Add-the-support-for-AMD-EPYC-3rd-generation-pro.patch [bz#1926785] +- Resolves: bz#1930757 + (Allow control of block-dirty-bitmap persistence via 'block-bitmap-mapping') +- Resolves: bz#1819991 + (Hostdev type interface with net failover enabled exists in domain xml and doesn't reattach to host after hot-unplug) +- Resolves: bz#1926785 + ([RFE] AMD Milan - Add KVM/support for EPYC-Milan CPU Model - Fast Train) + +* Mon Mar 01 2021 Danilo Cesar Lemes de Paula - 5.2.0-9.el8 +- kvm-docs-generate-qemu-storage-daemon-qmp-ref-7-man-page.patch [bz#1901323] +- kvm-docs-add-qemu-storage-daemon-1-man-page.patch [bz#1901323] +- kvm-docs-Add-qemu-storage-daemon-1-manpage-to-meson.buil.patch [bz#1901323] +- kvm-qemu-storage-daemon-Enable-object-add.patch [bz#1901323] +- kvm-spec-Package-qemu-storage-daemon.patch [bz#1901323] +- kvm-default-configs-Enable-vhost-user-blk.patch [bz#1930033] +- kvm-qemu-nbd-Use-SOMAXCONN-for-socket-listen-backlog.patch [bz#1925345] +- kvm-pcie-don-t-set-link-state-active-if-the-slot-is-empt.patch [bz#1917654] +- Resolves: bz#1901323 + (QSD (QEMU Storage Daemon): basic support - TechPreview) +- Resolves: bz#1930033 + (enable vhost-user-blk device) +- Resolves: bz#1925345 + (qemu-nbd needs larger backlog for Unix socket listen()) +- Resolves: bz#1917654 + ([failover vf migration][RHEL84 vm] After start a vm with a failover vf + a failover virtio net device, the failvoer vf do not exist in the vm) + +* Fri Feb 19 2021 Eduardo Lima (Etrunko) - 5.2.0-8.el8 +- kvm-block-nbd-only-detach-existing-iochannel-from-aio_co.patch [bz#1887883] +- kvm-block-nbd-only-enter-connection-coroutine-if-it-s-pr.patch [bz#1887883] +- kvm-nbd-make-nbd_read-return-EIO-on-error.patch [bz#1887883] +- kvm-virtio-move-use-disabled-flag-property-to-hw_compat_.patch [bz#1907255] +- kvm-virtiofsd-extract-lo_do_open-from-lo_open.patch [bz#1920740] +- kvm-virtiofsd-optionally-return-inode-pointer-from-lo_do.patch [bz#1920740] +- kvm-virtiofsd-prevent-opening-of-special-files-CVE-2020-.patch [bz#1920740] +- kvm-spapr-Adjust-firmware-path-of-PCI-devices.patch [bz#1920941] +- kvm-pci-reject-too-large-ROMs.patch [bz#1917830] +- kvm-pci-add-romsize-property.patch [bz#1917830] +- kvm-redhat-Add-some-devices-for-exporting-upstream-machi.patch [bz#1917826] +- kvm-vhost-Check-for-valid-vdev-in-vhost_backend_handle_i.patch [bz#1880299] +- Resolves: bz#1887883 + (qemu blocks client progress with various NBD actions) +- Resolves: bz#1907255 + (Migrate failed with vhost-vsock-pci from RHEL-AV 8.3.1 to RHEL-AV 8.2.1) +- Resolves: bz#1920740 + (CVE-2020-35517 virt:8.4/qemu-kvm: QEMU: virtiofsd: potential privileged host device access from guest [rhel-av-8.4.0]) +- Resolves: bz#1920941 + ([ppc64le] [AV]--disk cdimage.iso,bus=usb fails to boot) +- Resolves: bz#1917830 + (Add romsize property to qemu-kvm) +- Resolves: bz#1917826 + (Add extra device support to qemu-kvm, but not to rhel machine types) +- Resolves: bz#1880299 + (vhost-user mq connection fails to restart after kill host testpmd which acts as vhost-user client) + +* Fri Feb 12 2021 Eduardo Lima (Etrunko) - 5.2.0-7.el8 +- kvm-virtio-Add-corresponding-memory_listener_unregister-.patch [bz#1903521] +- kvm-block-Honor-blk_set_aio_context-context-requirements.patch [bz#1918966 bz#1918968] +- kvm-nbd-server-Quiesce-coroutines-on-context-switch.patch [bz#1918966 bz#1918968] +- kvm-block-Avoid-processing-BDS-twice-in-bdrv_set_aio_con.patch [bz#1918966 bz#1918968] +- kvm-storage-daemon-Call-bdrv_close_all-on-exit.patch [bz#1918966 bz#1918968] +- kvm-block-move-blk_exp_close_all-to-qemu_cleanup.patch [bz#1918966 bz#1918968] +- Resolves: bz#1903521 + (hot unplug vhost-user cause qemu crash: qemu-kvm: ../softmmu/memory.c:2818: do_address_space_destroy: Assertion `QTAILQ_EMPTY(&as->listeners)' failed.) +- Resolves: bz#1918966 + ([incremental_backup] qemu aborts if guest reboot during backup when using virtio-blk: "aio_co_schedule: Co-routine was already scheduled in 'aio_co_schedule'") +- Resolves: bz#1918968 + ([incremental_backup] qemu deadlock after poweroff in guest during backup in nbd_export_close_all()) + +* Tue Feb 09 2021 Eduardo Lima (Etrunko) - 5.2.0-6.el8 +- kvm-scsi-fix-device-removal-race-vs-IO-restart-callback-.patch [bz#1854811] +- kvm-tracetool-also-strip-l-and-ll-from-systemtap-format-.patch [bz#1907264] +- kvm-redhat-moving-all-documentation-files-to-qemu-kvm-do.patch [bz#1881170 bz#1924766] +- kvm-hw-arm-smmuv3-Fix-addr_mask-for-range-based-invalida.patch [bz#1834152] +- kvm-redhat-makes-qemu-respect-system-s-crypto-profile.patch [bz#1902219] +- kvm-vhost-Unbreak-SMMU-and-virtio-iommu-on-dev-iotlb-sup.patch [bz#1925028] +- kvm-docs-set-CONFDIR-when-running-sphinx.patch [bz#1902537] +- Resolves: bz#1854811 + (scsi-bus.c: use-after-free due to race between device unplug and I/O operation causes guest crash) +- Resolves: bz#1907264 + (systemtap: invalid or missing conversion specifier at the trace event vhost_vdpa_set_log_base) +- Resolves: bz#1881170 + (split documentation from the qemu-kvm-core package to its own subpackage) +- Resolves: bz#1924766 + (split documentation from the qemu-kvm-core package to its own subpackage [av-8.4.0]) +- Resolves: bz#1834152 + ([aarch64] QEMU SMMUv3 device: Support range invalidation) +- Resolves: bz#1902219 + (QEMU doesn't honour system crypto policies) +- Resolves: bz#1925028 + (vsmmuv3/vhost and virtio-iommu/vhost regression) +- Resolves: bz#1902537 + (The default fsfreeze-hook path from man page and qemu-ga --help command are different) + +* Tue Feb 02 2021 Eduardo Lima (Etrunko) - 5.2.0-5.el8 +- kvm-spapr-Allow-memory-unplug-to-always-succeed.patch [bz#1914069] +- kvm-spapr-Improve-handling-of-memory-unplug-with-old-gue.patch [bz#1914069] +- kvm-x86-cpu-Add-AVX512_FP16-cpu-feature.patch [bz#1838738] +- kvm-q35-Increase-max_cpus-to-710-on-pc-q35-rhel8-machine.patch [bz#1904268] +- kvm-config-enable-VFIO_CCW.patch [bz#1922170] +- Resolves: bz#1914069 + ([ppc64le] have this fix for rhel8.4 av (spapr: Allow memory unplug to always succeed)) +- Resolves: bz#1838738 + ([Intel 8.4 FEAT] qemu-kvm Sapphire Rapids (SPR) New Instructions (NIs) - Fast Train) +- Resolves: bz#1904268 + ([RFE] [HPEMC] qemu-kvm: support up to 710 VCPUs) +- Resolves: bz#1922170 + (Enable vfio-ccw in AV) + +* Wed Jan 27 2021 Danilo Cesar Lemes de Paula - 5.2.0-4.el8 +- kvm-Drop-bogus-IPv6-messages.patch [bz#1918061] +- Resolves: bz#1918061 + (CVE-2020-10756 virt:rhel/qemu-kvm: QEMU: slirp: networking out-of-bounds read information disclosure vulnerability [rhel-av-8]) + +* Mon Jan 18 2021 Danilo Cesar Lemes de Paula - 5.2.0-3.el8 +- kvm-block-nvme-Implement-fake-truncate-coroutine.patch [bz#1848834] +- kvm-spec-find-system-python-via-meson.patch [bz#1899619] +- kvm-build-system-use-b_staticpic-false.patch [bz#1899619] +- kvm-spapr-Fix-buffer-overflow-in-spapr_numa_associativit.patch [bz#1908693] +- kvm-usb-hcd-xhci-pci-Fixup-capabilities-ordering-again.patch [bz#1912846] +- kvm-qga-commands-posix-Send-CCW-address-on-s390x-with-th.patch [bz#1755075] +- kvm-AArch64-machine-types-cleanup.patch [bz#1895276] +- kvm-hw-arm-virt-Add-8.4-Machine-type.patch [bz#1895276] +- kvm-udev-kvm-check-remove-the-exceeded-subscription-limi.patch [bz#1914463] +- kvm-memory-Rename-memory_region_notify_one-to-memory_reg.patch [bz#1845758] +- kvm-memory-Add-IOMMUTLBEvent.patch [bz#1845758] +- kvm-memory-Add-IOMMU_NOTIFIER_DEVIOTLB_UNMAP-IOMMUTLBNot.patch [bz#1845758] +- kvm-intel_iommu-Skip-page-walking-on-device-iotlb-invali.patch [bz#1845758] +- kvm-memory-Skip-bad-range-assertion-if-notifier-is-DEVIO.patch [bz#1845758] +- kvm-RHEL-Switch-pvpanic-test-to-q35.patch [bz#1885555] +- kvm-8.4-x86-machine-type.patch [bz#1885555] +- kvm-memory-clamp-cached-translation-in-case-it-points-to.patch [bz#1904392] +- Resolves: bz#1848834 + (Failed to create luks format image on NVMe device) +- Resolves: bz#1899619 + (QEMU 5.2 is built with PIC objects instead of PIE) +- Resolves: bz#1908693 + ([ppc64le]boot up a guest with 128 numa nodes ,qemu got coredump) +- Resolves: bz#1912846 + (qemu-kvm: Failed to load xhci:parent_obj during migration) +- Resolves: bz#1755075 + ([qemu-guest-agent] fsinfo doesn't return disk info on s390x) +- Resolves: bz#1895276 + (Machine types update for aarch64 for QEMU 5.2.0) +- Resolves: bz#1914463 + (Remove KVM guest count and limit info message) +- Resolves: bz#1845758 + (qemu core dumped: qemu-kvm: /builddir/build/BUILD/qemu-4.2.0/memory.c:1928: memory_region_notify_one: Assertion `entry->iova >= notifier->start && entry_end <= notifier->end' failed.) +- Resolves: bz#1885555 + (8.4 machine types for x86) +- Resolves: bz#1904392 + (CVE-2020-27821 virt:8.4/qemu-kvm: QEMU: heap buffer overflow in msix_table_mmio_write() in hw/pci/msix.c [rhel-av-8]) + +* Tue Dec 15 2020 Danilo Cesar Lemes de Paula - 5.2.0-2.el8 +- kvm-redhat-Define-hw_compat_8_3.patch [bz#1893935] +- kvm-redhat-Add-spapr_machine_rhel_default_class_options.patch [bz#1893935] +- kvm-redhat-Define-pseries-rhel8.4.0-machine-type.patch [bz#1893935] +- kvm-redhat-s390x-add-rhel-8.4.0-compat-machine.patch [bz#1836282] +- Resolves: bz#1836282 + (New machine type for qemu-kvm on s390x in RHEL-AV) +- Resolves: bz#1893935 + (New machine type on RHEL-AV 8.4 for ppc64le) + +* Wed Dec 09 2020 Miroslav Rezanina - 5.2.0-1.el8 +- Rebase to QEMU 5.2.0 [bz#1905933] +- Resolves: bz#1905933 + (Rebase qemu-kvm to version 5.2.0) + +* Tue Dec 01 2020 Danilo Cesar Lemes de Paula - 5.1.0-16.el8 +- kvm-redhat-introduces-disable_everything-macro-into-the-.patch [bz#1884611] +- kvm-redhat-scripts-extract_build_cmd.py-Avoid-listing-em.patch [bz#1884611] +- kvm-redhat-Removing-unecessary-configurations.patch [bz#1884611] +- kvm-redhat-Fixing-rh-local-build.patch [bz#1884611] +- kvm-redhat-allow-Makefile-rh-prep-builddep-to-fail.patch [bz#1884611] +- kvm-redhat-adding-rh-rpm-target.patch [bz#1884611] +- kvm-redhat-move-shareable-files-from-qemu-kvm-core-to-qe.patch [bz#1884611] +- kvm-redhat-Add-qemu-kiwi-subpackage.patch [bz#1884611] +- Resolves: bz#1884611 + (Build kata-specific version of qemu) + +* Mon Nov 16 2020 Danilo Cesar Lemes de Paula - 5.1.0-15.el8 +- kvm-redhat-add-un-pre-install-systemd-hooks-for-qemu-ga.patch [bz#1882719] +- kvm-rcu-Implement-drain_call_rcu.patch [bz#1812399 bz#1866707] +- kvm-libqtest-Rename-qmp_assert_error_class-to-qmp_expect.patch [bz#1812399 bz#1866707] +- kvm-qtest-rename-qtest_qmp_receive-to-qtest_qmp_receive_.patch [bz#1812399 bz#1866707] +- kvm-qtest-Reintroduce-qtest_qmp_receive-with-QMP-event-b.patch [bz#1812399 bz#1866707] +- kvm-qtest-remove-qtest_qmp_receive_success.patch [bz#1812399 bz#1866707] +- kvm-device-plug-test-use-qtest_qmp-to-send-the-device_de.patch [bz#1812399 bz#1866707] +- kvm-qtest-switch-users-back-to-qtest_qmp_receive.patch [bz#1812399 bz#1866707] +- kvm-qtest-check-that-drives-are-really-appearing-and-dis.patch [bz#1812399 bz#1866707] +- kvm-qemu-iotests-qtest-rewrite-test-067-as-a-qtest.patch [bz#1812399 bz#1866707] +- kvm-qdev-add-check-if-address-free-callback-for-buses.patch [bz#1812399 bz#1866707] +- kvm-scsi-scsi_bus-switch-search-direction-in-scsi_device.patch [bz#1812399 bz#1866707] +- kvm-device_core-use-drain_call_rcu-in-in-qmp_device_add.patch [bz#1812399 bz#1866707] +- kvm-device-core-use-RCU-for-list-of-children-of-a-bus.patch [bz#1812399 bz#1866707] +- kvm-scsi-switch-to-bus-check_address.patch [bz#1812399 bz#1866707] +- kvm-device-core-use-atomic_set-on-.realized-property.patch [bz#1812399 bz#1866707] +- kvm-scsi-scsi-bus-scsi_device_find-don-t-return-unrealiz.patch [bz#1812399] +- kvm-scsi-scsi_bus-Add-scsi_device_get.patch [bz#1812399 bz#1866707] +- kvm-virtio-scsi-use-scsi_device_get.patch [bz#1812399 bz#1866707] +- kvm-scsi-scsi_bus-fix-races-in-REPORT-LUNS.patch [bz#1812399 bz#1866707] +- kvm-tests-migration-fix-memleak-in-wait_command-wait_com.patch [bz#1812399 bz#1866707] +- kvm-libqtest-fix-the-order-of-buffered-events.patch [bz#1812399 bz#1866707] +- kvm-libqtest-fix-memory-leak-in-the-qtest_qmp_event_ref.patch [bz#1812399 bz#1866707] +- kvm-iotests-add-filter_qmp_virtio_scsi-function.patch [bz#1812399 bz#1866707] +- kvm-iotests-rewrite-iotest-240-in-python.patch [bz#1812399 bz#1866707] +- Resolves: bz#1812399 + (Qemu crash when detach disk with cache="none" discard="ignore" io="native") +- Resolves: bz#1866707 + (qemu-kvm is crashing with error "scsi_target_emulate_report_luns: Assertion `i == n + 8' failed") +- Resolves: bz#1882719 + (qemu-ga service still active and can work after qemu-guest-agent been removed) + +* Tue Oct 13 2020 Danilo Cesar Lemes de Paula - 5.1.0-14.el8_3 +- kvm-virtiofsd-avoid-proc-self-fd-tempdir.patch [bz#1884276] +- Resolves: bz#1884276 + (Pod with kata-runtime won't start, QEMU: "vhost_user_dev init failed, Operation not permitted" [mkdtemp failing in sandboxing]) + +* Thu Oct 08 2020 Danilo Cesar Lemes de Paula - 5.1.0-13.el8_3 +- kvm-x86-lpc9-let-firmware-negotiate-CPU-hotplug-with-SMI.patch [bz#1846886] +- kvm-x86-cpuhp-prevent-guest-crash-on-CPU-hotplug-when-br.patch [bz#1846886] +- kvm-x86-cpuhp-refuse-cpu-hot-unplug-request-earlier-if-n.patch [bz#1846886] +- Resolves: bz#1846886 + (Guest hit soft lockup or reboots if hotplug vcpu under ovmf) + +* Mon Oct 05 2020 Danilo Cesar Lemes de Paula - 5.1.0-12.el8_3 +- kvm-virtio-skip-legacy-support-check-on-machine-types-le.patch [bz#1868449] +- kvm-vhost-vsock-pci-force-virtio-version-1.patch [bz#1868449] +- kvm-vhost-user-vsock-pci-force-virtio-version-1.patch [bz#1868449] +- kvm-vhost-vsock-ccw-force-virtio-version-1.patch [bz#1868449] +- Resolves: bz#1868449 + (vhost_vsock error: device is modern-only, use disable-legacy=on) + +* Mon Oct 05 2020 Danilo Cesar Lemes de Paula - 5.1.0-11.el8_3 +- kvm-migration-increase-max-bandwidth-to-128-MiB-s-1-Gib-.patch [bz#1874004] +- kvm-redhat-Make-all-generated-so-files-executable-not-on.patch [bz#1876635] +- Resolves: bz#1874004 + (Live migration performance is poor during guest installation process on power host) +- Resolves: bz#1876635 + (VM fails to start with a passthrough smartcard) + +* Mon Sep 28 2020 Danilo Cesar Lemes de Paula - 5.1.0-10.el8 +- kvm-qemu-img-Support-bitmap-merge-into-backing-image.patch [bz#1877209] +- Resolves: bz#1877209 + ('qemu-img bitmaps --merge' failed when trying to merge top volume bitmap to base volume bitmap) + +* Mon Sep 21 2020 Danilo Cesar Lemes de Paula - 5.1.0-9.el8 +- kvm-hw-nvram-fw_cfg-fix-FWCfgDataGeneratorClass-get_data.patch [bz#1688978] +- Resolves: bz#1688978 + (RFE: forward host preferences for cipher suites and CA certs to guest firmware) + +* Thu Sep 17 2020 Danilo Cesar Lemes de Paula - 5.1.0-8.el8 +- kvm-redhat-link-etc-qemu-ga-fsfreeze-hook-to-etc-qemu-kv.patch [bz#1738820] +- kvm-seccomp-fix-killing-of-whole-process-instead-of-thre.patch [bz#1752376] +- kvm-Revert-Drop-bogus-IPv6-messages.patch [bz#1867075] +- kvm-block-rbd-add-namespace-to-qemu_rbd_strong_runtime_o.patch [bz#1821528] +- Resolves: bz#1738820 + ('-F' option of qemu-ga command cause the guest-fsfreeze-freeze command doesn't work) +- Resolves: bz#1752376 + (qemu use SCMP_ACT_TRAP even SCMP_ACT_KILL_PROCESS is available) +- Resolves: bz#1821528 + (missing namespace attribute when access the rbd image with namespace) +- Resolves: bz#1867075 + (CVE-2020-10756 virt:8.3/qemu-kvm: QEMU: slirp: networking out-of-bounds read information disclosure vulnerability [rhel-av-8]) + +* Tue Sep 15 2020 Danilo Cesar Lemes de Paula - 5.1.0-7.el8 +- kvm-target-ppc-Add-experimental-option-for-enabling-secu.patch [bz#1789757 bz#1870384] +- kvm-target-arm-Move-start-powered-off-property-to-generi.patch [bz#1849483] +- kvm-target-arm-Move-setting-of-CPU-halted-state-to-gener.patch [bz#1849483] +- kvm-ppc-spapr-Use-start-powered-off-CPUState-property.patch [bz#1849483] +- Resolves: bz#1789757 + ([IBM 8.4 FEAT] Add machine option to enable secure VM support) +- Resolves: bz#1849483 + (Failed to boot up guest when hotplugging vcpus on bios stage) +- Resolves: bz#1870384 + ([IBM 8.3 FEAT] Add interim/unsupported machine option to enable secure VM support for testing purposes) + +* Thu Sep 10 2020 Danilo Cesar Lemes de Paula - 5.1.0-6.el8 +- kvm-spec-Move-qemu-pr-helper-back-to-usr-bin.patch [bz#1869635] +- kvm-Bump-required-libusbx-version.patch [bz#1856591] +- Resolves: bz#1856591 + (libusbx isn't updated with qemu-kvm) +- Resolves: bz#1869635 + ('/usr/bin/qemu-pr-helper' is not a suitable pr helper: No such file or directory) + +* Tue Sep 08 2020 Danilo Cesar Lemes de Paula - 5.1.0-5.el8 +- kvm-Revert-i386-Fix-pkg_id-offset-for-EPYC-cpu-models.patch [bz#1873417] +- kvm-Revert-target-i386-Enable-new-apic-id-encoding-for-E.patch [bz#1873417] +- kvm-Revert-hw-i386-Move-arch_id-decode-inside-x86_cpus_i.patch [bz#1873417] +- kvm-Revert-i386-Introduce-use_epyc_apic_id_encoding-in-X.patch [bz#1873417] +- kvm-Revert-hw-i386-Introduce-apicid-functions-inside-X86.patch [bz#1873417] +- kvm-Revert-target-i386-Cleanup-and-use-the-EPYC-mode-top.patch [bz#1873417] +- kvm-Revert-hw-386-Add-EPYC-mode-topology-decoding-functi.patch [bz#1873417] +- kvm-nvram-Exit-QEMU-if-NVRAM-cannot-contain-all-prom-env.patch [bz#1867739] +- kvm-usb-fix-setup_len-init-CVE-2020-14364.patch [bz#1869715] +- kvm-Remove-explicit-glusterfs-api-dependency.patch [bz#1872853] +- kvm-disable-virgl.patch [bz#1831271] +- Resolves: bz#1831271 + (Drop virgil acceleration support and remove virglrenderer dependency) +- Resolves: bz#1867739 + (-prom-env does not validate input) +- Resolves: bz#1869715 + (CVE-2020-14364 qemu-kvm: QEMU: usb: out-of-bounds r/w access issue while processing usb packets [rhel-av-8.3.0]) +- Resolves: bz#1872853 + (move the glusterfs dependency out of qemu-kvm-core to the glusterfs module) +- Resolves: bz#1873417 + (AMD/NUMA topology - revert 5.1 changes) + +* Thu Aug 27 2020 Danilo Cesar Lemes de Paula - 5.1.0-4.el8 +- kvm-Drop-bogus-IPv6-messages.patch [bz#1867075] +- kvm-machine-types-numa-set-numa_mem_supported-on-old-mac.patch [bz#1849707] +- kvm-machine_types-numa-compatibility-for-auto_enable_num.patch [bz#1849707] +- kvm-migration-Add-block-bitmap-mapping-parameter.patch [bz#1790492] +- kvm-iotests.py-Let-wait_migration-return-on-failure.patch [bz#1790492] +- kvm-iotests-Test-node-bitmap-aliases-during-migration.patch [bz#1790492] +- Resolves: bz#1790492 + ('dirty-bitmaps' migration capability should allow configuring target nodenames) +- Resolves: bz#1849707 + (8.3 machine types for x86 - 5.1 update) +- Resolves: bz#1867075 + (CVE-2020-10756 virt:8.3/qemu-kvm: QEMU: slirp: networking out-of-bounds read information disclosure vulnerability [rhel-av-8]) + +* Wed Aug 19 2020 Danilo Cesar Lemes de Paula - 5.1.0-3.el8 +- kvm-redhat-Update-hw_compat_8_2.patch [bz#1843348] +- kvm-redhat-update-pseries-rhel8.2.0-machine-type.patch [bz#1843348] +- kvm-Disable-TPM-passthrough-backend-on-ARM.patch [bz#1801242] +- kvm-Require-libfdt-1.6.0.patch [bz#1867847] +- Resolves: bz#1801242 + ([aarch64] vTPM support in machvirt) +- Resolves: bz#1843348 + (8.3 machine types for POWER) +- Resolves: bz#1867847 + ([ppc] virt module 7629: /usr/libexec/qemu-kvm: undefined symbol: fdt_check_full, version LIBFDT_1.2) + +* Wed Aug 12 2020 Danilo Cesar Lemes de Paula - 5.1.0-2.el8 +- kvm-redhat-define-hw_compat_8_2.patch [bz#1853265] +- Resolves: bz#1853265 + (Forward and backward migration from rhel-av-8.3.0(qemu-kvm-5.0.0) to rhel-av-8.2.1(qemu-kvm-4.2.0) failed with "qemu-kvm: error while loading state for instance 0x0 of device 'spapr'") + +* Wed Aug 12 2020 Danilo Cesar Lemes de Paula - 5.1.0-1.el8 +- Quick changelog fix to reflect the current fixes: +- Resolve: bz#1781911 +- Resolve: bz#1841529 +- Resolve: bz#1842902 +- Resolve: bz#1818843 +- Resolve: bz#1819292 +- Resolve: bz#1801242 + +* Wed Aug 12 2020 Danilo Cesar Lemes de Paula - 5.1.0-0.el8 +- Rebase to 5.1.0 +- Resolves: bz#1809650 + +* Tue Jul 07 2020 Danilo Cesar Lemes de Paula - 4.2.0-29.el8 +- kvm-virtio-net-fix-removal-of-failover-device.patch [bz#1820120] +- Resolves: bz#1820120 + (After hotunplugging the vitrio device and netdev, hotunpluging the failover VF will cause qemu core dump) + +* Sun Jun 28 2020 Danilo Cesar Lemes de Paula - 4.2.0-28.el8 +- kvm-virtio-blk-Refactor-the-code-that-processes-queued-r.patch [bz#1812765] +- kvm-virtio-blk-On-restart-process-queued-requests-in-the.patch [bz#1812765] +- kvm-Fix-use-afte-free-in-ip_reass-CVE-2020-1983.patch [bz#1838082] +- Resolves: bz#1812765 + (qemu with iothreads enabled crashes on resume after enospc pause for disk extension) +- Resolves: bz#1838082 + (CVE-2020-1983 virt:8.2/qemu-kvm: QEMU: slirp: use-after-free in ip_reass() function in ip_input.c [rhel-av-8]) + +* Thu Jun 18 2020 Eduardo Lima (Etrunko) - 4.2.0-27.el8 +- kvm-hw-pci-pcie-Move-hot-plug-capability-check-to-pre_pl.patch [bz#1820531] +- kvm-spec-Fix-python-shenigans-for-tests.patch [bz#1845779] +- kvm-target-i386-Add-ARCH_CAPABILITIES-related-bits-into-.patch [bz#1840342] +- Resolves: bz#1820531 + (qmp command query-pci get wrong result after hotplug device under hotplug=off controller) +- Resolves: bz#1840342 + ([Intel 8.2.1 Bug] qemu-kvm Add ARCH_CAPABILITIES to Icelake-Server cpu model - Fast Train) +- Resolves: bz#1845779 + (Install 'qemu-kvm-tests' failed as nothing provides /usr/libexec/platform-python3 - virt module 6972) + +* Wed Jun 17 2020 Eduardo Lima (Etrunko) - 4.2.0-26.el8 +- kvm-nbd-server-Avoid-long-error-message-assertions-CVE-2.patch [bz#1845384] +- kvm-block-Call-attention-to-truncation-of-long-NBD-expor.patch [bz#1845384] +- Resolves: bz#1845384 + (CVE-2020-10761 virt:8.2/qemu-kvm: QEMU: nbd: reachable assertion failure in nbd_negotiate_send_rep_verr via remote client [rhel-av-8]) + +* Tue Jun 09 2020 Danilo Cesar Lemes de Paula - 4.2.0-25.el8 +- kvm-enable-ramfb.patch [bz#1841068] +- kvm-block-Add-flags-to-BlockDriver.bdrv_co_truncate.patch [bz#1780574] +- kvm-block-Add-flags-to-bdrv-_co-_truncate.patch [bz#1780574] +- kvm-block-backend-Add-flags-to-blk_truncate.patch [bz#1780574] +- kvm-qcow2-Support-BDRV_REQ_ZERO_WRITE-for-truncate.patch [bz#1780574] +- kvm-raw-format-Support-BDRV_REQ_ZERO_WRITE-for-truncate.patch [bz#1780574] +- kvm-file-posix-Support-BDRV_REQ_ZERO_WRITE-for-truncate.patch [bz#1780574] +- kvm-block-truncate-Don-t-make-backing-file-data-visible.patch [bz#1780574] +- kvm-iotests-Add-qemu_io_log.patch [bz#1780574] +- kvm-iotests-Filter-testfiles-out-in-filter_img_info.patch [bz#1780574] +- kvm-iotests-Test-committing-to-short-backing-file.patch [bz#1780574] +- kvm-qcow2-Forward-ZERO_WRITE-flag-for-full-preallocation.patch [bz#1780574] +- kvm-i386-Add-MSR-feature-bit-for-MDS-NO.patch [bz#1769912] +- kvm-i386-Add-macro-for-stibp.patch [bz#1769912] +- kvm-target-i386-Add-new-bit-definitions-of-MSR_IA32_ARCH.patch [bz#1769912] +- kvm-i386-Add-new-CPU-model-Cooperlake.patch [bz#1769912] +- kvm-target-i386-Add-missed-features-to-Cooperlake-CPU-mo.patch [bz#1769912] +- Resolves: bz#1769912 + ([Intel 8.2.1 Feature] introduce Cooper Lake cpu model - qemu-kvm Fast Train) +- Resolves: bz#1780574 + (Data corruption with resizing short overlay over longer backing files) +- Resolves: bz#1841068 + (RFE: please support the "ramfb" display device model) + +* Mon Jun 08 2020 Danilo Cesar Lemes de Paula - 4.2.0-24.el8 +- kvm-target-i386-set-the-CPUID-level-to-0x14-on-old-machi.patch [bz#1513681] +- kvm-block-curl-HTTP-header-fields-allow-whitespace-aroun.patch [bz#1841038] +- kvm-block-curl-HTTP-header-field-names-are-case-insensit.patch [bz#1841038] +- kvm-MAINTAINERS-fix-qcow2-bitmap.c-under-Dirty-Bitmaps-h.patch [bz#1779893 bz#1779904] +- kvm-iotests-Let-_make_test_img-parse-its-parameters.patch [bz#1779893 bz#1779904] +- kvm-qemu_img-add-cvtnum_full-to-print-error-reports.patch [bz#1779893 bz#1779904] +- kvm-block-Make-it-easier-to-learn-which-BDS-support-bitm.patch [bz#1779893 bz#1779904] +- kvm-blockdev-Promote-several-bitmap-functions-to-non-sta.patch [bz#1779893 bz#1779904] +- kvm-blockdev-Split-off-basic-bitmap-operations-for-qemu-.patch [bz#1779893 bz#1779904] +- kvm-qemu-img-Add-bitmap-sub-command.patch [bz#1779893 bz#1779904] +- kvm-iotests-Fix-test-178.patch [bz#1779893 bz#1779904] +- kvm-qcow2-Expose-bitmaps-size-during-measure.patch [bz#1779893 bz#1779904] +- kvm-qemu-img-Factor-out-code-for-merging-bitmaps.patch [bz#1779893 bz#1779904] +- kvm-qemu-img-Add-convert-bitmaps-option.patch [bz#1779893 bz#1779904] +- kvm-iotests-Add-test-291-to-for-qemu-img-bitmap-coverage.patch [bz#1779893 bz#1779904] +- kvm-iotests-Add-more-skip_if_unsupported-statements-to-t.patch [bz#1778593] +- kvm-iotests-don-t-use-format-for-drive_add.patch [bz#1778593] +- kvm-iotests-055-refactor-compressed-backup-to-vmdk.patch [bz#1778593] +- kvm-iotests-055-skip-vmdk-target-tests-if-vmdk-is-not-wh.patch [bz#1778593] +- kvm-backup-Improve-error-for-bdrv_getlength-failure.patch [bz#1778593] +- kvm-backup-Make-sure-that-source-and-target-size-match.patch [bz#1778593] +- kvm-iotests-Backup-with-different-source-target-size.patch [bz#1778593] +- kvm-iotests-109-Don-t-mirror-with-mismatched-size.patch [bz#1778593] +- kvm-iotests-229-Use-blkdebug-to-inject-an-error.patch [bz#1778593] +- kvm-mirror-Make-sure-that-source-and-target-size-match.patch [bz#1778593] +- kvm-iotests-Mirror-with-different-source-target-size.patch [bz#1778593] +- Resolves: bz#1513681 + ([Intel 8.2.1 Feat] qemu-kvm PT VMX -- Fast Train) +- Resolves: bz#1778593 + (Qemu coredump when backup to a existing small size image) +- Resolves: bz#1779893 + (RFE: Copy bitmaps with qemu-img convert) +- Resolves: bz#1779904 + (RFE: ability to estimate bitmap space utilization for qcow2) +- Resolves: bz#1841038 + (qemu-img: /var/tmp/v2vovl56bced.qcow2: CURL: Error opening file: Server does not support 'range' (byte ranges) with HTTP/2 server in VMware ESXi 7) + +* Thu Jun 04 2020 Danilo Cesar Lemes de Paula - 4.2.0-23.el8 +- kvm-target-arm-Fix-PAuth-sbox-functions.patch [bz#1813940] +- kvm-Don-t-leak-memory-when-reallocation-fails.patch [bz#1749737] +- kvm-Replace-remaining-malloc-free-user-with-glib.patch [bz#1749737] +- kvm-Revert-RHEL-disable-hostmem-memfd.patch [bz#1839030] +- kvm-block-introducing-bdrv_co_delete_file-interface.patch [bz#1827630] +- kvm-block.c-adding-bdrv_co_delete_file.patch [bz#1827630] +- kvm-crypto.c-cleanup-created-file-when-block_crypto_co_c.patch [bz#1827630] +- Resolves: bz#1749737 + (CVE-2019-15890 qemu-kvm: QEMU: Slirp: use-after-free during packet reassembly [rhel-av-8]) +- Resolves: bz#1813940 + (CVE-2020-10702 virt:8.1/qemu-kvm: qemu: weak signature generation in Pointer Authentication support for ARM [rhel-av-8]) +- Resolves: bz#1827630 + (volume creation leaving uncleaned stuff behind on error (vol-clone/libvirt/qemu-kvm)) +- Resolves: bz#1839030 + (RFE: enable the "memfd" memory backend) + +* Mon May 25 2020 Danilo Cesar Lemes de Paula - 4.2.0-22.el8 +- kvm-block-always-fill-entire-LUKS-header-space-with-zero.patch [bz#1775462] +- kvm-numa-remove-not-needed-check.patch [bz#1600217] +- kvm-numa-properly-check-if-numa-is-supported.patch [bz#1600217] +- kvm-numa-Extend-CLI-to-provide-initiator-information-for.patch [bz#1600217] +- kvm-numa-Extend-CLI-to-provide-memory-latency-and-bandwi.patch [bz#1600217] +- kvm-numa-Extend-CLI-to-provide-memory-side-cache-informa.patch [bz#1600217] +- kvm-hmat-acpi-Build-Memory-Proximity-Domain-Attributes-S.patch [bz#1600217] +- kvm-hmat-acpi-Build-System-Locality-Latency-and-Bandwidt.patch [bz#1600217] +- kvm-hmat-acpi-Build-Memory-Side-Cache-Information-Struct.patch [bz#1600217] +- kvm-tests-numa-Add-case-for-QMP-build-HMAT.patch [bz#1600217] +- kvm-tests-bios-tables-test-add-test-cases-for-ACPI-HMAT.patch [bz#1600217] +- kvm-ACPI-add-expected-files-for-HMAT-tests-acpihmat.patch [bz#1600217] +- Resolves: bz#1600217 + ([Intel 8.2.1 FEAT] KVM ACPI HMAT support - qemu-kvm Fast Train) +- Resolves: bz#1775462 + (Creating luks-inside-qcow2 images with cluster_size=2k/4k will get a corrupted image) + +* Mon May 11 2020 Danilo Cesar Lemes de Paula - 4.2.0-21.el8 +- kvm-hw-pci-pcie-Forbid-hot-plug-if-it-s-disabled-on-the-.patch [bz#1820531] +- kvm-hw-pci-pcie-Replace-PCI_DEVICE-casts-with-existing-v.patch [bz#1820531] +- kvm-tools-virtiofsd-passthrough_ll-Fix-double-close.patch [bz#1817445] +- kvm-virtiofsd-add-rlimit-nofile-NUM-option.patch [bz#1817445] +- kvm-virtiofsd-stay-below-fs.file-max-sysctl-value-CVE-20.patch [bz#1817445] +- kvm-virtiofsd-jail-lo-proc_self_fd.patch [bz#1817445] +- kvm-virtiofsd-Show-submounts.patch [bz#1817445] +- kvm-virtiofsd-only-retain-file-system-capabilities.patch [bz#1817445] +- kvm-virtiofsd-drop-all-capabilities-in-the-wait-parent-p.patch [bz#1817445] +- Resolves: bz#1817445 + (CVE-2020-10717 virt:8.2/qemu-kvm: QEMU: virtiofsd: guest may open maximum file descriptor to cause DoS [rhel-av-8]) +- Resolves: bz#1820531 + (qmp command query-pci get wrong result after hotplug device under hotplug=off controller) + +* Fri May 01 2020 Jon Maloy - 4.2.0-20.el8 +- kvm-pcie_root_port-Add-hotplug-disabling-option.patch [bz#1790899] +- kvm-compat-disable-edid-for-virtio-gpu-ccw.patch [bz#1816793] +- Resolves: bz#1790899 + ([RFE] QEMU devices should have the option to enable/disable hotplug/unplug) +- Resolves: bz#1816793 + ('edid' compat handling missing for virtio-gpu-ccw) + +* Tue Apr 14 2020 Danilo Cesar Lemes de Paula - 4.2.0-19.el8_2 +- kvm-target-i386-do-not-set-unsupported-VMX-secondary-exe.patch [bz#1822682] +- Resolves: bz#1822682 + (QEMU-4.2 fails to start a VM on Azure) + +* Thu Apr 09 2020 Danilo Cesar Lemes de Paula - 4.2.0-18.el8_2 +- kvm-job-take-each-job-s-lock-individually-in-job_txn_app.patch [bz#1817621] +- kvm-replication-assert-we-own-context-before-job_cancel_.patch [bz#1817621] +- kvm-backup-don-t-acquire-aio_context-in-backup_clean.patch [bz#1817621] +- kvm-block-backend-Reorder-flush-pdiscard-function-defini.patch [bz#1817621] +- kvm-block-Increase-BB.in_flight-for-coroutine-and-sync-i.patch [bz#1817621] +- kvm-block-Fix-blk-in_flight-during-blk_wait_while_draine.patch [bz#1817621] +- Resolves: bz#1817621 + (Crash and deadlock with block jobs when using io-threads) + +* Mon Mar 30 2020 Danilo Cesar Lemes de Paula - 4.2.0-17.el8 +- kvm-block-pass-BlockDriver-reference-to-the-.bdrv_co_cre.patch [bz#1816007] +- kvm-block-trickle-down-the-fallback-image-creation-funct.patch [bz#1816007] +- kvm-Revert-mirror-Don-t-let-an-operation-wait-for-itself.patch [bz#1794692] +- kvm-mirror-Wait-only-for-in-flight-operations.patch [bz#1794692] +- Resolves: bz#1794692 + (Mirror block job stops making progress) +- Resolves: bz#1816007 + (qemu-img convert failed to convert with block device as target) + +* Tue Mar 24 2020 Danilo Cesar Lemes de Paula - 4.2.0-16.el8 +- kvm-migration-Rate-limit-inside-host-pages.patch [bz#1814336] +- kvm-build-sys-do-not-make-qemu-ga-link-with-pixman.patch [bz#1811670] +- Resolves: bz#1811670 + (Unneeded qemu-guest-agent dependency on pixman) +- Resolves: bz#1814336 + ([POWER9] QEMU migration-test triggers a kernel warning) + +* Tue Mar 17 2020 Danilo Cesar Lemes de Paula - 4.2.0-15.el8 +- kvm-block-nbd-Fix-hang-in-.bdrv_close.patch [bz#1640894] +- kvm-block-Generic-file-creation-fallback.patch [bz#1640894] +- kvm-file-posix-Drop-hdev_co_create_opts.patch [bz#1640894] +- kvm-iscsi-Drop-iscsi_co_create_opts.patch [bz#1640894] +- kvm-iotests-Add-test-for-image-creation-fallback.patch [bz#1640894] +- kvm-block-Fix-leak-in-bdrv_create_file_fallback.patch [bz#1640894] +- kvm-iotests-Use-complete_and_wait-in-155.patch [bz#1790482 bz#1805143] +- kvm-block-Introduce-bdrv_reopen_commit_post-step.patch [bz#1790482 bz#1805143] +- kvm-block-qcow2-Move-bitmap-reopen-into-bdrv_reopen_comm.patch [bz#1790482 bz#1805143] +- kvm-iotests-Refactor-blockdev-reopen-test-for-iothreads.patch [bz#1790482 bz#1805143] +- kvm-block-bdrv_reopen-with-backing-file-in-different-Aio.patch [bz#1790482 bz#1805143] +- kvm-block-Versioned-x-blockdev-reopen-API-with-feature-f.patch [bz#1790482 bz#1805143] +- kvm-block-Make-bdrv_get_cumulative_perm-public.patch [bz#1790482 bz#1805143] +- kvm-block-Relax-restrictions-for-blockdev-snapshot.patch [bz#1790482 bz#1805143] +- kvm-iotests-Fix-run_job-with-use_log-False.patch [bz#1790482 bz#1805143] +- kvm-iotests-Test-mirror-with-temporarily-disabled-target.patch [bz#1790482 bz#1805143] +- kvm-block-Fix-cross-AioContext-blockdev-snapshot.patch [bz#1790482 bz#1805143] +- kvm-iotests-Add-iothread-cases-to-155.patch [bz#1790482 bz#1805143] +- kvm-qapi-Add-allow-write-only-overlay-feature-for-blockd.patch [bz#1790482 bz#1805143] +- kvm-exec-rom_reset-Free-rom-data-during-inmigrate-skip.patch [bz#1809380] +- Resolves: bz#1640894 + (Fix generic file creation fallback for qemu-img nvme:// image creation support) +- Resolves: bz#1790482 + (bitmaps in backing images can't be modified) +- Resolves: bz#1805143 + (allow late/lazy opening of backing chain for shallow blockdev-mirror) +- Resolves: bz#1809380 + (guest hang during reboot process after migration from RHEl7.8 to RHEL8.2.0.) + +* Wed Mar 11 2020 Danilo Cesar Lemes de Paula - 4.2.0-14.el8 +- kvm-hw-smbios-set-new-default-SMBIOS-fields-for-Windows-.patch [bz#1782529] +- kvm-migration-multifd-clean-pages-after-filling-packet.patch [bz#1738451] +- kvm-migration-Make-sure-that-we-don-t-call-write-in-case.patch [bz#1738451] +- kvm-migration-multifd-fix-nullptr-access-in-terminating-.patch [bz#1738451] +- kvm-migration-multifd-fix-destroyed-mutex-access-in-term.patch [bz#1738451] +- kvm-multifd-Make-sure-that-we-don-t-do-any-IO-after-an-e.patch [bz#1738451] +- kvm-qemu-file-Don-t-do-IO-after-shutdown.patch [bz#1738451] +- kvm-migration-Don-t-send-data-if-we-have-stopped.patch [bz#1738451] +- kvm-migration-Create-migration_is_running.patch [bz#1738451] +- kvm-migration-multifd-fix-nullptr-access-in-multifd_send.patch [bz#1738451] +- kvm-migration-Maybe-VM-is-paused-when-migration-is-cance.patch [bz#1738451] +- kvm-virtiofsd-Remove-fuse_req_getgroups.patch [bz#1797064] +- kvm-virtiofsd-fv_create_listen_socket-error-path-socket-.patch [bz#1797064] +- kvm-virtiofsd-load_capng-missing-unlock.patch [bz#1797064] +- kvm-virtiofsd-do_read-missing-NULL-check.patch [bz#1797064] +- kvm-tools-virtiofsd-fuse_lowlevel-Fix-fuse_out_header-er.patch [bz#1797064] +- kvm-virtiofsd-passthrough_ll-cleanup-getxattr-listxattr.patch [bz#1797064] +- kvm-virtiofsd-Fix-xattr-operations.patch [bz#1797064] +- Resolves: bz#1738451 + (qemu on src host core dump after set multifd-channels and do migration twice (first migration execute migrate_cancel)) +- Resolves: bz#1782529 + (Windows Update Enablement with default smbios strings in qemu) +- Resolves: bz#1797064 + (virtiofsd: Fixes) + +* Sat Feb 29 2020 Danilo Cesar Lemes de Paula - 4.2.0-13.el8 +- kvm-target-i386-kvm-initialize-feature-MSRs-very-early.patch [bz#1791648] +- kvm-target-i386-add-a-ucode-rev-property.patch [bz#1791648] +- kvm-target-i386-kvm-initialize-microcode-revision-from-K.patch [bz#1791648] +- kvm-target-i386-fix-TCG-UCODE_REV-access.patch [bz#1791648] +- kvm-target-i386-check-for-availability-of-MSR_IA32_UCODE.patch [bz#1791648] +- kvm-target-i386-enable-monitor-and-ucode-revision-with-c.patch [bz#1791648] +- kvm-qcow2-Fix-qcow2_alloc_cluster_abort-for-external-dat.patch [bz#1703907] +- kvm-mirror-Store-MirrorOp.co-for-debuggability.patch [bz#1794692] +- kvm-mirror-Don-t-let-an-operation-wait-for-itself.patch [bz#1794692] +- Resolves: bz#1703907 + ([upstream]QEMU coredump when converting to qcow2: external data file images on block devices with copy_offloading) +- Resolves: bz#1791648 + ([RFE] Passthrough host CPU microcode version to KVM guest if using CPU passthrough) +- Resolves: bz#1794692 + (Mirror block job stops making progress) + +* Mon Feb 24 2020 Danilo Cesar Lemes de Paula - 4.2.0-12.el8 +- kvm-vhost-user-gpu-Drop-trailing-json-comma.patch [bz#1805334] +- Resolves: bz#1805334 + (vhost-user/50-qemu-gpu.json is not valid JSON) + +* Sun Feb 23 2020 Danilo Cesar Lemes de Paula - 4.2.0-11.el8 +- kvm-spapr-Enable-DD2.3-accelerated-count-cache-flush-in-.patch [bz#1796240] +- kvm-util-add-slirp_fmt-helpers.patch [bz#1798994] +- kvm-tcp_emu-fix-unsafe-snprintf-usages.patch [bz#1798994] +- kvm-virtio-add-ability-to-delete-vq-through-a-pointer.patch [bz#1791590] +- kvm-virtio-make-virtio_delete_queue-idempotent.patch [bz#1791590] +- kvm-virtio-reset-region-cache-when-on-queue-deletion.patch [bz#1791590] +- kvm-virtio-net-delete-also-control-queue-when-TX-RX-dele.patch [bz#1791590] +- Resolves: bz#1791590 + ([Q35] No "DEVICE_DELETED" event in qmp after unplug virtio-net-pci device) +- Resolves: bz#1796240 + (Enable hw accelerated cache-count-flush by default for POWER9 DD2.3 cpus) +- Resolves: bz#1798994 + (CVE-2020-8608 qemu-kvm: QEMU: Slirp: potential OOB access due to unsafe snprintf() usages [rhel-av-8.2.0]) + +* Fri Feb 14 2020 Danilo Cesar Lemes de Paula - 4.2.0-10.el8 +- kvm-i386-Resolve-CPU-models-to-v1-by-default.patch [bz#1779078 bz#1787291 bz#1779078 bz#1779078] +- kvm-iotests-Support-job-complete-in-run_job.patch [bz#1781637] +- kvm-iotests-Create-VM.blockdev_create.patch [bz#1781637] +- kvm-block-Activate-recursively-even-for-already-active-n.patch [bz#1781637] +- kvm-hmp-Allow-using-qdev-ID-for-qemu-io-command.patch [bz#1781637] +- kvm-iotests-Test-external-snapshot-with-VM-state.patch [bz#1781637] +- kvm-iotests.py-Let-wait_migration-wait-even-more.patch [bz#1781637] +- kvm-blockdev-fix-coding-style-issues-in-drive_backup_pre.patch [bz#1745606 bz#1746217 bz#1773517 bz#1779036 bz#1782111 bz#1782175 bz#1783965] +- kvm-blockdev-unify-qmp_drive_backup-and-drive-backup-tra.patch [bz#1745606 bz#1746217 bz#1773517 bz#1779036 bz#1782111 bz#1782175 bz#1783965] +- kvm-blockdev-unify-qmp_blockdev_backup-and-blockdev-back.patch [bz#1745606 bz#1746217 bz#1773517 bz#1779036 bz#1782111 bz#1782175 bz#1783965] +- kvm-blockdev-honor-bdrv_try_set_aio_context-context-requ.patch [bz#1745606 bz#1746217 bz#1773517 bz#1779036 bz#1782111 bz#1782175 bz#1783965] +- kvm-backup-top-Begin-drain-earlier.patch [bz#1745606 bz#1746217 bz#1773517 bz#1779036 bz#1782111 bz#1782175 bz#1783965] +- kvm-block-backup-top-Don-t-acquire-context-while-droppin.patch [bz#1745606 bz#1746217 bz#1773517 bz#1779036 bz#1782111 bz#1782175 bz#1783965] +- kvm-blockdev-Acquire-AioContext-on-dirty-bitmap-function.patch [bz#1745606 bz#1746217 bz#1773517 bz#1779036 bz#1782111 bz#1782175 bz#1783965] +- kvm-blockdev-Return-bs-to-the-proper-context-on-snapshot.patch [bz#1745606 bz#1746217 bz#1773517 bz#1779036 bz#1782111 bz#1782175 bz#1783965] +- kvm-iotests-Test-handling-of-AioContexts-with-some-block.patch [bz#1745606 bz#1746217 bz#1773517 bz#1779036 bz#1782111 bz#1782175 bz#1783965] +- kvm-target-arm-monitor-query-cpu-model-expansion-crashed.patch [bz#1801320] +- kvm-docs-arm-cpu-features-Make-kvm-no-adjvtime-comment-c.patch [bz#1801320] +- Resolves: bz#1745606 + (Qemu hang when do incremental live backup in transaction mode without bitmap) +- Resolves: bz#1746217 + (Src qemu hang when do storage vm migration during guest installation) +- Resolves: bz#1773517 + (Src qemu hang when do storage vm migration with dataplane enable) +- Resolves: bz#1779036 + (Qemu coredump when do snapshot in transaction mode with one snapshot path not exist) +- Resolves: bz#1779078 + (RHVH 4.4: Failed to run VM on 4.3/4.4 engine (Exit message: the CPU is incompatible with host CPU: Host CPU does not provide required features: hle, rtm)) +- Resolves: bz#1781637 + (qemu crashed when do mem and disk snapshot) +- Resolves: bz#1782111 + (Qemu hang when do full backup on multi-disks with one job's 'job-id' missed in transaction mode(data plane enable)) +- Resolves: bz#1782175 + (Qemu core dump when add persistent bitmap(data plane enable)) +- Resolves: bz#1783965 + (Qemu core dump when do backup with sync: bitmap and no bitmap provided) +- Resolves: bz#1787291 + (RHVH 4.4: Failed to run VM on 4.3/4.4 engine (Exit message: the CPU is incompatible with host CPU: Host CPU does not provide required features: hle, rtm) [rhel-8.1.0.z]) +- Resolves: bz#1801320 + (aarch64: backport query-cpu-model-expansion and adjvtime document fixes) + +* Mon Feb 10 2020 Danilo Cesar Lemes de Paula - 4.2.0-9.el8 +- kvm-ppc-Deassert-the-external-interrupt-pin-in-KVM-on-re.patch [bz#1776638] +- kvm-xics-Don-t-deassert-outputs.patch [bz#1776638] +- kvm-ppc-Don-t-use-CPUPPCState-irq_input_state-with-moder.patch [bz#1776638] +- kvm-trace-update-qemu-trace-stap-to-Python-3.patch [bz#1787395] +- kvm-redhat-Remove-redundant-fix-for-qemu-trace-stap.patch [bz#1787395] +- kvm-iscsi-Cap-block-count-from-GET-LBA-STATUS-CVE-2020-1.patch [bz#1794503] +- kvm-tpm-ppi-page-align-PPI-RAM.patch [bz#1787444] +- kvm-target-arm-kvm-trivial-Clean-up-header-documentation.patch [bz#1647366] +- kvm-target-arm-kvm64-kvm64-cpus-have-timer-registers.patch [bz#1647366] +- kvm-tests-arm-cpu-features-Check-feature-default-values.patch [bz#1647366] +- kvm-target-arm-kvm-Implement-virtual-time-adjustment.patch [bz#1647366] +- kvm-target-arm-cpu-Add-the-kvm-no-adjvtime-CPU-property.patch [bz#1647366] +- kvm-migration-Define-VMSTATE_INSTANCE_ID_ANY.patch [bz#1529231] +- kvm-migration-Change-SaveStateEntry.instance_id-into-uin.patch [bz#1529231] +- kvm-apic-Use-32bit-APIC-ID-for-migration-instance-ID.patch [bz#1529231] +- Resolves: bz#1529231 + ([q35] VM hangs after migration with 200 vCPUs) +- Resolves: bz#1647366 + (aarch64: Add support for the kvm-no-adjvtime ARM CPU feature) +- Resolves: bz#1776638 + (Guest failed to boot up after system_reset 20 times) +- Resolves: bz#1787395 + (qemu-trace-stap list : TypeError: startswith first arg must be bytes or a tuple of bytes, not str) +- Resolves: bz#1787444 + (Broken postcopy migration with vTPM device) +- Resolves: bz#1794503 + (CVE-2020-1711 qemu-kvm: QEMU: block: iscsi: OOB heap access via an unexpected response of iSCSI Server [rhel-av-8.2.0]) + +* Fri Jan 31 2020 Miroslav Rezanina - 4.2.0-8.el8 +- kvm-target-arm-arch_dump-Add-SVE-notes.patch [bz#1725084] +- kvm-vhost-Add-names-to-section-rounded-warning.patch [bz#1779041] +- kvm-vhost-Only-align-sections-for-vhost-user.patch [bz#1779041] +- kvm-vhost-coding-style-fix.patch [bz#1779041] +- kvm-virtio-fs-fix-MSI-X-nvectors-calculation.patch [bz#1694164] +- kvm-vhost-user-fs-remove-vhostfd-property.patch [bz#1694164] +- kvm-build-rename-CONFIG_LIBCAP-to-CONFIG_LIBCAP_NG.patch [bz#1694164] +- kvm-virtiofsd-Pull-in-upstream-headers.patch [bz#1694164] +- kvm-virtiofsd-Pull-in-kernel-s-fuse.h.patch [bz#1694164] +- kvm-virtiofsd-Add-auxiliary-.c-s.patch [bz#1694164] +- kvm-virtiofsd-Add-fuse_lowlevel.c.patch [bz#1694164] +- kvm-virtiofsd-Add-passthrough_ll.patch [bz#1694164] +- kvm-virtiofsd-Trim-down-imported-files.patch [bz#1694164] +- kvm-virtiofsd-Format-imported-files-to-qemu-style.patch [bz#1694164] +- kvm-virtiofsd-remove-mountpoint-dummy-argument.patch [bz#1694164] +- kvm-virtiofsd-remove-unused-notify-reply-support.patch [bz#1694164] +- kvm-virtiofsd-Remove-unused-enum-fuse_buf_copy_flags.patch [bz#1694164] +- kvm-virtiofsd-Fix-fuse_daemonize-ignored-return-values.patch [bz#1694164] +- kvm-virtiofsd-Fix-common-header-and-define-for-QEMU-buil.patch [bz#1694164] +- kvm-virtiofsd-Trim-out-compatibility-code.patch [bz#1694164] +- kvm-vitriofsd-passthrough_ll-fix-fallocate-ifdefs.patch [bz#1694164] +- kvm-virtiofsd-Make-fsync-work-even-if-only-inode-is-pass.patch [bz#1694164] +- kvm-virtiofsd-Add-options-for-virtio.patch [bz#1694164] +- kvm-virtiofsd-add-o-source-PATH-to-help-output.patch [bz#1694164] +- kvm-virtiofsd-Open-vhost-connection-instead-of-mounting.patch [bz#1694164] +- kvm-virtiofsd-Start-wiring-up-vhost-user.patch [bz#1694164] +- kvm-virtiofsd-Add-main-virtio-loop.patch [bz#1694164] +- kvm-virtiofsd-get-set-features-callbacks.patch [bz#1694164] +- kvm-virtiofsd-Start-queue-threads.patch [bz#1694164] +- kvm-virtiofsd-Poll-kick_fd-for-queue.patch [bz#1694164] +- kvm-virtiofsd-Start-reading-commands-from-queue.patch [bz#1694164] +- kvm-virtiofsd-Send-replies-to-messages.patch [bz#1694164] +- kvm-virtiofsd-Keep-track-of-replies.patch [bz#1694164] +- kvm-virtiofsd-Add-Makefile-wiring-for-virtiofsd-contrib.patch [bz#1694164] +- kvm-virtiofsd-Fast-path-for-virtio-read.patch [bz#1694164] +- kvm-virtiofsd-add-fd-FDNUM-fd-passing-option.patch [bz#1694164] +- kvm-virtiofsd-make-f-foreground-the-default.patch [bz#1694164] +- kvm-virtiofsd-add-vhost-user.json-file.patch [bz#1694164] +- kvm-virtiofsd-add-print-capabilities-option.patch [bz#1694164] +- kvm-virtiofs-Add-maintainers-entry.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-create-new-files-in-caller-.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-add-lo_map-for-ino-fh-indir.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-add-ino_map-to-hide-lo_inod.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-add-dirp_map-to-hide-lo_dir.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-add-fd_map-to-hide-file-des.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-add-fallback-for-racy-ops.patch [bz#1694164] +- kvm-virtiofsd-validate-path-components.patch [bz#1694164] +- kvm-virtiofsd-Plumb-fuse_bufvec-through-to-do_write_buf.patch [bz#1694164] +- kvm-virtiofsd-Pass-write-iov-s-all-the-way-through.patch [bz#1694164] +- kvm-virtiofsd-add-fuse_mbuf_iter-API.patch [bz#1694164] +- kvm-virtiofsd-validate-input-buffer-sizes-in-do_write_bu.patch [bz#1694164] +- kvm-virtiofsd-check-input-buffer-size-in-fuse_lowlevel.c.patch [bz#1694164] +- kvm-virtiofsd-prevent-.-escape-in-lo_do_lookup.patch [bz#1694164] +- kvm-virtiofsd-prevent-.-escape-in-lo_do_readdir.patch [bz#1694164] +- kvm-virtiofsd-use-proc-self-fd-O_PATH-file-descriptor.patch [bz#1694164] +- kvm-virtiofsd-sandbox-mount-namespace.patch [bz#1694164] +- kvm-virtiofsd-move-to-an-empty-network-namespace.patch [bz#1694164] +- kvm-virtiofsd-move-to-a-new-pid-namespace.patch [bz#1694164] +- kvm-virtiofsd-add-seccomp-whitelist.patch [bz#1694164] +- kvm-virtiofsd-Parse-flag-FUSE_WRITE_KILL_PRIV.patch [bz#1694164] +- kvm-virtiofsd-cap-ng-helpers.patch [bz#1694164] +- kvm-virtiofsd-Drop-CAP_FSETID-if-client-asked-for-it.patch [bz#1694164] +- kvm-virtiofsd-set-maximum-RLIMIT_NOFILE-limit.patch [bz#1694164] +- kvm-virtiofsd-fix-libfuse-information-leaks.patch [bz#1694164] +- kvm-virtiofsd-add-syslog-command-line-option.patch [bz#1694164] +- kvm-virtiofsd-print-log-only-when-priority-is-high-enoug.patch [bz#1694164] +- kvm-virtiofsd-Add-ID-to-the-log-with-FUSE_LOG_DEBUG-leve.patch [bz#1694164] +- kvm-virtiofsd-Add-timestamp-to-the-log-with-FUSE_LOG_DEB.patch [bz#1694164] +- kvm-virtiofsd-Handle-reinit.patch [bz#1694164] +- kvm-virtiofsd-Handle-hard-reboot.patch [bz#1694164] +- kvm-virtiofsd-Kill-threads-when-queues-are-stopped.patch [bz#1694164] +- kvm-vhost-user-Print-unexpected-slave-message-types.patch [bz#1694164] +- kvm-contrib-libvhost-user-Protect-slave-fd-with-mutex.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-add-renameat2-support.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-disable-readdirplus-on-cach.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-control-readdirplus.patch [bz#1694164] +- kvm-virtiofsd-rename-unref_inode-to-unref_inode_lolocked.patch [bz#1694164] +- kvm-virtiofsd-fail-when-parent-inode-isn-t-known-in-lo_d.patch [bz#1694164] +- kvm-virtiofsd-extract-root-inode-init-into-setup_root.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-clean-up-cache-related-opti.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-use-hashtable.patch [bz#1694164] +- kvm-virtiofsd-Clean-up-inodes-on-destroy.patch [bz#1694164] +- kvm-virtiofsd-support-nanosecond-resolution-for-file-tim.patch [bz#1694164] +- kvm-virtiofsd-fix-error-handling-in-main.patch [bz#1694164] +- kvm-virtiofsd-cleanup-allocated-resource-in-se.patch [bz#1694164] +- kvm-virtiofsd-fix-memory-leak-on-lo.source.patch [bz#1694164] +- kvm-virtiofsd-add-helper-for-lo_data-cleanup.patch [bz#1694164] +- kvm-virtiofsd-Prevent-multiply-running-with-same-vhost_u.patch [bz#1694164] +- kvm-virtiofsd-enable-PARALLEL_DIROPS-during-INIT.patch [bz#1694164] +- kvm-virtiofsd-fix-incorrect-error-handling-in-lo_do_look.patch [bz#1694164] +- kvm-Virtiofsd-fix-memory-leak-on-fuse-queueinfo.patch [bz#1694164] +- kvm-virtiofsd-Support-remote-posix-locks.patch [bz#1694164] +- kvm-virtiofsd-use-fuse_lowlevel_is_virtio-in-fuse_sessio.patch [bz#1694164] +- kvm-virtiofsd-prevent-fv_queue_thread-vs-virtio_loop-rac.patch [bz#1694164] +- kvm-virtiofsd-make-lo_release-atomic.patch [bz#1694164] +- kvm-virtiofsd-prevent-races-with-lo_dirp_put.patch [bz#1694164] +- kvm-virtiofsd-rename-inode-refcount-to-inode-nlookup.patch [bz#1694164] +- kvm-libvhost-user-Fix-some-memtable-remap-cases.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-fix-refcounting-on-remove-r.patch [bz#1694164] +- kvm-virtiofsd-introduce-inode-refcount-to-prevent-use-af.patch [bz#1694164] +- kvm-virtiofsd-do-not-always-set-FUSE_FLOCK_LOCKS.patch [bz#1694164] +- kvm-virtiofsd-convert-more-fprintf-and-perror-to-use-fus.patch [bz#1694164] +- kvm-virtiofsd-Reset-O_DIRECT-flag-during-file-open.patch [bz#1694164] +- kvm-virtiofsd-Fix-data-corruption-with-O_APPEND-write-in.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-Use-cache_readdir-for-direc.patch [bz#1694164] +- kvm-virtiofsd-add-definition-of-fuse_buf_writev.patch [bz#1694164] +- kvm-virtiofsd-use-fuse_buf_writev-to-replace-fuse_buf_wr.patch [bz#1694164] +- kvm-virtiofsd-process-requests-in-a-thread-pool.patch [bz#1694164] +- kvm-virtiofsd-prevent-FUSE_INIT-FUSE_DESTROY-races.patch [bz#1694164] +- kvm-virtiofsd-fix-lo_destroy-resource-leaks.patch [bz#1694164] +- kvm-virtiofsd-add-thread-pool-size-NUM-option.patch [bz#1694164] +- kvm-virtiofsd-Convert-lo_destroy-to-take-the-lo-mutex-lo.patch [bz#1694164] +- kvm-virtiofsd-passthrough_ll-Pass-errno-to-fuse_reply_er.patch [bz#1694164] +- kvm-virtiofsd-stop-all-queue-threads-on-exit-in-virtio_l.patch [bz#1694164] +- kvm-virtiofsd-add-some-options-to-the-help-message.patch [bz#1694164] +- kvm-redhat-ship-virtiofsd-vhost-user-device-backend.patch [bz#1694164] +- Resolves: bz#1694164 + (virtio-fs: host<->guest shared file system (qemu)) +- Resolves: bz#1725084 + (aarch64: support dumping SVE registers) +- Resolves: bz#1779041 + (netkvm: no connectivity Windows guest with q35 + hugepages + vhost + hv_synic) + +* Tue Jan 21 2020 Miroslav Rezanina - 4.2.0-7.el8 +- kvm-tcp_emu-Fix-oob-access.patch [bz#1791568] +- kvm-slirp-use-correct-size-while-emulating-IRC-commands.patch [bz#1791568] +- kvm-slirp-use-correct-size-while-emulating-commands.patch [bz#1791568] +- kvm-RHEL-hw-i386-disable-nested-PERF_GLOBAL_CTRL-MSR-sup.patch [bz#1559846] +- Resolves: bz#1559846 + (Nested KVM: limit VMX features according to CPU models - Fast Train) +- Resolves: bz#1791568 + (CVE-2020-7039 qemu-kvm: QEMU: slirp: OOB buffer access while emulating tcp protocols in tcp_emu() [rhel-av-8.2.0]) + +* Wed Jan 15 2020 Danilo Cesar Lemes de Paula - 4.2.0-6.el8 +- kvm-spapr-Don-t-trigger-a-CAS-reboot-for-XICS-XIVE-mode-.patch [bz#1733893] +- kvm-vfio-pci-Don-t-remove-irqchip-notifier-if-not-regist.patch [bz#1782678] +- kvm-virtio-don-t-enable-notifications-during-polling.patch [bz#1789301] +- kvm-usbredir-Prevent-recursion-in-usbredir_write.patch [bz#1790844] +- kvm-xhci-recheck-slot-status.patch [bz#1790844] +- Resolves: bz#1733893 + (Boot a guest with "-prom-env 'auto-boot?=false'", SLOF failed to enter the boot entry after input "boot" followed by "0 > " on VNC) +- Resolves: bz#1782678 + (qemu core dump after hot-unplugging the XXV710/XL710 PF) +- Resolves: bz#1789301 + (virtio-blk/scsi: fix notification suppression during AioContext polling) +- Resolves: bz#1790844 + (USB related fixes) + +* Tue Jan 07 2020 Danilo Cesar Lemes de Paula - 4.2.0-5.el8 +- kvm-i386-Remove-cpu64-rhel6-CPU-model.patch [bz#1741345] +- kvm-Reallocate-dirty_bmap-when-we-change-a-slot.patch [bz#1772774] +- Resolves: bz#1741345 + (Remove the "cpu64-rhel6" CPU from qemu-kvm) +- Resolves: bz#1772774 + (qemu-kvm core dump during migration+reboot ( Assertion `mem->dirty_bmap' failed )) + +* Fri Dec 13 2019 Danilo Cesar Lemes de Paula - 4.2.0-4.el8 +- Rebase to qemu-4.2 +- Resolves: bz#1783250 + (rebase qemu-kvm to 4.2) + +* Tue Dec 10 2019 Danilo Cesar Lemes de Paula - 4.1.0-18.el8 +- kvm-LUKS-support-preallocation.patch [bz#1534951] +- kvm-nbd-add-empty-.bdrv_reopen_prepare.patch [bz#1718727] +- kvm-qdev-qbus-add-hidden-device-support.patch [bz#1757796] +- kvm-pci-add-option-for-net-failover.patch [bz#1757796] +- kvm-pci-mark-devices-partially-unplugged.patch [bz#1757796] +- kvm-pci-mark-device-having-guest-unplug-request-pending.patch [bz#1757796] +- kvm-qapi-add-unplug-primary-event.patch [bz#1757796] +- kvm-qapi-add-failover-negotiated-event.patch [bz#1757796] +- kvm-migration-allow-unplug-during-migration-for-failover.patch [bz#1757796] +- kvm-migration-add-new-migration-state-wait-unplug.patch [bz#1757796] +- kvm-libqos-tolerate-wait-unplug-migration-state.patch [bz#1757796] +- kvm-net-virtio-add-failover-support.patch [bz#1757796] +- kvm-vfio-unplug-failover-primary-device-before-migration.patch [bz#1757796] +- kvm-net-virtio-fix-dev_unplug_pending.patch [bz#1757796] +- kvm-net-virtio-return-early-when-failover-primary-alread.patch [bz#1757796] +- kvm-net-virtio-fix-re-plugging-of-primary-device.patch [bz#1757796] +- kvm-net-virtio-return-error-when-device_opts-arg-is-NULL.patch [bz#1757796] +- kvm-vfio-don-t-ignore-return-value-of-migrate_add_blocke.patch [bz#1757796] +- kvm-hw-vfio-pci-Fix-double-free-of-migration_blocker.patch [bz#1757796] +- Resolves: bz#1534951 + (RFE: Support preallocation mode for luks format) +- Resolves: bz#1718727 + (Committing changes to the backing file over NBD fails with reopening files not supported) +- Resolves: bz#1757796 + (RFE: support for net failover devices in qemu) + +* Mon Dec 02 2019 Danilo Cesar Lemes de Paula - 4.1.0-17.el8 +- kvm-qemu-pr-helper-fix-crash-in-mpath_reconstruct_sense.patch [bz#1772322] +- Resolves: bz#1772322 + (qemu-pr-helper: fix crash in mpath_reconstruct_sense) + +* Wed Nov 27 2019 Danilo Cesar Lemes de Paula - 4.1.0-16.el8 +- kvm-curl-Keep-pointer-to-the-CURLState-in-CURLSocket.patch [bz#1745209] +- kvm-curl-Keep-socket-until-the-end-of-curl_sock_cb.patch [bz#1745209] +- kvm-curl-Check-completion-in-curl_multi_do.patch [bz#1745209] +- kvm-curl-Pass-CURLSocket-to-curl_multi_do.patch [bz#1745209] +- kvm-curl-Report-only-ready-sockets.patch [bz#1745209] +- kvm-curl-Handle-success-in-multi_check_completion.patch [bz#1745209] +- kvm-curl-Check-curl_multi_add_handle-s-return-code.patch [bz#1745209] +- kvm-vhost-user-save-features-if-the-char-dev-is-closed.patch [bz#1738768] +- kvm-block-snapshot-Restrict-set-of-snapshot-nodes.patch [bz#1658981] +- kvm-iotests-Test-internal-snapshots-with-blockdev.patch [bz#1658981] +- kvm-qapi-Add-feature-flags-to-commands-in-qapi-introspec.patch [bz#1658981] +- kvm-qapi-Allow-introspecting-fix-for-savevm-s-cooperatio.patch [bz#1658981] +- kvm-block-Remove-backing-null-from-bs-explicit_-options.patch [bz#1773925] +- kvm-iotests-Test-multiple-blockdev-snapshot-calls.patch [bz#1773925] +- Resolves: bz#1658981 + (qemu failed to create internal snapshot via 'savevm' when using blockdev) +- Resolves: bz#1738768 + (Guest fails to recover receiving packets after vhost-user reconnect) +- Resolves: bz#1745209 + (qemu-img gets stuck when stream-converting from http) +- Resolves: bz#1773925 + (Fail to do blockcommit with more than one snapshots) + +* Thu Nov 14 2019 Danilo Cesar Lemes de Paula - 4.1.0-15.el8 +- kvm-virtio-blk-Add-blk_drain-to-virtio_blk_device_unreal.patch [bz#1706759] +- kvm-Revert-qcow2-skip-writing-zero-buffers-to-empty-COW-.patch [bz#1772473] +- kvm-coroutine-Add-qemu_co_mutex_assert_locked.patch [bz#1772473] +- kvm-qcow2-Fix-corruption-bug-in-qcow2_detect_metadata_pr.patch [bz#1772473] +- Resolves: bz#1706759 + (qemu core dump when unplug a 16T GPT type disk from win2019 guest) +- Resolves: bz#1772473 + (Import fixes from 8.1.0 into 8.1.1 branch) + +* Tue Oct 29 2019 Danilo Cesar Lemes de Paula - 4.1.0-14.el8 +- kvm-Revert-qcow2-skip-writing-zero-buffers-to-empty-COW-.patch [bz#1751934] +- kvm-coroutine-Add-qemu_co_mutex_assert_locked.patch [bz#1764721] +- kvm-qcow2-Fix-corruption-bug-in-qcow2_detect_metadata_pr.patch [bz#1764721] +- Resolves: bz#1751934 + (Fail to install guest when xfs is the host filesystem) +- Resolves: bz#1764721 + (qcow2 image corruption due to incorrect locking in preallocation detection) + +* Fri Sep 27 2019 Danilo Cesar Lemes de Paula - 4.1.0-13.el8 +- kvm-nbd-server-attach-client-channel-to-the-export-s-Aio.patch [bz#1748253] +- kvm-virtio-blk-schedule-virtio_notify_config-to-run-on-m.patch [bz#1744955] +- Resolves: bz#1744955 + (Qemu hang when block resize a qcow2 image) +- Resolves: bz#1748253 + (QEMU crashes (core dump) when using the integrated NDB server with data-plane) + +* Thu Sep 26 2019 Danilo Cesar Lemes de Paula - 4.1.0-12.el8 +- kvm-block-Use-QEMU_IS_ALIGNED.patch [bz#1745922] +- kvm-block-qcow2-Fix-corruption-introduced-by-commit-8ac0.patch [bz#1745922] +- kvm-block-qcow2-refactor-encryption-code.patch [bz#1745922] +- kvm-qemu-iotests-Add-test-for-bz-1745922.patch [bz#1745922] +- Resolves: bz#1745922 + (Luks-inside-qcow2 snapshot cannot boot after 'qemu-img rebase') + +* Mon Sep 23 2019 Danilo Cesar Lemes de Paula - 4.1.0-11.el8 +- kvm-blockjob-update-nodes-head-while-removing-all-bdrv.patch [bz#1746631] +- kvm-hostmem-file-fix-pmem-file-size-check.patch [bz#1724008 bz#1736788] +- kvm-memory-fetch-pmem-size-in-get_file_size.patch [bz#1724008 bz#1736788] +- kvm-pr-manager-Fix-invalid-g_free-crash-bug.patch [bz#1753992] +- Resolves: bz#1724008 + (QEMU core dumped "memory_region_get_ram_ptr: Assertion `mr->ram_block' failed") +- Resolves: bz#1736788 + (QEMU core dumped if boot guest with nvdimm backed by /dev/dax0.0 and option pmem=off) +- Resolves: bz#1746631 + (Qemu core dump when do block commit under stress) +- Resolves: bz#1753992 + (core dump when testing persistent reservation in guest) + +* Mon Sep 16 2019 Danilo Cesar Lemes de Paula - 4.1.0-10.el8 +- kvm-spapr-xive-Mask-the-EAS-when-allocating-an-IRQ.patch [bz#1748725] +- kvm-block-create-Do-not-abort-if-a-block-driver-is-not-a.patch [bz#1746267] +- kvm-virtio-blk-Cancel-the-pending-BH-when-the-dataplane-.patch [bz#1717321] +- kvm-Using-ip_deq-after-m_free-might-read-pointers-from-a.patch [bz#1749737] +- Resolves: bz#1717321 + (qemu-kvm core dumped when repeat "system_reset" multiple times during guest boot) +- Resolves: bz#1746267 + (qemu coredump: qemu-kvm: block/create.c:68: qmp_blockdev_create: Assertion `drv' failed) +- Resolves: bz#1748725 + ([ppc][migration][v6.3-rc1-p1ce8930]basic migration failed with "qemu-kvm: KVM_SET_DEVICE_ATTR failed: Group 3 attr 0x0000000000001309: Device or resource busy") +- Resolves: bz#1749737 + (CVE-2019-15890 qemu-kvm: QEMU: Slirp: use-after-free during packet reassembly [rhel-av-8]) + +* Tue Sep 10 2019 Danilo Cesar Lemes de Paula - 4.1.0-9.el8 +- kvm-migration-always-initialise-ram_counters-for-a-new-m.patch [bz#1734316] +- kvm-migration-add-qemu_file_update_transfer-interface.patch [bz#1734316] +- kvm-migration-add-speed-limit-for-multifd-migration.patch [bz#1734316] +- kvm-migration-update-ram_counters-for-multifd-sync-packe.patch [bz#1734316] +- kvm-spapr-pci-Consolidate-de-allocation-of-MSIs.patch [bz#1750200] +- kvm-spapr-pci-Free-MSIs-during-reset.patch [bz#1750200] +- Resolves: bz#1734316 + (multifd migration does not honour speed limits, consumes entire bandwidth of NIC) +- Resolves: bz#1750200 + ([RHEL8.1][QEMU4.1]boot up guest with vf device,then system_reset guest,error prompt(qemu-kvm: Can't allocate MSIs for device 2800: IRQ 4904 is not free)) + +* Mon Sep 09 2019 Danilo Cesar Lemes de Paula - 4.1.0-8.el8 +- kvm-migration-Do-not-re-read-the-clock-on-pre_save-in-ca.patch [bz#1747836] +- kvm-ehci-fix-queue-dev-null-ptr-dereference.patch [bz#1746790] +- kvm-spapr-Use-SHUTDOWN_CAUSE_SUBSYSTEM_RESET-for-CAS-reb.patch [bz#1743477] +- kvm-file-posix-Handle-undetectable-alignment.patch [bz#1749134] +- kvm-block-posix-Always-allocate-the-first-block.patch [bz#1749134] +- kvm-iotests-Test-allocate_first_block-with-O_DIRECT.patch [bz#1749134] +- Resolves: bz#1743477 + (Since bd94bc06479a "spapr: change default interrupt mode to 'dual'", QEMU resets the machine to select the appropriate interrupt controller. And -no-reboot prevents that.) +- Resolves: bz#1746790 + (qemu core dump while migrate from RHEL7.6 to RHEL8.1) +- Resolves: bz#1747836 + (Call traces after guest migration due to incorrect handling of the timebase) +- Resolves: bz#1749134 + (I/O error when virtio-blk disk is backed by a raw image on 4k disk) + +* Fri Sep 06 2019 Danilo Cesar Lemes de Paula - 4.1.0-7.el8 +- kvm-trace-Clarify-DTrace-SystemTap-help-message.patch [bz#1516220] +- kvm-socket-Add-backlog-parameter-to-socket_listen.patch [bz#1726898] +- kvm-socket-Add-num-connections-to-qio_channel_socket_syn.patch [bz#1726898] +- kvm-socket-Add-num-connections-to-qio_channel_socket_asy.patch [bz#1726898] +- kvm-socket-Add-num-connections-to-qio_net_listener_open_.patch [bz#1726898] +- kvm-multifd-Use-number-of-channels-as-listen-backlog.patch [bz#1726898] +- kvm-pseries-Fix-compat_pvr-on-reset.patch [bz#1744107] +- kvm-spapr-Set-compat-mode-in-spapr_core_plug.patch [bz#1744107] +- Resolves: bz#1516220 + (-trace help prints an incomplete list of trace events) +- Resolves: bz#1726898 + (Parallel migration fails with error "Unable to write to socket: Connection reset by peer" now and then) +- Resolves: bz#1744107 + (Migration from P8(qemu4.1) to P9(qemu4.1), after migration, qemu crash on destination with error message "qemu-kvm: error while loading state for instance 0x1 of device 'cpu'") + +* Wed Sep 04 2019 Danilo Cesar Lemes de Paula - 4.1.0-6.el8 +- kvm-memory-Refactor-memory_region_clear_coalescing.patch [bz#1743142] +- kvm-memory-Split-zones-when-do-coalesced_io_del.patch [bz#1743142] +- kvm-memory-Remove-has_coalesced_range-counter.patch [bz#1743142] +- kvm-memory-Fix-up-memory_region_-add-del-_coalescing.patch [bz#1743142] +- kvm-enable-virgl-for-real-this-time.patch [bz#1559740] +- Resolves: bz#1559740 + ([RFE] Enable virgl as TechPreview (qemu)) +- Resolves: bz#1743142 + (Boot guest with multiple e1000 devices, qemu will crash after several guest reboots: kvm_mem_ioeventfd_add: error adding ioeventfd: No space left on device (28)) + +* Tue Aug 27 2019 Danilo Cesar Lemes de Paula - 4.1.0-5.el8 +- kvm-redhat-s390x-Rename-s390-ccw-virtio-rhel8.0.0-to-s39.patch [bz#1693772] +- kvm-redhat-s390x-Add-proper-compatibility-options-for-th.patch [bz#1693772] +- kvm-enable-virgl.patch [bz#1559740] +- kvm-redhat-update-pseries-rhel8.1.0-machine-type.patch [bz#1744170] +- kvm-Do-not-run-iotests-on-brew-build.patch [bz#1742197 bz#1742819] +- Resolves: bz#1559740 + ([RFE] Enable virgl as TechPreview (qemu)) +- Resolves: bz#1693772 + ([IBM zKVM] RHEL AV 8.1.0 machine type update for s390x) +- Resolves: bz#1742197 + (Remove iotests from qemu-kvm builds [RHEL AV 8.1.0]) +- Resolves: bz#1742819 + (Remove iotests from qemu-kvm builds [RHEL 8.1.0]) +- Resolves: bz#1744170 + ([IBM Power] New 8.1.0 machine type for pseries) + +* Tue Aug 20 2019 Danilo Cesar Lemes de Paula - 4.1.0-4.el8 +- kvm-RHEL-disable-hostmem-memfd.patch [bz#1738626 bz#1740797] +- Resolves: bz#1738626 + (Disable memfd in QEMU) +- Resolves: bz#1740797 + (Disable memfd in QEMU) + +* Mon Aug 19 2019 Danilo Cesar Lemes de Paula - 4.1.0-3.el8 +- kvm-x86-machine-types-pc_rhel_8_0_compat.patch [bz#1719649] +- kvm-x86-machine-types-q35-Fixup-units_per_default_bus.patch [bz#1719649] +- kvm-x86-machine-types-Fixup-dynamic-sysbus-entries.patch [bz#1719649] +- kvm-x86-machine-types-add-pc-q35-rhel8.1.0.patch [bz#1719649] +- kvm-machine-types-Update-hw_compat_rhel_8_0-from-hw_comp.patch [bz#1719649] +- kvm-virtio-Make-disable-legacy-disable-modern-compat-pro.patch [bz#1719649] +- Resolves: bz#1719649 + (8.1 machine type for x86) + +* Mon Aug 19 2019 Danilo Cesar Lemes de Paula - 4.1.0-2.el8 +- kvm-spec-Update-seavgabios-dependency.patch [bz#1725664] +- kvm-pc-Don-t-make-die-id-mandatory-unless-necessary.patch [bz#1741451] +- kvm-display-bochs-fix-pcie-support.patch [bz#1733977 bz#1740692] +- kvm-spapr-Reset-CAS-IRQ-subsystem-after-devices.patch [bz#1733977] +- kvm-spapr-xive-Fix-migration-of-hot-plugged-CPUs.patch [bz#1733977] +- kvm-riscv-roms-Fix-make-rules-for-building-sifive_u-bios.patch [bz#1733977 bz#1740692] +- kvm-Update-version-for-v4.1.0-release.patch [bz#1733977 bz#1740692] +- Resolves: bz#1725664 + (Update seabios dependency) +- Resolves: bz#1733977 + (Qemu core dumped: /home/ngu/qemu/hw/intc/xics_kvm.c:321: ics_kvm_set_irq: Assertion `kernel_xics_fd != -1' failed) +- Resolves: bz#1740692 + (Backport QEMU 4.1.0 rc5 & ga patches) +- Resolves: bz#1741451 + (Failed to hot-plug vcpus) + +* Wed Aug 14 2019 Miroslav Rezanina - 4.1.0-1.el8 +- Rebase to qemu 4.1.0 rc4 [bz#1705235] +- Resolves: bz#1705235 + (Rebase qemu-kvm for RHEL-AV 8.1.0) + +* Tue Jul 23 2019 Danilo Cesar Lemes de Paula - 4.0.0-6.el8 +- kvm-x86_64-rh-devices-add-missing-TPM-passthrough.patch [bz#1519013] +- kvm-x86_64-rh-devices-enable-TPM-emulation.patch [bz#1519013] +- kvm-vfio-increase-the-cap-on-number-of-assigned-devices-.patch [bz#1719823] +- Resolves: bz#1519013 + ([RFE] QEMU Software TPM support (vTPM, or TPM emulation)) +- Resolves: bz#1719823 + ([RHEL 8.1] [RFE] increase the maximum of vfio devices to more than 32 in qemu-kvm) + +* Mon Jul 08 2019 Miroslav Rezanina - 4.0.0-5.el8 +- kvm-qemu-kvm.spec-bump-libseccomp-2.4.0.patch [bz#1720306] +- kvm-qxl-check-release-info-object.patch [bz#1712717] +- kvm-target-i386-add-MDS-NO-feature.patch [bz#1722839] +- kvm-block-file-posix-Unaligned-O_DIRECT-block-status.patch [bz#1588356] +- kvm-iotests-Test-unaligned-raw-images-with-O_DIRECT.patch [bz#1588356] +- kvm-rh-set-CONFIG_BOCHS_DISPLAY-y-for-x86.patch [bz#1707118] +- Resolves: bz#1588356 + (qemu crashed on the source host when do storage migration with source qcow2 disk created by 'qemu-img') +- Resolves: bz#1707118 + (enable device: bochs-display (QEMU)) +- Resolves: bz#1712717 + (CVE-2019-12155 qemu-kvm: QEMU: qxl: null pointer dereference while releasing spice resources [rhel-av-8]) +- Resolves: bz#1720306 + (VM failed to start with error "failed to install seccomp syscall filter in the kernel") +- Resolves: bz#1722839 + ([Intel 8.1 FEAT] MDS_NO exposure to guest - Fast Train) + +* Tue Jun 11 2019 Danilo Cesar Lemes de Paula - 4.0.0-4.el8 +- kvm-Disable-VXHS-support.patch [bz#1714937] +- kvm-aarch64-Add-virt-rhel8.1.0-machine-type-for-ARM.patch [bz#1713735] +- kvm-aarch64-Allow-ARM-VIRT-iommu-option-in-RHEL8.1-machi.patch [bz#1713735] +- kvm-usb-call-reset-handler-before-updating-state.patch [bz#1713679] +- kvm-usb-host-skip-reset-for-untouched-devices.patch [bz#1713679] +- kvm-usb-host-avoid-libusb_set_configuration-calls.patch [bz#1713679] +- kvm-aarch64-Compile-out-IOH3420.patch [bz#1627283] +- kvm-vl-Fix-drive-blockdev-persistent-reservation-managem.patch [bz#1714891] +- kvm-vl-Document-why-objects-are-delayed.patch [bz#1714891] +- Resolves: bz#1627283 + (Compile out IOH3420 on aarch64) +- Resolves: bz#1713679 + (Detached device when trying to upgrade USB device firmware when in doing USB Passthrough via QEMU) +- Resolves: bz#1713735 + (Allow ARM VIRT iommu option in RHEL8.1 machine) +- Resolves: bz#1714891 + (Guest with persistent reservation manager for a disk fails to start) +- Resolves: bz#1714937 + (Disable VXHS support) + +* Tue May 28 2019 Danilo Cesar Lemes de Paula - 4.0.0-3.el8 +- kvm-redhat-fix-cut-n-paste-garbage-in-hw_compat-comments.patch [bz#1709726] +- kvm-compat-Generic-hw_compat_rhel_8_0.patch [bz#1709726] +- kvm-redhat-sync-pseries-rhel7.6.0-with-rhel-av-8.0.1.patch [bz#1709726] +- kvm-redhat-define-pseries-rhel8.1.0-machine-type.patch [bz#1709726] +- Resolves: bz#1709726 + (Forward and backward migration failed with "qemu-kvm: error while loading state for instance 0x0 of device 'spapr'") + +* Sat May 25 2019 Danilo Cesar Lemes de Paula - 4.0.0-2.el8 +- kvm-target-i386-define-md-clear-bit.patch [bz#1703297 bz#1703304 bz#1703310 bz#1707274] +- Resolves: bz#1703297 + (CVE-2018-12126 virt:8.0.0/qemu-kvm: hardware: Microarchitectural Store Buffer Data Sampling (MSBDS) [rhel-av-8]) +- Resolves: bz#1703304 + (CVE-2018-12130 virt:8.0.0/qemu-kvm: hardware: Microarchitectural Fill Buffer Data Sampling (MFBDS) [rhel-av-8]) +- Resolves: bz#1703310 + (CVE-2018-12127 virt:8.0.0/qemu-kvm: hardware: Micro-architectural Load Port Data Sampling - Information Leak (MLPDS) [rhel-av-8]) +- Resolves: bz#1707274 + (CVE-2019-11091 virt:8.0.0/qemu-kvm: hardware: Microarchitectural Data Sampling Uncacheable Memory (MDSUM) [rhel-av-8.1.0]) + +* Wed May 15 2019 Danilo Cesar Lemes de Paula - 3.1.0-26.el8 +- kvm-target-ppc-spapr-Add-SPAPR_CAP_LARGE_DECREMENTER.patch [bz#1698711] +- kvm-target-ppc-spapr-Add-workaround-option-to-SPAPR_CAP_.patch [bz#1698711] +- kvm-target-ppc-spapr-Add-SPAPR_CAP_CCF_ASSIST.patch [bz#1698711] +- kvm-target-ppc-tcg-make-spapr_caps-apply-cap-cfpc-sbbc-i.patch [bz#1698711] +- kvm-target-ppc-spapr-Enable-mitigations-by-default-for-p.patch [bz#1698711] +- kvm-slirp-ensure-there-is-enough-space-in-mbuf-to-null-t.patch [bz#1693076] +- kvm-slirp-don-t-manipulate-so_rcv-in-tcp_emu.patch [bz#1693076] +- Resolves: bz#1693076 + (CVE-2019-6778 qemu-kvm: QEMU: slirp: heap buffer overflow in tcp_emu() [rhel-av-8]) +- Resolves: bz#1698711 + (Enable Spectre / Meltdown mitigations by default in pseries-rhel8.0.0 machine type) + +* Mon May 06 2019 Danilo Cesar Lemes de Paula - 3.1.0-25.el8 +- kvm-redhat-enable-tpmdev-passthrough.patch [bz#1688312] +- kvm-exec-Only-count-mapped-memory-backends-for-qemu_getr.patch [bz#1680492] +- kvm-Enable-libpmem-to-support-nvdimm.patch [bz#1705149] +- Resolves: bz#1680492 + (Qemu quits suddenly while system_reset after hot-plugging unsupported memory by compatible guest on P9 with 1G huge page set) +- Resolves: bz#1688312 + ([RFE] enable TPM passthrough at compile time (qemu-kvm)) +- Resolves: bz#1705149 + (libpmem support is not enabled in qemu-kvm) + +* Fri Apr 26 2019 Danilo Cesar Lemes de Paula - 3.1.0-24.el8 +- kvm-x86-host-phys-bits-limit-option.patch [bz#1688915] +- kvm-rhel-Set-host-phys-bits-limit-48-on-rhel-machine-typ.patch [bz#1688915] +- Resolves: bz#1688915 + ([Intel 8.0 Alpha] physical bits should <= 48 when host with 5level paging &EPT5 and qemu command with "-cpu qemu64" parameters.) + +* Tue Apr 23 2019 Danilo Cesar Lemes de Paula - 3.1.0-23.el8 +- kvm-device_tree-Fix-integer-overflowing-in-load_device_t.patch [bz#1693173] +- Resolves: bz#1693173 + (CVE-2018-20815 qemu-kvm: QEMU: device_tree: heap buffer overflow while loading device tree blob [rhel-av-8]) + +* Mon Apr 15 2019 Danilo Cesar Lemes de Paula - 3.1.0-22.el8 +- kvm-i386-kvm-Disable-arch_capabilities-if-MSR-can-t-be-s.patch [bz#1687578] +- kvm-i386-Make-arch_capabilities-migratable.patch [bz#1687578] +- Resolves: bz#1687578 + (Incorrect CVE vulnerabilities reported on Cascade Lake cpus) + +* Thu Apr 11 2019 Danilo Cesar Lemes de Paula - 3.1.0-21.el8 +- kvm-Remove-7-qcow2-and-luks-iotests-that-are-taking-25-s.patch [bz#1683473] +- kvm-spapr-fix-out-of-bounds-write-in-spapr_populate_drme.patch [bz#1674438] +- kvm-qcow2-include-LUKS-payload-overhead-in-qemu-img-meas.patch [bz#1655065] +- kvm-iotests-add-LUKS-payload-overhead-to-178-qemu-img-me.patch [bz#1655065] +- kvm-vnc-detect-and-optimize-pageflips.patch [bz#1666206] +- kvm-Load-kvm-module-during-boot.patch [bz#1676907 bz#1685995] +- kvm-hostmem-file-reject-invalid-pmem-file-sizes.patch [bz#1669053] +- kvm-iotests-Fix-test-200-on-s390x-without-virtio-pci.patch [bz#1687582] +- kvm-block-file-posix-do-not-fail-on-unlock-bytes.patch [bz#1652572] +- Resolves: bz#1652572 + (QEMU core dumped if stop nfs service during migration) +- Resolves: bz#1655065 + ([rhel.8.0][fast train]'qemu-img measure' size does not match the real allocated size for luks-inside-qcow2 image) +- Resolves: bz#1666206 + (vnc server should detect page-flips and avoid sending fullscreen updates then.) +- Resolves: bz#1669053 + (Guest call trace when boot with nvdimm device backed by /dev/dax) +- Resolves: bz#1674438 + (RHEL8.0 - Guest reboot fails after memory hotplug multiple times (kvm)) +- Resolves: bz#1676907 + (/dev/kvm device exists but kernel module is not loaded on boot up causing VM start to fail in libvirt) +- Resolves: bz#1683473 + (Remove 7 qcow2 & luks iotests from rhel8 fast train build %check phase) +- Resolves: bz#1685995 + (/dev/kvm device exists but kernel module is not loaded on boot up causing VM start to fail in libvirt) +- Resolves: bz#1687582 + (QEMU IOTEST 200 fails with 'virtio-scsi-pci is not a valid device model name') + +* Fri Mar 15 2019 Danilo Cesar Lemes de Paula - 3.1.0-20.el8 +- kvm-i386-Add-stibp-flag-name.patch [bz#1686260] +- Resolves: bz#1686260 + (stibp is missing on qemu 3.0 and qemu 3.1) + +* Fri Mar 15 2019 Danilo Cesar Lemes de Paula - 3.1.0-19.el8 +- kvm-migration-Fix-cancel-state.patch [bz#1608649] +- kvm-migration-rdma-Fix-qemu_rdma_cleanup-null-check.patch [bz#1608649] +- Resolves: bz#1608649 + (Query-migrate get "failed" status after migrate-cancel) + +* Tue Feb 26 2019 Danilo Cesar Lemes de Paula - 3.1.0-18.el8 +- kvm-target-i386-Disable-MPX-support-on-named-CPU-models.patch [bz#1661030] +- kvm-i386-remove-the-new-CPUID-PCONFIG-from-Icelake-Serve.patch [bz#1661515] +- kvm-i386-remove-the-INTEL_PT-CPUID-bit-from-named-CPU-mo.patch [bz#1661515] +- kvm-Revert-i386-Add-CPUID-bit-for-PCONFIG.patch [bz#1661515] +- Resolves: bz#1661030 + (Remove MPX support from 8.0 machine types) +- Resolves: bz#1661515 + (Remove PCONFIG and INTEL_PT from Icelake-* CPU models) + +* Tue Feb 26 2019 Danilo Cesar Lemes de Paula - 3.1.0-17.el8 +- kvm-block-Apply-auto-read-only-for-ro-whitelist-drivers.patch [bz#1678968] +- Resolves: bz#1678968 + (-blockdev: auto-read-only is ineffective for drivers on read-only whitelist) + +* Mon Feb 25 2019 Danilo Cesar Lemes de Paula - 3.1.0-16.el8 +- kvm-fdc-Revert-downstream-disablement-of-device-floppy.patch [bz#1664997] +- kvm-fdc-Restrict-floppy-controllers-to-RHEL-7-machine-ty.patch [bz#1664997] +- Resolves: bz#1664997 + (Restrict floppy device to RHEL-7 machine types) + +* Wed Feb 13 2019 Danilo Cesar Lemes de Paula - 3.1.0-15.el8 +- kvm-Add-raw-qcow2-nbd-and-luks-iotests-to-run-during-the.patch [bz#1664855] +- kvm-Introduce-the-qemu-kvm-tests-rpm.patch [bz#1669924] +- Resolves: bz#1664855 + (Run iotests in qemu-kvm build %check phase) +- Resolves: bz#1669924 + (qemu-kvm packaging: Package the avocado_qemu tests and qemu-iotests in a new rpm) + +* Tue Feb 12 2019 Danilo Cesar Lemes de Paula - 3.1.0-14.el8 +- kvm-doc-fix-the-configuration-path.patch [bz#1644985] +- Resolves: bz#1644985 + (The "fsfreeze-hook" script path shown by command "qemu-ga --help" or "man qemu-ga" is wrong - Fast Train) + +* Mon Feb 11 2019 Danilo Cesar Lemes de Paula - 3.1.0-13.el8 +- kvm-Acceptance-tests-add-Linux-initrd-checking-test.patch [bz#1669922] +- kvm-mmap-alloc-unfold-qemu_ram_mmap.patch [bz#1671519] +- kvm-mmap-alloc-fix-hugetlbfs-misaligned-length-in-ppc64.patch [bz#1671519] +- kvm-BZ1653590-Require-at-least-64kiB-pages-for-downstrea.patch [bz#1653590] +- kvm-block-Fix-invalidate_cache-error-path-for-parent-act.patch [bz#1673014] +- kvm-virtio-scsi-Move-BlockBackend-back-to-the-main-AioCo.patch [bz#1656276 bz#1662508] +- kvm-scsi-disk-Acquire-the-AioContext-in-scsi_-_realize.patch [bz#1656276 bz#1662508] +- kvm-virtio-scsi-Forbid-devices-with-different-iothreads-.patch [bz#1656276 bz#1662508] +- Resolves: bz#1653590 + ([Fast train]had better stop qemu immediately while guest was making use of an improper page size) +- Resolves: bz#1656276 + (qemu-kvm core dumped after hotplug the deleted disk with iothread parameter) +- Resolves: bz#1662508 + (Qemu core dump when start guest with two disks using same drive) +- Resolves: bz#1669922 + (Backport avocado-qemu tests for QEMU 3.1) +- Resolves: bz#1671519 + (RHEL8.0 Snapshot3 - qemu doesn't free up hugepage memory when hotplug/hotunplug using memory-backend-file (qemu-kvm)) +- Resolves: bz#1673014 + (Local VM and migrated VM on the same host can run with same RAW file as visual disk source while without shareable configured or lock manager enabled) + +* Fri Feb 08 2019 Danilo Cesar Lemes de Paula - 3.1.0-12.el8 +- kvm-io-ensure-UNIX-client-doesn-t-unlink-server-socket.patch [bz#1665896] +- kvm-scsi-disk-Don-t-use-empty-string-as-device-id.patch [bz#1668248] +- kvm-scsi-disk-Add-device_id-property.patch [bz#1668248] +- Resolves: bz#1665896 + (VNC unix listener socket is deleted after first client quits) +- Resolves: bz#1668248 + ("An unknown error has occurred" when using cdrom to install the system with two blockdev disks.(when choose installation destination)) + +* Thu Jan 31 2019 Danilo Cesar Lemes de Paula - 3.1.0-11.el8 +- kvm-Fix-fsfreeze-hook-path-in-the-man-page.patch [bz#1644985] +- kvm-json-Fix-handling-when-not-interpolating.patch [bz#1668244] +- Resolves: bz#1644985 + (The "fsfreeze-hook" script path shown by command "qemu-ga --help" or "man qemu-ga" is wrong - Fast Train) +- Resolves: bz#1668244 + (qemu-img: /var/tmp/v2vovl9951f8.qcow2: CURL: Error opening file: The requested URL returned error: 404 Not Found) + +* Tue Jan 29 2019 Danilo Cesar Lemes de Paula - 3.1.0-10.el8 +- kvm-throttle-groups-fix-restart-coroutine-iothread-race.patch [bz#1655947] +- kvm-iotests-add-238-for-throttling-tgm-unregister-iothre.patch [bz#1655947] +- Resolves: bz#1655947 + (qemu-kvm core dumped after unplug the device which was set io throttling parameters) + +* Tue Jan 29 2019 Danilo Cesar Lemes de Paula - 3.1.0-9.el8 +- kvm-migration-rdma-unregister-fd-handler.patch [bz#1666601] +- kvm-s390x-tod-Properly-stop-the-KVM-TOD-while-the-guest-.patch [bz#1659127] +- kvm-hw-s390x-Fix-bad-mask-in-time2tod.patch [bz#1659127] +- Resolves: bz#1659127 + (Stress guest and stop it, then do live migration, guest hit call trace on destination end) +- Resolves: bz#1666601 + ([q35] dst qemu core dumped when do rdma migration with Mellanox IB QDR card) + +* Thu Jan 24 2019 Danilo Cesar Lemes de Paula - 3.1.0-7.el8 +- kvm-i386-kvm-expose-HV_CPUID_ENLIGHTMENT_INFO.EAX-and-HV.patch [bz#1653511] +- kvm-i386-kvm-add-a-comment-explaining-why-.feat_names-ar.patch [bz#1653511] +- Resolves: bz#1653511 + (qemu doesn't report all support cpu features which cause libvirt cannot get the support status of hv_tlbflush) + +* Wed Jan 23 2019 Danilo Cesar Lemes de Paula - 3.1.0-6.el8 +- kvm-spapr-Fix-ibm-max-associativity-domains-property-num.patch [bz#1653114] +- kvm-cpus-ignore-ESRCH-in-qemu_cpu_kick_thread.patch [bz#1668205] +- Resolves: bz#1653114 + (Incorrect NUMA nodes passed to qemu-kvm guest in ibm,max-associativity-domains property) +- Resolves: bz#1668205 + (Guest quit with error when hotunplug cpu) + +* Mon Jan 21 2019 Danilo Cesar Lemes de Paula - 3.1.0-5.el8 +- kvm-virtio-Helper-for-registering-virtio-device-types.patch [bz#1648023] +- kvm-virtio-Provide-version-specific-variants-of-virtio-P.patch [bz#1648023] +- kvm-globals-Allow-global-properties-to-be-optional.patch [bz#1648023] +- kvm-virtio-Make-disable-legacy-disable-modern-compat-pro.patch [bz#1648023] +- kvm-aarch64-Add-virt-rhel8.0.0-machine-type-for-ARM.patch [bz#1656504] +- kvm-aarch64-Set-virt-rhel8.0.0-max_cpus-to-512.patch [bz#1656504] +- kvm-aarch64-Use-256MB-ECAM-region-by-default.patch [bz#1656504] +- Resolves: bz#1648023 + (Provide separate device types for transitional virtio PCI devices - Fast Train) +- Resolves: bz#1656504 + (Machine types for qemu-kvm based on rebase to qemu-3.1 (aarch64)) + +* Fri Jan 11 2019 Danilo Cesar Lemes de Paula - 3.1.0-4.el8 +- kvm-hw-s390x-s390-virtio-ccw-Add-machine-types-for-RHEL8.patch [bz#1656510] +- kvm-spapr-Add-H-Call-H_HOME_NODE_ASSOCIATIVITY.patch [bz#1661967] +- kvm-redhat-Fixing-.gitpublish-to-include-AV-information.patch [] +- Resolves: bz#1656510 + (Machine types for qemu-kvm based on rebase to qemu-3.1 (s390x)) +- Resolves: bz#1661967 + (Kernel prints the message "VPHN is not supported. Disabling polling...") + +* Thu Jan 03 2019 Danilo Cesar Lemes de Paula - 3.1.0-3.el8 +- kvm-redhat-define-pseries-rhel8.0.0-machine-type.patch [bz#1656508] +- Resolves: bz#1656508 + (Machine types for qemu-kvm based on rebase to qemu-3.1 (ppc64le)) + +* Fri Dec 21 2018 Danilo Cesar Lemes de Paula - 3.1.0-2.el8 +- kvm-pc-7.5-compat-entries.patch [bz#1655820] +- kvm-compat-Generic-HW_COMPAT_RHEL7_6.patch [bz#1655820] +- kvm-pc-PC_RHEL7_6_COMPAT.patch [bz#1655820] +- kvm-pc-Add-compat-for-pc-i440fx-rhel7.6.0-machine-type.patch [bz#1655820] +- kvm-pc-Add-pc-q35-8.0.0-machine-type.patch [bz#1655820] +- kvm-pc-Add-x-migrate-smi-count-off-to-PC_RHEL7_6_COMPAT.patch [bz#1655820] +- kvm-clear-out-KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT-for.patch [bz#1659604] +- kvm-Add-edk2-Requires-to-qemu-kvm.patch [bz#1660208] +- Resolves: bz#1655820 + (Can't migarate between rhel8 and rhel7 when guest has device "video") +- Resolves: bz#1659604 + (8->7 migration failed: qemu-kvm: error: failed to set MSR 0x4b564d02 to 0x27fc13285) +- Resolves: bz#1660208 + (qemu-kvm: Should depend on the architecture-appropriate guest firmware) + +* Thu Dec 13 2018 Danilo Cesar Lemes de Paula - 3.1.0-1.el8 +- Rebase to qemu-kvm 3.1.0 + +* Tue Dec 11 2018 Danilo Cesar Lemes de Paula - qemu-kvm-2.12.0-47 +- kvm-Disable-CONFIG_IPMI-and-CONFIG_I2C-for-ppc64.patch [bz#1640044] +- kvm-Disable-CONFIG_CAN_BUS-and-CONFIG_CAN_SJA1000.patch [bz#1640042] +- Resolves: bz#1640042 + (Disable CONFIG_CAN_BUS and CONFIG_CAN_SJA1000 config switches) +- Resolves: bz#1640044 + (Disable CONFIG_I2C and CONFIG_IPMI in default-configs/ppc64-softmmu.mak) + +* Tue Dec 11 2018 Danilo Cesar Lemes de Paula - qemu-kvm-2.12.0-46 +- kvm-qcow2-Give-the-refcount-cache-the-minimum-possible-s.patch [bz#1656507] +- kvm-docs-Document-the-new-default-sizes-of-the-qcow2-cac.patch [bz#1656507] +- kvm-qcow2-Fix-Coverity-warning-when-calculating-the-refc.patch [bz#1656507] +- kvm-include-Add-IEC-binary-prefixes-in-qemu-units.h.patch [bz#1656507] +- kvm-qcow2-Options-documentation-fixes.patch [bz#1656507] +- kvm-include-Add-a-lookup-table-of-sizes.patch [bz#1656507] +- kvm-qcow2-Make-sizes-more-humanly-readable.patch [bz#1656507] +- kvm-qcow2-Avoid-duplication-in-setting-the-refcount-cach.patch [bz#1656507] +- kvm-qcow2-Assign-the-L2-cache-relatively-to-the-image-si.patch [bz#1656507] +- kvm-qcow2-Increase-the-default-upper-limit-on-the-L2-cac.patch [bz#1656507] +- kvm-qcow2-Resize-the-cache-upon-image-resizing.patch [bz#1656507] +- kvm-qcow2-Set-the-default-cache-clean-interval-to-10-min.patch [bz#1656507] +- kvm-qcow2-Explicit-number-replaced-by-a-constant.patch [bz#1656507] +- kvm-block-backend-Set-werror-rerror-defaults-in-blk_new.patch [bz#1657637] +- kvm-qcow2-Fix-cache-clean-interval-documentation.patch [bz#1656507] +- Resolves: bz#1656507 + ([RHEL.8] qcow2 cache is too small) +- Resolves: bz#1657637 + (Wrong werror default for -device drive=) + +* Thu Dec 06 2018 Danilo Cesar Lemes de Paula - qemu-kvm-2.12.0-45 +- kvm-target-ppc-add-basic-support-for-PTCR-on-POWER9.patch [bz#1639069] +- kvm-linux-headers-Update-for-nested-KVM-HV-downstream-on.patch [bz#1639069] +- kvm-target-ppc-Add-one-reg-id-for-ptcr.patch [bz#1639069] +- kvm-ppc-spapr_caps-Add-SPAPR_CAP_NESTED_KVM_HV.patch [bz#1639069] +- kvm-Re-enable-CONFIG_HYPERV_TESTDEV.patch [bz#1651195] +- kvm-qxl-use-guest_monitor_config-for-local-renderer.patch [bz#1610163] +- kvm-Declare-cirrus-vga-as-deprecated.patch [bz#1651994] +- kvm-Do-not-build-bluetooth-support.patch [bz#1654651] +- kvm-vfio-helpers-Fix-qemu_vfio_open_pci-crash.patch [bz#1645840] +- kvm-balloon-Allow-multiple-inhibit-users.patch [bz#1650272] +- kvm-Use-inhibit-to-prevent-ballooning-without-synchr.patch [bz#1650272] +- kvm-vfio-Inhibit-ballooning-based-on-group-attachment-to.patch [bz#1650272] +- kvm-vfio-ccw-pci-Allow-devices-to-opt-in-for-ballooning.patch [bz#1650272] +- kvm-vfio-pci-Handle-subsystem-realpath-returning-NULL.patch [bz#1650272] +- kvm-vfio-pci-Fix-failure-to-close-file-descriptor-on-err.patch [bz#1650272] +- kvm-postcopy-Synchronize-usage-of-the-balloon-inhibitor.patch [bz#1650272] +- Resolves: bz#1610163 + (guest shows border blurred screen with some resolutions when qemu boot with -device qxl-vga ,and guest on rhel7.6 has no such question) +- Resolves: bz#1639069 + ([IBM 8.0 FEAT] POWER9 - Nested virtualization in RHEL8.0 KVM for ppc64le - qemu-kvm side) +- Resolves: bz#1645840 + (Qemu core dump when hotplug nvme:// drive via -blockdev) +- Resolves: bz#1650272 + (Ballooning is incompatible with vfio assigned devices, but not prevented) +- Resolves: bz#1651195 + (Re-enable hyperv-testdev device) +- Resolves: bz#1651994 + (Declare the "Cirrus VGA" device emulation of QEMU as deprecated in RHEL8) +- Resolves: bz#1654651 + (Qemu: hw: bt: keep bt/* objects from building [rhel-8.0]) + +* Tue Nov 27 2018 Danilo Cesar Lemes de Paula - qemu-kvm-2.12.0-43 +- kvm-block-Make-more-block-drivers-compile-time-configura.patch [bz#1598842 bz#1598842] +- kvm-RHEL8-Add-disable-configure-options-to-qemu-spec-fil.patch [bz#1598842] +- Resolves: bz#1598842 + (Compile out unused block drivers) + +* Mon Nov 26 2018 Danilo Cesar Lemes de Paula - qemu-kvm-2.12.0-43 + +- kvm-configure-add-test-for-libudev.patch [bz#1636185] +- kvm-qga-linux-report-disk-serial-number.patch [bz#1636185] +- kvm-qga-linux-return-disk-device-in-guest-get-fsinfo.patch [bz#1636185] +- kvm-qemu-error-introduce-error-warn-_report_once.patch [bz#1625173] +- kvm-intel-iommu-start-to-use-error_report_once.patch [bz#1625173] +- kvm-intel-iommu-replace-more-vtd_err_-traces.patch [bz#1625173] +- kvm-intel_iommu-introduce-vtd_reset_caches.patch [bz#1625173] +- kvm-intel_iommu-better-handling-of-dmar-state-switch.patch [bz#1625173] +- kvm-intel_iommu-move-ce-fetching-out-when-sync-shadow.patch [bz#1625173 bz#1629616] +- kvm-intel_iommu-handle-invalid-ce-for-shadow-sync.patch [bz#1625173 bz#1629616] +- kvm-block-remove-bdrv_dirty_bitmap_make_anon.patch [bz#1518989] +- kvm-block-simplify-code-around-releasing-bitmaps.patch [bz#1518989] +- kvm-hbitmap-Add-advance-param-to-hbitmap_iter_next.patch [bz#1518989] +- kvm-test-hbitmap-Add-non-advancing-iter_next-tests.patch [bz#1518989] +- kvm-block-dirty-bitmap-Add-bdrv_dirty_iter_next_area.patch [bz#1518989] +- kvm-blockdev-backup-add-bitmap-argument.patch [bz#1518989] +- kvm-dirty-bitmap-switch-assert-fails-to-errors-in-bdrv_m.patch [bz#1518989] +- kvm-dirty-bitmap-rename-bdrv_undo_clear_dirty_bitmap.patch [bz#1518989] +- kvm-dirty-bitmap-make-it-possible-to-restore-bitmap-afte.patch [bz#1518989] +- kvm-blockdev-rename-block-dirty-bitmap-clear-transaction.patch [bz#1518989] +- kvm-qapi-add-transaction-support-for-x-block-dirty-bitma.patch [bz#1518989] +- kvm-block-dirty-bitmaps-add-user_locked-status-checker.patch [bz#1518989] +- kvm-block-dirty-bitmaps-fix-merge-permissions.patch [bz#1518989] +- kvm-block-dirty-bitmaps-allow-clear-on-disabled-bitmaps.patch [bz#1518989] +- kvm-block-dirty-bitmaps-prohibit-enable-disable-on-locke.patch [bz#1518989] +- kvm-block-backup-prohibit-backup-from-using-in-use-bitma.patch [bz#1518989] +- kvm-nbd-forbid-use-of-frozen-bitmaps.patch [bz#1518989] +- kvm-bitmap-Update-count-after-a-merge.patch [bz#1518989] +- kvm-iotests-169-drop-deprecated-autoload-parameter.patch [bz#1518989] +- kvm-block-qcow2-improve-error-message-in-qcow2_inactivat.patch [bz#1518989] +- kvm-bloc-qcow2-drop-dirty_bitmaps_loaded-state-variable.patch [bz#1518989] +- kvm-dirty-bitmaps-clean-up-bitmaps-loading-and-migration.patch [bz#1518989] +- kvm-iotests-improve-169.patch [bz#1518989] +- kvm-iotests-169-add-cases-for-source-vm-resuming.patch [bz#1518989] +- kvm-pc-dimm-turn-alignment-assert-into-check.patch [bz#1630116] +- Resolves: bz#1518989 + (RFE: QEMU Incremental live backup) +- Resolves: bz#1625173 + ([NVMe Device Assignment] Guest could not boot up with q35+iommu) +- Resolves: bz#1629616 + (boot guest with q35+vIOMMU+ device assignment, qemu terminal shows "qemu-kvm: VFIO_UNMAP_DMA: -22" when return assigned network devices from vfio driver to ixgbe in guest) +- Resolves: bz#1630116 + (pc_dimm_get_free_addr: assertion failed: (QEMU_ALIGN_UP(address_space_start, align) == address_space_start)) +- Resolves: bz#1636185 + ([RFE] Report disk device name and serial number (qemu-guest-agent on Linux)) + +* Mon Nov 05 2018 Danilo Cesar Lemes de Paula - 2.12.0-42.el8 +- kvm-luks-Allow-share-rw-on.patch [bz#1629701] +- kvm-redhat-reenable-gluster-support.patch [bz#1599340] +- kvm-redhat-bump-libusb-requirement.patch [bz#1627970] +- Resolves: bz#1599340 + (Reenable glusterfs in qemu-kvm once BZ#1567292 gets fixed) +- Resolves: bz#1627970 + (symbol lookup error: /usr/libexec/qemu-kvm: undefined symbol: libusb_set_option) +- Resolves: bz#1629701 + ("share-rw=on" does not work for luks format image - Fast Train) + +* Tue Oct 16 2018 Danilo Cesar Lemes de Paula - 2.12.0-41.el8 +- kvm-block-rbd-pull-out-qemu_rbd_convert_options.patch [bz#1635585] +- kvm-block-rbd-Attempt-to-parse-legacy-filenames.patch [bz#1635585] +- kvm-block-rbd-add-deprecation-documentation-for-filename.patch [bz#1635585] +- kvm-block-rbd-add-iotest-for-rbd-legacy-keyvalue-filenam.patch [bz#1635585] +- Resolves: bz#1635585 + (rbd json format of 7.6 is incompatible with 7.5) + +* Tue Oct 16 2018 Danilo Cesar Lemes de Paula - 2.12.0-40.el8 + +- kvm-vnc-call-sasl_server_init-only-when-required.patch [bz#1609327] +- kvm-nbd-server-fix-NBD_CMD_CACHE.patch [bz#1636142] +- kvm-nbd-fix-NBD_FLAG_SEND_CACHE-value.patch [bz#1636142] +- kvm-test-bdrv-drain-bdrv_drain-works-with-cross-AioConte.patch [bz#1637976] +- kvm-block-Use-bdrv_do_drain_begin-end-in-bdrv_drain_all.patch [bz#1637976] +- kvm-block-Remove-recursive-parameter-from-bdrv_drain_inv.patch [bz#1637976] +- kvm-block-Don-t-manually-poll-in-bdrv_drain_all.patch [bz#1637976] +- kvm-tests-test-bdrv-drain-bdrv_drain_all-works-in-corout.patch [bz#1637976] +- kvm-block-Avoid-unnecessary-aio_poll-in-AIO_WAIT_WHILE.patch [bz#1637976] +- kvm-block-Really-pause-block-jobs-on-drain.patch [bz#1637976] +- kvm-block-Remove-bdrv_drain_recurse.patch [bz#1637976] +- kvm-test-bdrv-drain-Add-test-for-node-deletion.patch [bz#1637976] +- kvm-block-Drain-recursively-with-a-single-BDRV_POLL_WHIL.patch [bz#1637976] +- kvm-test-bdrv-drain-Test-node-deletion-in-subtree-recurs.patch [bz#1637976] +- kvm-block-Don-t-poll-in-parent-drain-callbacks.patch [bz#1637976] +- kvm-test-bdrv-drain-Graph-change-through-parent-callback.patch [bz#1637976] +- kvm-block-Defer-.bdrv_drain_begin-callback-to-polling-ph.patch [bz#1637976] +- kvm-test-bdrv-drain-Test-that-bdrv_drain_invoke-doesn-t-.patch [bz#1637976] +- kvm-block-Allow-AIO_WAIT_WHILE-with-NULL-ctx.patch [bz#1637976] +- kvm-block-Move-bdrv_drain_all_begin-out-of-coroutine-con.patch [bz#1637976] +- kvm-block-ignore_bds_parents-parameter-for-drain-functio.patch [bz#1637976] +- kvm-block-Allow-graph-changes-in-bdrv_drain_all_begin-en.patch [bz#1637976] +- kvm-test-bdrv-drain-Test-graph-changes-in-drain_all-sect.patch [bz#1637976] +- kvm-block-Poll-after-drain-on-attaching-a-node.patch [bz#1637976] +- kvm-test-bdrv-drain-Test-bdrv_append-to-drained-node.patch [bz#1637976] +- kvm-block-linux-aio-acquire-AioContext-before-qemu_laio_.patch [bz#1637976] +- kvm-util-async-use-qemu_aio_coroutine_enter-in-co_schedu.patch [bz#1637976] +- kvm-job-Fix-nested-aio_poll-hanging-in-job_txn_apply.patch [bz#1637976] +- kvm-job-Fix-missing-locking-due-to-mismerge.patch [bz#1637976] +- kvm-blockjob-Wake-up-BDS-when-job-becomes-idle.patch [bz#1637976] +- kvm-aio-wait-Increase-num_waiters-even-in-home-thread.patch [bz#1637976] +- kvm-test-bdrv-drain-Drain-with-block-jobs-in-an-I-O-thre.patch [bz#1637976] +- kvm-test-blockjob-Acquire-AioContext-around-job_cancel_s.patch [bz#1637976] +- kvm-job-Use-AIO_WAIT_WHILE-in-job_finish_sync.patch [bz#1637976] +- kvm-test-bdrv-drain-Test-AIO_WAIT_WHILE-in-completion-ca.patch [bz#1637976] +- kvm-block-Add-missing-locking-in-bdrv_co_drain_bh_cb.patch [bz#1637976] +- kvm-block-backend-Add-.drained_poll-callback.patch [bz#1637976] +- kvm-block-backend-Fix-potential-double-blk_delete.patch [bz#1637976] +- kvm-block-backend-Decrease-in_flight-only-after-callback.patch [bz#1637976] +- kvm-blockjob-Lie-better-in-child_job_drained_poll.patch [bz#1637976] +- kvm-block-Remove-aio_poll-in-bdrv_drain_poll-variants.patch [bz#1637976] +- kvm-test-bdrv-drain-Test-nested-poll-in-bdrv_drain_poll_.patch [bz#1637976] +- kvm-job-Avoid-deadlocks-in-job_completed_txn_abort.patch [bz#1637976] +- kvm-test-bdrv-drain-AIO_WAIT_WHILE-in-job-.commit-.abort.patch [bz#1637976] +- kvm-test-bdrv-drain-Fix-outdated-comments.patch [bz#1637976] +- kvm-block-Use-a-single-global-AioWait.patch [bz#1637976] +- kvm-test-bdrv-drain-Test-draining-job-source-child-and-p.patch [bz#1637976] +- kvm-qemu-img-Fix-assert-when-mapping-unaligned-raw-file.patch [bz#1639374] +- kvm-iotests-Add-test-221-to-catch-qemu-img-map-regressio.patch [bz#1639374] +- Resolves: bz#1609327 + (qemu-kvm[37046]: Could not find keytab file: /etc/qemu/krb5.tab: Unknown error 49408) +- Resolves: bz#1636142 + (qemu NBD_CMD_CACHE flaws impacting non-qemu NBD clients) +- Resolves: bz#1637976 + (Crashes and hangs with iothreads vs. block jobs) +- Resolves: bz#1639374 + (qemu-img map 'Aborted (core dumped)' when specifying a plain file) + +* Tue Oct 16 2018 Danilo Cesar Lemes de Paula - +- kvm-linux-headers-update.patch [bz#1508142] +- kvm-s390x-cpumodel-Set-up-CPU-model-for-AP-device-suppor.patch [bz#1508142] +- kvm-s390x-kvm-enable-AP-instruction-interpretation-for-g.patch [bz#1508142] +- kvm-s390x-ap-base-Adjunct-Processor-AP-object-model.patch [bz#1508142] +- kvm-s390x-vfio-ap-Introduce-VFIO-AP-device.patch [bz#1508142] +- kvm-s390-doc-detailed-specifications-for-AP-virtualizati.patch [bz#1508142] +- Resolves: bz#1508142 + ([IBM 8.0 FEAT] KVM: Guest-dedicated Crypto Adapters - qemu part) + +* Mon Oct 15 2018 Danilo Cesar Lemes de Paula - 2.12.0-38.el8 +- kvm-Revert-hw-acpi-build-build-SRAT-memory-affinity-stru.patch [bz#1609235] +- kvm-add-udev-kvm-check.patch [bz#1552663] +- kvm-aio-posix-Don-t-count-ctx-notifier-as-progress-when-.patch [bz#1623085] +- kvm-aio-Do-aio_notify_accept-only-during-blocking-aio_po.patch [bz#1623085] +- kvm-aio-posix-fix-concurrent-access-to-poll_disable_cnt.patch [bz#1632622] +- kvm-aio-posix-compute-timeout-before-polling.patch [bz#1632622] +- kvm-aio-posix-do-skip-system-call-if-ctx-notifier-pollin.patch [bz#1632622] +- kvm-intel-iommu-send-PSI-always-even-if-across-PDEs.patch [bz#1450712] +- kvm-intel-iommu-remove-IntelIOMMUNotifierNode.patch [bz#1450712] +- kvm-intel-iommu-add-iommu-lock.patch [bz#1450712] +- kvm-intel-iommu-only-do-page-walk-for-MAP-notifiers.patch [bz#1450712] +- kvm-intel-iommu-introduce-vtd_page_walk_info.patch [bz#1450712] +- kvm-intel-iommu-pass-in-address-space-when-page-walk.patch [bz#1450712] +- kvm-intel-iommu-trace-domain-id-during-page-walk.patch [bz#1450712] +- kvm-util-implement-simple-iova-tree.patch [bz#1450712] +- kvm-intel-iommu-rework-the-page-walk-logic.patch [bz#1450712] +- kvm-i386-define-the-ssbd-CPUID-feature-bit-CVE-2018-3639.patch [bz#1633928] +- Resolves: bz#1450712 + (Booting nested guest with vIOMMU, the assigned network devices can not receive packets (qemu)) +- Resolves: bz#1552663 + (81-kvm-rhel.rules is no longer part of initscripts) +- Resolves: bz#1609235 + (Win2016 guest can't recognize pc-dimm hotplugged to node 0) +- Resolves: bz#1623085 + (VM doesn't boot from HD) +- Resolves: bz#1632622 + (~40% virtio_blk disk performance drop for win2012r2 guest when comparing qemu-kvm-rhev-2.12.0-9 with qemu-kvm-rhev-2.12.0-12) +- Resolves: bz#1633928 + (CVE-2018-3639 qemu-kvm: hw: cpu: speculative store bypass [rhel-8.0]) + +* Fri Oct 12 2018 Danilo Cesar Lemes de Paula - 2.12.0-37.el8 +- kvm-block-for-jobs-do-not-clear-user_paused-until-after-.patch [bz#1635583] +- kvm-iotests-Add-failure-matching-to-common.qemu.patch [bz#1635583] +- kvm-block-iotest-to-catch-abort-on-forced-blockjob-cance.patch [bz#1635583] +- Resolves: bz#1635583 + (Quitting VM causes qemu core dump once the block mirror job paused for no enough target space) + +* Fri Oct 12 2018 Danilo Cesar Lemes de Paula - qemu-kvm-2.12.0-36 +- kvm-check-Only-test-ivshm-when-it-is-compiled-in.patch [bz#1621817] +- kvm-Disable-ivshmem.patch [bz#1621817] +- kvm-mirror-Fail-gracefully-for-source-target.patch [bz#1637963] +- kvm-commit-Add-top-node-base-node-options.patch [bz#1637970] +- kvm-qemu-iotests-Test-commit-with-top-node-base-node.patch [bz#1637970] +- Resolves: bz#1621817 + (Disable IVSHMEM in RHEL 8) +- Resolves: bz#1637963 + (Segfault on 'blockdev-mirror' with same node as source and target) +- Resolves: bz#1637970 + (allow using node-names with block-commit) + +* Thu Oct 11 2018 Danilo Cesar Lemes de Paula - 2.12.0-35.el8 +- kvm-redhat-make-the-plugins-executable.patch [bz#1638304] +- Resolves: bz#1638304 + (the driver packages lack all the library Requires) + +* Thu Oct 11 2018 Danilo Cesar Lemes de Paula - 2.12.0-34.el8 +- kvm-seccomp-allow-sched_setscheduler-with-SCHED_IDLE-pol.patch [bz#1618356] +- kvm-seccomp-use-SIGSYS-signal-instead-of-killing-the-thr.patch [bz#1618356] +- kvm-seccomp-prefer-SCMP_ACT_KILL_PROCESS-if-available.patch [bz#1618356] +- kvm-configure-require-libseccomp-2.2.0.patch [bz#1618356] +- kvm-seccomp-set-the-seccomp-filter-to-all-threads.patch [bz#1618356] +- kvm-memory-cleanup-side-effects-of-memory_region_init_fo.patch [bz#1600365] +- Resolves: bz#1600365 + (QEMU core dumped when hotplug memory exceeding host hugepages and with discard-data=yes) +- Resolves: bz#1618356 + (qemu-kvm: Qemu: seccomp: blacklist is not applied to all threads [rhel-8]) + +* Fri Oct 05 2018 Danilo Cesar Lemes de Paula - 2.12.0-33.el8 +- kvm-migration-postcopy-Clear-have_listen_thread.patch [bz#1608765] +- kvm-migration-cleanup-in-error-paths-in-loadvm.patch [bz#1608765] +- kvm-jobs-change-start-callback-to-run-callback.patch [bz#1632939] +- kvm-jobs-canonize-Error-object.patch [bz#1632939] +- kvm-jobs-add-exit-shim.patch [bz#1632939] +- kvm-block-commit-utilize-job_exit-shim.patch [bz#1632939] +- kvm-block-mirror-utilize-job_exit-shim.patch [bz#1632939] +- kvm-jobs-utilize-job_exit-shim.patch [bz#1632939] +- kvm-block-backup-make-function-variables-consistently-na.patch [bz#1632939] +- kvm-jobs-remove-ret-argument-to-job_completed-privatize-.patch [bz#1632939] +- kvm-jobs-remove-job_defer_to_main_loop.patch [bz#1632939] +- kvm-block-commit-add-block-job-creation-flags.patch [bz#1632939] +- kvm-block-mirror-add-block-job-creation-flags.patch [bz#1632939] +- kvm-block-stream-add-block-job-creation-flags.patch [bz#1632939] +- kvm-block-commit-refactor-commit-to-use-job-callbacks.patch [bz#1632939] +- kvm-block-mirror-don-t-install-backing-chain-on-abort.patch [bz#1632939] +- kvm-block-mirror-conservative-mirror_exit-refactor.patch [bz#1632939] +- kvm-block-stream-refactor-stream-to-use-job-callbacks.patch [bz#1632939] +- kvm-tests-blockjob-replace-Blockjob-with-Job.patch [bz#1632939] +- kvm-tests-test-blockjob-remove-exit-callback.patch [bz#1632939] +- kvm-tests-test-blockjob-txn-move-.exit-to-.clean.patch [bz#1632939] +- kvm-jobs-remove-.exit-callback.patch [bz#1632939] +- kvm-qapi-block-commit-expose-new-job-properties.patch [bz#1632939] +- kvm-qapi-block-mirror-expose-new-job-properties.patch [bz#1632939] +- kvm-qapi-block-stream-expose-new-job-properties.patch [bz#1632939] +- kvm-block-backup-qapi-documentation-fixup.patch [bz#1632939] +- kvm-blockdev-document-transactional-shortcomings.patch [bz#1632939] +- Resolves: bz#1608765 + (After postcopy migration, do savevm and loadvm, guest hang and call trace) +- Resolves: bz#1632939 + (qemu blockjobs other than backup do not support job-finalize or job-dismiss) + +* Fri Sep 28 2018 Danilo Cesar Lemes de Paula - 2.12.0-32.el8 +- kvm-Re-enable-disabled-Hyper-V-enlightenments.patch [bz#1625185] +- kvm-Fix-annocheck-issues.patch [bz#1624164] +- kvm-exec-check-that-alignment-is-a-power-of-two.patch [bz#1630746] +- kvm-curl-Make-sslverify-off-disable-host-as-well-as-peer.patch [bz#1575925] +- Resolves: bz#1575925 + ("SSL: no alternative certificate subject name matches target host name" error even though sslverify = off) +- Resolves: bz#1624164 + (Review annocheck distro flag failures in qemu-kvm) +- Resolves: bz#1625185 + (Re-enable disabled Hyper-V enlightenments) +- Resolves: bz#1630746 + (qemu_ram_mmap: Assertion `is_power_of_2(align)' failed) + +* Tue Sep 11 2018 Danilo Cesar Lemes de Paula - 2.12.0-31.el8 +- kvm-i386-Disable-TOPOEXT-by-default-on-cpu-host.patch [bz#1619804] +- kvm-redhat-enable-opengl-add-build-and-runtime-deps.patch [bz#1618412] +- Resolves: bz#1618412 + (Enable opengl (for intel vgpu display)) +- Resolves: bz#1619804 + (kernel panic in init_amd_cacheinfo) + +* Wed Sep 05 2018 Danilo Cesar Lemes de Paula - 2.12.0-30.el8 +- kvm-redhat-Disable-vhost-crypto.patch [bz#1625668] +- Resolves: bz#1625668 + (Decide if we should disable 'vhost-crypto' or not) + +* Wed Sep 05 2018 Danilo Cesar Lemes de Paula - 2.12.0-29.el8 +- kvm-target-i386-sev-fix-memory-leaks.patch [bz#1615717] +- kvm-i386-Fix-arch_query_cpu_model_expansion-leak.patch [bz#1615717] +- kvm-redhat-Update-build-configuration.patch [bz#1573156] +- Resolves: bz#1573156 + (Update build configure for QEMU 2.12.0) +- Resolves: bz#1615717 + (Memory leaks) + +* Wed Aug 29 2018 Danilo Cesar Lemes de Paula - 2.12.0-27.el8 +- kvm-Fix-libusb-1.0.22-deprecated-libusb_set_debug-with-l.patch [bz#1622656] +- Resolves: bz#1622656 + (qemu-kvm fails to build due to libusb_set_debug being deprecated) + +* Fri Aug 17 2018 Danilo Cesar Lemes de Paula - 2.12.0-26.el8 +- kvm-redhat-remove-extra-in-rhel_rhev_conflicts-macro.patch [bz#1618752] +- Resolves: bz#1618752 + (qemu-kvm can't be installed in RHEL-8 as it Conflicts with itself.) + +* Thu Aug 16 2018 Danilo Cesar Lemes de Paula - 2.12.0-25.el8 +- kvm-Migration-TLS-Fix-crash-due-to-double-cleanup.patch [bz#1594384] +- Resolves: bz#1594384 + (2.12 migration fixes) + +* Tue Aug 14 2018 Danilo Cesar Lemes de Paula - 2.12.0-24.el8 +- kvm-Add-qemu-keymap-to-qemu-kvm-common.patch [bz#1593117] +- Resolves: bz#1593117 + (add qemu-keymap utility) + +* Fri Aug 10 2018 Danilo Cesar Lemes de Paula - 2.12.0-23.el8 +- Fixing an issue with some old command in the spec file + +* Fri Aug 10 2018 Danilo Cesar Lemes de Paula - 2.12.0-22.el8 +- Fix an issue with the build_configure script. +- Resolves: bz#1425820 + (Improve QEMU packaging layout with modularization of the block layer) + + +* Fri Aug 10 2018 Danilo Cesar Lemes de Paula - 2.12.0-20.el8 +- kvm-migration-stop-compressing-page-in-migration-thread.patch [bz#1594384] +- kvm-migration-stop-compression-to-allocate-and-free-memo.patch [bz#1594384] +- kvm-migration-stop-decompression-to-allocate-and-free-me.patch [bz#1594384] +- kvm-migration-detect-compression-and-decompression-error.patch [bz#1594384] +- kvm-migration-introduce-control_save_page.patch [bz#1594384] +- kvm-migration-move-some-code-to-ram_save_host_page.patch [bz#1594384] +- kvm-migration-move-calling-control_save_page-to-the-comm.patch [bz#1594384] +- kvm-migration-move-calling-save_zero_page-to-the-common-.patch [bz#1594384] +- kvm-migration-introduce-save_normal_page.patch [bz#1594384] +- kvm-migration-remove-ram_save_compressed_page.patch [bz#1594384] +- kvm-migration-block-dirty-bitmap-fix-memory-leak-in-dirt.patch [bz#1594384] +- kvm-migration-fix-saving-normal-page-even-if-it-s-been-c.patch [bz#1594384] +- kvm-migration-update-index-field-when-delete-or-qsort-RD.patch [bz#1594384] +- kvm-migration-introduce-decompress-error-check.patch [bz#1594384] +- kvm-migration-Don-t-activate-block-devices-if-using-S.patch [bz#1594384] +- kvm-migration-not-wait-RDMA_CM_EVENT_DISCONNECTED-event-.patch [bz#1594384] +- kvm-migration-block-dirty-bitmap-fix-dirty_bitmap_load.patch [bz#1594384] +- kvm-s390x-add-RHEL-7.6-machine-type-for-ccw.patch [bz#1595718] +- kvm-s390x-cpumodel-default-enable-bpb-and-ppa15-for-z196.patch [bz#1595718] +- kvm-linux-headers-asm-s390-kvm.h-header-sync.patch [bz#1612938] +- kvm-s390x-kvm-add-etoken-facility.patch [bz#1612938] +- Resolves: bz#1594384 + (2.12 migration fixes) +- Resolves: bz#1595718 + (Add ppa15/bpb to the default cpu model for z196 and higher in the 7.6 s390-ccw-virtio machine) +- Resolves: bz#1612938 + (Add etoken support to qemu-kvm for s390x KVM guests) + +* Fri Aug 10 2018 Danilo Cesar Lemes de Paula - 2.12.0-18.el8 + Mass import from RHEL 7.6 qemu-kvm-rhev, including fixes to the following BZs: + +- kvm-AArch64-Add-virt-rhel7.6-machine-type.patch [bz#1558723] +- kvm-cpus-Fix-event-order-on-resume-of-stopped-guest.patch [bz#1566153] +- kvm-qemu-img-Check-post-truncation-size.patch [bz#1523065] +- kvm-vga-catch-depth-0.patch [bz#1575541] +- kvm-Fix-x-hv-max-vps-compat-value-for-7.4-machine-type.patch [bz#1583959] +- kvm-ccid-card-passthru-fix-regression-in-realize.patch [bz#1584984] +- kvm-Use-4-MB-vram-for-cirrus.patch [bz#1542080] +- kvm-spapr_pci-Remove-unhelpful-pagesize-warning.patch [bz#1505664] +- kvm-rpm-Add-nvme-VFIO-driver-to-rw-whitelist.patch [bz#1416180] +- kvm-qobject-Use-qobject_to-instead-of-type-cast.patch [bz#1557995] +- kvm-qobject-Ensure-base-is-at-offset-0.patch [bz#1557995] +- kvm-qobject-use-a-QObjectBase_-struct.patch [bz#1557995] +- kvm-qobject-Replace-qobject_incref-QINCREF-qobject_decre.patch [bz#1557995] +- kvm-qobject-Modify-qobject_ref-to-return-obj.patch [bz#1557995] +- kvm-rbd-Drop-deprecated-drive-parameter-filename.patch [bz#1557995] +- kvm-iscsi-Drop-deprecated-drive-parameter-filename.patch [bz#1557995] +- kvm-block-Add-block-specific-QDict-header.patch [bz#1557995] +- kvm-qobject-Move-block-specific-qdict-code-to-block-qdic.patch [bz#1557995] +- kvm-block-Fix-blockdev-for-certain-non-string-scalars.patch [bz#1557995] +- kvm-block-Fix-drive-for-certain-non-string-scalars.patch [bz#1557995] +- kvm-block-Clean-up-a-misuse-of-qobject_to-in-.bdrv_co_cr.patch [bz#1557995] +- kvm-block-Factor-out-qobject_input_visitor_new_flat_conf.patch [bz#1557995] +- kvm-block-Make-remaining-uses-of-qobject-input-visitor-m.patch [bz#1557995] +- kvm-block-qdict-Simplify-qdict_flatten_qdict.patch [bz#1557995] +- kvm-block-qdict-Tweak-qdict_flatten_qdict-qdict_flatten_.patch [bz#1557995] +- kvm-block-qdict-Clean-up-qdict_crumple-a-bit.patch [bz#1557995] +- kvm-block-qdict-Simplify-qdict_is_list-some.patch [bz#1557995] +- kvm-check-block-qdict-Rename-qdict_flatten-s-variables-f.patch [bz#1557995] +- kvm-check-block-qdict-Cover-flattening-of-empty-lists-an.patch [bz#1557995] +- kvm-block-Fix-blockdev-blockdev-add-for-empty-objects-an.patch [bz#1557995] +- kvm-rbd-New-parameter-auth-client-required.patch [bz#1557995] +- kvm-rbd-New-parameter-key-secret.patch [bz#1557995] +- kvm-block-mirror-honor-ratelimit-again.patch [bz#1572856] +- kvm-block-mirror-Make-cancel-always-cancel-pre-READY.patch [bz#1572856] +- kvm-iotests-Add-test-for-cancelling-a-mirror-job.patch [bz#1572856] +- kvm-iotests-Split-214-off-of-122.patch [bz#1518738] +- kvm-block-Add-COR-filter-driver.patch [bz#1518738] +- kvm-block-BLK_PERM_WRITE-includes-._UNCHANGED.patch [bz#1518738] +- kvm-block-Add-BDRV_REQ_WRITE_UNCHANGED-flag.patch [bz#1518738] +- kvm-block-Set-BDRV_REQ_WRITE_UNCHANGED-for-COR-writes.patch [bz#1518738] +- kvm-block-quorum-Support-BDRV_REQ_WRITE_UNCHANGED.patch [bz#1518738] +- kvm-block-Support-BDRV_REQ_WRITE_UNCHANGED-in-filters.patch [bz#1518738] +- kvm-iotests-Clean-up-wrap-image-in-197.patch [bz#1518738] +- kvm-iotests-Copy-197-for-COR-filter-driver.patch [bz#1518738] +- kvm-iotests-Add-test-for-COR-across-nodes.patch [bz#1518738] +- kvm-qemu-io-Use-purely-string-blockdev-options.patch [bz#1576598] +- kvm-qemu-img-Use-only-string-options-in-img_open_opts.patch [bz#1576598] +- kvm-iotests-Add-test-for-U-force-share-conflicts.patch [bz#1576598] +- kvm-qemu-io-Drop-command-functions-return-values.patch [bz#1519617] +- kvm-qemu-io-Let-command-functions-return-error-code.patch [bz#1519617] +- kvm-qemu-io-Exit-with-error-when-a-command-failed.patch [bz#1519617] +- kvm-iotests.py-Add-qemu_io_silent.patch [bz#1519617] +- kvm-iotests-Let-216-make-use-of-qemu-io-s-exit-code.patch [bz#1519617] +- kvm-qcow2-Repair-OFLAG_COPIED-when-fixing-leaks.patch [bz#1527085] +- kvm-iotests-Repairing-error-during-snapshot-deletion.patch [bz#1527085] +- kvm-block-Make-bdrv_is_writable-public.patch [bz#1588039] +- kvm-qcow2-Do-not-mark-inactive-images-corrupt.patch [bz#1588039] +- kvm-iotests-Add-case-for-a-corrupted-inactive-image.patch [bz#1588039] +- kvm-main-loop-drop-spin_counter.patch [bz#1168213] +- kvm-target-ppc-Factor-out-the-parsing-in-kvmppc_get_cpu_.patch [bz#1560847] +- kvm-target-ppc-Don-t-require-private-l1d-cache-on-POWER8.patch [bz#1560847] +- kvm-ppc-spapr_caps-Don-t-disable-cap_cfpc-on-POWER8-by-d.patch [bz#1560847] +- kvm-qxl-fix-local-renderer-crash.patch [bz#1567733] +- kvm-qemu-img-Amendment-support-implies-create_opts.patch [bz#1537956] +- kvm-block-Add-Error-parameter-to-bdrv_amend_options.patch [bz#1537956] +- kvm-qemu-option-Pull-out-Supported-options-print.patch [bz#1537956] +- kvm-qemu-img-Add-print_amend_option_help.patch [bz#1537956] +- kvm-qemu-img-Recognize-no-creation-support-in-o-help.patch [bz#1537956] +- kvm-iotests-Test-help-option-for-unsupporting-formats.patch [bz#1537956] +- kvm-iotests-Rework-113.patch [bz#1537956] +- kvm-qemu-img-Resolve-relative-backing-paths-in-rebase.patch [bz#1569835] +- kvm-iotests-Add-test-for-rebasing-with-relative-paths.patch [bz#1569835] +- kvm-qemu-img-Special-post-backing-convert-handling.patch [bz#1527898] +- kvm-iotests-Test-post-backing-convert-target-behavior.patch [bz#1527898] +- kvm-migration-calculate-expected_downtime-with-ram_bytes.patch [bz#1564576] +- kvm-sheepdog-Fix-sd_co_create_opts-memory-leaks.patch [bz#1513543] +- kvm-qemu-iotests-reduce-chance-of-races-in-185.patch [bz#1513543] +- kvm-blockjob-do-not-cancel-timer-in-resume.patch [bz#1513543] +- kvm-nfs-Fix-error-path-in-nfs_options_qdict_to_qapi.patch [bz#1513543] +- kvm-nfs-Remove-processed-options-from-QDict.patch [bz#1513543] +- kvm-blockjob-drop-block_job_pause-resume_all.patch [bz#1513543] +- kvm-blockjob-expose-error-string-via-query.patch [bz#1513543] +- kvm-blockjob-Fix-assertion-in-block_job_finalize.patch [bz#1513543] +- kvm-blockjob-Wrappers-for-progress-counter-access.patch [bz#1513543] +- kvm-blockjob-Move-RateLimit-to-BlockJob.patch [bz#1513543] +- kvm-blockjob-Implement-block_job_set_speed-centrally.patch [bz#1513543] +- kvm-blockjob-Introduce-block_job_ratelimit_get_delay.patch [bz#1513543] +- kvm-blockjob-Add-block_job_driver.patch [bz#1513543] +- kvm-blockjob-Update-block-job-pause-resume-documentation.patch [bz#1513543] +- kvm-blockjob-Improve-BlockJobInfo.offset-len-documentati.patch [bz#1513543] +- kvm-job-Create-Job-JobDriver-and-job_create.patch [bz#1513543] +- kvm-job-Rename-BlockJobType-into-JobType.patch [bz#1513543] +- kvm-job-Add-JobDriver.job_type.patch [bz#1513543] +- kvm-job-Add-job_delete.patch [bz#1513543] +- kvm-job-Maintain-a-list-of-all-jobs.patch [bz#1513543] +- kvm-job-Move-state-transitions-to-Job.patch [bz#1513543] +- kvm-job-Add-reference-counting.patch [bz#1513543] +- kvm-job-Move-cancelled-to-Job.patch [bz#1513543] +- kvm-job-Add-Job.aio_context.patch [bz#1513543] +- kvm-job-Move-defer_to_main_loop-to-Job.patch [bz#1513543] +- kvm-job-Move-coroutine-and-related-code-to-Job.patch [bz#1513543] +- kvm-job-Add-job_sleep_ns.patch [bz#1513543] +- kvm-job-Move-pause-resume-functions-to-Job.patch [bz#1513543] +- kvm-job-Replace-BlockJob.completed-with-job_is_completed.patch [bz#1513543] +- kvm-job-Move-BlockJobCreateFlags-to-Job.patch [bz#1513543] +- kvm-blockjob-Split-block_job_event_pending.patch [bz#1513543] +- kvm-job-Add-job_event_.patch [bz#1513543] +- kvm-job-Move-single-job-finalisation-to-Job.patch [bz#1513543] +- kvm-job-Convert-block_job_cancel_async-to-Job.patch [bz#1513543] +- kvm-job-Add-job_drain.patch [bz#1513543] +- kvm-job-Move-.complete-callback-to-Job.patch [bz#1513543] +- kvm-job-Move-job_finish_sync-to-Job.patch [bz#1513543] +- kvm-job-Switch-transactions-to-JobTxn.patch [bz#1513543] +- kvm-job-Move-transactions-to-Job.patch [bz#1513543] +- kvm-job-Move-completion-and-cancellation-to-Job.patch [bz#1513543] +- kvm-block-Cancel-job-in-bdrv_close_all-callers.patch [bz#1513543] +- kvm-job-Add-job_yield.patch [bz#1513543] +- kvm-job-Add-job_dismiss.patch [bz#1513543] +- kvm-job-Add-job_is_ready.patch [bz#1513543] +- kvm-job-Add-job_transition_to_ready.patch [bz#1513543] +- kvm-job-Move-progress-fields-to-Job.patch [bz#1513543] +- kvm-job-Introduce-qapi-job.json.patch [bz#1513543] +- kvm-job-Add-JOB_STATUS_CHANGE-QMP-event.patch [bz#1513543] +- kvm-job-Add-lifecycle-QMP-commands.patch [bz#1513543] +- kvm-job-Add-query-jobs-QMP-command.patch [bz#1513543] +- kvm-blockjob-Remove-BlockJob.driver.patch [bz#1513543] +- kvm-iotests-Move-qmp_to_opts-to-VM.patch [bz#1513543] +- kvm-qemu-iotests-Test-job-with-block-jobs.patch [bz#1513543] +- kvm-vdi-Fix-vdi_co_do_create-return-value.patch [bz#1513543] +- kvm-vhdx-Fix-vhdx_co_create-return-value.patch [bz#1513543] +- kvm-job-Add-error-message-for-failing-jobs.patch [bz#1513543] +- kvm-block-create-Make-x-blockdev-create-a-job.patch [bz#1513543] +- kvm-qemu-iotests-Add-VM.get_qmp_events_filtered.patch [bz#1513543] +- kvm-qemu-iotests-Add-VM.qmp_log.patch [bz#1513543] +- kvm-qemu-iotests-Add-iotests.img_info_log.patch [bz#1513543] +- kvm-qemu-iotests-Add-VM.run_job.patch [bz#1513543] +- kvm-qemu-iotests-iotests.py-helper-for-non-file-protocol.patch [bz#1513543] +- kvm-qemu-iotests-Rewrite-206-for-blockdev-create-job.patch [bz#1513543] +- kvm-qemu-iotests-Rewrite-207-for-blockdev-create-job.patch [bz#1513543] +- kvm-qemu-iotests-Rewrite-210-for-blockdev-create-job.patch [bz#1513543] +- kvm-qemu-iotests-Rewrite-211-for-blockdev-create-job.patch [bz#1513543] +- kvm-qemu-iotests-Rewrite-212-for-blockdev-create-job.patch [bz#1513543] +- kvm-qemu-iotests-Rewrite-213-for-blockdev-create-job.patch [bz#1513543] +- kvm-block-create-Mark-blockdev-create-stable.patch [bz#1513543] +- kvm-jobs-fix-stale-wording.patch [bz#1513543] +- kvm-jobs-fix-verb-references-in-docs.patch [bz#1513543] +- kvm-iotests-Fix-219-s-timing.patch [bz#1513543] +- kvm-iotests-improve-pause_job.patch [bz#1513543] +- kvm-rpm-Whitelist-copy-on-read-block-driver.patch [bz#1518738] +- kvm-rpm-add-throttle-driver-to-rw-whitelist.patch [bz#1591076] +- kvm-usb-host-skip-open-on-pending-postload-bh.patch [bz#1572851] +- kvm-i386-Define-the-Virt-SSBD-MSR-and-handling-of-it-CVE.patch [bz#1574216] +- kvm-i386-define-the-AMD-virt-ssbd-CPUID-feature-bit-CVE-.patch [bz#1574216] +- kvm-block-file-posix-Pass-FD-to-locking-helpers.patch [bz#1519144] +- kvm-block-file-posix-File-locking-during-creation.patch [bz#1519144] +- kvm-iotests-Add-creation-test-to-153.patch [bz#1519144] +- kvm-vhost-user-add-Net-prefix-to-internal-state-structur.patch [bz#1526645] +- kvm-virtio-support-setting-memory-region-based-host-noti.patch [bz#1526645] +- kvm-vhost-user-support-receiving-file-descriptors-in-sla.patch [bz#1526645] +- kvm-osdep-add-wait.h-compat-macros.patch [bz#1526645] +- kvm-vhost-user-bridge-support-host-notifier.patch [bz#1526645] +- kvm-vhost-allow-backends-to-filter-memory-sections.patch [bz#1526645] +- kvm-vhost-user-allow-slave-to-send-fds-via-slave-channel.patch [bz#1526645] +- kvm-vhost-user-introduce-shared-vhost-user-state.patch [bz#1526645] +- kvm-vhost-user-support-registering-external-host-notifie.patch [bz#1526645] +- kvm-libvhost-user-support-host-notifier.patch [bz#1526645] +- kvm-block-Introduce-API-for-copy-offloading.patch [bz#1482537] +- kvm-raw-Check-byte-range-uniformly.patch [bz#1482537] +- kvm-raw-Implement-copy-offloading.patch [bz#1482537] +- kvm-qcow2-Implement-copy-offloading.patch [bz#1482537] +- kvm-file-posix-Implement-bdrv_co_copy_range.patch [bz#1482537] +- kvm-iscsi-Query-and-save-device-designator-when-opening.patch [bz#1482537] +- kvm-iscsi-Create-and-use-iscsi_co_wait_for_task.patch [bz#1482537] +- kvm-iscsi-Implement-copy-offloading.patch [bz#1482537] +- kvm-block-backend-Add-blk_co_copy_range.patch [bz#1482537] +- kvm-qemu-img-Convert-with-copy-offloading.patch [bz#1482537] +- kvm-qcow2-Fix-src_offset-in-copy-offloading.patch [bz#1482537] +- kvm-iscsi-Don-t-blindly-use-designator-length-in-respons.patch [bz#1482537] +- kvm-file-posix-Fix-EINTR-handling.patch [bz#1482537] +- kvm-usb-storage-Add-rerror-werror-properties.patch [bz#1595180] +- kvm-numa-clarify-error-message-when-node-index-is-out-of.patch [bz#1578381] +- kvm-qemu-iotests-Update-026.out.nocache-reference-output.patch [bz#1528541] +- kvm-qcow2-Free-allocated-clusters-on-write-error.patch [bz#1528541] +- kvm-qemu-iotests-Test-qcow2-not-leaking-clusters-on-writ.patch [bz#1528541] +- kvm-qemu-options-Add-missing-newline-to-accel-help-text.patch [bz#1586313] +- kvm-xhci-fix-guest-triggerable-assert.patch [bz#1594135] +- kvm-virtio-gpu-tweak-scanout-disable.patch [bz#1589634] +- kvm-virtio-gpu-update-old-resource-too.patch [bz#1589634] +- kvm-virtio-gpu-disable-scanout-when-backing-resource-is-.patch [bz#1589634] +- kvm-block-Don-t-silently-truncate-node-names.patch [bz#1549654] +- kvm-pr-helper-fix-socket-path-default-in-help.patch [bz#1533158] +- kvm-pr-helper-fix-assertion-failure-on-failed-multipath-.patch [bz#1533158] +- kvm-pr-manager-helper-avoid-SIGSEGV-when-writing-to-the-.patch [bz#1533158] +- kvm-pr-manager-put-stubs-in-.c-file.patch [bz#1533158] +- kvm-pr-manager-add-query-pr-managers-QMP-command.patch [bz#1533158] +- kvm-pr-manager-helper-report-event-on-connection-disconn.patch [bz#1533158] +- kvm-pr-helper-avoid-error-on-PR-IN-command-with-zero-req.patch [bz#1533158] +- kvm-pr-helper-Rework-socket-path-handling.patch [bz#1533158] +- kvm-pr-manager-helper-fix-memory-leak-on-event.patch [bz#1533158] +- kvm-object-fix-OBJ_PROP_LINK_UNREF_ON_RELEASE-ambivalenc.patch [bz#1556678] +- kvm-usb-hcd-xhci-test-add-a-test-for-ccid-hotplug.patch [bz#1556678] +- kvm-Revert-usb-release-the-created-buses.patch [bz#1556678] +- kvm-file-posix-Fix-creation-locking.patch [bz#1599335] +- kvm-file-posix-Unlock-FD-after-creation.patch [bz#1599335] +- kvm-ahci-trim-signatures-on-raise-lower.patch [bz#1584914] +- kvm-ahci-fix-PxCI-register-race.patch [bz#1584914] +- kvm-ahci-don-t-schedule-unnecessary-BH.patch [bz#1584914] +- kvm-qcow2-Fix-qcow2_truncate-error-return-value.patch [bz#1595173] +- kvm-block-Convert-.bdrv_truncate-callback-to-coroutine_f.patch [bz#1595173] +- kvm-qcow2-Remove-coroutine-trampoline-for-preallocate_co.patch [bz#1595173] +- kvm-block-Move-bdrv_truncate-implementation-to-io.c.patch [bz#1595173] +- kvm-block-Use-tracked-request-for-truncate.patch [bz#1595173] +- kvm-file-posix-Make-.bdrv_co_truncate-asynchronous.patch [bz#1595173] +- kvm-block-Fix-copy-on-read-crash-with-partial-final-clus.patch [bz#1590640] +- kvm-block-fix-QEMU-crash-with-scsi-hd-and-drive_del.patch [bz#1599515] +- kvm-virtio-rng-process-pending-requests-on-DRIVER_OK.patch [bz#1576743] +- kvm-file-posix-specify-expected-filetypes.patch [bz#1525829] +- kvm-iotests-add-test-226-for-file-driver-types.patch [bz#1525829] +- kvm-block-dirty-bitmap-add-lock-to-bdrv_enable-disable_d.patch [bz#1207657] +- kvm-qapi-add-x-block-dirty-bitmap-enable-disable.patch [bz#1207657] +- kvm-qmp-transaction-support-for-x-block-dirty-bitmap-ena.patch [bz#1207657] +- kvm-qapi-add-x-block-dirty-bitmap-merge.patch [bz#1207657] +- kvm-qapi-add-disabled-parameter-to-block-dirty-bitmap-ad.patch [bz#1207657] +- kvm-block-dirty-bitmap-add-bdrv_enable_dirty_bitmap_lock.patch [bz#1207657] +- kvm-dirty-bitmap-fix-double-lock-on-bitmap-enabling.patch [bz#1207657] +- kvm-block-qcow2-bitmap-fix-free_bitmap_clusters.patch [bz#1207657] +- kvm-qcow2-add-overlap-check-for-bitmap-directory.patch [bz#1207657] +- kvm-blockdev-enable-non-root-nodes-for-backup-source.patch [bz#1207657] +- kvm-iotests-add-222-to-test-basic-fleecing.patch [bz#1207657] +- kvm-qcow2-Remove-dead-check-on-ret.patch [bz#1207657] +- kvm-block-Move-request-tracking-to-children-in-copy-offl.patch [bz#1207657] +- kvm-block-Fix-parameter-checking-in-bdrv_co_copy_range_i.patch [bz#1207657] +- kvm-block-Honour-BDRV_REQ_NO_SERIALISING-in-copy-range.patch [bz#1207657] +- kvm-backup-Use-copy-offloading.patch [bz#1207657] +- kvm-block-backup-disable-copy-offloading-for-backup.patch [bz#1207657] +- kvm-iotests-222-Don-t-run-with-luks.patch [bz#1207657] +- kvm-block-io-fix-copy_range.patch [bz#1207657] +- kvm-block-split-flags-in-copy_range.patch [bz#1207657] +- kvm-block-add-BDRV_REQ_SERIALISING-flag.patch [bz#1207657] +- kvm-block-backup-fix-fleecing-scheme-use-serialized-writ.patch [bz#1207657] +- kvm-nbd-server-Reject-0-length-block-status-request.patch [bz#1207657] +- kvm-nbd-server-fix-trace.patch [bz#1207657] +- kvm-nbd-server-refactor-NBDExportMetaContexts.patch [bz#1207657] +- kvm-nbd-server-add-nbd_meta_empty_or_pattern-helper.patch [bz#1207657] +- kvm-nbd-server-implement-dirty-bitmap-export.patch [bz#1207657] +- kvm-qapi-new-qmp-command-nbd-server-add-bitmap.patch [bz#1207657] +- kvm-docs-interop-add-nbd.txt.patch [bz#1207657] +- kvm-nbd-server-introduce-NBD_CMD_CACHE.patch [bz#1207657] +- kvm-nbd-server-Silence-gcc-false-positive.patch [bz#1207657] +- kvm-nbd-server-Fix-dirty-bitmap-logic-regression.patch [bz#1207657] +- kvm-nbd-server-fix-nbd_co_send_block_status.patch [bz#1207657] +- kvm-nbd-client-Add-x-dirty-bitmap-to-query-bitmap-from-s.patch [bz#1207657] +- kvm-iotests-New-test-223-for-exporting-dirty-bitmap-over.patch [bz#1207657] +- kvm-hw-char-serial-Only-retry-if-qemu_chr_fe_write-retur.patch [bz#1592817] +- kvm-hw-char-serial-retry-write-if-EAGAIN.patch [bz#1592817] +- kvm-throttle-groups-fix-hang-when-group-member-leaves.patch [bz#1535914] +- kvm-Disable-aarch64-devices-reappeared-after-2.12-rebase.patch [bz#1586357] +- kvm-Disable-split-irq-device.patch [bz#1586357] +- kvm-Disable-AT24Cx-i2c-eeprom.patch [bz#1586357] +- kvm-Disable-CAN-bus-devices.patch [bz#1586357] +- kvm-Disable-new-superio-devices.patch [bz#1586357] +- kvm-Disable-new-pvrdma-device.patch [bz#1586357] +- kvm-qdev-add-HotplugHandler-post_plug-callback.patch [bz#1607891] +- kvm-virtio-scsi-fix-hotplug-reset-vs-event-race.patch [bz#1607891] +- kvm-e1000-Fix-tso_props-compat-for-82540em.patch [bz#1608778] +- kvm-slirp-correct-size-computation-while-concatenating-m.patch [bz#1586255] +- kvm-s390x-sclp-fix-maxram-calculation.patch [bz#1595740] +- kvm-redhat-Make-gitpublish-profile-the-default-one.patch [bz#1425820] +- Resolves: bz#1168213 + (main-loop: WARNING: I/O thread spun for 1000 iterations while doing stream block device.) +- Resolves: bz#1207657 + (RFE: QEMU Incremental live backup - push and pull modes) +- Resolves: bz#1416180 + (QEMU VFIO based block driver for NVMe devices) +- Resolves: bz#1425820 + (Improve QEMU packaging layout with modularization of the block layer) +- Resolves: bz#1482537 + ([RFE] qemu-img copy-offloading (convert command)) +- Resolves: bz#1505664 + ("qemu-kvm: System page size 0x1000000 is not enabled in page_size_mask (0x11000). Performance may be slow" show up while using hugepage as guest's memory) +- Resolves: bz#1513543 + ([RFE] Add block job to create format on a storage device) +- Resolves: bz#1518738 + (Add 'copy-on-read' filter driver for use with blockdev-add) +- Resolves: bz#1519144 + (qemu-img: image locking doesn't cover image creation) +- Resolves: bz#1519617 + (The exit code should be non-zero when qemu-io reports an error) +- Resolves: bz#1523065 + ("qemu-img resize" should fail to decrease the size of logical partition/lvm/iSCSI image with raw format) +- Resolves: bz#1525829 + (can not boot up a scsi-block passthrough disk via -blockdev with error "cannot get SG_IO version number: Operation not supported. Is this a SCSI device?") +- Resolves: bz#1526645 + ([Intel 7.6 FEAT] vHost Data Plane Acceleration (vDPA) - vhost user client - qemu-kvm-rhev) +- Resolves: bz#1527085 + (The copied flag should be updated during '-r leaks') +- Resolves: bz#1527898 + ([RFE] qemu-img should leave cluster unallocated if it's read as zero throughout the backing chain) +- Resolves: bz#1528541 + (qemu-img check reports tons of leaked clusters after re-start nfs service to resume writing data in guest) +- Resolves: bz#1533158 + (QEMU support for libvirtd restarting qemu-pr-helper) +- Resolves: bz#1535914 + (Disable io throttling for one member disk of a group during io will induce the other one hang with io) +- Resolves: bz#1537956 + (RFE: qemu-img amend should list the true supported options) +- Resolves: bz#1542080 + (Qemu core dump at cirrus_invalidate_region) +- Resolves: bz#1549654 + (Reject node-names which would be truncated by the block layer commands) +- Resolves: bz#1556678 + (Hot plug usb-ccid for the 2nd time with the same ID as the 1st time failed) +- Resolves: bz#1557995 + (QAPI schema for RBD storage misses the 'password-secret' option) +- Resolves: bz#1558723 + (Create RHEL-7.6 QEMU machine type for AArch64) +- Resolves: bz#1560847 + ([Power8][FW b0320a_1812.861][rhel7.5rc2 3.10.0-861.el7.ppc64le][qemu-kvm-{ma,rhev}-2.10.0-21.el7_5.1.ppc64le] KVM guest does not default to ori type flush even with pseries-rhel7.5.0-sxxm) +- Resolves: bz#1564576 + (Pegas 1.1 - Require to backport qemu-kvm patch that fixes expected_downtime calculation during migration) +- Resolves: bz#1566153 + (IOERROR pause code lost after resuming a VM while I/O error is still present) +- Resolves: bz#1567733 + (qemu abort when migrate during guest reboot) +- Resolves: bz#1569835 + (qemu-img get wrong backing file path after rebasing image with relative path) +- Resolves: bz#1572851 + (Core dumped after migration when with usb-host) +- Resolves: bz#1572856 + ('block-job-cancel' can not cancel a "drive-mirror" job) +- Resolves: bz#1574216 + (CVE-2018-3639 qemu-kvm-rhev: hw: cpu: speculative store bypass [rhel-7.6]) +- Resolves: bz#1575541 + (qemu core dump while installing win10 guest) +- Resolves: bz#1576598 + (Segfault in qemu-io and qemu-img with -U --image-opts force-share=off) +- Resolves: bz#1576743 + (virtio-rng hangs when running on recent (2.x) QEMU versions) +- Resolves: bz#1578381 + (Error message need update when specify numa distance with node index >=128) +- Resolves: bz#1583959 + (Incorrect vcpu count limit for 7.4 machine types for windows guests) +- Resolves: bz#1584914 + (SATA emulator lags and hangs) +- Resolves: bz#1584984 + (Vm starts failed with 'passthrough' smartcard) +- Resolves: bz#1586255 + (CVE-2018-11806 qemu-kvm-rhev: QEMU: slirp: heap buffer overflow while reassembling fragmented datagrams [rhel-7.6]) +- Resolves: bz#1586313 + (-smp option is not easily found in the output of qemu help) +- Resolves: bz#1586357 + (Disable new devices in 2.12) +- Resolves: bz#1588039 + (Possible assertion failure in qemu when a corrupted image is used during an incoming migration) +- Resolves: bz#1589634 + (Migration failed when rebooting guest with multiple virtio videos) +- Resolves: bz#1590640 + (qemu-kvm: block/io.c:1098: bdrv_co_do_copy_on_readv: Assertion `skip_bytes < pnum' failed.) +- Resolves: bz#1591076 + (The driver of 'throttle' is not whitelisted) +- Resolves: bz#1592817 + (Retrying on serial_xmit if the pipe is broken may compromise the Guest) +- Resolves: bz#1594135 + (system_reset many times linux guests cause qemu process Aborted) +- Resolves: bz#1595173 + (blockdev-create is blocking) +- Resolves: bz#1595180 + (Can't set rerror/werror with usb-storage) +- Resolves: bz#1595740 + (RHEL-Alt-7.6 - qemu has error during migration of larger guests) +- Resolves: bz#1599335 + (Image creation locking is too tight and is not properly released) +- Resolves: bz#1599515 + (qemu core-dump with aio_read via hmp (util/qemu-thread-posix.c:64: qemu_mutex_lock_impl: Assertion `mutex->initialized' failed)) +- Resolves: bz#1607891 + (Hotplug events are sometimes lost with virtio-scsi + iothread) +- Resolves: bz#1608778 + (qemu/migration: migrate failed from RHEL.7.6 to RHEL.7.5 with e1000-82540em) + +* Mon Aug 06 2018 Danilo Cesar Lemes de Paula - 2.12.0-17.el8 +- kvm-linux-headers-Update-to-include-KVM_CAP_S390_HPAGE_1.patch [bz#1610906] +- kvm-s390x-Enable-KVM-huge-page-backing-support.patch [bz#1610906] +- kvm-redhat-s390x-add-hpage-1-to-kvm.conf.patch [bz#1610906] +- Resolves: bz#1610906 + ([IBM 8.0 FEAT] KVM: Huge Pages - libhugetlbfs Enablement - qemu-kvm part) + +* Tue Jul 31 2018 Danilo Cesar Lemes de Paula - 2.12.0-16.el8 +- kvm-spapr-Correct-inverted-test-in-spapr_pc_dimm_node.patch [bz#1601671] +- kvm-osdep-powerpc64-align-memory-to-allow-2MB-radix-THP-.patch [bz#1601317] +- kvm-RHEL-8.0-Add-pseries-rhel7.6.0-sxxm-machine-type.patch [bz#1595501] +- kvm-i386-Helpers-to-encode-cache-information-consistentl.patch [bz#1597739] +- kvm-i386-Add-cache-information-in-X86CPUDefinition.patch [bz#1597739] +- kvm-i386-Initialize-cache-information-for-EPYC-family-pr.patch [bz#1597739] +- kvm-i386-Add-new-property-to-control-cache-info.patch [bz#1597739] +- kvm-i386-Clean-up-cache-CPUID-code.patch [bz#1597739] +- kvm-i386-Populate-AMD-Processor-Cache-Information-for-cp.patch [bz#1597739] +- kvm-i386-Add-support-for-CPUID_8000_001E-for-AMD.patch [bz#1597739] +- kvm-i386-Fix-up-the-Node-id-for-CPUID_8000_001E.patch [bz#1597739] +- kvm-i386-Enable-TOPOEXT-feature-on-AMD-EPYC-CPU.patch [bz#1597739] +- kvm-i386-Remove-generic-SMT-thread-check.patch [bz#1597739] +- kvm-i386-Allow-TOPOEXT-to-be-enabled-on-older-kernels.patch [bz#1597739] +- Resolves: bz#1595501 + (Create pseries-rhel7.6.0-sxxm machine type) +- Resolves: bz#1597739 + (AMD EPYC/Zen SMT support for KVM / QEMU guest (qemu-kvm)) +- Resolves: bz#1601317 + (RHEL8.0 - qemu patch to align memory to allow 2MB THP) +- Resolves: bz#1601671 + (After rebooting guest,all the hot plug memory will be assigned to the 1st numa node.) + +* Tue Jul 24 2018 Danilo Cesar Lemes de Paula - 2.12.0-15.el8 +- kvm-spapr-Add-ibm-max-associativity-domains-property.patch [bz#1599593] +- kvm-Revert-spapr-Don-t-allow-memory-hotplug-to-memory-le.patch [bz#1599593] +- kvm-simpletrace-Convert-name-from-mapping-record-to-str.patch [bz#1594969] +- kvm-tests-fix-TLS-handshake-failure-with-TLS-1.3.patch [bz#1602403] +- Resolves: bz#1594969 + (simpletrace.py fails when running with Python 3) +- Resolves: bz#1599593 + (User can't hotplug memory to less memory numa node on rhel8) +- Resolves: bz#1602403 + (test-crypto-tlssession unit test fails with assertions) + +* Mon Jul 09 2018 Danilo Cesar Lemes de Paula - 2.12.0-14.el8 +- kvm-vfio-pci-Default-display-option-to-off.patch [bz#1590511] +- kvm-python-futurize-f-libfuturize.fixes.fix_print_with_i.patch [bz#1571533] +- kvm-python-futurize-f-lib2to3.fixes.fix_except.patch [bz#1571533] +- kvm-Revert-Defining-a-shebang-for-python-scripts.patch [bz#1571533] +- kvm-spec-Fix-ambiguous-python-interpreter-name.patch [bz#1571533] +- kvm-qemu-ga-blacklisting-guest-exec-and-guest-exec-statu.patch [bz#1518132] +- kvm-redhat-rewrap-build_configure.sh-cmdline-for-the-rh-.patch [] +- kvm-redhat-remove-the-VTD-LIVE_BLOCK_OPS-and-RHV-options.patch [] +- kvm-redhat-fix-the-rh-env-prep-target-s-dependency-on-th.patch [] +- kvm-redhat-remove-dead-code-related-to-s390-not-s390x.patch [] +- kvm-redhat-sync-compiler-flags-from-the-spec-file-to-rh-.patch [] +- kvm-redhat-sync-guest-agent-enablement-and-tcmalloc-usag.patch [] +- kvm-redhat-fix-up-Python-3-dependency-for-building-QEMU.patch [] +- kvm-redhat-fix-up-Python-dependency-for-SRPM-generation.patch [] +- kvm-redhat-disable-glusterfs-dependency-support-temporar.patch [] +- Resolves: bz#1518132 + (Ensure file access RPCs are disabled by default) +- Resolves: bz#1571533 + (Convert qemu-kvm python scripts to python3) +- Resolves: bz#1590511 + (Fails to start guest with Intel vGPU device) + +* Thu Jun 21 2018 Danilo C. L. de Paula - 2.12.0-13.el8 +- Resolves: bz#1508137 + ([IBM 8.0 FEAT] KVM: Interactive Bootloader (qemu)) +- Resolves: bz#1513558 + (Remove RHEL6 machine types) +- Resolves: bz#1568600 + (pc-i440fx-rhel7.6.0 and pc-q35-rhel7.6.0 machine types (x86)) +- Resolves: bz#1570029 + ([IBM 8.0 FEAT] KVM: 3270 Connectivity - qemu part) +- Resolves: bz#1578855 + (Enable Native Ceph support on non x86_64 CPUs) +- Resolves: bz#1585651 + (RHEL 7.6 new pseries machine type (ppc64le)) +- Resolves: bz#1592337 + ([IBM 8.0 FEAT] KVM: CPU Model z14 ZR1 (qemu-kvm)) + +* Tue May 15 2018 Danilo C. L. de Paula - 2.12.0-11.el8.1 +- Resolves: bz#1576468 + (Enable vhost_user in qemu-kvm 2.12) + +* Wed May 09 2018 Danilo de Paula - 2.12.0-11.el8 +- Resolves: bz#1574406 + ([RHEL 8][qemu-kvm] Failed to find romfile "efi-virtio.rom") +- Resolves: bz#1569675 + (Backwards compatibility of pc-*-rhel7.5.0 and older machine-types) +- Resolves: bz#1576045 + (Fix build issue by using python3) +- Resolves: bz#1571145 + (qemu-kvm segfaults on RHEL 8 when run guestfsd under TCG) + +* Fri Apr 20 2018 Danilo de Paula - 2.12.0-10.el +- Fixing some issues with packaging. +- Rebasing to 2.12.0-rc4 + +* Fri Apr 13 2018 Danilo de Paula - 2.11.0-7.el8 +- Bumping epoch for RHEL8 and dropping self-obsoleting + +* Thu Apr 12 2018 Danilo de Paula - 2.11.0-6.el8 +- Rebuilding + +* Mon Mar 05 2018 Danilo de Paula - 2.11.0-5.el8 +- Prepare building on RHEL-8.0 diff --git a/qemu-pr-helper.service b/qemu-pr-helper.service new file mode 100644 index 0000000..a1d27b0 --- /dev/null +++ b/qemu-pr-helper.service @@ -0,0 +1,15 @@ +[Unit] +Description=Persistent Reservation Daemon for QEMU + +[Service] +WorkingDirectory=/tmp +Type=simple +ExecStart=/usr/bin/qemu-pr-helper +PrivateTmp=yes +ProtectSystem=strict +ReadWritePaths=/var/run +RestrictAddressFamilies=AF_UNIX +Restart=always +RestartSec=0 + +[Install] diff --git a/qemu-pr-helper.socket b/qemu-pr-helper.socket new file mode 100644 index 0000000..9d7c3e5 --- /dev/null +++ b/qemu-pr-helper.socket @@ -0,0 +1,9 @@ +[Unit] +Description=Persistent Reservation Daemon for QEMU + +[Socket] +ListenStream=/run/qemu-pr-helper.sock +SocketMode=0600 + +[Install] +WantedBy=multi-user.target diff --git a/rpminspect.yaml b/rpminspect.yaml new file mode 100644 index 0000000..889796d --- /dev/null +++ b/rpminspect.yaml @@ -0,0 +1,11 @@ +--- +elf: + exclude_path: (.*s390-ccw.img.*)|(.*s390-netboot.img.*) +inspections: + badfuncs: off +annocheck: + - hardened: --skip-cf-protection --skip-property-note --ignore-unknown --verbose + - rhel-policy: --skip-cf-protection --skip-property-note --ignore-unknown --verbose + ignore: + - /usr/share/qemu-kvm/s390-ccw.img + - /usr/share/qemu-kvm/s390-netboot.img diff --git a/sources b/sources new file mode 100644 index 0000000..b6290ac --- /dev/null +++ b/sources @@ -0,0 +1 @@ +SHA1 (qemu-6.2.0.tar.xz) = 68cd61a466170115b88817e2d52db2cd7a92f43a diff --git a/tests_data_acpi_pc_SSDT.dimmpxm b/tests_data_acpi_pc_SSDT.dimmpxm new file mode 100644 index 0000000000000000000000000000000000000000..ac55387d57e48adb99eb738a102308688a262fb8 GIT binary patch literal 734 zcmZWnJ&4m_7=F`~Ynryuv=x7-N9J4Xbr8Wp(xkD0CMii;gd8+KN84*Ve0b*|BDB5X z>Ip(G+#(|Bkx_6L++3ZUT*TqP-9>Ov^X72yT;BJ~`@DI+=Xv-{Q?kCK0H|I=7h5K; z^&~}A<8J_HJm`qni~811-)!{lg}fEx2GegTQb7{lkd5ln$_Wwsc7E8beYKIfi&D%}l!?7VZ{Ys}%!${^h_4uRje2a1x9h2(!T$B95k#LXjydBYP-~j8R)y zeGVX>jc_E`Y1Y^9W0&*q2N-9bv?yz3~JX!?c7r^o$`ZR+iCa z(*iQWMsNo+x*P#Tl{P5cNK$f+9f&Eq1PhXd`I8`-M6pnE1p4e`o#xC%V!B literal 0 HcmV?d00001 diff --git a/tests_data_acpi_q35_FACP.slic b/tests_data_acpi_q35_FACP.slic new file mode 100644 index 0000000000000000000000000000000000000000..15986e095cf2db7ee92f7ce113c1d46d54018c62 GIT binary patch literal 244 zcmZ>BbPo8!z`($K$vMa|*h9hB703YsMj&>I2yh0`Fu=&b$*>8?VPXanY!FcvC=KFq z0NDyKaV7>90if7_{{Q?Cl?)6Ip(G+#(|Bkx_6L++3ZUT$~PE#YJ#X^X72yT;BJ~`@DI+=Xv-{Q?|aO04Sy2mYOE7 z_hePm;%@+`Kj=uDhS&(sAyMoNo*B}*3iU(J<~gC zo622d*>rq1L=6TxDBANJ&S~9JRNkIf5ulrLtHB$Mp2;g+UE&qdxIg=uvFD`(HChdw zZ_A=J`;`fA1XCbfnJq?Ni?>#<&$(xxN_T@FJMY}}op3Xl=23F8@cq^E-Sx+kbo^QU z@V55ech^^wx$q_{afNekwPJwJzZ|&X^{2rAPJ)pOVKx{;#L-kzC^AK5WDlj0F=`8| zPl6W{HXR&I;%k-@0G}NCebPtGWPq*?P%oSI!rP}F%MkDxtKLf4-}hbLUR3p-ATW+7 n8`Ncol)c!D(d`6fQx)#t0NB(8Uc12Q|8SbYhnR@A=!SHy6V5FNJ%-&%O8V`fj)#OzB2uGw|c}i-+q^1^MKw z_VHcygYT}d#t$oR)3T7e5LPP=#O$kq8(w=B4B#{vxe%s8(#DyVKR7Unu8X2Ru zu>3T5IpLGR@r15fP5^vzWXMNT3lV0eDx%=gIA z5)JvVL=50yh<*6jkA~!l$Ftx$MH~RHMeidlrF>7Z^kjq~kM_qC;7rSwprofvRkPDv zOSNnuBWwg`Fr&*6Fic^K!HGB}_rwKC(IHrnEXmT^8?<~qjw^HGH nl43HK9Wu^ho1!~0$|p*~zX9-x4PM>g?0-1R;a8+qKg)jr(Nxg^ literal 0 HcmV?d00001 diff --git a/udev-kvm-check.c b/udev-kvm-check.c new file mode 100644 index 0000000..928b9de --- /dev/null +++ b/udev-kvm-check.c @@ -0,0 +1,155 @@ +/* + * udev-kvm-check.c + * + * Copyright 2018 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + */ + +#include +#include +#include +#include +#include + +#define DEFAULT 0 +#define FACILITY "kvm" +#define SYSCONFIG_KVM "/etc/sysconfig/kvm" + +#define COUNT_MSG \ + "%d %s now active" + +int get_threshold_from_file(FILE *fp) +{ + static const char key[] = "THRESHOLD="; + int pos = 0; + int thres; + int ch; + +start: + /* State START - at beginning of line, search for beginning of "THRESHOLD=" + * string. + */ + ch = getc(fp); + if (ch == EOF) { + return DEFAULT; + } + if (isspace(ch)) { + goto start; + } + if (ch == 'T') { + pos = 1; + goto key; + } + goto eol; + +eol: + /* State EOL - loop until end of line */ + ch = getc(fp); + if (ch == EOF) { + return DEFAULT; + } + if (ch == '\n') { + goto start; + } + goto eol; + +key: + /* State KEY - match "THRESHOLD=" string, go to THRESHOLD if found */ + ch = getc(fp); + if (ch == EOF) { + return DEFAULT; + } + if (ch == key[pos]) { + pos++; + if (key[pos] == 0) { + goto threshold; + } else { + goto key; + } + } + goto eol; + +threshold: + /* State THRESHOLD - parse number using fscanf, expect comment or space + * or EOL. + */ + ch = getc(fp); + if (ch == EOF) { + return DEFAULT; + } + if (!isdigit(ch)) { + goto eol; + } + ungetc(ch, fp); + if (fscanf(fp, "%d", &thres) != 1) { + return DEFAULT; + } + ch = getc(fp); + if (ch == '#' || ch == EOF || ch == '\n' || isspace(ch)) { + return thres; + } + goto eol; +} + +int get_threshold() +{ + FILE *fp = fopen(SYSCONFIG_KVM, "r"); + int val; + + if (!fp) { + return DEFAULT; + } + + val = get_threshold_from_file(fp); + fclose (fp); + return val; +} + +const char *guest(int count) +{ + return (count == 1 ? "guest" : "guests"); +} + +void emit_count_message(int count) +{ + openlog(FACILITY, LOG_CONS, LOG_USER); + syslog(LOG_INFO, COUNT_MSG, count, guest(count)); + closelog(); +} + +int main(int argc, char **argv) +{ + int count, threshold; + + if (argc < 3) + exit(1); + + count = atoi(argv[1]); + threshold = get_threshold(); + + if (!strcmp(argv[2], "create")) { + if (threshold == 0 || count > threshold) { + emit_count_message(count); + } + } else { + if (count >= threshold) { + emit_count_message(count); + } + } + + return 0; +} diff --git a/vhost.conf b/vhost.conf new file mode 100644 index 0000000..68d6d7f --- /dev/null +++ b/vhost.conf @@ -0,0 +1,3 @@ +# Increase default vhost memory map limit to match +# KVM's memory slot limit +options vhost max_mem_regions=509