* Wed Jan 24 2024 Miroslav Rezanina <mrezanin@redhat.com> - 8.2.0-3
- kvm-hw-arm-virt-Add-properties-to-disable-high-memory-re.patch [RHEL-19738] - kvm-vfio-Introduce-base-object-for-VFIOContainer-and-tar.patch [RHEL-19302 RHEL-21057] - kvm-vfio-container-Introduce-a-empty-VFIOIOMMUOps.patch [RHEL-19302 RHEL-21057] - kvm-vfio-container-Switch-to-dma_map-unmap-API.patch [RHEL-19302 RHEL-21057] - kvm-vfio-common-Introduce-vfio_container_init-destroy-he.patch [RHEL-19302 RHEL-21057] - kvm-vfio-common-Move-giommu_list-in-base-container.patch [RHEL-19302 RHEL-21057] - kvm-vfio-container-Move-space-field-to-base-container.patch [RHEL-19302 RHEL-21057] - kvm-vfio-container-Switch-to-IOMMU-BE-set_dirty_page_tra.patch [RHEL-19302 RHEL-21057] - kvm-vfio-container-Move-per-container-device-list-in-bas.patch [RHEL-19302 RHEL-21057] - kvm-vfio-container-Convert-functions-to-base-container.patch [RHEL-19302 RHEL-21057] - kvm-vfio-container-Move-pgsizes-and-dma_max_mappings-to-.patch [RHEL-19302 RHEL-21057] - kvm-vfio-container-Move-vrdl_list-to-base-container.patch [RHEL-19302 RHEL-21057] - kvm-vfio-container-Move-listener-to-base-container.patch [RHEL-19302 RHEL-21057] - kvm-vfio-container-Move-dirty_pgsizes-and-max_dirty_bitm.patch [RHEL-19302 RHEL-21057] - kvm-vfio-container-Move-iova_ranges-to-base-container.patch [RHEL-19302 RHEL-21057] - kvm-vfio-container-Implement-attach-detach_device.patch [RHEL-19302 RHEL-21057] - kvm-vfio-spapr-Introduce-spapr-backend-and-target-interf.patch [RHEL-19302 RHEL-21057] - kvm-vfio-spapr-switch-to-spapr-IOMMU-BE-add-del_section_.patch [RHEL-19302 RHEL-21057] - kvm-vfio-spapr-Move-prereg_listener-into-spapr-container.patch [RHEL-19302 RHEL-21057] - kvm-vfio-spapr-Move-hostwin_list-into-spapr-container.patch [RHEL-19302 RHEL-21057] - kvm-backends-iommufd-Introduce-the-iommufd-object.patch [RHEL-19302 RHEL-21057] - kvm-util-char_dev-Add-open_cdev.patch [RHEL-19302 RHEL-21057] - kvm-vfio-common-return-early-if-space-isn-t-empty.patch [RHEL-19302 RHEL-21057] - kvm-vfio-iommufd-Implement-the-iommufd-backend.patch [RHEL-19302 RHEL-21057] - kvm-vfio-iommufd-Relax-assert-check-for-iommufd-backend.patch [RHEL-19302 RHEL-21057] - kvm-vfio-iommufd-Add-support-for-iova_ranges-and-pgsizes.patch [RHEL-19302 RHEL-21057] - kvm-vfio-pci-Extract-out-a-helper-vfio_pci_get_pci_hot_r.patch [RHEL-19302 RHEL-21057] - kvm-vfio-pci-Introduce-a-vfio-pci-hot-reset-interface.patch [RHEL-19302 RHEL-21057] - kvm-vfio-iommufd-Enable-pci-hot-reset-through-iommufd-cd.patch [RHEL-19302 RHEL-21057] - kvm-vfio-pci-Allow-the-selection-of-a-given-iommu-backen.patch [RHEL-19302 RHEL-21057] - kvm-vfio-pci-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch [RHEL-19302 RHEL-21057] - kvm-vfio-platform-Allow-the-selection-of-a-given-iommu-b.patch [RHEL-19302 RHEL-21057] - kvm-vfio-platform-Make-vfio-cdev-pre-openable-by-passing.patch [RHEL-19302 RHEL-21057] - kvm-vfio-ap-Allow-the-selection-of-a-given-iommu-backend.patch [RHEL-19302 RHEL-21057] - kvm-vfio-ap-Make-vfio-cdev-pre-openable-by-passing-a-fil.patch [RHEL-19302 RHEL-21057] - kvm-vfio-ccw-Allow-the-selection-of-a-given-iommu-backen.patch [RHEL-19302 RHEL-21057] - kvm-vfio-ccw-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch [RHEL-19302 RHEL-21057] - kvm-vfio-Make-VFIOContainerBase-poiner-parameter-const-i.patch [RHEL-19302 RHEL-21057] - kvm-hw-arm-Activate-IOMMUFD-for-virt-machines.patch [RHEL-19302 RHEL-21057] - kvm-kconfig-Activate-IOMMUFD-for-s390x-machines.patch [RHEL-19302 RHEL-21057] - kvm-hw-i386-Activate-IOMMUFD-for-q35-machines.patch [RHEL-19302 RHEL-21057] - kvm-vfio-pci-Move-VFIODevice-initializations-in-vfio_ins.patch [RHEL-19302 RHEL-21057] - kvm-vfio-platform-Move-VFIODevice-initializations-in-vfi.patch [RHEL-19302 RHEL-21057] - kvm-vfio-ap-Move-VFIODevice-initializations-in-vfio_ap_i.patch [RHEL-19302 RHEL-21057] - kvm-vfio-ccw-Move-VFIODevice-initializations-in-vfio_ccw.patch [RHEL-19302 RHEL-21057] - kvm-vfio-Introduce-a-helper-function-to-initialize-VFIOD.patch [RHEL-19302 RHEL-21057] - kvm-docs-devel-Add-VFIO-iommufd-backend-documentation.patch [RHEL-19302 RHEL-21057] - kvm-hw-ppc-Kconfig-Imply-VFIO_PCI.patch [RHEL-19302 RHEL-21057] - kvm-vfio-spapr-Extend-VFIOIOMMUOps-with-a-release-handle.patch [RHEL-19302 RHEL-21057] - kvm-vfio-container-Introduce-vfio_legacy_setup-for-furth.patch [RHEL-19302 RHEL-21057] - kvm-vfio-container-Initialize-VFIOIOMMUOps-under-vfio_in.patch [RHEL-19302 RHEL-21057] - kvm-vfio-container-Introduce-a-VFIOIOMMU-QOM-interface.patch [RHEL-19302 RHEL-21057] - kvm-vfio-container-Introduce-a-VFIOIOMMU-legacy-QOM-inte.patch [RHEL-19302 RHEL-21057] - kvm-vfio-container-Intoduce-a-new-VFIOIOMMUClass-setup-h.patch [RHEL-19302 RHEL-21057] - kvm-vfio-spapr-Introduce-a-sPAPR-VFIOIOMMU-QOM-interface.patch [RHEL-19302 RHEL-21057] - kvm-vfio-iommufd-Introduce-a-VFIOIOMMU-iommufd-QOM-inter.patch [RHEL-19302 RHEL-21057] - kvm-vfio-spapr-Only-compile-sPAPR-IOMMU-support-when-nee.patch [RHEL-19302 RHEL-21057] - kvm-vfio-iommufd-Remove-CONFIG_IOMMUFD-usage.patch [RHEL-19302 RHEL-21057] - kvm-vfio-container-Replace-basename-with-g_path_get_base.patch [RHEL-19302 RHEL-21057] - kvm-hw-vfio-fix-iteration-over-global-VFIODevice-list.patch [RHEL-19302 RHEL-21057] - kvm-vfio-iommufd-Remove-the-use-of-stat-to-check-file-ex.patch [RHEL-19302 RHEL-21057] - kvm-vfio-container-Rename-vfio_init_container-to-vfio_se.patch [RHEL-19302 RHEL-21057] - kvm-vfio-migration-Add-helper-function-to-set-state-or-r.patch [RHEL-19302 RHEL-21057] - kvm-backends-iommufd-Remove-check-on-number-of-backend-u.patch [RHEL-19302 RHEL-21057] - kvm-backends-iommufd-Remove-mutex.patch [RHEL-19302 RHEL-21057] - kvm-Compile-IOMMUFD-object-on-aarch64.patch [RHEL-19302 RHEL-21057] - kvm-Compile-IOMMUFD-on-s390x.patch [RHEL-19302 RHEL-21057] - kvm-Compile-IOMMUFD-on-x86_64.patch [RHEL-19302 RHEL-21057] - kvm-target-s390x-kvm-pv-Provide-some-more-useful-informa.patch [RHEL-18212] - kvm-nbd-server-avoid-per-NBDRequest-nbd_client_get-put.patch [RHEL-15965] - kvm-nbd-server-only-traverse-NBDExport-clients-from-main.patch [RHEL-15965] - kvm-nbd-server-introduce-NBDClient-lock-to-protect-field.patch [RHEL-15965] - kvm-block-file-posix-set-up-Linux-AIO-and-io_uring-in-th.patch [RHEL-15965] - kvm-virtio-blk-add-lock-to-protect-s-rq.patch [RHEL-15965] - kvm-virtio-blk-don-t-lock-AioContext-in-the-completion-c.patch [RHEL-15965] - kvm-virtio-blk-don-t-lock-AioContext-in-the-submission-c.patch [RHEL-15965] - kvm-scsi-only-access-SCSIDevice-requests-from-one-thread.patch [RHEL-15965] - kvm-virtio-scsi-don-t-lock-AioContext-around-virtio_queu.patch [RHEL-15965] - kvm-scsi-don-t-lock-AioContext-in-I-O-code-path.patch [RHEL-15965] - kvm-dma-helpers-don-t-lock-AioContext-in-dma_blk_cb.patch [RHEL-15965] - kvm-virtio-scsi-replace-AioContext-lock-with-tmf_bh_lock.patch [RHEL-15965] - kvm-scsi-assert-that-callbacks-run-in-the-correct-AioCon.patch [RHEL-15965] - kvm-tests-remove-aio_context_acquire-tests.patch [RHEL-15965] - kvm-aio-make-aio_context_acquire-aio_context_release-a-n.patch [RHEL-15965] - kvm-graph-lock-remove-AioContext-locking.patch [RHEL-15965] - kvm-block-remove-AioContext-locking.patch [RHEL-15965] - kvm-block-remove-bdrv_co_lock.patch [RHEL-15965] - kvm-scsi-remove-AioContext-locking.patch [RHEL-15965] - kvm-aio-wait-draw-equivalence-between-AIO_WAIT_WHILE-and.patch [RHEL-15965] - kvm-aio-remove-aio_context_acquire-aio_context_release-A.patch [RHEL-15965] - kvm-docs-remove-AioContext-lock-from-IOThread-docs.patch [RHEL-15965] - kvm-scsi-remove-outdated-AioContext-lock-comment.patch [RHEL-15965] - kvm-job-remove-outdated-AioContext-locking-comments.patch [RHEL-15965] - kvm-block-remove-outdated-AioContext-locking-comments.patch [RHEL-15965] - kvm-block-coroutine-wrapper-use-qemu_get_current_aio_con.patch [RHEL-15965] - kvm-s390x-pci-avoid-double-enable-disable-of-aif.patch [RHEL-21169] - kvm-s390x-pci-refresh-fh-before-disabling-aif.patch [RHEL-21169] - kvm-s390x-pci-drive-ISM-reset-from-subsystem-reset.patch [RHEL-21169] - kvm-include-ui-rect.h-fix-qemu_rect_init-mis-assignment.patch [RHEL-21570] - kvm-virtio-gpu-block-migration-of-VMs-with-blob-true.patch [RHEL-7565] - kvm-spec-Enable-zstd.patch [RHEL-7361] - Resolves: RHEL-19738 (Enable properties allowing to disable high memory regions) - Resolves: RHEL-19302 (NVIDIA:Grace-Hopper Backport QEMU IOMMUFD Backend) - Resolves: RHEL-21057 (Request backport of 9353b6da430f90e47f352dbf6dc31120c8914da6) - Resolves: RHEL-18212 ([RHEL9][Secure-execution][s390x] The error message is not clear when boot up a SE guest with wrong encryption) - Resolves: RHEL-15965 ( [qemu-kvm] Remove AioContext lock (no response with QMP command block_resize)) - Resolves: RHEL-21169 ([s390x] VM fails to start with ISM passed through QEMU 8.2) - Resolves: RHEL-21570 (Critical performance degradation for input devices in virtio vnc session) - Resolves: RHEL-7565 (qemu crashed when migrate guest with blob resources enabled) - Resolves: RHEL-7361 ([qemu-kvm] Enable zstd support for qcow2 files)
This commit is contained in:
parent
25859dae3b
commit
9ef7cdf7ca
37
kvm-Compile-IOMMUFD-object-on-aarch64.patch
Normal file
37
kvm-Compile-IOMMUFD-object-on-aarch64.patch
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
From 363d6aedc82314a70bdfbe9fa23b7e8fdda50138 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Eric Auger <eric.auger@redhat.com>
|
||||||
|
Date: Thu, 11 Jan 2024 12:26:19 -0500
|
||||||
|
Subject: [PATCH 066/101] Compile IOMMUFD object on aarch64
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [65/67] 9358030fdd499c5fe122dee3bb4f114966fac9c2 (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
Upstream: RHEL only
|
||||||
|
|
||||||
|
Compiles the IOMMUFD object on aarch64 to be able to use
|
||||||
|
the IOMMUFD VFIO backend.
|
||||||
|
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
configs/devices/aarch64-softmmu/aarch64-rh-devices.mak | 1 +
|
||||||
|
1 file changed, 1 insertion(+)
|
||||||
|
|
||||||
|
diff --git a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak
|
||||||
|
index aec1831199..b0191d3c69 100644
|
||||||
|
--- a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak
|
||||||
|
+++ b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak
|
||||||
|
@@ -39,3 +39,4 @@ CONFIG_PXB=y
|
||||||
|
CONFIG_VHOST_VSOCK=y
|
||||||
|
CONFIG_VHOST_USER_VSOCK=y
|
||||||
|
CONFIG_VHOST_USER_FS=y
|
||||||
|
+CONFIG_IOMMUFD=y
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
37
kvm-Compile-IOMMUFD-on-s390x.patch
Normal file
37
kvm-Compile-IOMMUFD-on-s390x.patch
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
From c1e9ddf8d0ea6d358fcaa5cacd3a91920f36e73b Mon Sep 17 00:00:00 2001
|
||||||
|
From: Eric Auger <eric.auger@redhat.com>
|
||||||
|
Date: Thu, 11 Jan 2024 12:33:17 -0500
|
||||||
|
Subject: [PATCH 067/101] Compile IOMMUFD on s390x
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [66/67] d3004aafca2bb76d817ac99c3d65973b8fbd4557 (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
Upstream: RHEL only
|
||||||
|
|
||||||
|
Compiles the IOMMUFD object on s390x to be able to use
|
||||||
|
the IOMMUFD VFIO backend.
|
||||||
|
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
configs/devices/s390x-softmmu/s390x-rh-devices.mak | 1 +
|
||||||
|
1 file changed, 1 insertion(+)
|
||||||
|
|
||||||
|
diff --git a/configs/devices/s390x-softmmu/s390x-rh-devices.mak b/configs/devices/s390x-softmmu/s390x-rh-devices.mak
|
||||||
|
index 69a799adbd..24cf6dbd03 100644
|
||||||
|
--- a/configs/devices/s390x-softmmu/s390x-rh-devices.mak
|
||||||
|
+++ b/configs/devices/s390x-softmmu/s390x-rh-devices.mak
|
||||||
|
@@ -16,3 +16,4 @@ CONFIG_WDT_DIAG288=y
|
||||||
|
CONFIG_VHOST_VSOCK=y
|
||||||
|
CONFIG_VHOST_USER_VSOCK=y
|
||||||
|
CONFIG_VHOST_USER_FS=y
|
||||||
|
+CONFIG_IOMMUFD=y
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
37
kvm-Compile-IOMMUFD-on-x86_64.patch
Normal file
37
kvm-Compile-IOMMUFD-on-x86_64.patch
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
From be2c3d9bbee1bdec061c901f507bc999fa40a53e Mon Sep 17 00:00:00 2001
|
||||||
|
From: Eric Auger <eric.auger@redhat.com>
|
||||||
|
Date: Thu, 11 Jan 2024 12:34:44 -0500
|
||||||
|
Subject: [PATCH 068/101] Compile IOMMUFD on x86_64
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [67/67] 411d48a5cc7ce1f05be793fd9a89c143ce34c91a (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
Upstream: RHEL only
|
||||||
|
|
||||||
|
Compiles the IOMMUFD object on s390x to be able to use
|
||||||
|
the IOMMUFD VFIO backend.
|
||||||
|
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
configs/devices/x86_64-softmmu/x86_64-rh-devices.mak | 1 +
|
||||||
|
1 file changed, 1 insertion(+)
|
||||||
|
|
||||||
|
diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak
|
||||||
|
index ce5be73633..ba41108e0c 100644
|
||||||
|
--- a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak
|
||||||
|
+++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak
|
||||||
|
@@ -108,3 +108,4 @@ CONFIG_SGX=y
|
||||||
|
CONFIG_VHOST_VSOCK=y
|
||||||
|
CONFIG_VHOST_USER_VSOCK=y
|
||||||
|
CONFIG_VHOST_USER_FS=y
|
||||||
|
+CONFIG_IOMMUFD=y
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
@ -0,0 +1,60 @@
|
|||||||
|
From 6b5cfed21e20b372090046a934387255ff4bda58 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
Date: Tue, 5 Dec 2023 13:20:01 -0500
|
||||||
|
Subject: [PATCH 084/101] aio: make aio_context_acquire()/aio_context_release()
|
||||||
|
a no-op
|
||||||
|
|
||||||
|
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
RH-MergeRequest: 214: Remove AioContext lock
|
||||||
|
RH-Jira: RHEL-15965
|
||||||
|
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||||
|
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
RH-Commit: [15/26] 723dcada900aaf08862e8221921be22506b561a8 (kmwolf/centos-qemu-kvm)
|
||||||
|
|
||||||
|
aio_context_acquire()/aio_context_release() has been replaced by
|
||||||
|
fine-grained locking to protect state shared by multiple threads. The
|
||||||
|
AioContext lock still plays the role of balancing locking in
|
||||||
|
AIO_WAIT_WHILE() and many functions in QEMU either require that the
|
||||||
|
AioContext lock is held or not held for this reason. In other words, the
|
||||||
|
AioContext lock is purely there for consistency with itself and serves
|
||||||
|
no real purpose anymore.
|
||||||
|
|
||||||
|
Stop actually acquiring/releasing the lock in
|
||||||
|
aio_context_acquire()/aio_context_release() so that subsequent patches
|
||||||
|
can remove callers across the codebase incrementally.
|
||||||
|
|
||||||
|
I have performed "make check" and qemu-iotests stress tests across
|
||||||
|
x86-64, ppc64le, and aarch64 to confirm that there are no failures as a
|
||||||
|
result of eliminating the lock.
|
||||||
|
|
||||||
|
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||||
|
Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
Message-ID: <20231205182011.1976568-5-stefanha@redhat.com>
|
||||||
|
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
---
|
||||||
|
util/async.c | 4 ++--
|
||||||
|
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/util/async.c b/util/async.c
|
||||||
|
index 8f90ddc304..04ee83d220 100644
|
||||||
|
--- a/util/async.c
|
||||||
|
+++ b/util/async.c
|
||||||
|
@@ -725,12 +725,12 @@ void aio_context_unref(AioContext *ctx)
|
||||||
|
|
||||||
|
void aio_context_acquire(AioContext *ctx)
|
||||||
|
{
|
||||||
|
- qemu_rec_mutex_lock(&ctx->lock);
|
||||||
|
+ /* TODO remove this function */
|
||||||
|
}
|
||||||
|
|
||||||
|
void aio_context_release(AioContext *ctx)
|
||||||
|
{
|
||||||
|
- qemu_rec_mutex_unlock(&ctx->lock);
|
||||||
|
+ /* TODO remove this function */
|
||||||
|
}
|
||||||
|
|
||||||
|
QEMU_DEFINE_STATIC_CO_TLS(AioContext *, my_aiocontext)
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
102
kvm-aio-remove-aio_context_acquire-aio_context_release-A.patch
Normal file
102
kvm-aio-remove-aio_context_acquire-aio_context_release-A.patch
Normal file
@ -0,0 +1,102 @@
|
|||||||
|
From 14913d8970090c8914dc19dad14f3b9f91985ec3 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
Date: Tue, 5 Dec 2023 13:20:07 -0500
|
||||||
|
Subject: [PATCH 090/101] aio: remove
|
||||||
|
aio_context_acquire()/aio_context_release() API
|
||||||
|
|
||||||
|
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
RH-MergeRequest: 214: Remove AioContext lock
|
||||||
|
RH-Jira: RHEL-15965
|
||||||
|
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||||
|
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
RH-Commit: [21/26] 4b6d4afcac79d3248a6722b063b5fc777dc418df (kmwolf/centos-qemu-kvm)
|
||||||
|
|
||||||
|
Delete these functions because nothing calls these functions anymore.
|
||||||
|
|
||||||
|
I introduced these APIs in commit 98563fc3ec44 ("aio: add
|
||||||
|
aio_context_acquire() and aio_context_release()") in 2014. It's with a
|
||||||
|
sigh of relief that I delete these APIs almost 10 years later.
|
||||||
|
|
||||||
|
Thanks to Paolo Bonzini's vision for multi-queue QEMU, we got an
|
||||||
|
understanding of where the code needed to go in order to remove the
|
||||||
|
limitations that the original dataplane and the IOThread/AioContext
|
||||||
|
approach that followed it.
|
||||||
|
|
||||||
|
Emanuele Giuseppe Esposito had the splendid determination to convert
|
||||||
|
large parts of the codebase so that they no longer needed the AioContext
|
||||||
|
lock. This was a painstaking process, both in the actual code changes
|
||||||
|
required and the iterations of code review that Emanuele eked out of
|
||||||
|
Kevin and me over many months.
|
||||||
|
|
||||||
|
Kevin Wolf tackled multitudes of graph locking conversions to protect
|
||||||
|
in-flight I/O from run-time changes to the block graph as well as the
|
||||||
|
clang Thread Safety Analysis annotations that allow the compiler to
|
||||||
|
check whether the graph lock is being used correctly.
|
||||||
|
|
||||||
|
And me, well, I'm just here to add some pizzazz to the QEMU multi-queue
|
||||||
|
block layer :). Thank you to everyone who helped with this effort,
|
||||||
|
including Eric Blake, code reviewer extraordinaire, and others who I've
|
||||||
|
forgotten to mention.
|
||||||
|
|
||||||
|
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||||
|
Message-ID: <20231205182011.1976568-11-stefanha@redhat.com>
|
||||||
|
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
---
|
||||||
|
include/block/aio.h | 17 -----------------
|
||||||
|
util/async.c | 10 ----------
|
||||||
|
2 files changed, 27 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/include/block/aio.h b/include/block/aio.h
|
||||||
|
index f08b358077..af05512a7d 100644
|
||||||
|
--- a/include/block/aio.h
|
||||||
|
+++ b/include/block/aio.h
|
||||||
|
@@ -278,23 +278,6 @@ void aio_context_ref(AioContext *ctx);
|
||||||
|
*/
|
||||||
|
void aio_context_unref(AioContext *ctx);
|
||||||
|
|
||||||
|
-/* Take ownership of the AioContext. If the AioContext will be shared between
|
||||||
|
- * threads, and a thread does not want to be interrupted, it will have to
|
||||||
|
- * take ownership around calls to aio_poll(). Otherwise, aio_poll()
|
||||||
|
- * automatically takes care of calling aio_context_acquire and
|
||||||
|
- * aio_context_release.
|
||||||
|
- *
|
||||||
|
- * Note that this is separate from bdrv_drained_begin/bdrv_drained_end. A
|
||||||
|
- * thread still has to call those to avoid being interrupted by the guest.
|
||||||
|
- *
|
||||||
|
- * Bottom halves, timers and callbacks can be created or removed without
|
||||||
|
- * acquiring the AioContext.
|
||||||
|
- */
|
||||||
|
-void aio_context_acquire(AioContext *ctx);
|
||||||
|
-
|
||||||
|
-/* Relinquish ownership of the AioContext. */
|
||||||
|
-void aio_context_release(AioContext *ctx);
|
||||||
|
-
|
||||||
|
/**
|
||||||
|
* aio_bh_schedule_oneshot_full: Allocate a new bottom half structure that will
|
||||||
|
* run only once and as soon as possible.
|
||||||
|
diff --git a/util/async.c b/util/async.c
|
||||||
|
index dfd44ef612..460529057c 100644
|
||||||
|
--- a/util/async.c
|
||||||
|
+++ b/util/async.c
|
||||||
|
@@ -719,16 +719,6 @@ void aio_context_unref(AioContext *ctx)
|
||||||
|
g_source_unref(&ctx->source);
|
||||||
|
}
|
||||||
|
|
||||||
|
-void aio_context_acquire(AioContext *ctx)
|
||||||
|
-{
|
||||||
|
- /* TODO remove this function */
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-void aio_context_release(AioContext *ctx)
|
||||||
|
-{
|
||||||
|
- /* TODO remove this function */
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
QEMU_DEFINE_STATIC_CO_TLS(AioContext *, my_aiocontext)
|
||||||
|
|
||||||
|
AioContext *qemu_get_current_aio_context(void)
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
@ -0,0 +1,81 @@
|
|||||||
|
From e1e2f3972065c4b5d6fcf37e0e1c4fb92a0d5260 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
Date: Tue, 5 Dec 2023 13:20:06 -0500
|
||||||
|
Subject: [PATCH 089/101] aio-wait: draw equivalence between AIO_WAIT_WHILE()
|
||||||
|
and AIO_WAIT_WHILE_UNLOCKED()
|
||||||
|
|
||||||
|
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
RH-MergeRequest: 214: Remove AioContext lock
|
||||||
|
RH-Jira: RHEL-15965
|
||||||
|
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||||
|
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
RH-Commit: [20/26] 20e49777869714c99769263103f1b0c2c370cfcd (kmwolf/centos-qemu-kvm)
|
||||||
|
|
||||||
|
Now that the AioContext lock no longer exists, AIO_WAIT_WHILE() and
|
||||||
|
AIO_WAIT_WHILE_UNLOCKED() are equivalent.
|
||||||
|
|
||||||
|
A future patch will get rid of AIO_WAIT_WHILE_UNLOCKED().
|
||||||
|
|
||||||
|
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||||
|
Message-ID: <20231205182011.1976568-10-stefanha@redhat.com>
|
||||||
|
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
---
|
||||||
|
include/block/aio-wait.h | 16 ++++------------
|
||||||
|
1 file changed, 4 insertions(+), 12 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h
|
||||||
|
index 5449b6d742..157f105916 100644
|
||||||
|
--- a/include/block/aio-wait.h
|
||||||
|
+++ b/include/block/aio-wait.h
|
||||||
|
@@ -63,9 +63,6 @@ extern AioWait global_aio_wait;
|
||||||
|
* @ctx: the aio context, or NULL if multiple aio contexts (for which the
|
||||||
|
* caller does not hold a lock) are involved in the polling condition.
|
||||||
|
* @cond: wait while this conditional expression is true
|
||||||
|
- * @unlock: whether to unlock and then lock again @ctx. This applies
|
||||||
|
- * only when waiting for another AioContext from the main loop.
|
||||||
|
- * Otherwise it's ignored.
|
||||||
|
*
|
||||||
|
* Wait while a condition is true. Use this to implement synchronous
|
||||||
|
* operations that require event loop activity.
|
||||||
|
@@ -78,7 +75,7 @@ extern AioWait global_aio_wait;
|
||||||
|
* wait on conditions between two IOThreads since that could lead to deadlock,
|
||||||
|
* go via the main loop instead.
|
||||||
|
*/
|
||||||
|
-#define AIO_WAIT_WHILE_INTERNAL(ctx, cond, unlock) ({ \
|
||||||
|
+#define AIO_WAIT_WHILE_INTERNAL(ctx, cond) ({ \
|
||||||
|
bool waited_ = false; \
|
||||||
|
AioWait *wait_ = &global_aio_wait; \
|
||||||
|
AioContext *ctx_ = (ctx); \
|
||||||
|
@@ -95,13 +92,7 @@ extern AioWait global_aio_wait;
|
||||||
|
assert(qemu_get_current_aio_context() == \
|
||||||
|
qemu_get_aio_context()); \
|
||||||
|
while ((cond)) { \
|
||||||
|
- if (unlock && ctx_) { \
|
||||||
|
- aio_context_release(ctx_); \
|
||||||
|
- } \
|
||||||
|
aio_poll(qemu_get_aio_context(), true); \
|
||||||
|
- if (unlock && ctx_) { \
|
||||||
|
- aio_context_acquire(ctx_); \
|
||||||
|
- } \
|
||||||
|
waited_ = true; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
@@ -109,10 +100,11 @@ extern AioWait global_aio_wait;
|
||||||
|
waited_; })
|
||||||
|
|
||||||
|
#define AIO_WAIT_WHILE(ctx, cond) \
|
||||||
|
- AIO_WAIT_WHILE_INTERNAL(ctx, cond, true)
|
||||||
|
+ AIO_WAIT_WHILE_INTERNAL(ctx, cond)
|
||||||
|
|
||||||
|
+/* TODO replace this with AIO_WAIT_WHILE() in a future patch */
|
||||||
|
#define AIO_WAIT_WHILE_UNLOCKED(ctx, cond) \
|
||||||
|
- AIO_WAIT_WHILE_INTERNAL(ctx, cond, false)
|
||||||
|
+ AIO_WAIT_WHILE_INTERNAL(ctx, cond)
|
||||||
|
|
||||||
|
/**
|
||||||
|
* aio_wait_kick:
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
476
kvm-backends-iommufd-Introduce-the-iommufd-object.patch
Normal file
476
kvm-backends-iommufd-Introduce-the-iommufd-object.patch
Normal file
@ -0,0 +1,476 @@
|
|||||||
|
From 0d8255c98b3ef6f603ff0279592d3e91de26de0e Mon Sep 17 00:00:00 2001
|
||||||
|
From: Eric Auger <eric.auger@redhat.com>
|
||||||
|
Date: Tue, 21 Nov 2023 16:44:00 +0800
|
||||||
|
Subject: [PATCH 021/101] backends/iommufd: Introduce the iommufd object
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [20/67] 8a56344ab4a2126f248bfa492ccddd19265f39be (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
Introduce an iommufd object which allows the interaction
|
||||||
|
with the host /dev/iommu device.
|
||||||
|
|
||||||
|
The /dev/iommu can have been already pre-opened outside of qemu,
|
||||||
|
in which case the fd can be passed directly along with the
|
||||||
|
iommufd object:
|
||||||
|
|
||||||
|
This allows the iommufd object to be shared accross several
|
||||||
|
subsystems (VFIO, VDPA, ...). For example, libvirt would open
|
||||||
|
the /dev/iommu once.
|
||||||
|
|
||||||
|
If no fd is passed along with the iommufd object, the /dev/iommu
|
||||||
|
is opened by the qemu code.
|
||||||
|
|
||||||
|
Suggested-by: Alex Williamson <alex.williamson@redhat.com>
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
|
||||||
|
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Tested-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit 6e6d8ac62b5b38dc9d4b69ffdf073f0a0b43b7be)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
MAINTAINERS | 8 ++
|
||||||
|
backends/Kconfig | 4 +
|
||||||
|
backends/iommufd.c | 245 +++++++++++++++++++++++++++++++++++++++
|
||||||
|
backends/meson.build | 1 +
|
||||||
|
backends/trace-events | 10 ++
|
||||||
|
include/sysemu/iommufd.h | 38 ++++++
|
||||||
|
qapi/qom.json | 19 +++
|
||||||
|
qemu-options.hx | 12 ++
|
||||||
|
8 files changed, 337 insertions(+)
|
||||||
|
create mode 100644 backends/iommufd.c
|
||||||
|
create mode 100644 include/sysemu/iommufd.h
|
||||||
|
|
||||||
|
diff --git a/MAINTAINERS b/MAINTAINERS
|
||||||
|
index 695e0bd34f..a5a446914a 100644
|
||||||
|
--- a/MAINTAINERS
|
||||||
|
+++ b/MAINTAINERS
|
||||||
|
@@ -2167,6 +2167,14 @@ F: hw/vfio/ap.c
|
||||||
|
F: docs/system/s390x/vfio-ap.rst
|
||||||
|
L: qemu-s390x@nongnu.org
|
||||||
|
|
||||||
|
+iommufd
|
||||||
|
+M: Yi Liu <yi.l.liu@intel.com>
|
||||||
|
+M: Eric Auger <eric.auger@redhat.com>
|
||||||
|
+M: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
+S: Supported
|
||||||
|
+F: backends/iommufd.c
|
||||||
|
+F: include/sysemu/iommufd.h
|
||||||
|
+
|
||||||
|
vhost
|
||||||
|
M: Michael S. Tsirkin <mst@redhat.com>
|
||||||
|
S: Supported
|
||||||
|
diff --git a/backends/Kconfig b/backends/Kconfig
|
||||||
|
index f35abc1609..2cb23f62fa 100644
|
||||||
|
--- a/backends/Kconfig
|
||||||
|
+++ b/backends/Kconfig
|
||||||
|
@@ -1 +1,5 @@
|
||||||
|
source tpm/Kconfig
|
||||||
|
+
|
||||||
|
+config IOMMUFD
|
||||||
|
+ bool
|
||||||
|
+ depends on VFIO
|
||||||
|
diff --git a/backends/iommufd.c b/backends/iommufd.c
|
||||||
|
new file mode 100644
|
||||||
|
index 0000000000..ba58a0eb0d
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/backends/iommufd.c
|
||||||
|
@@ -0,0 +1,245 @@
|
||||||
|
+/*
|
||||||
|
+ * iommufd container backend
|
||||||
|
+ *
|
||||||
|
+ * Copyright (C) 2023 Intel Corporation.
|
||||||
|
+ * Copyright Red Hat, Inc. 2023
|
||||||
|
+ *
|
||||||
|
+ * Authors: Yi Liu <yi.l.liu@intel.com>
|
||||||
|
+ * Eric Auger <eric.auger@redhat.com>
|
||||||
|
+ *
|
||||||
|
+ * SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
+ */
|
||||||
|
+
|
||||||
|
+#include "qemu/osdep.h"
|
||||||
|
+#include "sysemu/iommufd.h"
|
||||||
|
+#include "qapi/error.h"
|
||||||
|
+#include "qapi/qmp/qerror.h"
|
||||||
|
+#include "qemu/module.h"
|
||||||
|
+#include "qom/object_interfaces.h"
|
||||||
|
+#include "qemu/error-report.h"
|
||||||
|
+#include "monitor/monitor.h"
|
||||||
|
+#include "trace.h"
|
||||||
|
+#include <sys/ioctl.h>
|
||||||
|
+#include <linux/iommufd.h>
|
||||||
|
+
|
||||||
|
+static void iommufd_backend_init(Object *obj)
|
||||||
|
+{
|
||||||
|
+ IOMMUFDBackend *be = IOMMUFD_BACKEND(obj);
|
||||||
|
+
|
||||||
|
+ be->fd = -1;
|
||||||
|
+ be->users = 0;
|
||||||
|
+ be->owned = true;
|
||||||
|
+ qemu_mutex_init(&be->lock);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void iommufd_backend_finalize(Object *obj)
|
||||||
|
+{
|
||||||
|
+ IOMMUFDBackend *be = IOMMUFD_BACKEND(obj);
|
||||||
|
+
|
||||||
|
+ if (be->owned) {
|
||||||
|
+ close(be->fd);
|
||||||
|
+ be->fd = -1;
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void iommufd_backend_set_fd(Object *obj, const char *str, Error **errp)
|
||||||
|
+{
|
||||||
|
+ IOMMUFDBackend *be = IOMMUFD_BACKEND(obj);
|
||||||
|
+ int fd = -1;
|
||||||
|
+
|
||||||
|
+ fd = monitor_fd_param(monitor_cur(), str, errp);
|
||||||
|
+ if (fd == -1) {
|
||||||
|
+ error_prepend(errp, "Could not parse remote object fd %s:", str);
|
||||||
|
+ return;
|
||||||
|
+ }
|
||||||
|
+ qemu_mutex_lock(&be->lock);
|
||||||
|
+ be->fd = fd;
|
||||||
|
+ be->owned = false;
|
||||||
|
+ qemu_mutex_unlock(&be->lock);
|
||||||
|
+ trace_iommu_backend_set_fd(be->fd);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static bool iommufd_backend_can_be_deleted(UserCreatable *uc)
|
||||||
|
+{
|
||||||
|
+ IOMMUFDBackend *be = IOMMUFD_BACKEND(uc);
|
||||||
|
+
|
||||||
|
+ return !be->users;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void iommufd_backend_class_init(ObjectClass *oc, void *data)
|
||||||
|
+{
|
||||||
|
+ UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
|
||||||
|
+
|
||||||
|
+ ucc->can_be_deleted = iommufd_backend_can_be_deleted;
|
||||||
|
+
|
||||||
|
+ object_class_property_add_str(oc, "fd", NULL, iommufd_backend_set_fd);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp)
|
||||||
|
+{
|
||||||
|
+ int fd, ret = 0;
|
||||||
|
+
|
||||||
|
+ qemu_mutex_lock(&be->lock);
|
||||||
|
+ if (be->users == UINT32_MAX) {
|
||||||
|
+ error_setg(errp, "too many connections");
|
||||||
|
+ ret = -E2BIG;
|
||||||
|
+ goto out;
|
||||||
|
+ }
|
||||||
|
+ if (be->owned && !be->users) {
|
||||||
|
+ fd = qemu_open_old("/dev/iommu", O_RDWR);
|
||||||
|
+ if (fd < 0) {
|
||||||
|
+ error_setg_errno(errp, errno, "/dev/iommu opening failed");
|
||||||
|
+ ret = fd;
|
||||||
|
+ goto out;
|
||||||
|
+ }
|
||||||
|
+ be->fd = fd;
|
||||||
|
+ }
|
||||||
|
+ be->users++;
|
||||||
|
+out:
|
||||||
|
+ trace_iommufd_backend_connect(be->fd, be->owned,
|
||||||
|
+ be->users, ret);
|
||||||
|
+ qemu_mutex_unlock(&be->lock);
|
||||||
|
+ return ret;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void iommufd_backend_disconnect(IOMMUFDBackend *be)
|
||||||
|
+{
|
||||||
|
+ qemu_mutex_lock(&be->lock);
|
||||||
|
+ if (!be->users) {
|
||||||
|
+ goto out;
|
||||||
|
+ }
|
||||||
|
+ be->users--;
|
||||||
|
+ if (!be->users && be->owned) {
|
||||||
|
+ close(be->fd);
|
||||||
|
+ be->fd = -1;
|
||||||
|
+ }
|
||||||
|
+out:
|
||||||
|
+ trace_iommufd_backend_disconnect(be->fd, be->users);
|
||||||
|
+ qemu_mutex_unlock(&be->lock);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id,
|
||||||
|
+ Error **errp)
|
||||||
|
+{
|
||||||
|
+ int ret, fd = be->fd;
|
||||||
|
+ struct iommu_ioas_alloc alloc_data = {
|
||||||
|
+ .size = sizeof(alloc_data),
|
||||||
|
+ .flags = 0,
|
||||||
|
+ };
|
||||||
|
+
|
||||||
|
+ ret = ioctl(fd, IOMMU_IOAS_ALLOC, &alloc_data);
|
||||||
|
+ if (ret) {
|
||||||
|
+ error_setg_errno(errp, errno, "Failed to allocate ioas");
|
||||||
|
+ return ret;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ *ioas_id = alloc_data.out_ioas_id;
|
||||||
|
+ trace_iommufd_backend_alloc_ioas(fd, *ioas_id, ret);
|
||||||
|
+
|
||||||
|
+ return ret;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void iommufd_backend_free_id(IOMMUFDBackend *be, uint32_t id)
|
||||||
|
+{
|
||||||
|
+ int ret, fd = be->fd;
|
||||||
|
+ struct iommu_destroy des = {
|
||||||
|
+ .size = sizeof(des),
|
||||||
|
+ .id = id,
|
||||||
|
+ };
|
||||||
|
+
|
||||||
|
+ ret = ioctl(fd, IOMMU_DESTROY, &des);
|
||||||
|
+ trace_iommufd_backend_free_id(fd, id, ret);
|
||||||
|
+ if (ret) {
|
||||||
|
+ error_report("Failed to free id: %u %m", id);
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova,
|
||||||
|
+ ram_addr_t size, void *vaddr, bool readonly)
|
||||||
|
+{
|
||||||
|
+ int ret, fd = be->fd;
|
||||||
|
+ struct iommu_ioas_map map = {
|
||||||
|
+ .size = sizeof(map),
|
||||||
|
+ .flags = IOMMU_IOAS_MAP_READABLE |
|
||||||
|
+ IOMMU_IOAS_MAP_FIXED_IOVA,
|
||||||
|
+ .ioas_id = ioas_id,
|
||||||
|
+ .__reserved = 0,
|
||||||
|
+ .user_va = (uintptr_t)vaddr,
|
||||||
|
+ .iova = iova,
|
||||||
|
+ .length = size,
|
||||||
|
+ };
|
||||||
|
+
|
||||||
|
+ if (!readonly) {
|
||||||
|
+ map.flags |= IOMMU_IOAS_MAP_WRITEABLE;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ ret = ioctl(fd, IOMMU_IOAS_MAP, &map);
|
||||||
|
+ trace_iommufd_backend_map_dma(fd, ioas_id, iova, size,
|
||||||
|
+ vaddr, readonly, ret);
|
||||||
|
+ if (ret) {
|
||||||
|
+ ret = -errno;
|
||||||
|
+
|
||||||
|
+ /* TODO: Not support mapping hardware PCI BAR region for now. */
|
||||||
|
+ if (errno == EFAULT) {
|
||||||
|
+ warn_report("IOMMU_IOAS_MAP failed: %m, PCI BAR?");
|
||||||
|
+ } else {
|
||||||
|
+ error_report("IOMMU_IOAS_MAP failed: %m");
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ return ret;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id,
|
||||||
|
+ hwaddr iova, ram_addr_t size)
|
||||||
|
+{
|
||||||
|
+ int ret, fd = be->fd;
|
||||||
|
+ struct iommu_ioas_unmap unmap = {
|
||||||
|
+ .size = sizeof(unmap),
|
||||||
|
+ .ioas_id = ioas_id,
|
||||||
|
+ .iova = iova,
|
||||||
|
+ .length = size,
|
||||||
|
+ };
|
||||||
|
+
|
||||||
|
+ ret = ioctl(fd, IOMMU_IOAS_UNMAP, &unmap);
|
||||||
|
+ /*
|
||||||
|
+ * IOMMUFD takes mapping as some kind of object, unmapping
|
||||||
|
+ * nonexistent mapping is treated as deleting a nonexistent
|
||||||
|
+ * object and return ENOENT. This is different from legacy
|
||||||
|
+ * backend which allows it. vIOMMU may trigger a lot of
|
||||||
|
+ * redundant unmapping, to avoid flush the log, treat them
|
||||||
|
+ * as succeess for IOMMUFD just like legacy backend.
|
||||||
|
+ */
|
||||||
|
+ if (ret && errno == ENOENT) {
|
||||||
|
+ trace_iommufd_backend_unmap_dma_non_exist(fd, ioas_id, iova, size, ret);
|
||||||
|
+ ret = 0;
|
||||||
|
+ } else {
|
||||||
|
+ trace_iommufd_backend_unmap_dma(fd, ioas_id, iova, size, ret);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (ret) {
|
||||||
|
+ ret = -errno;
|
||||||
|
+ error_report("IOMMU_IOAS_UNMAP failed: %m");
|
||||||
|
+ }
|
||||||
|
+ return ret;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static const TypeInfo iommufd_backend_info = {
|
||||||
|
+ .name = TYPE_IOMMUFD_BACKEND,
|
||||||
|
+ .parent = TYPE_OBJECT,
|
||||||
|
+ .instance_size = sizeof(IOMMUFDBackend),
|
||||||
|
+ .instance_init = iommufd_backend_init,
|
||||||
|
+ .instance_finalize = iommufd_backend_finalize,
|
||||||
|
+ .class_size = sizeof(IOMMUFDBackendClass),
|
||||||
|
+ .class_init = iommufd_backend_class_init,
|
||||||
|
+ .interfaces = (InterfaceInfo[]) {
|
||||||
|
+ { TYPE_USER_CREATABLE },
|
||||||
|
+ { }
|
||||||
|
+ }
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+static void register_types(void)
|
||||||
|
+{
|
||||||
|
+ type_register_static(&iommufd_backend_info);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+type_init(register_types);
|
||||||
|
diff --git a/backends/meson.build b/backends/meson.build
|
||||||
|
index 914c7c4afb..9a5cea480d 100644
|
||||||
|
--- a/backends/meson.build
|
||||||
|
+++ b/backends/meson.build
|
||||||
|
@@ -20,6 +20,7 @@ if have_vhost_user
|
||||||
|
system_ss.add(when: 'CONFIG_VIRTIO', if_true: files('vhost-user.c'))
|
||||||
|
endif
|
||||||
|
system_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('cryptodev-vhost.c'))
|
||||||
|
+system_ss.add(when: 'CONFIG_IOMMUFD', if_true: files('iommufd.c'))
|
||||||
|
if have_vhost_user_crypto
|
||||||
|
system_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('cryptodev-vhost-user.c'))
|
||||||
|
endif
|
||||||
|
diff --git a/backends/trace-events b/backends/trace-events
|
||||||
|
index 652eb76a57..d45c6e31a6 100644
|
||||||
|
--- a/backends/trace-events
|
||||||
|
+++ b/backends/trace-events
|
||||||
|
@@ -5,3 +5,13 @@ dbus_vmstate_pre_save(void)
|
||||||
|
dbus_vmstate_post_load(int version_id) "version_id: %d"
|
||||||
|
dbus_vmstate_loading(const char *id) "id: %s"
|
||||||
|
dbus_vmstate_saving(const char *id) "id: %s"
|
||||||
|
+
|
||||||
|
+# iommufd.c
|
||||||
|
+iommufd_backend_connect(int fd, bool owned, uint32_t users, int ret) "fd=%d owned=%d users=%d (%d)"
|
||||||
|
+iommufd_backend_disconnect(int fd, uint32_t users) "fd=%d users=%d"
|
||||||
|
+iommu_backend_set_fd(int fd) "pre-opened /dev/iommu fd=%d"
|
||||||
|
+iommufd_backend_map_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, void *vaddr, bool readonly, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" addr=%p readonly=%d (%d)"
|
||||||
|
+iommufd_backend_unmap_dma_non_exist(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int ret) " Unmap nonexistent mapping: iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)"
|
||||||
|
+iommufd_backend_unmap_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)"
|
||||||
|
+iommufd_backend_alloc_ioas(int iommufd, uint32_t ioas, int ret) " iommufd=%d ioas=%d (%d)"
|
||||||
|
+iommufd_backend_free_id(int iommufd, uint32_t id, int ret) " iommufd=%d id=%d (%d)"
|
||||||
|
diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h
|
||||||
|
new file mode 100644
|
||||||
|
index 0000000000..9c5524b0ed
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/include/sysemu/iommufd.h
|
||||||
|
@@ -0,0 +1,38 @@
|
||||||
|
+#ifndef SYSEMU_IOMMUFD_H
|
||||||
|
+#define SYSEMU_IOMMUFD_H
|
||||||
|
+
|
||||||
|
+#include "qom/object.h"
|
||||||
|
+#include "qemu/thread.h"
|
||||||
|
+#include "exec/hwaddr.h"
|
||||||
|
+#include "exec/cpu-common.h"
|
||||||
|
+
|
||||||
|
+#define TYPE_IOMMUFD_BACKEND "iommufd"
|
||||||
|
+OBJECT_DECLARE_TYPE(IOMMUFDBackend, IOMMUFDBackendClass, IOMMUFD_BACKEND)
|
||||||
|
+
|
||||||
|
+struct IOMMUFDBackendClass {
|
||||||
|
+ ObjectClass parent_class;
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+struct IOMMUFDBackend {
|
||||||
|
+ Object parent;
|
||||||
|
+
|
||||||
|
+ /*< protected >*/
|
||||||
|
+ int fd; /* /dev/iommu file descriptor */
|
||||||
|
+ bool owned; /* is the /dev/iommu opened internally */
|
||||||
|
+ QemuMutex lock;
|
||||||
|
+ uint32_t users;
|
||||||
|
+
|
||||||
|
+ /*< public >*/
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp);
|
||||||
|
+void iommufd_backend_disconnect(IOMMUFDBackend *be);
|
||||||
|
+
|
||||||
|
+int iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id,
|
||||||
|
+ Error **errp);
|
||||||
|
+void iommufd_backend_free_id(IOMMUFDBackend *be, uint32_t id);
|
||||||
|
+int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova,
|
||||||
|
+ ram_addr_t size, void *vaddr, bool readonly);
|
||||||
|
+int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id,
|
||||||
|
+ hwaddr iova, ram_addr_t size);
|
||||||
|
+#endif
|
||||||
|
diff --git a/qapi/qom.json b/qapi/qom.json
|
||||||
|
index c53ef978ff..95516ba325 100644
|
||||||
|
--- a/qapi/qom.json
|
||||||
|
+++ b/qapi/qom.json
|
||||||
|
@@ -794,6 +794,23 @@
|
||||||
|
{ 'struct': 'VfioUserServerProperties',
|
||||||
|
'data': { 'socket': 'SocketAddress', 'device': 'str' } }
|
||||||
|
|
||||||
|
+##
|
||||||
|
+# @IOMMUFDProperties:
|
||||||
|
+#
|
||||||
|
+# Properties for iommufd objects.
|
||||||
|
+#
|
||||||
|
+# @fd: file descriptor name previously passed via 'getfd' command,
|
||||||
|
+# which represents a pre-opened /dev/iommu. This allows the
|
||||||
|
+# iommufd object to be shared accross several subsystems
|
||||||
|
+# (VFIO, VDPA, ...), and the file descriptor to be shared
|
||||||
|
+# with other process, e.g. DPDK. (default: QEMU opens
|
||||||
|
+# /dev/iommu by itself)
|
||||||
|
+#
|
||||||
|
+# Since: 9.0
|
||||||
|
+##
|
||||||
|
+{ 'struct': 'IOMMUFDProperties',
|
||||||
|
+ 'data': { '*fd': 'str' } }
|
||||||
|
+
|
||||||
|
##
|
||||||
|
# @RngProperties:
|
||||||
|
#
|
||||||
|
@@ -934,6 +951,7 @@
|
||||||
|
'input-barrier',
|
||||||
|
{ 'name': 'input-linux',
|
||||||
|
'if': 'CONFIG_LINUX' },
|
||||||
|
+ 'iommufd',
|
||||||
|
'iothread',
|
||||||
|
'main-loop',
|
||||||
|
{ 'name': 'memory-backend-epc',
|
||||||
|
@@ -1003,6 +1021,7 @@
|
||||||
|
'input-barrier': 'InputBarrierProperties',
|
||||||
|
'input-linux': { 'type': 'InputLinuxProperties',
|
||||||
|
'if': 'CONFIG_LINUX' },
|
||||||
|
+ 'iommufd': 'IOMMUFDProperties',
|
||||||
|
'iothread': 'IothreadProperties',
|
||||||
|
'main-loop': 'MainLoopProperties',
|
||||||
|
'memory-backend-epc': { 'type': 'MemoryBackendEpcProperties',
|
||||||
|
diff --git a/qemu-options.hx b/qemu-options.hx
|
||||||
|
index 557118cb1f..0814f43066 100644
|
||||||
|
--- a/qemu-options.hx
|
||||||
|
+++ b/qemu-options.hx
|
||||||
|
@@ -5224,6 +5224,18 @@ SRST
|
||||||
|
|
||||||
|
The ``share`` boolean option is on by default with memfd.
|
||||||
|
|
||||||
|
+ ``-object iommufd,id=id[,fd=fd]``
|
||||||
|
+ Creates an iommufd backend which allows control of DMA mapping
|
||||||
|
+ through the ``/dev/iommu`` device.
|
||||||
|
+
|
||||||
|
+ The ``id`` parameter is a unique ID which frontends (such as
|
||||||
|
+ vfio-pci of vdpa) will use to connect with the iommufd backend.
|
||||||
|
+
|
||||||
|
+ The ``fd`` parameter is an optional pre-opened file descriptor
|
||||||
|
+ resulting from ``/dev/iommu`` opening. Usually the iommufd is shared
|
||||||
|
+ across all subsystems, bringing the benefit of centralized
|
||||||
|
+ reference counting.
|
||||||
|
+
|
||||||
|
``-object rng-builtin,id=id``
|
||||||
|
Creates a random number generator backend which obtains entropy
|
||||||
|
from QEMU builtin functions. The ``id`` parameter is a unique ID
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
@ -0,0 +1,47 @@
|
|||||||
|
From da9a24793e876f6f2727d57f939d882be26a47b8 Mon Sep 17 00:00:00 2001
|
||||||
|
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
||||||
|
Date: Fri, 22 Dec 2023 08:55:23 +0100
|
||||||
|
Subject: [PATCH 064/101] backends/iommufd: Remove check on number of backend
|
||||||
|
users
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [63/67] ac4d4589d1f2de5ac3f0adfd8d1f27dbf6bbfdee (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
QOM already has a ref count on objects and it will assert much
|
||||||
|
earlier, when INT_MAX is reached.
|
||||||
|
|
||||||
|
Reviewed-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit c2ab3a6f7411c895e538e8350fee8948ac07c1a0)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
backends/iommufd.c | 5 -----
|
||||||
|
1 file changed, 5 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/backends/iommufd.c b/backends/iommufd.c
|
||||||
|
index ba58a0eb0d..393c0d9a37 100644
|
||||||
|
--- a/backends/iommufd.c
|
||||||
|
+++ b/backends/iommufd.c
|
||||||
|
@@ -80,11 +80,6 @@ int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp)
|
||||||
|
int fd, ret = 0;
|
||||||
|
|
||||||
|
qemu_mutex_lock(&be->lock);
|
||||||
|
- if (be->users == UINT32_MAX) {
|
||||||
|
- error_setg(errp, "too many connections");
|
||||||
|
- ret = -E2BIG;
|
||||||
|
- goto out;
|
||||||
|
- }
|
||||||
|
if (be->owned && !be->users) {
|
||||||
|
fd = qemu_open_old("/dev/iommu", O_RDWR);
|
||||||
|
if (fd < 0) {
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
112
kvm-backends-iommufd-Remove-mutex.patch
Normal file
112
kvm-backends-iommufd-Remove-mutex.patch
Normal file
@ -0,0 +1,112 @@
|
|||||||
|
From 92aff3cc1a412de01e9563802fa48848eae5283f Mon Sep 17 00:00:00 2001
|
||||||
|
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
||||||
|
Date: Thu, 21 Dec 2023 16:58:41 +0100
|
||||||
|
Subject: [PATCH 065/101] backends/iommufd: Remove mutex
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [64/67] 65518432b18f18ceadafe1b0698cdaa962e84f61 (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
Coverity reports a concurrent data access violation because be->users
|
||||||
|
is being accessed in iommufd_backend_can_be_deleted() without holding
|
||||||
|
the mutex.
|
||||||
|
|
||||||
|
However, these routines are called from the QEMU main thread when a
|
||||||
|
device is created. In this case, the code paths should be protected by
|
||||||
|
the BQL lock and it should be safe to drop the IOMMUFD backend mutex.
|
||||||
|
Simply remove it.
|
||||||
|
|
||||||
|
Fixes: CID 1531550
|
||||||
|
Fixes: CID 1531549
|
||||||
|
Reviewed-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit 19368b1905b4b917e915526fcbd5bfa3f7439451)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
backends/iommufd.c | 7 -------
|
||||||
|
include/sysemu/iommufd.h | 2 --
|
||||||
|
2 files changed, 9 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/backends/iommufd.c b/backends/iommufd.c
|
||||||
|
index 393c0d9a37..1ef683c7b0 100644
|
||||||
|
--- a/backends/iommufd.c
|
||||||
|
+++ b/backends/iommufd.c
|
||||||
|
@@ -29,7 +29,6 @@ static void iommufd_backend_init(Object *obj)
|
||||||
|
be->fd = -1;
|
||||||
|
be->users = 0;
|
||||||
|
be->owned = true;
|
||||||
|
- qemu_mutex_init(&be->lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void iommufd_backend_finalize(Object *obj)
|
||||||
|
@@ -52,10 +51,8 @@ static void iommufd_backend_set_fd(Object *obj, const char *str, Error **errp)
|
||||||
|
error_prepend(errp, "Could not parse remote object fd %s:", str);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
- qemu_mutex_lock(&be->lock);
|
||||||
|
be->fd = fd;
|
||||||
|
be->owned = false;
|
||||||
|
- qemu_mutex_unlock(&be->lock);
|
||||||
|
trace_iommu_backend_set_fd(be->fd);
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -79,7 +76,6 @@ int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp)
|
||||||
|
{
|
||||||
|
int fd, ret = 0;
|
||||||
|
|
||||||
|
- qemu_mutex_lock(&be->lock);
|
||||||
|
if (be->owned && !be->users) {
|
||||||
|
fd = qemu_open_old("/dev/iommu", O_RDWR);
|
||||||
|
if (fd < 0) {
|
||||||
|
@@ -93,13 +89,11 @@ int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp)
|
||||||
|
out:
|
||||||
|
trace_iommufd_backend_connect(be->fd, be->owned,
|
||||||
|
be->users, ret);
|
||||||
|
- qemu_mutex_unlock(&be->lock);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
void iommufd_backend_disconnect(IOMMUFDBackend *be)
|
||||||
|
{
|
||||||
|
- qemu_mutex_lock(&be->lock);
|
||||||
|
if (!be->users) {
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
@@ -110,7 +104,6 @@ void iommufd_backend_disconnect(IOMMUFDBackend *be)
|
||||||
|
}
|
||||||
|
out:
|
||||||
|
trace_iommufd_backend_disconnect(be->fd, be->users);
|
||||||
|
- qemu_mutex_unlock(&be->lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
int iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id,
|
||||||
|
diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h
|
||||||
|
index 9c5524b0ed..9af27ebd6c 100644
|
||||||
|
--- a/include/sysemu/iommufd.h
|
||||||
|
+++ b/include/sysemu/iommufd.h
|
||||||
|
@@ -2,7 +2,6 @@
|
||||||
|
#define SYSEMU_IOMMUFD_H
|
||||||
|
|
||||||
|
#include "qom/object.h"
|
||||||
|
-#include "qemu/thread.h"
|
||||||
|
#include "exec/hwaddr.h"
|
||||||
|
#include "exec/cpu-common.h"
|
||||||
|
|
||||||
|
@@ -19,7 +18,6 @@ struct IOMMUFDBackend {
|
||||||
|
/*< protected >*/
|
||||||
|
int fd; /* /dev/iommu file descriptor */
|
||||||
|
bool owned; /* is the /dev/iommu opened internally */
|
||||||
|
- QemuMutex lock;
|
||||||
|
uint32_t users;
|
||||||
|
|
||||||
|
/*< public >*/
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
@ -0,0 +1,69 @@
|
|||||||
|
From b1a68aebadecd7d339cf5eaffeda15099c998528 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
Date: Tue, 12 Sep 2023 19:10:37 -0400
|
||||||
|
Subject: [PATCH 095/101] block-coroutine-wrapper: use
|
||||||
|
qemu_get_current_aio_context()
|
||||||
|
|
||||||
|
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
RH-MergeRequest: 214: Remove AioContext lock
|
||||||
|
RH-Jira: RHEL-15965
|
||||||
|
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||||
|
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
RH-Commit: [26/26] cde767bcdc626e90721792e3889952057a548ac5 (kmwolf/centos-qemu-kvm)
|
||||||
|
|
||||||
|
Use qemu_get_current_aio_context() in mixed wrappers and coroutine
|
||||||
|
wrappers so that code runs in the caller's AioContext instead of moving
|
||||||
|
to the BlockDriverState's AioContext. This change is necessary for the
|
||||||
|
multi-queue block layer where any thread can call into the block layer.
|
||||||
|
|
||||||
|
Most wrappers are IO_CODE where it's safe to use the current AioContext
|
||||||
|
nowadays. BlockDrivers and the core block layer use their own locks and
|
||||||
|
no longer depend on the AioContext lock for thread-safety.
|
||||||
|
|
||||||
|
The bdrv_create() wrapper invokes GLOBAL_STATE code. Using the current
|
||||||
|
AioContext is safe because this code is only called with the BQL held
|
||||||
|
from the main loop thread.
|
||||||
|
|
||||||
|
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
Message-ID: <20230912231037.826804-6-stefanha@redhat.com>
|
||||||
|
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||||
|
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
---
|
||||||
|
scripts/block-coroutine-wrapper.py | 6 ++----
|
||||||
|
1 file changed, 2 insertions(+), 4 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/scripts/block-coroutine-wrapper.py b/scripts/block-coroutine-wrapper.py
|
||||||
|
index c9c09fcacd..dbbde99e39 100644
|
||||||
|
--- a/scripts/block-coroutine-wrapper.py
|
||||||
|
+++ b/scripts/block-coroutine-wrapper.py
|
||||||
|
@@ -92,8 +92,6 @@ def __init__(self, wrapper_type: str, return_type: str, name: str,
|
||||||
|
f"{self.name}")
|
||||||
|
self.target_name = f'{subsystem}_{subname}'
|
||||||
|
|
||||||
|
- self.ctx = self.gen_ctx()
|
||||||
|
-
|
||||||
|
self.get_result = 's->ret = '
|
||||||
|
self.ret = 'return s.ret;'
|
||||||
|
self.co_ret = 'return '
|
||||||
|
@@ -167,7 +165,7 @@ def create_mixed_wrapper(func: FuncDecl) -> str:
|
||||||
|
{func.co_ret}{name}({ func.gen_list('{name}') });
|
||||||
|
}} else {{
|
||||||
|
{struct_name} s = {{
|
||||||
|
- .poll_state.ctx = {func.ctx},
|
||||||
|
+ .poll_state.ctx = qemu_get_current_aio_context(),
|
||||||
|
.poll_state.in_progress = true,
|
||||||
|
|
||||||
|
{ func.gen_block(' .{name} = {name},') }
|
||||||
|
@@ -191,7 +189,7 @@ def create_co_wrapper(func: FuncDecl) -> str:
|
||||||
|
{func.return_type} {func.name}({ func.gen_list('{decl}') })
|
||||||
|
{{
|
||||||
|
{struct_name} s = {{
|
||||||
|
- .poll_state.ctx = {func.ctx},
|
||||||
|
+ .poll_state.ctx = qemu_get_current_aio_context(),
|
||||||
|
.poll_state.in_progress = true,
|
||||||
|
|
||||||
|
{ func.gen_block(' .{name} = {name},') }
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
217
kvm-block-file-posix-set-up-Linux-AIO-and-io_uring-in-th.patch
Normal file
217
kvm-block-file-posix-set-up-Linux-AIO-and-io_uring-in-th.patch
Normal file
@ -0,0 +1,217 @@
|
|||||||
|
From 25cce5df341861e8ba8ec57722558e2dee3ce56a Mon Sep 17 00:00:00 2001
|
||||||
|
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
Date: Thu, 14 Sep 2023 10:00:58 -0400
|
||||||
|
Subject: [PATCH 073/101] block/file-posix: set up Linux AIO and io_uring in
|
||||||
|
the current thread
|
||||||
|
|
||||||
|
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
RH-MergeRequest: 214: Remove AioContext lock
|
||||||
|
RH-Jira: RHEL-15965
|
||||||
|
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||||
|
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
RH-Commit: [4/26] 74c7daf805daefe706378308c3afeb28d861164b (kmwolf/centos-qemu-kvm)
|
||||||
|
|
||||||
|
The file-posix block driver currently only sets up Linux AIO and
|
||||||
|
io_uring in the BDS's AioContext. In the multi-queue block layer we must
|
||||||
|
be able to submit I/O requests in AioContexts that do not have Linux AIO
|
||||||
|
and io_uring set up yet since any thread can call into the block driver.
|
||||||
|
|
||||||
|
Set up Linux AIO and io_uring for the current AioContext during request
|
||||||
|
submission. We lose the ability to return an error from
|
||||||
|
.bdrv_file_open() when Linux AIO and io_uring setup fails (e.g. due to
|
||||||
|
resource limits). Instead the user only gets warnings and we fall back
|
||||||
|
to aio=threads. This is still better than a fatal error after startup.
|
||||||
|
|
||||||
|
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
Message-ID: <20230914140101.1065008-2-stefanha@redhat.com>
|
||||||
|
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||||
|
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
---
|
||||||
|
block/file-posix.c | 103 ++++++++++++++++++++++-----------------------
|
||||||
|
1 file changed, 51 insertions(+), 52 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/block/file-posix.c b/block/file-posix.c
|
||||||
|
index b862406c71..35684f7e21 100644
|
||||||
|
--- a/block/file-posix.c
|
||||||
|
+++ b/block/file-posix.c
|
||||||
|
@@ -712,17 +712,11 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
|
||||||
|
|
||||||
|
#ifdef CONFIG_LINUX_AIO
|
||||||
|
/* Currently Linux does AIO only for files opened with O_DIRECT */
|
||||||
|
- if (s->use_linux_aio) {
|
||||||
|
- if (!(s->open_flags & O_DIRECT)) {
|
||||||
|
- error_setg(errp, "aio=native was specified, but it requires "
|
||||||
|
- "cache.direct=on, which was not specified.");
|
||||||
|
- ret = -EINVAL;
|
||||||
|
- goto fail;
|
||||||
|
- }
|
||||||
|
- if (!aio_setup_linux_aio(bdrv_get_aio_context(bs), errp)) {
|
||||||
|
- error_prepend(errp, "Unable to use native AIO: ");
|
||||||
|
- goto fail;
|
||||||
|
- }
|
||||||
|
+ if (s->use_linux_aio && !(s->open_flags & O_DIRECT)) {
|
||||||
|
+ error_setg(errp, "aio=native was specified, but it requires "
|
||||||
|
+ "cache.direct=on, which was not specified.");
|
||||||
|
+ ret = -EINVAL;
|
||||||
|
+ goto fail;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
if (s->use_linux_aio) {
|
||||||
|
@@ -733,14 +727,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
|
||||||
|
}
|
||||||
|
#endif /* !defined(CONFIG_LINUX_AIO) */
|
||||||
|
|
||||||
|
-#ifdef CONFIG_LINUX_IO_URING
|
||||||
|
- if (s->use_linux_io_uring) {
|
||||||
|
- if (!aio_setup_linux_io_uring(bdrv_get_aio_context(bs), errp)) {
|
||||||
|
- error_prepend(errp, "Unable to use io_uring: ");
|
||||||
|
- goto fail;
|
||||||
|
- }
|
||||||
|
- }
|
||||||
|
-#else
|
||||||
|
+#ifndef CONFIG_LINUX_IO_URING
|
||||||
|
if (s->use_linux_io_uring) {
|
||||||
|
error_setg(errp, "aio=io_uring was specified, but is not supported "
|
||||||
|
"in this build.");
|
||||||
|
@@ -2444,6 +2431,48 @@ static bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
+#ifdef CONFIG_LINUX_IO_URING
|
||||||
|
+static inline bool raw_check_linux_io_uring(BDRVRawState *s)
|
||||||
|
+{
|
||||||
|
+ Error *local_err = NULL;
|
||||||
|
+ AioContext *ctx;
|
||||||
|
+
|
||||||
|
+ if (!s->use_linux_io_uring) {
|
||||||
|
+ return false;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ ctx = qemu_get_current_aio_context();
|
||||||
|
+ if (unlikely(!aio_setup_linux_io_uring(ctx, &local_err))) {
|
||||||
|
+ error_reportf_err(local_err, "Unable to use linux io_uring, "
|
||||||
|
+ "falling back to thread pool: ");
|
||||||
|
+ s->use_linux_io_uring = false;
|
||||||
|
+ return false;
|
||||||
|
+ }
|
||||||
|
+ return true;
|
||||||
|
+}
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+#ifdef CONFIG_LINUX_AIO
|
||||||
|
+static inline bool raw_check_linux_aio(BDRVRawState *s)
|
||||||
|
+{
|
||||||
|
+ Error *local_err = NULL;
|
||||||
|
+ AioContext *ctx;
|
||||||
|
+
|
||||||
|
+ if (!s->use_linux_aio) {
|
||||||
|
+ return false;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ ctx = qemu_get_current_aio_context();
|
||||||
|
+ if (unlikely(!aio_setup_linux_aio(ctx, &local_err))) {
|
||||||
|
+ error_reportf_err(local_err, "Unable to use Linux AIO, "
|
||||||
|
+ "falling back to thread pool: ");
|
||||||
|
+ s->use_linux_aio = false;
|
||||||
|
+ return false;
|
||||||
|
+ }
|
||||||
|
+ return true;
|
||||||
|
+}
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr,
|
||||||
|
uint64_t bytes, QEMUIOVector *qiov, int type)
|
||||||
|
{
|
||||||
|
@@ -2474,13 +2503,13 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr,
|
||||||
|
if (s->needs_alignment && !bdrv_qiov_is_aligned(bs, qiov)) {
|
||||||
|
type |= QEMU_AIO_MISALIGNED;
|
||||||
|
#ifdef CONFIG_LINUX_IO_URING
|
||||||
|
- } else if (s->use_linux_io_uring) {
|
||||||
|
+ } else if (raw_check_linux_io_uring(s)) {
|
||||||
|
assert(qiov->size == bytes);
|
||||||
|
ret = luring_co_submit(bs, s->fd, offset, qiov, type);
|
||||||
|
goto out;
|
||||||
|
#endif
|
||||||
|
#ifdef CONFIG_LINUX_AIO
|
||||||
|
- } else if (s->use_linux_aio) {
|
||||||
|
+ } else if (raw_check_linux_aio(s)) {
|
||||||
|
assert(qiov->size == bytes);
|
||||||
|
ret = laio_co_submit(s->fd, offset, qiov, type,
|
||||||
|
s->aio_max_batch);
|
||||||
|
@@ -2567,39 +2596,13 @@ static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs)
|
||||||
|
};
|
||||||
|
|
||||||
|
#ifdef CONFIG_LINUX_IO_URING
|
||||||
|
- if (s->use_linux_io_uring) {
|
||||||
|
+ if (raw_check_linux_io_uring(s)) {
|
||||||
|
return luring_co_submit(bs, s->fd, 0, NULL, QEMU_AIO_FLUSH);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return raw_thread_pool_submit(handle_aiocb_flush, &acb);
|
||||||
|
}
|
||||||
|
|
||||||
|
-static void raw_aio_attach_aio_context(BlockDriverState *bs,
|
||||||
|
- AioContext *new_context)
|
||||||
|
-{
|
||||||
|
- BDRVRawState __attribute__((unused)) *s = bs->opaque;
|
||||||
|
-#ifdef CONFIG_LINUX_AIO
|
||||||
|
- if (s->use_linux_aio) {
|
||||||
|
- Error *local_err = NULL;
|
||||||
|
- if (!aio_setup_linux_aio(new_context, &local_err)) {
|
||||||
|
- error_reportf_err(local_err, "Unable to use native AIO, "
|
||||||
|
- "falling back to thread pool: ");
|
||||||
|
- s->use_linux_aio = false;
|
||||||
|
- }
|
||||||
|
- }
|
||||||
|
-#endif
|
||||||
|
-#ifdef CONFIG_LINUX_IO_URING
|
||||||
|
- if (s->use_linux_io_uring) {
|
||||||
|
- Error *local_err = NULL;
|
||||||
|
- if (!aio_setup_linux_io_uring(new_context, &local_err)) {
|
||||||
|
- error_reportf_err(local_err, "Unable to use linux io_uring, "
|
||||||
|
- "falling back to thread pool: ");
|
||||||
|
- s->use_linux_io_uring = false;
|
||||||
|
- }
|
||||||
|
- }
|
||||||
|
-#endif
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
static void raw_close(BlockDriverState *bs)
|
||||||
|
{
|
||||||
|
BDRVRawState *s = bs->opaque;
|
||||||
|
@@ -3896,7 +3899,6 @@ BlockDriver bdrv_file = {
|
||||||
|
.bdrv_co_copy_range_from = raw_co_copy_range_from,
|
||||||
|
.bdrv_co_copy_range_to = raw_co_copy_range_to,
|
||||||
|
.bdrv_refresh_limits = raw_refresh_limits,
|
||||||
|
- .bdrv_attach_aio_context = raw_aio_attach_aio_context,
|
||||||
|
|
||||||
|
.bdrv_co_truncate = raw_co_truncate,
|
||||||
|
.bdrv_co_getlength = raw_co_getlength,
|
||||||
|
@@ -4266,7 +4268,6 @@ static BlockDriver bdrv_host_device = {
|
||||||
|
.bdrv_co_copy_range_from = raw_co_copy_range_from,
|
||||||
|
.bdrv_co_copy_range_to = raw_co_copy_range_to,
|
||||||
|
.bdrv_refresh_limits = raw_refresh_limits,
|
||||||
|
- .bdrv_attach_aio_context = raw_aio_attach_aio_context,
|
||||||
|
|
||||||
|
.bdrv_co_truncate = raw_co_truncate,
|
||||||
|
.bdrv_co_getlength = raw_co_getlength,
|
||||||
|
@@ -4402,7 +4403,6 @@ static BlockDriver bdrv_host_cdrom = {
|
||||||
|
.bdrv_co_pwritev = raw_co_pwritev,
|
||||||
|
.bdrv_co_flush_to_disk = raw_co_flush_to_disk,
|
||||||
|
.bdrv_refresh_limits = cdrom_refresh_limits,
|
||||||
|
- .bdrv_attach_aio_context = raw_aio_attach_aio_context,
|
||||||
|
|
||||||
|
.bdrv_co_truncate = raw_co_truncate,
|
||||||
|
.bdrv_co_getlength = raw_co_getlength,
|
||||||
|
@@ -4528,7 +4528,6 @@ static BlockDriver bdrv_host_cdrom = {
|
||||||
|
.bdrv_co_pwritev = raw_co_pwritev,
|
||||||
|
.bdrv_co_flush_to_disk = raw_co_flush_to_disk,
|
||||||
|
.bdrv_refresh_limits = cdrom_refresh_limits,
|
||||||
|
- .bdrv_attach_aio_context = raw_aio_attach_aio_context,
|
||||||
|
|
||||||
|
.bdrv_co_truncate = raw_co_truncate,
|
||||||
|
.bdrv_co_getlength = raw_co_getlength,
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
4438
kvm-block-remove-AioContext-locking.patch
Normal file
4438
kvm-block-remove-AioContext-locking.patch
Normal file
File diff suppressed because it is too large
Load Diff
97
kvm-block-remove-bdrv_co_lock.patch
Normal file
97
kvm-block-remove-bdrv_co_lock.patch
Normal file
@ -0,0 +1,97 @@
|
|||||||
|
From d0514c7d5d6cc1aa140119c95d5ea2c1591b01e9 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
Date: Tue, 5 Dec 2023 13:20:04 -0500
|
||||||
|
Subject: [PATCH 087/101] block: remove bdrv_co_lock()
|
||||||
|
|
||||||
|
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
RH-MergeRequest: 214: Remove AioContext lock
|
||||||
|
RH-Jira: RHEL-15965
|
||||||
|
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||||
|
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
RH-Commit: [18/26] a303f861ea5e84d8e89fd51e530fd0cb2da17b89 (kmwolf/centos-qemu-kvm)
|
||||||
|
|
||||||
|
The bdrv_co_lock() and bdrv_co_unlock() functions are already no-ops.
|
||||||
|
Remove them.
|
||||||
|
|
||||||
|
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
Message-ID: <20231205182011.1976568-8-stefanha@redhat.com>
|
||||||
|
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
---
|
||||||
|
block.c | 10 ----------
|
||||||
|
blockdev.c | 5 -----
|
||||||
|
include/block/block-global-state.h | 14 --------------
|
||||||
|
3 files changed, 29 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/block.c b/block.c
|
||||||
|
index 91ace5d2d5..434b7f4d72 100644
|
||||||
|
--- a/block.c
|
||||||
|
+++ b/block.c
|
||||||
|
@@ -7431,16 +7431,6 @@ void coroutine_fn bdrv_co_leave(BlockDriverState *bs, AioContext *old_ctx)
|
||||||
|
bdrv_dec_in_flight(bs);
|
||||||
|
}
|
||||||
|
|
||||||
|
-void coroutine_fn bdrv_co_lock(BlockDriverState *bs)
|
||||||
|
-{
|
||||||
|
- /* TODO removed in next patch */
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-void coroutine_fn bdrv_co_unlock(BlockDriverState *bs)
|
||||||
|
-{
|
||||||
|
- /* TODO removed in next patch */
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
static void bdrv_do_remove_aio_context_notifier(BdrvAioNotifier *ban)
|
||||||
|
{
|
||||||
|
GLOBAL_STATE_CODE();
|
||||||
|
diff --git a/blockdev.c b/blockdev.c
|
||||||
|
index 5d8b3a23eb..3a5e7222ec 100644
|
||||||
|
--- a/blockdev.c
|
||||||
|
+++ b/blockdev.c
|
||||||
|
@@ -2264,18 +2264,13 @@ void coroutine_fn qmp_block_resize(const char *device, const char *node_name,
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
- bdrv_co_lock(bs);
|
||||||
|
bdrv_drained_begin(bs);
|
||||||
|
- bdrv_co_unlock(bs);
|
||||||
|
|
||||||
|
old_ctx = bdrv_co_enter(bs);
|
||||||
|
blk_co_truncate(blk, size, false, PREALLOC_MODE_OFF, 0, errp);
|
||||||
|
bdrv_co_leave(bs, old_ctx);
|
||||||
|
|
||||||
|
- bdrv_co_lock(bs);
|
||||||
|
bdrv_drained_end(bs);
|
||||||
|
- bdrv_co_unlock(bs);
|
||||||
|
-
|
||||||
|
blk_co_unref(blk);
|
||||||
|
}
|
||||||
|
|
||||||
|
diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h
|
||||||
|
index 0327f1c605..4ec0b217f0 100644
|
||||||
|
--- a/include/block/block-global-state.h
|
||||||
|
+++ b/include/block/block-global-state.h
|
||||||
|
@@ -267,20 +267,6 @@ int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag);
|
||||||
|
int bdrv_debug_resume(BlockDriverState *bs, const char *tag);
|
||||||
|
bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag);
|
||||||
|
|
||||||
|
-/**
|
||||||
|
- * Locks the AioContext of @bs if it's not the current AioContext. This avoids
|
||||||
|
- * double locking which could lead to deadlocks: This is a coroutine_fn, so we
|
||||||
|
- * know we already own the lock of the current AioContext.
|
||||||
|
- *
|
||||||
|
- * May only be called in the main thread.
|
||||||
|
- */
|
||||||
|
-void coroutine_fn bdrv_co_lock(BlockDriverState *bs);
|
||||||
|
-
|
||||||
|
-/**
|
||||||
|
- * Unlocks the AioContext of @bs if it's not the current AioContext.
|
||||||
|
- */
|
||||||
|
-void coroutine_fn bdrv_co_unlock(BlockDriverState *bs);
|
||||||
|
-
|
||||||
|
bool bdrv_child_change_aio_context(BdrvChild *c, AioContext *ctx,
|
||||||
|
GHashTable *visited, Transaction *tran,
|
||||||
|
Error **errp);
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
411
kvm-block-remove-outdated-AioContext-locking-comments.patch
Normal file
411
kvm-block-remove-outdated-AioContext-locking-comments.patch
Normal file
@ -0,0 +1,411 @@
|
|||||||
|
From dc4eb64185957a01948217814478abc450ce5f26 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
Date: Tue, 5 Dec 2023 13:20:11 -0500
|
||||||
|
Subject: [PATCH 094/101] block: remove outdated AioContext locking comments
|
||||||
|
|
||||||
|
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
RH-MergeRequest: 214: Remove AioContext lock
|
||||||
|
RH-Jira: RHEL-15965
|
||||||
|
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||||
|
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
RH-Commit: [25/26] 395e18fb40d28d4bc961acee1a00da7f60748076 (kmwolf/centos-qemu-kvm)
|
||||||
|
|
||||||
|
The AioContext lock no longer exists.
|
||||||
|
|
||||||
|
There is one noteworthy change:
|
||||||
|
|
||||||
|
- * More specifically, these functions use BDRV_POLL_WHILE(bs), which
|
||||||
|
- * requires the caller to be either in the main thread and hold
|
||||||
|
- * the BlockdriverState (bs) AioContext lock, or directly in the
|
||||||
|
- * home thread that runs the bs AioContext. Calling them from
|
||||||
|
- * another thread in another AioContext would cause deadlocks.
|
||||||
|
+ * More specifically, these functions use BDRV_POLL_WHILE(bs), which requires
|
||||||
|
+ * the caller to be either in the main thread or directly in the home thread
|
||||||
|
+ * that runs the bs AioContext. Calling them from another thread in another
|
||||||
|
+ * AioContext would cause deadlocks.
|
||||||
|
|
||||||
|
I am not sure whether deadlocks are still possible. Maybe they have just
|
||||||
|
moved to the fine-grained locks that have replaced the AioContext. Since
|
||||||
|
I am not sure if the deadlocks are gone, I have kept the substance
|
||||||
|
unchanged and just removed mention of the AioContext.
|
||||||
|
|
||||||
|
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||||
|
Message-ID: <20231205182011.1976568-15-stefanha@redhat.com>
|
||||||
|
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
---
|
||||||
|
block.c | 73 ++++++----------------------
|
||||||
|
block/block-backend.c | 8 ---
|
||||||
|
block/export/vhost-user-blk-server.c | 4 --
|
||||||
|
include/block/block-common.h | 3 --
|
||||||
|
include/block/block-io.h | 9 ++--
|
||||||
|
include/block/block_int-common.h | 2 -
|
||||||
|
tests/qemu-iotests/202 | 2 +-
|
||||||
|
tests/qemu-iotests/203 | 3 +-
|
||||||
|
8 files changed, 22 insertions(+), 82 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/block.c b/block.c
|
||||||
|
index 434b7f4d72..a097772238 100644
|
||||||
|
--- a/block.c
|
||||||
|
+++ b/block.c
|
||||||
|
@@ -1616,11 +1616,6 @@ out:
|
||||||
|
g_free(gen_node_name);
|
||||||
|
}
|
||||||
|
|
||||||
|
-/*
|
||||||
|
- * The caller must always hold @bs AioContext lock, because this function calls
|
||||||
|
- * bdrv_refresh_total_sectors() which polls when called from non-coroutine
|
||||||
|
- * context.
|
||||||
|
- */
|
||||||
|
static int no_coroutine_fn GRAPH_UNLOCKED
|
||||||
|
bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, const char *node_name,
|
||||||
|
QDict *options, int open_flags, Error **errp)
|
||||||
|
@@ -2901,7 +2896,7 @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm)
|
||||||
|
* Replaces the node that a BdrvChild points to without updating permissions.
|
||||||
|
*
|
||||||
|
* If @new_bs is non-NULL, the parent of @child must already be drained through
|
||||||
|
- * @child and the caller must hold the AioContext lock for @new_bs.
|
||||||
|
+ * @child.
|
||||||
|
*/
|
||||||
|
static void GRAPH_WRLOCK
|
||||||
|
bdrv_replace_child_noperm(BdrvChild *child, BlockDriverState *new_bs)
|
||||||
|
@@ -3041,9 +3036,8 @@ static TransactionActionDrv bdrv_attach_child_common_drv = {
|
||||||
|
*
|
||||||
|
* Returns new created child.
|
||||||
|
*
|
||||||
|
- * The caller must hold the AioContext lock for @child_bs. Both @parent_bs and
|
||||||
|
- * @child_bs can move to a different AioContext in this function. Callers must
|
||||||
|
- * make sure that their AioContext locking is still correct after this.
|
||||||
|
+ * Both @parent_bs and @child_bs can move to a different AioContext in this
|
||||||
|
+ * function.
|
||||||
|
*/
|
||||||
|
static BdrvChild * GRAPH_WRLOCK
|
||||||
|
bdrv_attach_child_common(BlockDriverState *child_bs,
|
||||||
|
@@ -3142,9 +3136,8 @@ bdrv_attach_child_common(BlockDriverState *child_bs,
|
||||||
|
/*
|
||||||
|
* Function doesn't update permissions, caller is responsible for this.
|
||||||
|
*
|
||||||
|
- * The caller must hold the AioContext lock for @child_bs. Both @parent_bs and
|
||||||
|
- * @child_bs can move to a different AioContext in this function. Callers must
|
||||||
|
- * make sure that their AioContext locking is still correct after this.
|
||||||
|
+ * Both @parent_bs and @child_bs can move to a different AioContext in this
|
||||||
|
+ * function.
|
||||||
|
*
|
||||||
|
* After calling this function, the transaction @tran may only be completed
|
||||||
|
* while holding a writer lock for the graph.
|
||||||
|
@@ -3184,9 +3177,6 @@ bdrv_attach_child_noperm(BlockDriverState *parent_bs,
|
||||||
|
*
|
||||||
|
* On failure NULL is returned, errp is set and the reference to
|
||||||
|
* child_bs is also dropped.
|
||||||
|
- *
|
||||||
|
- * The caller must hold the AioContext lock @child_bs, but not that of @ctx
|
||||||
|
- * (unless @child_bs is already in @ctx).
|
||||||
|
*/
|
||||||
|
BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
|
||||||
|
const char *child_name,
|
||||||
|
@@ -3226,9 +3216,6 @@ out:
|
||||||
|
*
|
||||||
|
* On failure NULL is returned, errp is set and the reference to
|
||||||
|
* child_bs is also dropped.
|
||||||
|
- *
|
||||||
|
- * If @parent_bs and @child_bs are in different AioContexts, the caller must
|
||||||
|
- * hold the AioContext lock for @child_bs, but not for @parent_bs.
|
||||||
|
*/
|
||||||
|
BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
|
||||||
|
BlockDriverState *child_bs,
|
||||||
|
@@ -3418,9 +3405,8 @@ static BdrvChildRole bdrv_backing_role(BlockDriverState *bs)
|
||||||
|
*
|
||||||
|
* Function doesn't update permissions, caller is responsible for this.
|
||||||
|
*
|
||||||
|
- * The caller must hold the AioContext lock for @child_bs. Both @parent_bs and
|
||||||
|
- * @child_bs can move to a different AioContext in this function. Callers must
|
||||||
|
- * make sure that their AioContext locking is still correct after this.
|
||||||
|
+ * Both @parent_bs and @child_bs can move to a different AioContext in this
|
||||||
|
+ * function.
|
||||||
|
*
|
||||||
|
* After calling this function, the transaction @tran may only be completed
|
||||||
|
* while holding a writer lock for the graph.
|
||||||
|
@@ -3513,9 +3499,8 @@ out:
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
- * The caller must hold the AioContext lock for @backing_hd. Both @bs and
|
||||||
|
- * @backing_hd can move to a different AioContext in this function. Callers must
|
||||||
|
- * make sure that their AioContext locking is still correct after this.
|
||||||
|
+ * Both @bs and @backing_hd can move to a different AioContext in this
|
||||||
|
+ * function.
|
||||||
|
*
|
||||||
|
* If a backing child is already present (i.e. we're detaching a node), that
|
||||||
|
* child node must be drained.
|
||||||
|
@@ -3574,8 +3559,6 @@ int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
|
||||||
|
* itself, all options starting with "${bdref_key}." are considered part of the
|
||||||
|
* BlockdevRef.
|
||||||
|
*
|
||||||
|
- * The caller must hold the main AioContext lock.
|
||||||
|
- *
|
||||||
|
* TODO Can this be unified with bdrv_open_image()?
|
||||||
|
*/
|
||||||
|
int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
|
||||||
|
@@ -3745,9 +3728,7 @@ done:
|
||||||
|
*
|
||||||
|
* The BlockdevRef will be removed from the options QDict.
|
||||||
|
*
|
||||||
|
- * The caller must hold the lock of the main AioContext and no other AioContext.
|
||||||
|
- * @parent can move to a different AioContext in this function. Callers must
|
||||||
|
- * make sure that their AioContext locking is still correct after this.
|
||||||
|
+ * @parent can move to a different AioContext in this function.
|
||||||
|
*/
|
||||||
|
BdrvChild *bdrv_open_child(const char *filename,
|
||||||
|
QDict *options, const char *bdref_key,
|
||||||
|
@@ -3778,9 +3759,7 @@ BdrvChild *bdrv_open_child(const char *filename,
|
||||||
|
/*
|
||||||
|
* Wrapper on bdrv_open_child() for most popular case: open primary child of bs.
|
||||||
|
*
|
||||||
|
- * The caller must hold the lock of the main AioContext and no other AioContext.
|
||||||
|
- * @parent can move to a different AioContext in this function. Callers must
|
||||||
|
- * make sure that their AioContext locking is still correct after this.
|
||||||
|
+ * @parent can move to a different AioContext in this function.
|
||||||
|
*/
|
||||||
|
int bdrv_open_file_child(const char *filename,
|
||||||
|
QDict *options, const char *bdref_key,
|
||||||
|
@@ -3923,8 +3902,6 @@ out:
|
||||||
|
* The reference parameter may be used to specify an existing block device which
|
||||||
|
* should be opened. If specified, neither options nor a filename may be given,
|
||||||
|
* nor can an existing BDS be reused (that is, *pbs has to be NULL).
|
||||||
|
- *
|
||||||
|
- * The caller must always hold the main AioContext lock.
|
||||||
|
*/
|
||||||
|
static BlockDriverState * no_coroutine_fn
|
||||||
|
bdrv_open_inherit(const char *filename, const char *reference, QDict *options,
|
||||||
|
@@ -4217,7 +4194,6 @@ close_and_fail:
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
-/* The caller must always hold the main AioContext lock. */
|
||||||
|
BlockDriverState *bdrv_open(const char *filename, const char *reference,
|
||||||
|
QDict *options, int flags, Error **errp)
|
||||||
|
{
|
||||||
|
@@ -4665,10 +4641,7 @@ int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only,
|
||||||
|
*
|
||||||
|
* Return 0 on success, otherwise return < 0 and set @errp.
|
||||||
|
*
|
||||||
|
- * The caller must hold the AioContext lock of @reopen_state->bs.
|
||||||
|
* @reopen_state->bs can move to a different AioContext in this function.
|
||||||
|
- * Callers must make sure that their AioContext locking is still correct after
|
||||||
|
- * this.
|
||||||
|
*/
|
||||||
|
static int GRAPH_UNLOCKED
|
||||||
|
bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state,
|
||||||
|
@@ -4801,8 +4774,6 @@ out_rdlock:
|
||||||
|
* It is the responsibility of the caller to then call the abort() or
|
||||||
|
* commit() for any other BDS that have been left in a prepare() state
|
||||||
|
*
|
||||||
|
- * The caller must hold the AioContext lock of @reopen_state->bs.
|
||||||
|
- *
|
||||||
|
* After calling this function, the transaction @change_child_tran may only be
|
||||||
|
* completed while holding a writer lock for the graph.
|
||||||
|
*/
|
||||||
|
@@ -5437,8 +5408,6 @@ int bdrv_drop_filter(BlockDriverState *bs, Error **errp)
|
||||||
|
* child.
|
||||||
|
*
|
||||||
|
* This function does not create any image files.
|
||||||
|
- *
|
||||||
|
- * The caller must hold the AioContext lock for @bs_top.
|
||||||
|
*/
|
||||||
|
int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
|
||||||
|
Error **errp)
|
||||||
|
@@ -5545,9 +5514,8 @@ static void bdrv_delete(BlockDriverState *bs)
|
||||||
|
* after the call (even on failure), so if the caller intends to reuse the
|
||||||
|
* dictionary, it needs to use qobject_ref() before calling bdrv_open.
|
||||||
|
*
|
||||||
|
- * The caller holds the AioContext lock for @bs. It must make sure that @bs
|
||||||
|
- * stays in the same AioContext, i.e. @options must not refer to nodes in a
|
||||||
|
- * different AioContext.
|
||||||
|
+ * The caller must make sure that @bs stays in the same AioContext, i.e.
|
||||||
|
+ * @options must not refer to nodes in a different AioContext.
|
||||||
|
*/
|
||||||
|
BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *options,
|
||||||
|
int flags, Error **errp)
|
||||||
|
@@ -7565,10 +7533,6 @@ static TransactionActionDrv set_aio_context = {
|
||||||
|
*
|
||||||
|
* Must be called from the main AioContext.
|
||||||
|
*
|
||||||
|
- * The caller must own the AioContext lock for the old AioContext of bs, but it
|
||||||
|
- * must not own the AioContext lock for new_context (unless new_context is the
|
||||||
|
- * same as the current context of bs).
|
||||||
|
- *
|
||||||
|
* @visited will accumulate all visited BdrvChild objects. The caller is
|
||||||
|
* responsible for freeing the list afterwards.
|
||||||
|
*/
|
||||||
|
@@ -7621,13 +7585,6 @@ static bool bdrv_change_aio_context(BlockDriverState *bs, AioContext *ctx,
|
||||||
|
*
|
||||||
|
* If ignore_child is not NULL, that child (and its subgraph) will not
|
||||||
|
* be touched.
|
||||||
|
- *
|
||||||
|
- * This function still requires the caller to take the bs current
|
||||||
|
- * AioContext lock, otherwise draining will fail since AIO_WAIT_WHILE
|
||||||
|
- * assumes the lock is always held if bs is in another AioContext.
|
||||||
|
- * For the same reason, it temporarily also holds the new AioContext, since
|
||||||
|
- * bdrv_drained_end calls BDRV_POLL_WHILE that assumes the lock is taken too.
|
||||||
|
- * Therefore the new AioContext lock must not be taken by the caller.
|
||||||
|
*/
|
||||||
|
int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx,
|
||||||
|
BdrvChild *ignore_child, Error **errp)
|
||||||
|
@@ -7653,8 +7610,8 @@ int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx,
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Linear phase: go through all callbacks collected in the transaction.
|
||||||
|
- * Run all callbacks collected in the recursion to switch all nodes
|
||||||
|
- * AioContext lock (transaction commit), or undo all changes done in the
|
||||||
|
+ * Run all callbacks collected in the recursion to switch every node's
|
||||||
|
+ * AioContext (transaction commit), or undo all changes done in the
|
||||||
|
* recursion (transaction abort).
|
||||||
|
*/
|
||||||
|
|
||||||
|
diff --git a/block/block-backend.c b/block/block-backend.c
|
||||||
|
index f412bed274..209eb07528 100644
|
||||||
|
--- a/block/block-backend.c
|
||||||
|
+++ b/block/block-backend.c
|
||||||
|
@@ -390,8 +390,6 @@ BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm)
|
||||||
|
* Both sets of permissions can be changed later using blk_set_perm().
|
||||||
|
*
|
||||||
|
* Return the new BlockBackend on success, null on failure.
|
||||||
|
- *
|
||||||
|
- * Callers must hold the AioContext lock of @bs.
|
||||||
|
*/
|
||||||
|
BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm,
|
||||||
|
uint64_t shared_perm, Error **errp)
|
||||||
|
@@ -416,8 +414,6 @@ BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm,
|
||||||
|
* Just as with bdrv_open(), after having called this function the reference to
|
||||||
|
* @options belongs to the block layer (even on failure).
|
||||||
|
*
|
||||||
|
- * Called without holding an AioContext lock.
|
||||||
|
- *
|
||||||
|
* TODO: Remove @filename and @flags; it should be possible to specify a whole
|
||||||
|
* BDS tree just by specifying the @options QDict (or @reference,
|
||||||
|
* alternatively). At the time of adding this function, this is not possible,
|
||||||
|
@@ -872,8 +868,6 @@ BlockBackend *blk_by_public(BlockBackendPublic *public)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Disassociates the currently associated BlockDriverState from @blk.
|
||||||
|
- *
|
||||||
|
- * The caller must hold the AioContext lock for the BlockBackend.
|
||||||
|
*/
|
||||||
|
void blk_remove_bs(BlockBackend *blk)
|
||||||
|
{
|
||||||
|
@@ -915,8 +909,6 @@ void blk_remove_bs(BlockBackend *blk)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Associates a new BlockDriverState with @blk.
|
||||||
|
- *
|
||||||
|
- * Callers must hold the AioContext lock of @bs.
|
||||||
|
*/
|
||||||
|
int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
|
||||||
|
{
|
||||||
|
diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c
|
||||||
|
index 16f48388d3..50c358e8cd 100644
|
||||||
|
--- a/block/export/vhost-user-blk-server.c
|
||||||
|
+++ b/block/export/vhost-user-blk-server.c
|
||||||
|
@@ -278,7 +278,6 @@ static void vu_blk_exp_resize(void *opaque)
|
||||||
|
vu_config_change_msg(&vexp->vu_server.vu_dev);
|
||||||
|
}
|
||||||
|
|
||||||
|
-/* Called with vexp->export.ctx acquired */
|
||||||
|
static void vu_blk_drained_begin(void *opaque)
|
||||||
|
{
|
||||||
|
VuBlkExport *vexp = opaque;
|
||||||
|
@@ -287,7 +286,6 @@ static void vu_blk_drained_begin(void *opaque)
|
||||||
|
vhost_user_server_detach_aio_context(&vexp->vu_server);
|
||||||
|
}
|
||||||
|
|
||||||
|
-/* Called with vexp->export.blk AioContext acquired */
|
||||||
|
static void vu_blk_drained_end(void *opaque)
|
||||||
|
{
|
||||||
|
VuBlkExport *vexp = opaque;
|
||||||
|
@@ -300,8 +298,6 @@ static void vu_blk_drained_end(void *opaque)
|
||||||
|
* Ensures that bdrv_drained_begin() waits until in-flight requests complete
|
||||||
|
* and the server->co_trip coroutine has terminated. It will be restarted in
|
||||||
|
* vhost_user_server_attach_aio_context().
|
||||||
|
- *
|
||||||
|
- * Called with vexp->export.ctx acquired.
|
||||||
|
*/
|
||||||
|
static bool vu_blk_drained_poll(void *opaque)
|
||||||
|
{
|
||||||
|
diff --git a/include/block/block-common.h b/include/block/block-common.h
|
||||||
|
index d7599564db..a846023a09 100644
|
||||||
|
--- a/include/block/block-common.h
|
||||||
|
+++ b/include/block/block-common.h
|
||||||
|
@@ -70,9 +70,6 @@
|
||||||
|
* automatically takes the graph rdlock when calling the wrapped function. In
|
||||||
|
* the same way, no_co_wrapper_bdrv_wrlock functions automatically take the
|
||||||
|
* graph wrlock.
|
||||||
|
- *
|
||||||
|
- * If the first parameter of the function is a BlockDriverState, BdrvChild or
|
||||||
|
- * BlockBackend pointer, the AioContext lock for it is taken in the wrapper.
|
||||||
|
*/
|
||||||
|
#define no_co_wrapper
|
||||||
|
#define no_co_wrapper_bdrv_rdlock
|
||||||
|
diff --git a/include/block/block-io.h b/include/block/block-io.h
|
||||||
|
index 8eb39a858b..b49e0537dd 100644
|
||||||
|
--- a/include/block/block-io.h
|
||||||
|
+++ b/include/block/block-io.h
|
||||||
|
@@ -332,11 +332,10 @@ bdrv_co_copy_range(BdrvChild *src, int64_t src_offset,
|
||||||
|
* "I/O or GS" API functions. These functions can run without
|
||||||
|
* the BQL, but only in one specific iothread/main loop.
|
||||||
|
*
|
||||||
|
- * More specifically, these functions use BDRV_POLL_WHILE(bs), which
|
||||||
|
- * requires the caller to be either in the main thread and hold
|
||||||
|
- * the BlockdriverState (bs) AioContext lock, or directly in the
|
||||||
|
- * home thread that runs the bs AioContext. Calling them from
|
||||||
|
- * another thread in another AioContext would cause deadlocks.
|
||||||
|
+ * More specifically, these functions use BDRV_POLL_WHILE(bs), which requires
|
||||||
|
+ * the caller to be either in the main thread or directly in the home thread
|
||||||
|
+ * that runs the bs AioContext. Calling them from another thread in another
|
||||||
|
+ * AioContext would cause deadlocks.
|
||||||
|
*
|
||||||
|
* Therefore, these functions are not proper I/O, because they
|
||||||
|
* can't run in *any* iothreads, but only in a specific one.
|
||||||
|
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
|
||||||
|
index 4e31d161c5..151279d481 100644
|
||||||
|
--- a/include/block/block_int-common.h
|
||||||
|
+++ b/include/block/block_int-common.h
|
||||||
|
@@ -1192,8 +1192,6 @@ struct BlockDriverState {
|
||||||
|
/* The error object in use for blocking operations on backing_hd */
|
||||||
|
Error *backing_blocker;
|
||||||
|
|
||||||
|
- /* Protected by AioContext lock */
|
||||||
|
-
|
||||||
|
/*
|
||||||
|
* If we are reading a disk image, give its size in sectors.
|
||||||
|
* Generally read-only; it is written to by load_snapshot and
|
||||||
|
diff --git a/tests/qemu-iotests/202 b/tests/qemu-iotests/202
|
||||||
|
index b784dcd791..13304242e5 100755
|
||||||
|
--- a/tests/qemu-iotests/202
|
||||||
|
+++ b/tests/qemu-iotests/202
|
||||||
|
@@ -21,7 +21,7 @@
|
||||||
|
# Check that QMP 'transaction' blockdev-snapshot-sync with multiple drives on a
|
||||||
|
# single IOThread completes successfully. This particular command triggered a
|
||||||
|
# hang due to recursive AioContext locking and BDRV_POLL_WHILE(). Protect
|
||||||
|
-# against regressions.
|
||||||
|
+# against regressions even though the AioContext lock no longer exists.
|
||||||
|
|
||||||
|
import iotests
|
||||||
|
|
||||||
|
diff --git a/tests/qemu-iotests/203 b/tests/qemu-iotests/203
|
||||||
|
index ab80fd0e44..1ba878522b 100755
|
||||||
|
--- a/tests/qemu-iotests/203
|
||||||
|
+++ b/tests/qemu-iotests/203
|
||||||
|
@@ -21,7 +21,8 @@
|
||||||
|
# Check that QMP 'migrate' with multiple drives on a single IOThread completes
|
||||||
|
# successfully. This particular command triggered a hang in the source QEMU
|
||||||
|
# process due to recursive AioContext locking in bdrv_invalidate_all() and
|
||||||
|
-# BDRV_POLL_WHILE().
|
||||||
|
+# BDRV_POLL_WHILE(). Protect against regressions even though the AioContext
|
||||||
|
+# lock no longer exists.
|
||||||
|
|
||||||
|
import iotests
|
||||||
|
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
75
kvm-dma-helpers-don-t-lock-AioContext-in-dma_blk_cb.patch
Normal file
75
kvm-dma-helpers-don-t-lock-AioContext-in-dma_blk_cb.patch
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
From ac9dc8ea241ef6d3a0447d696620d4d4053b71bf Mon Sep 17 00:00:00 2001
|
||||||
|
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
Date: Mon, 4 Dec 2023 11:42:59 -0500
|
||||||
|
Subject: [PATCH 080/101] dma-helpers: don't lock AioContext in dma_blk_cb()
|
||||||
|
|
||||||
|
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
RH-MergeRequest: 214: Remove AioContext lock
|
||||||
|
RH-Jira: RHEL-15965
|
||||||
|
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||||
|
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
RH-Commit: [11/26] a8580463ba6aee4ca248c0b947b9e72bd9e87aab (kmwolf/centos-qemu-kvm)
|
||||||
|
|
||||||
|
Commit abfcd2760b3e ("dma-helpers: prevent dma_blk_cb() vs
|
||||||
|
dma_aio_cancel() race") acquired the AioContext lock inside dma_blk_cb()
|
||||||
|
to avoid a race with scsi_device_purge_requests() running in the main
|
||||||
|
loop thread.
|
||||||
|
|
||||||
|
The SCSI code no longer calls dma_aio_cancel() from the main loop thread
|
||||||
|
while I/O is running in the IOThread AioContext. Therefore it is no
|
||||||
|
longer necessary to take this lock to protect DMAAIOCB fields. The
|
||||||
|
->cb() function also does not require the lock because blk_aio_*() and
|
||||||
|
friends do not need the AioContext lock.
|
||||||
|
|
||||||
|
Both hw/ide/core.c and hw/ide/macio.c also call dma_blk_io() but don't
|
||||||
|
rely on it taking the AioContext lock, so this change is safe.
|
||||||
|
|
||||||
|
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||||
|
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
Message-ID: <20231204164259.1515217-5-stefanha@redhat.com>
|
||||||
|
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
---
|
||||||
|
system/dma-helpers.c | 7 ++-----
|
||||||
|
1 file changed, 2 insertions(+), 5 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/system/dma-helpers.c b/system/dma-helpers.c
|
||||||
|
index 36211acc7e..528117f256 100644
|
||||||
|
--- a/system/dma-helpers.c
|
||||||
|
+++ b/system/dma-helpers.c
|
||||||
|
@@ -119,13 +119,12 @@ static void dma_blk_cb(void *opaque, int ret)
|
||||||
|
|
||||||
|
trace_dma_blk_cb(dbs, ret);
|
||||||
|
|
||||||
|
- aio_context_acquire(ctx);
|
||||||
|
dbs->acb = NULL;
|
||||||
|
dbs->offset += dbs->iov.size;
|
||||||
|
|
||||||
|
if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) {
|
||||||
|
dma_complete(dbs, ret);
|
||||||
|
- goto out;
|
||||||
|
+ return;
|
||||||
|
}
|
||||||
|
dma_blk_unmap(dbs);
|
||||||
|
|
||||||
|
@@ -168,7 +167,7 @@ static void dma_blk_cb(void *opaque, int ret)
|
||||||
|
trace_dma_map_wait(dbs);
|
||||||
|
dbs->bh = aio_bh_new(ctx, reschedule_dma, dbs);
|
||||||
|
cpu_register_map_client(dbs->bh);
|
||||||
|
- goto out;
|
||||||
|
+ return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!QEMU_IS_ALIGNED(dbs->iov.size, dbs->align)) {
|
||||||
|
@@ -179,8 +178,6 @@ static void dma_blk_cb(void *opaque, int ret)
|
||||||
|
dbs->acb = dbs->io_func(dbs->offset, &dbs->iov,
|
||||||
|
dma_blk_cb, dbs, dbs->io_func_opaque);
|
||||||
|
assert(dbs->acb);
|
||||||
|
-out:
|
||||||
|
- aio_context_release(ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void dma_aio_cancel(BlockAIOCB *acb)
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
228
kvm-docs-devel-Add-VFIO-iommufd-backend-documentation.patch
Normal file
228
kvm-docs-devel-Add-VFIO-iommufd-backend-documentation.patch
Normal file
@ -0,0 +1,228 @@
|
|||||||
|
From 71aa0219f7c84cbf175eb2a091d48d5fd5daa40b Mon Sep 17 00:00:00 2001
|
||||||
|
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Date: Tue, 21 Nov 2023 16:44:26 +0800
|
||||||
|
Subject: [PATCH 047/101] docs/devel: Add VFIO iommufd backend documentation
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [46/67] 6cf49d00e87788f894d690a985bb6798eae24505 (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
Suggested-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
|
||||||
|
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit 98dad2b01931f6064c6c4b48ca3c2a1d9f542cd8)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
MAINTAINERS | 1 +
|
||||||
|
docs/devel/index-internals.rst | 1 +
|
||||||
|
docs/devel/vfio-iommufd.rst | 166 +++++++++++++++++++++++++++++++++
|
||||||
|
3 files changed, 168 insertions(+)
|
||||||
|
create mode 100644 docs/devel/vfio-iommufd.rst
|
||||||
|
|
||||||
|
diff --git a/MAINTAINERS b/MAINTAINERS
|
||||||
|
index ca70bb4e64..0ddb20a35f 100644
|
||||||
|
--- a/MAINTAINERS
|
||||||
|
+++ b/MAINTAINERS
|
||||||
|
@@ -2176,6 +2176,7 @@ F: backends/iommufd.c
|
||||||
|
F: include/sysemu/iommufd.h
|
||||||
|
F: include/qemu/chardev_open.h
|
||||||
|
F: util/chardev_open.c
|
||||||
|
+F: docs/devel/vfio-iommufd.rst
|
||||||
|
|
||||||
|
vhost
|
||||||
|
M: Michael S. Tsirkin <mst@redhat.com>
|
||||||
|
diff --git a/docs/devel/index-internals.rst b/docs/devel/index-internals.rst
|
||||||
|
index 6f81df92bc..3def4a138b 100644
|
||||||
|
--- a/docs/devel/index-internals.rst
|
||||||
|
+++ b/docs/devel/index-internals.rst
|
||||||
|
@@ -18,5 +18,6 @@ Details about QEMU's various subsystems including how to add features to them.
|
||||||
|
s390-dasd-ipl
|
||||||
|
tracing
|
||||||
|
vfio-migration
|
||||||
|
+ vfio-iommufd
|
||||||
|
writing-monitor-commands
|
||||||
|
virtio-backends
|
||||||
|
diff --git a/docs/devel/vfio-iommufd.rst b/docs/devel/vfio-iommufd.rst
|
||||||
|
new file mode 100644
|
||||||
|
index 0000000000..3d1c11f175
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/docs/devel/vfio-iommufd.rst
|
||||||
|
@@ -0,0 +1,166 @@
|
||||||
|
+===============================
|
||||||
|
+IOMMUFD BACKEND usage with VFIO
|
||||||
|
+===============================
|
||||||
|
+
|
||||||
|
+(Same meaning for backend/container/BE)
|
||||||
|
+
|
||||||
|
+With the introduction of iommufd, the Linux kernel provides a generic
|
||||||
|
+interface for user space drivers to propagate their DMA mappings to kernel
|
||||||
|
+for assigned devices. While the legacy kernel interface is group-centric,
|
||||||
|
+the new iommufd interface is device-centric, relying on device fd and iommufd.
|
||||||
|
+
|
||||||
|
+To support both interfaces in the QEMU VFIO device, introduce a base container
|
||||||
|
+to abstract the common part of VFIO legacy and iommufd container. So that the
|
||||||
|
+generic VFIO code can use either container.
|
||||||
|
+
|
||||||
|
+The base container implements generic functions such as memory_listener and
|
||||||
|
+address space management whereas the derived container implements callbacks
|
||||||
|
+specific to either legacy or iommufd. Each container has its own way to setup
|
||||||
|
+secure context and dma management interface. The below diagram shows how it
|
||||||
|
+looks like with both containers.
|
||||||
|
+
|
||||||
|
+::
|
||||||
|
+
|
||||||
|
+ VFIO AddressSpace/Memory
|
||||||
|
+ +-------+ +----------+ +-----+ +-----+
|
||||||
|
+ | pci | | platform | | ap | | ccw |
|
||||||
|
+ +---+---+ +----+-----+ +--+--+ +--+--+ +----------------------+
|
||||||
|
+ | | | | | AddressSpace |
|
||||||
|
+ | | | | +------------+---------+
|
||||||
|
+ +---V-----------V-----------V--------V----+ /
|
||||||
|
+ | VFIOAddressSpace | <------------+
|
||||||
|
+ | | | MemoryListener
|
||||||
|
+ | VFIOContainerBase list |
|
||||||
|
+ +-------+----------------------------+----+
|
||||||
|
+ | |
|
||||||
|
+ | |
|
||||||
|
+ +-------V------+ +--------V----------+
|
||||||
|
+ | iommufd | | vfio legacy |
|
||||||
|
+ | container | | container |
|
||||||
|
+ +-------+------+ +--------+----------+
|
||||||
|
+ | |
|
||||||
|
+ | /dev/iommu | /dev/vfio/vfio
|
||||||
|
+ | /dev/vfio/devices/vfioX | /dev/vfio/$group_id
|
||||||
|
+ Userspace | |
|
||||||
|
+ ============+============================+===========================
|
||||||
|
+ Kernel | device fd |
|
||||||
|
+ +---------------+ | group/container fd
|
||||||
|
+ | (BIND_IOMMUFD | | (SET_CONTAINER/SET_IOMMU)
|
||||||
|
+ | ATTACH_IOAS) | | device fd
|
||||||
|
+ | | |
|
||||||
|
+ | +-------V------------V-----------------+
|
||||||
|
+ iommufd | | vfio |
|
||||||
|
+ (map/unmap | +---------+--------------------+-------+
|
||||||
|
+ ioas_copy) | | | map/unmap
|
||||||
|
+ | | |
|
||||||
|
+ +------V------+ +-----V------+ +------V--------+
|
||||||
|
+ | iommfd core | | device | | vfio iommu |
|
||||||
|
+ +-------------+ +------------+ +---------------+
|
||||||
|
+
|
||||||
|
+* Secure Context setup
|
||||||
|
+
|
||||||
|
+ - iommufd BE: uses device fd and iommufd to setup secure context
|
||||||
|
+ (bind_iommufd, attach_ioas)
|
||||||
|
+ - vfio legacy BE: uses group fd and container fd to setup secure context
|
||||||
|
+ (set_container, set_iommu)
|
||||||
|
+
|
||||||
|
+* Device access
|
||||||
|
+
|
||||||
|
+ - iommufd BE: device fd is opened through ``/dev/vfio/devices/vfioX``
|
||||||
|
+ - vfio legacy BE: device fd is retrieved from group fd ioctl
|
||||||
|
+
|
||||||
|
+* DMA Mapping flow
|
||||||
|
+
|
||||||
|
+ 1. VFIOAddressSpace receives MemoryRegion add/del via MemoryListener
|
||||||
|
+ 2. VFIO populates DMA map/unmap via the container BEs
|
||||||
|
+ * iommufd BE: uses iommufd
|
||||||
|
+ * vfio legacy BE: uses container fd
|
||||||
|
+
|
||||||
|
+Example configuration
|
||||||
|
+=====================
|
||||||
|
+
|
||||||
|
+Step 1: configure the host device
|
||||||
|
+---------------------------------
|
||||||
|
+
|
||||||
|
+It's exactly same as the VFIO device with legacy VFIO container.
|
||||||
|
+
|
||||||
|
+Step 2: configure QEMU
|
||||||
|
+----------------------
|
||||||
|
+
|
||||||
|
+Interactions with the ``/dev/iommu`` are abstracted by a new iommufd
|
||||||
|
+object (compiled in with the ``CONFIG_IOMMUFD`` option).
|
||||||
|
+
|
||||||
|
+Any QEMU device (e.g. VFIO device) wishing to use ``/dev/iommu`` must
|
||||||
|
+be linked with an iommufd object. It gets a new optional property
|
||||||
|
+named iommufd which allows to pass an iommufd object. Take ``vfio-pci``
|
||||||
|
+device for example:
|
||||||
|
+
|
||||||
|
+.. code-block:: bash
|
||||||
|
+
|
||||||
|
+ -object iommufd,id=iommufd0
|
||||||
|
+ -device vfio-pci,host=0000:02:00.0,iommufd=iommufd0
|
||||||
|
+
|
||||||
|
+Note the ``/dev/iommu`` and VFIO cdev can be externally opened by a
|
||||||
|
+management layer. In such a case the fd is passed, the fd supports a
|
||||||
|
+string naming the fd or a number, for example:
|
||||||
|
+
|
||||||
|
+.. code-block:: bash
|
||||||
|
+
|
||||||
|
+ -object iommufd,id=iommufd0,fd=22
|
||||||
|
+ -device vfio-pci,iommufd=iommufd0,fd=23
|
||||||
|
+
|
||||||
|
+If the ``fd`` property is not passed, the fd is opened by QEMU.
|
||||||
|
+
|
||||||
|
+If no ``iommufd`` object is passed to the ``vfio-pci`` device, iommufd
|
||||||
|
+is not used and the user gets the behavior based on the legacy VFIO
|
||||||
|
+container:
|
||||||
|
+
|
||||||
|
+.. code-block:: bash
|
||||||
|
+
|
||||||
|
+ -device vfio-pci,host=0000:02:00.0
|
||||||
|
+
|
||||||
|
+Supported platform
|
||||||
|
+==================
|
||||||
|
+
|
||||||
|
+Supports x86, ARM and s390x currently.
|
||||||
|
+
|
||||||
|
+Caveats
|
||||||
|
+=======
|
||||||
|
+
|
||||||
|
+Dirty page sync
|
||||||
|
+---------------
|
||||||
|
+
|
||||||
|
+Dirty page sync with iommufd backend is unsupported yet, live migration is
|
||||||
|
+disabled by default. But it can be force enabled like below, low efficient
|
||||||
|
+though.
|
||||||
|
+
|
||||||
|
+.. code-block:: bash
|
||||||
|
+
|
||||||
|
+ -object iommufd,id=iommufd0
|
||||||
|
+ -device vfio-pci,host=0000:02:00.0,iommufd=iommufd0,enable-migration=on
|
||||||
|
+
|
||||||
|
+P2P DMA
|
||||||
|
+-------
|
||||||
|
+
|
||||||
|
+PCI p2p DMA is unsupported as IOMMUFD doesn't support mapping hardware PCI
|
||||||
|
+BAR region yet. Below warning shows for assigned PCI device, it's not a bug.
|
||||||
|
+
|
||||||
|
+.. code-block:: none
|
||||||
|
+
|
||||||
|
+ qemu-system-x86_64: warning: IOMMU_IOAS_MAP failed: Bad address, PCI BAR?
|
||||||
|
+ qemu-system-x86_64: vfio_container_dma_map(0x560cb6cb1620, 0xe000000021000, 0x3000, 0x7f32ed55c000) = -14 (Bad address)
|
||||||
|
+
|
||||||
|
+FD passing with mdev
|
||||||
|
+--------------------
|
||||||
|
+
|
||||||
|
+``vfio-pci`` device checks sysfsdev property to decide if backend is a mdev.
|
||||||
|
+If FD passing is used, there is no way to know that and the mdev is treated
|
||||||
|
+like a real PCI device. There is an error as below if user wants to enable
|
||||||
|
+RAM discarding for mdev.
|
||||||
|
+
|
||||||
|
+.. code-block:: none
|
||||||
|
+
|
||||||
|
+ qemu-system-x86_64: -device vfio-pci,iommufd=iommufd0,x-balloon-allowed=on,fd=9: vfio VFIO_FD9: x-balloon-allowed only potentially compatible with mdev devices
|
||||||
|
+
|
||||||
|
+``vfio-ap`` and ``vfio-ccw`` devices don't have same issue as their backend
|
||||||
|
+devices are always mdev and RAM discarding is force enabled.
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
98
kvm-docs-remove-AioContext-lock-from-IOThread-docs.patch
Normal file
98
kvm-docs-remove-AioContext-lock-from-IOThread-docs.patch
Normal file
@ -0,0 +1,98 @@
|
|||||||
|
From fc69df3a70bed5722643cc16828ca20beae3a20d Mon Sep 17 00:00:00 2001
|
||||||
|
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
Date: Tue, 5 Dec 2023 13:20:08 -0500
|
||||||
|
Subject: [PATCH 091/101] docs: remove AioContext lock from IOThread docs
|
||||||
|
|
||||||
|
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
RH-MergeRequest: 214: Remove AioContext lock
|
||||||
|
RH-Jira: RHEL-15965
|
||||||
|
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||||
|
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
RH-Commit: [22/26] ab89cda483e74ded983d26e1c6e50217405e0a55 (kmwolf/centos-qemu-kvm)
|
||||||
|
|
||||||
|
Encourage the use of locking primitives and stop mentioning the
|
||||||
|
AioContext lock since it is being removed.
|
||||||
|
|
||||||
|
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||||
|
Message-ID: <20231205182011.1976568-12-stefanha@redhat.com>
|
||||||
|
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
---
|
||||||
|
docs/devel/multiple-iothreads.txt | 47 +++++++++++--------------------
|
||||||
|
1 file changed, 16 insertions(+), 31 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/docs/devel/multiple-iothreads.txt b/docs/devel/multiple-iothreads.txt
|
||||||
|
index a3e949f6b3..4865196bde 100644
|
||||||
|
--- a/docs/devel/multiple-iothreads.txt
|
||||||
|
+++ b/docs/devel/multiple-iothreads.txt
|
||||||
|
@@ -88,27 +88,18 @@ loop, depending on which AioContext instance the caller passes in.
|
||||||
|
|
||||||
|
How to synchronize with an IOThread
|
||||||
|
-----------------------------------
|
||||||
|
-AioContext is not thread-safe so some rules must be followed when using file
|
||||||
|
-descriptors, event notifiers, timers, or BHs across threads:
|
||||||
|
+Variables that can be accessed by multiple threads require some form of
|
||||||
|
+synchronization such as qemu_mutex_lock(), rcu_read_lock(), etc.
|
||||||
|
|
||||||
|
-1. AioContext functions can always be called safely. They handle their
|
||||||
|
-own locking internally.
|
||||||
|
-
|
||||||
|
-2. Other threads wishing to access the AioContext must use
|
||||||
|
-aio_context_acquire()/aio_context_release() for mutual exclusion. Once the
|
||||||
|
-context is acquired no other thread can access it or run event loop iterations
|
||||||
|
-in this AioContext.
|
||||||
|
-
|
||||||
|
-Legacy code sometimes nests aio_context_acquire()/aio_context_release() calls.
|
||||||
|
-Do not use nesting anymore, it is incompatible with the BDRV_POLL_WHILE() macro
|
||||||
|
-used in the block layer and can lead to hangs.
|
||||||
|
-
|
||||||
|
-There is currently no lock ordering rule if a thread needs to acquire multiple
|
||||||
|
-AioContexts simultaneously. Therefore, it is only safe for code holding the
|
||||||
|
-QEMU global mutex to acquire other AioContexts.
|
||||||
|
+AioContext functions like aio_set_fd_handler(), aio_set_event_notifier(),
|
||||||
|
+aio_bh_new(), and aio_timer_new() are thread-safe. They can be used to trigger
|
||||||
|
+activity in an IOThread.
|
||||||
|
|
||||||
|
Side note: the best way to schedule a function call across threads is to call
|
||||||
|
-aio_bh_schedule_oneshot(). No acquire/release or locking is needed.
|
||||||
|
+aio_bh_schedule_oneshot().
|
||||||
|
+
|
||||||
|
+The main loop thread can wait synchronously for a condition using
|
||||||
|
+AIO_WAIT_WHILE().
|
||||||
|
|
||||||
|
AioContext and the block layer
|
||||||
|
------------------------------
|
||||||
|
@@ -124,22 +115,16 @@ Block layer code must therefore expect to run in an IOThread and avoid using
|
||||||
|
old APIs that implicitly use the main loop. See the "How to program for
|
||||||
|
IOThreads" above for information on how to do that.
|
||||||
|
|
||||||
|
-If main loop code such as a QMP function wishes to access a BlockDriverState
|
||||||
|
-it must first call aio_context_acquire(bdrv_get_aio_context(bs)) to ensure
|
||||||
|
-that callbacks in the IOThread do not run in parallel.
|
||||||
|
-
|
||||||
|
Code running in the monitor typically needs to ensure that past
|
||||||
|
requests from the guest are completed. When a block device is running
|
||||||
|
in an IOThread, the IOThread can also process requests from the guest
|
||||||
|
(via ioeventfd). To achieve both objects, wrap the code between
|
||||||
|
bdrv_drained_begin() and bdrv_drained_end(), thus creating a "drained
|
||||||
|
-section". The functions must be called between aio_context_acquire()
|
||||||
|
-and aio_context_release(). You can freely release and re-acquire the
|
||||||
|
-AioContext within a drained section.
|
||||||
|
-
|
||||||
|
-Long-running jobs (usually in the form of coroutines) are best scheduled in
|
||||||
|
-the BlockDriverState's AioContext to avoid the need to acquire/release around
|
||||||
|
-each bdrv_*() call. The functions bdrv_add/remove_aio_context_notifier,
|
||||||
|
-or alternatively blk_add/remove_aio_context_notifier if you use BlockBackends,
|
||||||
|
-can be used to get a notification whenever bdrv_try_change_aio_context() moves a
|
||||||
|
+section".
|
||||||
|
+
|
||||||
|
+Long-running jobs (usually in the form of coroutines) are often scheduled in
|
||||||
|
+the BlockDriverState's AioContext. The functions
|
||||||
|
+bdrv_add/remove_aio_context_notifier, or alternatively
|
||||||
|
+blk_add/remove_aio_context_notifier if you use BlockBackends, can be used to
|
||||||
|
+get a notification whenever bdrv_try_change_aio_context() moves a
|
||||||
|
BlockDriverState to a different AioContext.
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
1190
kvm-graph-lock-remove-AioContext-locking.patch
Normal file
1190
kvm-graph-lock-remove-AioContext-locking.patch
Normal file
File diff suppressed because it is too large
Load Diff
42
kvm-hw-arm-Activate-IOMMUFD-for-virt-machines.patch
Normal file
42
kvm-hw-arm-Activate-IOMMUFD-for-virt-machines.patch
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
From ceaee9c4372bbdc4196cb6808515047388f7aa26 Mon Sep 17 00:00:00 2001
|
||||||
|
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
||||||
|
Date: Tue, 21 Nov 2023 16:44:18 +0800
|
||||||
|
Subject: [PATCH 039/101] hw/arm: Activate IOMMUFD for virt machines
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [38/67] 0a059ae661616e95eb8455e17f35774495cae8e7 (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Reviewed-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
Tested-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit 0970238343af45a8b547695bfc22f18d4eb7da7e)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/arm/Kconfig | 1 +
|
||||||
|
1 file changed, 1 insertion(+)
|
||||||
|
|
||||||
|
diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
|
||||||
|
index 3ada335a24..660f49db49 100644
|
||||||
|
--- a/hw/arm/Kconfig
|
||||||
|
+++ b/hw/arm/Kconfig
|
||||||
|
@@ -8,6 +8,7 @@ config ARM_VIRT
|
||||||
|
imply TPM_TIS_SYSBUS
|
||||||
|
imply TPM_TIS_I2C
|
||||||
|
imply NVDIMM
|
||||||
|
+ imply IOMMUFD
|
||||||
|
select ARM_GIC
|
||||||
|
select ACPI
|
||||||
|
select ARM_SMMUV3
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
@ -0,0 +1,88 @@
|
|||||||
|
From e670722b9a6460d41497688d820d5a9a9b51d8e9 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Gavin Shan <gshan@redhat.com>
|
||||||
|
Date: Tue, 9 Jan 2024 11:36:42 +1000
|
||||||
|
Subject: [PATCH 001/101] hw/arm/virt: Add properties to disable high memory
|
||||||
|
regions
|
||||||
|
|
||||||
|
RH-Author: Gavin Shan <gshan@redhat.com>
|
||||||
|
RH-MergeRequest: 210: hw/arm/virt: Add properties to disable high memory regions
|
||||||
|
RH-Jira: RHEL-19738
|
||||||
|
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||||
|
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-Commit: [1/1] 4097ba5133a67126e30b84202cb40df4e019c5f4
|
||||||
|
|
||||||
|
Upstream: RHEL-only
|
||||||
|
Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=57927352
|
||||||
|
|
||||||
|
There are 3 high memory regions for GICv3 or GICv4 redistributor, PCI
|
||||||
|
ECAM and PCI MMIO. Each of them has a property introduced by upstream
|
||||||
|
commit 6a48c64eec ("hw/arm/virt: Add properties to disable high memory
|
||||||
|
regions") so that the corresponding high memory region can be disabled.
|
||||||
|
|
||||||
|
It's notable that another property ("compact-highmem") introduced by
|
||||||
|
upstream commit f40408a9fe ("hw/arm/virt: Add 'compact-highmem' property")
|
||||||
|
so that the compact high memory region layout during assignment can be
|
||||||
|
disabled, compatible to the old machine types. However, we don't have
|
||||||
|
the compatible issue since the compact high memory region layout is
|
||||||
|
always kept as disabled until RHEL9.2.0 machine type and onwards.
|
||||||
|
|
||||||
|
Expose those 3 properties: "highmem-redists", "highmem-ecam" and
|
||||||
|
"highmem-mmio". The property "compact-highmem" is kept as hidden.
|
||||||
|
|
||||||
|
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||||
|
---
|
||||||
|
hw/arm/virt.c | 24 +++++++++++++++++++++++-
|
||||||
|
1 file changed, 23 insertions(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||||
|
index 5cab00b4cd..60f117f0d2 100644
|
||||||
|
--- a/hw/arm/virt.c
|
||||||
|
+++ b/hw/arm/virt.c
|
||||||
|
@@ -2456,6 +2456,7 @@ static void virt_set_compact_highmem(Object *obj, bool value, Error **errp)
|
||||||
|
|
||||||
|
vms->highmem_compact = value;
|
||||||
|
}
|
||||||
|
+#endif /* disabled for RHEL */
|
||||||
|
|
||||||
|
static bool virt_get_highmem_redists(Object *obj, Error **errp)
|
||||||
|
{
|
||||||
|
@@ -2498,7 +2499,6 @@ static void virt_set_highmem_mmio(Object *obj, bool value, Error **errp)
|
||||||
|
|
||||||
|
vms->highmem_mmio = value;
|
||||||
|
}
|
||||||
|
-#endif /* disabled for RHEL */
|
||||||
|
|
||||||
|
static bool virt_get_its(Object *obj, Error **errp)
|
||||||
|
{
|
||||||
|
@@ -3521,6 +3521,28 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data)
|
||||||
|
"Set on/off to enable/disable using "
|
||||||
|
"physical address space above 32 bits");
|
||||||
|
|
||||||
|
+ object_class_property_add_bool(oc, "highmem-redists",
|
||||||
|
+ virt_get_highmem_redists,
|
||||||
|
+ virt_set_highmem_redists);
|
||||||
|
+ object_class_property_set_description(oc, "highmem-redists",
|
||||||
|
+ "Set on/off to enable/disable high "
|
||||||
|
+ "memory region for GICv3 or GICv4 "
|
||||||
|
+ "redistributor");
|
||||||
|
+
|
||||||
|
+ object_class_property_add_bool(oc, "highmem-ecam",
|
||||||
|
+ virt_get_highmem_ecam,
|
||||||
|
+ virt_set_highmem_ecam);
|
||||||
|
+ object_class_property_set_description(oc, "highmem-ecam",
|
||||||
|
+ "Set on/off to enable/disable high "
|
||||||
|
+ "memory region for PCI ECAM");
|
||||||
|
+
|
||||||
|
+ object_class_property_add_bool(oc, "highmem-mmio",
|
||||||
|
+ virt_get_highmem_mmio,
|
||||||
|
+ virt_set_highmem_mmio);
|
||||||
|
+ object_class_property_set_description(oc, "highmem-mmio",
|
||||||
|
+ "Set on/off to enable/disable high "
|
||||||
|
+ "memory region for PCI MMIO");
|
||||||
|
+
|
||||||
|
object_class_property_add_str(oc, "gic-version", virt_get_gic_version,
|
||||||
|
virt_set_gic_version);
|
||||||
|
object_class_property_set_description(oc, "gic-version",
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
41
kvm-hw-i386-Activate-IOMMUFD-for-q35-machines.patch
Normal file
41
kvm-hw-i386-Activate-IOMMUFD-for-q35-machines.patch
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
From 7a6be312c11911bdd2ce82566be22a3e014947c2 Mon Sep 17 00:00:00 2001
|
||||||
|
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
||||||
|
Date: Tue, 21 Nov 2023 16:44:20 +0800
|
||||||
|
Subject: [PATCH 041/101] hw/i386: Activate IOMMUFD for q35 machines
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [40/67] b15764ab24fd57389a8d219736613484acd7d29e (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Reviewed-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit 64ad06f6eba66c514477f490bcba409439a480d8)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/i386/Kconfig | 1 +
|
||||||
|
1 file changed, 1 insertion(+)
|
||||||
|
|
||||||
|
diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig
|
||||||
|
index 55850791df..a1846be6f7 100644
|
||||||
|
--- a/hw/i386/Kconfig
|
||||||
|
+++ b/hw/i386/Kconfig
|
||||||
|
@@ -95,6 +95,7 @@ config Q35
|
||||||
|
imply E1000E_PCI_EXPRESS
|
||||||
|
imply VMPORT
|
||||||
|
imply VMMOUSE
|
||||||
|
+ imply IOMMUFD
|
||||||
|
select PC_PCI
|
||||||
|
select PC_ACPI
|
||||||
|
select PCI_EXPRESS_Q35
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
116
kvm-hw-ppc-Kconfig-Imply-VFIO_PCI.patch
Normal file
116
kvm-hw-ppc-Kconfig-Imply-VFIO_PCI.patch
Normal file
@ -0,0 +1,116 @@
|
|||||||
|
From 84f378c41832602dcf9bad6167b1f532c7c53e37 Mon Sep 17 00:00:00 2001
|
||||||
|
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
||||||
|
Date: Tue, 21 Nov 2023 15:03:55 +0100
|
||||||
|
Subject: [PATCH 048/101] hw/ppc/Kconfig: Imply VFIO_PCI
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [47/67] c1a40cdab9bf62b16cb428d57a20b3e0eaa6de38 (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
When the legacy and iommufd backends were introduced, a set of common
|
||||||
|
vfio-pci routines were exported in pci.c for both backends to use :
|
||||||
|
|
||||||
|
vfio_pci_pre_reset
|
||||||
|
vfio_pci_get_pci_hot_reset_info
|
||||||
|
vfio_pci_host_match
|
||||||
|
vfio_pci_post_reset
|
||||||
|
|
||||||
|
This introduced a build failure on PPC when --without-default-devices
|
||||||
|
is use because VFIO is always selected in ppc/Kconfig but VFIO_PCI is
|
||||||
|
not.
|
||||||
|
|
||||||
|
Use an 'imply VFIO_PCI' in ppc/Kconfig and bypass compilation of the
|
||||||
|
VFIO EEH hooks routines defined in hw/ppc/spapr_pci_vfio.c with
|
||||||
|
CONFIG_VFIO_PCI.
|
||||||
|
|
||||||
|
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||||
|
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit 4278df9d1d2383b738338c857406357660f11e42)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/ppc/Kconfig | 2 +-
|
||||||
|
hw/ppc/spapr_pci_vfio.c | 36 ++++++++++++++++++++++++++++++++++++
|
||||||
|
2 files changed, 37 insertions(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/hw/ppc/Kconfig b/hw/ppc/Kconfig
|
||||||
|
index 56f0475a8e..44263a58c4 100644
|
||||||
|
--- a/hw/ppc/Kconfig
|
||||||
|
+++ b/hw/ppc/Kconfig
|
||||||
|
@@ -3,11 +3,11 @@ config PSERIES
|
||||||
|
imply PCI_DEVICES
|
||||||
|
imply TEST_DEVICES
|
||||||
|
imply VIRTIO_VGA
|
||||||
|
+ imply VFIO_PCI if LINUX # needed by spapr_pci_vfio.c
|
||||||
|
select NVDIMM
|
||||||
|
select DIMM
|
||||||
|
select PCI
|
||||||
|
select SPAPR_VSCSI
|
||||||
|
- select VFIO if LINUX # needed by spapr_pci_vfio.c
|
||||||
|
select XICS
|
||||||
|
select XIVE
|
||||||
|
select MSI_NONBROKEN
|
||||||
|
diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c
|
||||||
|
index d1d07bec46..76b2a3487b 100644
|
||||||
|
--- a/hw/ppc/spapr_pci_vfio.c
|
||||||
|
+++ b/hw/ppc/spapr_pci_vfio.c
|
||||||
|
@@ -26,10 +26,12 @@
|
||||||
|
#include "hw/pci/pci_device.h"
|
||||||
|
#include "hw/vfio/vfio-common.h"
|
||||||
|
#include "qemu/error-report.h"
|
||||||
|
+#include CONFIG_DEVICES /* CONFIG_VFIO_PCI */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Interfaces for IBM EEH (Enhanced Error Handling)
|
||||||
|
*/
|
||||||
|
+#ifdef CONFIG_VFIO_PCI
|
||||||
|
static bool vfio_eeh_container_ok(VFIOContainer *container)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
@@ -314,3 +316,37 @@ int spapr_phb_vfio_eeh_configure(SpaprPhbState *sphb)
|
||||||
|
|
||||||
|
return RTAS_OUT_SUCCESS;
|
||||||
|
}
|
||||||
|
+
|
||||||
|
+#else
|
||||||
|
+
|
||||||
|
+bool spapr_phb_eeh_available(SpaprPhbState *sphb)
|
||||||
|
+{
|
||||||
|
+ return false;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void spapr_phb_vfio_reset(DeviceState *qdev)
|
||||||
|
+{
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int spapr_phb_vfio_eeh_set_option(SpaprPhbState *sphb,
|
||||||
|
+ unsigned int addr, int option)
|
||||||
|
+{
|
||||||
|
+ return RTAS_OUT_NOT_SUPPORTED;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int spapr_phb_vfio_eeh_get_state(SpaprPhbState *sphb, int *state)
|
||||||
|
+{
|
||||||
|
+ return RTAS_OUT_NOT_SUPPORTED;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int spapr_phb_vfio_eeh_reset(SpaprPhbState *sphb, int option)
|
||||||
|
+{
|
||||||
|
+ return RTAS_OUT_NOT_SUPPORTED;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int spapr_phb_vfio_eeh_configure(SpaprPhbState *sphb)
|
||||||
|
+{
|
||||||
|
+ return RTAS_OUT_NOT_SUPPORTED;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+#endif /* CONFIG_VFIO_PCI */
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
73
kvm-hw-vfio-fix-iteration-over-global-VFIODevice-list.patch
Normal file
73
kvm-hw-vfio-fix-iteration-over-global-VFIODevice-list.patch
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
From 8f27893a37e55a31180bb66cd9eae7199911881b Mon Sep 17 00:00:00 2001
|
||||||
|
From: =?UTF-8?q?Volker=20R=C3=BCmelin?= <vr_qemu@t-online.de>
|
||||||
|
Date: Fri, 29 Dec 2023 21:38:54 +0100
|
||||||
|
Subject: [PATCH 060/101] hw/vfio: fix iteration over global VFIODevice list
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [59/67] f926e1233c8c5ad418e8794b1a103371c9dc5eb0 (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
Commit 3d779abafe ("vfio/common: Introduce a global VFIODevice list")
|
||||||
|
introduced a global VFIODevice list, but forgot to update the list
|
||||||
|
element field name when iterating over the new list. Change the code
|
||||||
|
to use the correct list element field.
|
||||||
|
|
||||||
|
Fixes: 3d779abafe ("vfio/common: Introduce a global VFIODevice list")
|
||||||
|
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2061
|
||||||
|
Signed-off-by: Volker Rümelin <vr_qemu@t-online.de>
|
||||||
|
Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Reviewed-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
(cherry picked from commit 9353b6da430f90e47f352dbf6dc31120c8914da6)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/vfio/common.c | 8 ++++----
|
||||||
|
1 file changed, 4 insertions(+), 4 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
|
||||||
|
index 0d4d8b8416..0b3352f2a9 100644
|
||||||
|
--- a/hw/vfio/common.c
|
||||||
|
+++ b/hw/vfio/common.c
|
||||||
|
@@ -73,7 +73,7 @@ bool vfio_mig_active(void)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
- QLIST_FOREACH(vbasedev, &vfio_device_list, next) {
|
||||||
|
+ QLIST_FOREACH(vbasedev, &vfio_device_list, global_next) {
|
||||||
|
if (vbasedev->migration_blocker) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
@@ -94,7 +94,7 @@ static bool vfio_multiple_devices_migration_is_supported(void)
|
||||||
|
unsigned int device_num = 0;
|
||||||
|
bool all_support_p2p = true;
|
||||||
|
|
||||||
|
- QLIST_FOREACH(vbasedev, &vfio_device_list, next) {
|
||||||
|
+ QLIST_FOREACH(vbasedev, &vfio_device_list, global_next) {
|
||||||
|
if (vbasedev->migration) {
|
||||||
|
device_num++;
|
||||||
|
|
||||||
|
@@ -1366,13 +1366,13 @@ void vfio_reset_handler(void *opaque)
|
||||||
|
{
|
||||||
|
VFIODevice *vbasedev;
|
||||||
|
|
||||||
|
- QLIST_FOREACH(vbasedev, &vfio_device_list, next) {
|
||||||
|
+ QLIST_FOREACH(vbasedev, &vfio_device_list, global_next) {
|
||||||
|
if (vbasedev->dev->realized) {
|
||||||
|
vbasedev->ops->vfio_compute_needs_reset(vbasedev);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
- QLIST_FOREACH(vbasedev, &vfio_device_list, next) {
|
||||||
|
+ QLIST_FOREACH(vbasedev, &vfio_device_list, global_next) {
|
||||||
|
if (vbasedev->dev->realized && vbasedev->needs_reset) {
|
||||||
|
vbasedev->ops->vfio_hot_reset_multi(vbasedev);
|
||||||
|
}
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
@ -0,0 +1,54 @@
|
|||||||
|
From 51b8f29cddb73eb02f91af5f52a205fdd3af6583 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Thomas Huth <thuth@redhat.com>
|
||||||
|
Date: Wed, 17 Jan 2024 21:08:59 +0100
|
||||||
|
Subject: [PATCH 099/101] include/ui/rect.h: fix qemu_rect_init()
|
||||||
|
mis-assignment
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Thomas Huth <thuth@redhat.com>
|
||||||
|
RH-MergeRequest: 216: Fix regression in QEMU's virtio-gpu VNC sessions
|
||||||
|
RH-Jira: RHEL-21570
|
||||||
|
RH-Acked-by: Marc-André Lureau <marcandre.lureau@redhat.com>
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Commit: [1/1] a9d487be04e2c1847b80c479b5cc790af81e3428 (thuth/qemu-kvm-cs9)
|
||||||
|
|
||||||
|
JIRA: https://issues.redhat.com/browse/RHEL-21570
|
||||||
|
|
||||||
|
commit 9d5b42beb6978dc6219d5dc029c9d453c6b8d503
|
||||||
|
Author: Elen Avan <elen.avan@bk.ru>
|
||||||
|
Date: Fri Dec 22 22:17:21 2023 +0300
|
||||||
|
|
||||||
|
include/ui/rect.h: fix qemu_rect_init() mis-assignment
|
||||||
|
|
||||||
|
Signed-off-by: Elen Avan <elen.avan@bk.ru>
|
||||||
|
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2051
|
||||||
|
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2050
|
||||||
|
Fixes: a200d53b1fde "virtio-gpu: replace PIXMAN for region/rect test"
|
||||||
|
Cc: qemu-stable@nongnu.org
|
||||||
|
Reviewed-by: Michael Tokarev <mjt@tls.msk.ru>
|
||||||
|
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
|
||||||
|
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
|
||||||
|
|
||||||
|
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||||
|
---
|
||||||
|
include/ui/rect.h | 2 +-
|
||||||
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/include/ui/rect.h b/include/ui/rect.h
|
||||||
|
index 94898f92d0..68f05d78a8 100644
|
||||||
|
--- a/include/ui/rect.h
|
||||||
|
+++ b/include/ui/rect.h
|
||||||
|
@@ -19,7 +19,7 @@ static inline void qemu_rect_init(QemuRect *rect,
|
||||||
|
uint16_t width, uint16_t height)
|
||||||
|
{
|
||||||
|
rect->x = x;
|
||||||
|
- rect->y = x;
|
||||||
|
+ rect->y = y;
|
||||||
|
rect->width = width;
|
||||||
|
rect->height = height;
|
||||||
|
}
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
105
kvm-job-remove-outdated-AioContext-locking-comments.patch
Normal file
105
kvm-job-remove-outdated-AioContext-locking-comments.patch
Normal file
@ -0,0 +1,105 @@
|
|||||||
|
From 4ab25b33831fa207500179bd30f29388d81e4cce Mon Sep 17 00:00:00 2001
|
||||||
|
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
Date: Tue, 5 Dec 2023 13:20:10 -0500
|
||||||
|
Subject: [PATCH 093/101] job: remove outdated AioContext locking comments
|
||||||
|
|
||||||
|
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
RH-MergeRequest: 214: Remove AioContext lock
|
||||||
|
RH-Jira: RHEL-15965
|
||||||
|
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||||
|
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
RH-Commit: [24/26] 15ff2928be82d6905c22619458487fbb72d6044a (kmwolf/centos-qemu-kvm)
|
||||||
|
|
||||||
|
The AioContext lock no longer exists.
|
||||||
|
|
||||||
|
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||||
|
Message-ID: <20231205182011.1976568-14-stefanha@redhat.com>
|
||||||
|
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
---
|
||||||
|
include/qemu/job.h | 20 --------------------
|
||||||
|
1 file changed, 20 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/include/qemu/job.h b/include/qemu/job.h
|
||||||
|
index e502787dd8..9ea98b5927 100644
|
||||||
|
--- a/include/qemu/job.h
|
||||||
|
+++ b/include/qemu/job.h
|
||||||
|
@@ -67,8 +67,6 @@ typedef struct Job {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The completion function that will be called when the job completes.
|
||||||
|
- * Called with AioContext lock held, since many callback implementations
|
||||||
|
- * use bdrv_* functions that require to hold the lock.
|
||||||
|
*/
|
||||||
|
BlockCompletionFunc *cb;
|
||||||
|
|
||||||
|
@@ -264,9 +262,6 @@ struct JobDriver {
|
||||||
|
*
|
||||||
|
* This callback will not be invoked if the job has already failed.
|
||||||
|
* If it fails, abort and then clean will be called.
|
||||||
|
- *
|
||||||
|
- * Called with AioContext lock held, since many callbacs implementations
|
||||||
|
- * use bdrv_* functions that require to hold the lock.
|
||||||
|
*/
|
||||||
|
int (*prepare)(Job *job);
|
||||||
|
|
||||||
|
@@ -277,9 +272,6 @@ struct JobDriver {
|
||||||
|
*
|
||||||
|
* All jobs will complete with a call to either .commit() or .abort() but
|
||||||
|
* never both.
|
||||||
|
- *
|
||||||
|
- * Called with AioContext lock held, since many callback implementations
|
||||||
|
- * use bdrv_* functions that require to hold the lock.
|
||||||
|
*/
|
||||||
|
void (*commit)(Job *job);
|
||||||
|
|
||||||
|
@@ -290,9 +282,6 @@ struct JobDriver {
|
||||||
|
*
|
||||||
|
* All jobs will complete with a call to either .commit() or .abort() but
|
||||||
|
* never both.
|
||||||
|
- *
|
||||||
|
- * Called with AioContext lock held, since many callback implementations
|
||||||
|
- * use bdrv_* functions that require to hold the lock.
|
||||||
|
*/
|
||||||
|
void (*abort)(Job *job);
|
||||||
|
|
||||||
|
@@ -301,9 +290,6 @@ struct JobDriver {
|
||||||
|
* .commit() or .abort(). Regardless of which callback is invoked after
|
||||||
|
* completion, .clean() will always be called, even if the job does not
|
||||||
|
* belong to a transaction group.
|
||||||
|
- *
|
||||||
|
- * Called with AioContext lock held, since many callbacs implementations
|
||||||
|
- * use bdrv_* functions that require to hold the lock.
|
||||||
|
*/
|
||||||
|
void (*clean)(Job *job);
|
||||||
|
|
||||||
|
@@ -318,17 +304,12 @@ struct JobDriver {
|
||||||
|
* READY).
|
||||||
|
* (If the callback is NULL, the job is assumed to terminate
|
||||||
|
* without I/O.)
|
||||||
|
- *
|
||||||
|
- * Called with AioContext lock held, since many callback implementations
|
||||||
|
- * use bdrv_* functions that require to hold the lock.
|
||||||
|
*/
|
||||||
|
bool (*cancel)(Job *job, bool force);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Called when the job is freed.
|
||||||
|
- * Called with AioContext lock held, since many callback implementations
|
||||||
|
- * use bdrv_* functions that require to hold the lock.
|
||||||
|
*/
|
||||||
|
void (*free)(Job *job);
|
||||||
|
};
|
||||||
|
@@ -424,7 +405,6 @@ void job_ref_locked(Job *job);
|
||||||
|
* Release a reference that was previously acquired with job_ref_locked() or
|
||||||
|
* job_create(). If it's the last reference to the object, it will be freed.
|
||||||
|
*
|
||||||
|
- * Takes AioContext lock internally to invoke a job->driver callback.
|
||||||
|
* Called with job lock held.
|
||||||
|
*/
|
||||||
|
void job_unref_locked(Job *job);
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
42
kvm-kconfig-Activate-IOMMUFD-for-s390x-machines.patch
Normal file
42
kvm-kconfig-Activate-IOMMUFD-for-s390x-machines.patch
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
From 9c2eb4ab03903bc084c53ac29b60b8d2121c9fed Mon Sep 17 00:00:00 2001
|
||||||
|
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
||||||
|
Date: Tue, 21 Nov 2023 16:44:19 +0800
|
||||||
|
Subject: [PATCH 040/101] kconfig: Activate IOMMUFD for s390x machines
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [39/67] cf0ebe770b8db5916dd35247618c0a325dc1eaab (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Reviewed-by: Matthew Rosato <mjrosato@linux.ibm.com>
|
||||||
|
Reviewed-by: Eric Farman <farman@linux.ibm.com>
|
||||||
|
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit 73e2df669335047b542b67d37ade060a6ae40dd8)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/s390x/Kconfig | 1 +
|
||||||
|
1 file changed, 1 insertion(+)
|
||||||
|
|
||||||
|
diff --git a/hw/s390x/Kconfig b/hw/s390x/Kconfig
|
||||||
|
index 4c068d7960..26ad104485 100644
|
||||||
|
--- a/hw/s390x/Kconfig
|
||||||
|
+++ b/hw/s390x/Kconfig
|
||||||
|
@@ -6,6 +6,7 @@ config S390_CCW_VIRTIO
|
||||||
|
imply VFIO_CCW
|
||||||
|
imply WDT_DIAG288
|
||||||
|
imply PCIE_DEVICES
|
||||||
|
+ imply IOMMUFD
|
||||||
|
select PCI_EXPRESS
|
||||||
|
select S390_FLIC
|
||||||
|
select S390_FLIC_KVM if KVM
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
53
kvm-nbd-server-avoid-per-NBDRequest-nbd_client_get-put.patch
Normal file
53
kvm-nbd-server-avoid-per-NBDRequest-nbd_client_get-put.patch
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
From cd7788a857a6099206c4063e3ef69cb9e4aebcbc Mon Sep 17 00:00:00 2001
|
||||||
|
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
Date: Thu, 21 Dec 2023 14:24:50 -0500
|
||||||
|
Subject: [PATCH 070/101] nbd/server: avoid per-NBDRequest nbd_client_get/put()
|
||||||
|
|
||||||
|
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
RH-MergeRequest: 214: Remove AioContext lock
|
||||||
|
RH-Jira: RHEL-15965
|
||||||
|
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||||
|
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
RH-Commit: [1/26] 5acb090ac4adf4260cd9e9c5605a27012b2a33aa (kmwolf/centos-qemu-kvm)
|
||||||
|
|
||||||
|
nbd_trip() processes a single NBD request from start to finish and holds
|
||||||
|
an NBDClient reference throughout. NBDRequest does not outlive the scope
|
||||||
|
of nbd_trip(). Therefore it is unnecessary to ref/unref NBDClient for
|
||||||
|
each NBDRequest.
|
||||||
|
|
||||||
|
Removing these nbd_client_get()/nbd_client_put() calls will make
|
||||||
|
thread-safety easier in the commits that follow.
|
||||||
|
|
||||||
|
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||||
|
Message-ID: <20231221192452.1785567-5-stefanha@redhat.com>
|
||||||
|
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
---
|
||||||
|
nbd/server.c | 3 ---
|
||||||
|
1 file changed, 3 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/nbd/server.c b/nbd/server.c
|
||||||
|
index 895cf0a752..0b09ccc8dc 100644
|
||||||
|
--- a/nbd/server.c
|
||||||
|
+++ b/nbd/server.c
|
||||||
|
@@ -1557,7 +1557,6 @@ static NBDRequestData *nbd_request_get(NBDClient *client)
|
||||||
|
client->nb_requests++;
|
||||||
|
|
||||||
|
req = g_new0(NBDRequestData, 1);
|
||||||
|
- nbd_client_get(client);
|
||||||
|
req->client = client;
|
||||||
|
return req;
|
||||||
|
}
|
||||||
|
@@ -1578,8 +1577,6 @@ static void nbd_request_put(NBDRequestData *req)
|
||||||
|
}
|
||||||
|
|
||||||
|
nbd_client_receive_next_request(client);
|
||||||
|
-
|
||||||
|
- nbd_client_put(client);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void blk_aio_attached(AioContext *ctx, void *opaque)
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
373
kvm-nbd-server-introduce-NBDClient-lock-to-protect-field.patch
Normal file
373
kvm-nbd-server-introduce-NBDClient-lock-to-protect-field.patch
Normal file
@ -0,0 +1,373 @@
|
|||||||
|
From bb0a6afff7f23a3ddb460dc1b2e70c06565f8a3f Mon Sep 17 00:00:00 2001
|
||||||
|
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
Date: Thu, 21 Dec 2023 14:24:52 -0500
|
||||||
|
Subject: [PATCH 072/101] nbd/server: introduce NBDClient->lock to protect
|
||||||
|
fields
|
||||||
|
|
||||||
|
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
RH-MergeRequest: 214: Remove AioContext lock
|
||||||
|
RH-Jira: RHEL-15965
|
||||||
|
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||||
|
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
RH-Commit: [3/26] 49b64adaaf8b1c30f339d1ecc8ea89fb9db63f1c (kmwolf/centos-qemu-kvm)
|
||||||
|
|
||||||
|
NBDClient has a number of fields that are accessed by both the export
|
||||||
|
AioContext and the main loop thread. When the AioContext lock is removed
|
||||||
|
these fields will need another form of protection.
|
||||||
|
|
||||||
|
Add NBDClient->lock and protect fields that are accessed by both
|
||||||
|
threads. Also add assertions where possible and otherwise add doc
|
||||||
|
comments stating assumptions about which thread and lock holding.
|
||||||
|
|
||||||
|
Note this patch moves the client->recv_coroutine assertion from
|
||||||
|
nbd_co_receive_request() to nbd_trip() where client->lock is held.
|
||||||
|
|
||||||
|
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
Message-ID: <20231221192452.1785567-7-stefanha@redhat.com>
|
||||||
|
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
---
|
||||||
|
nbd/server.c | 144 +++++++++++++++++++++++++++++++++++++++------------
|
||||||
|
1 file changed, 111 insertions(+), 33 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/nbd/server.c b/nbd/server.c
|
||||||
|
index e91e2e0903..941832f178 100644
|
||||||
|
--- a/nbd/server.c
|
||||||
|
+++ b/nbd/server.c
|
||||||
|
@@ -125,23 +125,25 @@ struct NBDClient {
|
||||||
|
int refcount; /* atomic */
|
||||||
|
void (*close_fn)(NBDClient *client, bool negotiated);
|
||||||
|
|
||||||
|
+ QemuMutex lock;
|
||||||
|
+
|
||||||
|
NBDExport *exp;
|
||||||
|
QCryptoTLSCreds *tlscreds;
|
||||||
|
char *tlsauthz;
|
||||||
|
QIOChannelSocket *sioc; /* The underlying data channel */
|
||||||
|
QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */
|
||||||
|
|
||||||
|
- Coroutine *recv_coroutine;
|
||||||
|
+ Coroutine *recv_coroutine; /* protected by lock */
|
||||||
|
|
||||||
|
CoMutex send_lock;
|
||||||
|
Coroutine *send_coroutine;
|
||||||
|
|
||||||
|
- bool read_yielding;
|
||||||
|
- bool quiescing;
|
||||||
|
+ bool read_yielding; /* protected by lock */
|
||||||
|
+ bool quiescing; /* protected by lock */
|
||||||
|
|
||||||
|
QTAILQ_ENTRY(NBDClient) next;
|
||||||
|
- int nb_requests;
|
||||||
|
- bool closing;
|
||||||
|
+ int nb_requests; /* protected by lock */
|
||||||
|
+ bool closing; /* protected by lock */
|
||||||
|
|
||||||
|
uint32_t check_align; /* If non-zero, check for aligned client requests */
|
||||||
|
|
||||||
|
@@ -1415,11 +1417,18 @@ nbd_read_eof(NBDClient *client, void *buffer, size_t size, Error **errp)
|
||||||
|
|
||||||
|
len = qio_channel_readv(client->ioc, &iov, 1, errp);
|
||||||
|
if (len == QIO_CHANNEL_ERR_BLOCK) {
|
||||||
|
- client->read_yielding = true;
|
||||||
|
+ WITH_QEMU_LOCK_GUARD(&client->lock) {
|
||||||
|
+ client->read_yielding = true;
|
||||||
|
+
|
||||||
|
+ /* Prompt main loop thread to re-run nbd_drained_poll() */
|
||||||
|
+ aio_wait_kick();
|
||||||
|
+ }
|
||||||
|
qio_channel_yield(client->ioc, G_IO_IN);
|
||||||
|
- client->read_yielding = false;
|
||||||
|
- if (client->quiescing) {
|
||||||
|
- return -EAGAIN;
|
||||||
|
+ WITH_QEMU_LOCK_GUARD(&client->lock) {
|
||||||
|
+ client->read_yielding = false;
|
||||||
|
+ if (client->quiescing) {
|
||||||
|
+ return -EAGAIN;
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
} else if (len < 0) {
|
||||||
|
@@ -1528,6 +1537,7 @@ void nbd_client_put(NBDClient *client)
|
||||||
|
blk_exp_unref(&client->exp->common);
|
||||||
|
}
|
||||||
|
g_free(client->contexts.bitmaps);
|
||||||
|
+ qemu_mutex_destroy(&client->lock);
|
||||||
|
g_free(client);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@@ -1561,11 +1571,13 @@ static void client_close(NBDClient *client, bool negotiated)
|
||||||
|
{
|
||||||
|
assert(qemu_in_main_thread());
|
||||||
|
|
||||||
|
- if (client->closing) {
|
||||||
|
- return;
|
||||||
|
- }
|
||||||
|
+ WITH_QEMU_LOCK_GUARD(&client->lock) {
|
||||||
|
+ if (client->closing) {
|
||||||
|
+ return;
|
||||||
|
+ }
|
||||||
|
|
||||||
|
- client->closing = true;
|
||||||
|
+ client->closing = true;
|
||||||
|
+ }
|
||||||
|
|
||||||
|
/* Force requests to finish. They will drop their own references,
|
||||||
|
* then we'll close the socket and free the NBDClient.
|
||||||
|
@@ -1579,6 +1591,7 @@ static void client_close(NBDClient *client, bool negotiated)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
+/* Runs in export AioContext with client->lock held */
|
||||||
|
static NBDRequestData *nbd_request_get(NBDClient *client)
|
||||||
|
{
|
||||||
|
NBDRequestData *req;
|
||||||
|
@@ -1591,6 +1604,7 @@ static NBDRequestData *nbd_request_get(NBDClient *client)
|
||||||
|
return req;
|
||||||
|
}
|
||||||
|
|
||||||
|
+/* Runs in export AioContext with client->lock held */
|
||||||
|
static void nbd_request_put(NBDRequestData *req)
|
||||||
|
{
|
||||||
|
NBDClient *client = req->client;
|
||||||
|
@@ -1614,14 +1628,18 @@ static void blk_aio_attached(AioContext *ctx, void *opaque)
|
||||||
|
NBDExport *exp = opaque;
|
||||||
|
NBDClient *client;
|
||||||
|
|
||||||
|
+ assert(qemu_in_main_thread());
|
||||||
|
+
|
||||||
|
trace_nbd_blk_aio_attached(exp->name, ctx);
|
||||||
|
|
||||||
|
exp->common.ctx = ctx;
|
||||||
|
|
||||||
|
QTAILQ_FOREACH(client, &exp->clients, next) {
|
||||||
|
- assert(client->nb_requests == 0);
|
||||||
|
- assert(client->recv_coroutine == NULL);
|
||||||
|
- assert(client->send_coroutine == NULL);
|
||||||
|
+ WITH_QEMU_LOCK_GUARD(&client->lock) {
|
||||||
|
+ assert(client->nb_requests == 0);
|
||||||
|
+ assert(client->recv_coroutine == NULL);
|
||||||
|
+ assert(client->send_coroutine == NULL);
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -1629,6 +1647,8 @@ static void blk_aio_detach(void *opaque)
|
||||||
|
{
|
||||||
|
NBDExport *exp = opaque;
|
||||||
|
|
||||||
|
+ assert(qemu_in_main_thread());
|
||||||
|
+
|
||||||
|
trace_nbd_blk_aio_detach(exp->name, exp->common.ctx);
|
||||||
|
|
||||||
|
exp->common.ctx = NULL;
|
||||||
|
@@ -1639,8 +1659,12 @@ static void nbd_drained_begin(void *opaque)
|
||||||
|
NBDExport *exp = opaque;
|
||||||
|
NBDClient *client;
|
||||||
|
|
||||||
|
+ assert(qemu_in_main_thread());
|
||||||
|
+
|
||||||
|
QTAILQ_FOREACH(client, &exp->clients, next) {
|
||||||
|
- client->quiescing = true;
|
||||||
|
+ WITH_QEMU_LOCK_GUARD(&client->lock) {
|
||||||
|
+ client->quiescing = true;
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -1649,28 +1673,48 @@ static void nbd_drained_end(void *opaque)
|
||||||
|
NBDExport *exp = opaque;
|
||||||
|
NBDClient *client;
|
||||||
|
|
||||||
|
+ assert(qemu_in_main_thread());
|
||||||
|
+
|
||||||
|
QTAILQ_FOREACH(client, &exp->clients, next) {
|
||||||
|
- client->quiescing = false;
|
||||||
|
- nbd_client_receive_next_request(client);
|
||||||
|
+ WITH_QEMU_LOCK_GUARD(&client->lock) {
|
||||||
|
+ client->quiescing = false;
|
||||||
|
+ nbd_client_receive_next_request(client);
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
+/* Runs in export AioContext */
|
||||||
|
+static void nbd_wake_read_bh(void *opaque)
|
||||||
|
+{
|
||||||
|
+ NBDClient *client = opaque;
|
||||||
|
+ qio_channel_wake_read(client->ioc);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static bool nbd_drained_poll(void *opaque)
|
||||||
|
{
|
||||||
|
NBDExport *exp = opaque;
|
||||||
|
NBDClient *client;
|
||||||
|
|
||||||
|
+ assert(qemu_in_main_thread());
|
||||||
|
+
|
||||||
|
QTAILQ_FOREACH(client, &exp->clients, next) {
|
||||||
|
- if (client->nb_requests != 0) {
|
||||||
|
- /*
|
||||||
|
- * If there's a coroutine waiting for a request on nbd_read_eof()
|
||||||
|
- * enter it here so we don't depend on the client to wake it up.
|
||||||
|
- */
|
||||||
|
- if (client->recv_coroutine != NULL && client->read_yielding) {
|
||||||
|
- qio_channel_wake_read(client->ioc);
|
||||||
|
- }
|
||||||
|
+ WITH_QEMU_LOCK_GUARD(&client->lock) {
|
||||||
|
+ if (client->nb_requests != 0) {
|
||||||
|
+ /*
|
||||||
|
+ * If there's a coroutine waiting for a request on nbd_read_eof()
|
||||||
|
+ * enter it here so we don't depend on the client to wake it up.
|
||||||
|
+ *
|
||||||
|
+ * Schedule a BH in the export AioContext to avoid missing the
|
||||||
|
+ * wake up due to the race between qio_channel_wake_read() and
|
||||||
|
+ * qio_channel_yield().
|
||||||
|
+ */
|
||||||
|
+ if (client->recv_coroutine != NULL && client->read_yielding) {
|
||||||
|
+ aio_bh_schedule_oneshot(nbd_export_aio_context(client->exp),
|
||||||
|
+ nbd_wake_read_bh, client);
|
||||||
|
+ }
|
||||||
|
|
||||||
|
- return true;
|
||||||
|
+ return true;
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -1681,6 +1725,8 @@ static void nbd_eject_notifier(Notifier *n, void *data)
|
||||||
|
{
|
||||||
|
NBDExport *exp = container_of(n, NBDExport, eject_notifier);
|
||||||
|
|
||||||
|
+ assert(qemu_in_main_thread());
|
||||||
|
+
|
||||||
|
blk_exp_request_shutdown(&exp->common);
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -2566,7 +2612,6 @@ static int coroutine_fn nbd_co_receive_request(NBDRequestData *req,
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
g_assert(qemu_in_coroutine());
|
||||||
|
- assert(client->recv_coroutine == qemu_coroutine_self());
|
||||||
|
ret = nbd_receive_request(client, request, errp);
|
||||||
|
if (ret < 0) {
|
||||||
|
return ret;
|
||||||
|
@@ -2975,6 +3020,9 @@ static coroutine_fn void nbd_trip(void *opaque)
|
||||||
|
*/
|
||||||
|
|
||||||
|
trace_nbd_trip();
|
||||||
|
+
|
||||||
|
+ qemu_mutex_lock(&client->lock);
|
||||||
|
+
|
||||||
|
if (client->closing) {
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
@@ -2990,7 +3038,21 @@ static coroutine_fn void nbd_trip(void *opaque)
|
||||||
|
}
|
||||||
|
|
||||||
|
req = nbd_request_get(client);
|
||||||
|
- ret = nbd_co_receive_request(req, &request, &local_err);
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * nbd_co_receive_request() returns -EAGAIN when nbd_drained_begin() has
|
||||||
|
+ * set client->quiescing but by the time we get back nbd_drained_end() may
|
||||||
|
+ * have already cleared client->quiescing. In that case we try again
|
||||||
|
+ * because nothing else will spawn an nbd_trip() coroutine until we set
|
||||||
|
+ * client->recv_coroutine = NULL further down.
|
||||||
|
+ */
|
||||||
|
+ do {
|
||||||
|
+ assert(client->recv_coroutine == qemu_coroutine_self());
|
||||||
|
+ qemu_mutex_unlock(&client->lock);
|
||||||
|
+ ret = nbd_co_receive_request(req, &request, &local_err);
|
||||||
|
+ qemu_mutex_lock(&client->lock);
|
||||||
|
+ } while (ret == -EAGAIN && !client->quiescing);
|
||||||
|
+
|
||||||
|
client->recv_coroutine = NULL;
|
||||||
|
|
||||||
|
if (client->closing) {
|
||||||
|
@@ -3002,15 +3064,16 @@ static coroutine_fn void nbd_trip(void *opaque)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ret == -EAGAIN) {
|
||||||
|
- assert(client->quiescing);
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
nbd_client_receive_next_request(client);
|
||||||
|
+
|
||||||
|
if (ret == -EIO) {
|
||||||
|
goto disconnect;
|
||||||
|
}
|
||||||
|
|
||||||
|
+ qemu_mutex_unlock(&client->lock);
|
||||||
|
qio_channel_set_cork(client->ioc, true);
|
||||||
|
|
||||||
|
if (ret < 0) {
|
||||||
|
@@ -3030,6 +3093,10 @@ static coroutine_fn void nbd_trip(void *opaque)
|
||||||
|
g_free(request.contexts->bitmaps);
|
||||||
|
g_free(request.contexts);
|
||||||
|
}
|
||||||
|
+
|
||||||
|
+ qio_channel_set_cork(client->ioc, false);
|
||||||
|
+ qemu_mutex_lock(&client->lock);
|
||||||
|
+
|
||||||
|
if (ret < 0) {
|
||||||
|
error_prepend(&local_err, "Failed to send reply: ");
|
||||||
|
goto disconnect;
|
||||||
|
@@ -3044,11 +3111,13 @@ static coroutine_fn void nbd_trip(void *opaque)
|
||||||
|
goto disconnect;
|
||||||
|
}
|
||||||
|
|
||||||
|
- qio_channel_set_cork(client->ioc, false);
|
||||||
|
done:
|
||||||
|
if (req) {
|
||||||
|
nbd_request_put(req);
|
||||||
|
}
|
||||||
|
+
|
||||||
|
+ qemu_mutex_unlock(&client->lock);
|
||||||
|
+
|
||||||
|
if (!nbd_client_put_nonzero(client)) {
|
||||||
|
aio_co_reschedule_self(qemu_get_aio_context());
|
||||||
|
nbd_client_put(client);
|
||||||
|
@@ -3059,13 +3128,19 @@ disconnect:
|
||||||
|
if (local_err) {
|
||||||
|
error_reportf_err(local_err, "Disconnect client, due to: ");
|
||||||
|
}
|
||||||
|
+
|
||||||
|
nbd_request_put(req);
|
||||||
|
+ qemu_mutex_unlock(&client->lock);
|
||||||
|
|
||||||
|
aio_co_reschedule_self(qemu_get_aio_context());
|
||||||
|
client_close(client, true);
|
||||||
|
nbd_client_put(client);
|
||||||
|
}
|
||||||
|
|
||||||
|
+/*
|
||||||
|
+ * Runs in export AioContext and main loop thread. Caller must hold
|
||||||
|
+ * client->lock.
|
||||||
|
+ */
|
||||||
|
static void nbd_client_receive_next_request(NBDClient *client)
|
||||||
|
{
|
||||||
|
if (!client->recv_coroutine && client->nb_requests < MAX_NBD_REQUESTS &&
|
||||||
|
@@ -3091,7 +3166,9 @@ static coroutine_fn void nbd_co_client_start(void *opaque)
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
- nbd_client_receive_next_request(client);
|
||||||
|
+ WITH_QEMU_LOCK_GUARD(&client->lock) {
|
||||||
|
+ nbd_client_receive_next_request(client);
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
@@ -3108,6 +3185,7 @@ void nbd_client_new(QIOChannelSocket *sioc,
|
||||||
|
Coroutine *co;
|
||||||
|
|
||||||
|
client = g_new0(NBDClient, 1);
|
||||||
|
+ qemu_mutex_init(&client->lock);
|
||||||
|
client->refcount = 1;
|
||||||
|
client->tlscreds = tlscreds;
|
||||||
|
if (tlscreds) {
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
176
kvm-nbd-server-only-traverse-NBDExport-clients-from-main.patch
Normal file
176
kvm-nbd-server-only-traverse-NBDExport-clients-from-main.patch
Normal file
@ -0,0 +1,176 @@
|
|||||||
|
From 8b60d72532b6511b41d82d591fb4f509314ef15f Mon Sep 17 00:00:00 2001
|
||||||
|
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
Date: Thu, 21 Dec 2023 14:24:51 -0500
|
||||||
|
Subject: [PATCH 071/101] nbd/server: only traverse NBDExport->clients from
|
||||||
|
main loop thread
|
||||||
|
|
||||||
|
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
RH-MergeRequest: 214: Remove AioContext lock
|
||||||
|
RH-Jira: RHEL-15965
|
||||||
|
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||||
|
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
RH-Commit: [2/26] e7794a3a5c363c7508ee505c4ba03d9ef8862ca9 (kmwolf/centos-qemu-kvm)
|
||||||
|
|
||||||
|
The NBD clients list is currently accessed from both the export
|
||||||
|
AioContext and the main loop thread. When the AioContext lock is removed
|
||||||
|
there will be nothing protecting the clients list.
|
||||||
|
|
||||||
|
Adding a lock around the clients list is tricky because NBDClient
|
||||||
|
structs are refcounted and may be freed from the export AioContext or
|
||||||
|
the main loop thread. nbd_export_request_shutdown() -> client_close() ->
|
||||||
|
nbd_client_put() is also tricky because the list lock would be held
|
||||||
|
while indirectly dropping references to NDBClients.
|
||||||
|
|
||||||
|
A simpler approach is to only allow nbd_client_put() and client_close()
|
||||||
|
calls from the main loop thread. Then the NBD clients list is only
|
||||||
|
accessed from the main loop thread and no fancy locking is needed.
|
||||||
|
|
||||||
|
nbd_trip() just needs to reschedule itself in the main loop AioContext
|
||||||
|
before calling nbd_client_put() and client_close(). This costs more CPU
|
||||||
|
cycles per NBD request so add nbd_client_put_nonzero() to optimize the
|
||||||
|
common case where more references to NBDClient remain.
|
||||||
|
|
||||||
|
Note that nbd_client_get() can still be called from either thread, so
|
||||||
|
make NBDClient->refcount atomic.
|
||||||
|
|
||||||
|
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
Message-ID: <20231221192452.1785567-6-stefanha@redhat.com>
|
||||||
|
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
---
|
||||||
|
nbd/server.c | 61 +++++++++++++++++++++++++++++++++++++++++++---------
|
||||||
|
1 file changed, 51 insertions(+), 10 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/nbd/server.c b/nbd/server.c
|
||||||
|
index 0b09ccc8dc..e91e2e0903 100644
|
||||||
|
--- a/nbd/server.c
|
||||||
|
+++ b/nbd/server.c
|
||||||
|
@@ -122,7 +122,7 @@ struct NBDMetaContexts {
|
||||||
|
};
|
||||||
|
|
||||||
|
struct NBDClient {
|
||||||
|
- int refcount;
|
||||||
|
+ int refcount; /* atomic */
|
||||||
|
void (*close_fn)(NBDClient *client, bool negotiated);
|
||||||
|
|
||||||
|
NBDExport *exp;
|
||||||
|
@@ -1501,14 +1501,17 @@ static int coroutine_fn nbd_receive_request(NBDClient *client, NBDRequest *reque
|
||||||
|
|
||||||
|
#define MAX_NBD_REQUESTS 16
|
||||||
|
|
||||||
|
+/* Runs in export AioContext and main loop thread */
|
||||||
|
void nbd_client_get(NBDClient *client)
|
||||||
|
{
|
||||||
|
- client->refcount++;
|
||||||
|
+ qatomic_inc(&client->refcount);
|
||||||
|
}
|
||||||
|
|
||||||
|
void nbd_client_put(NBDClient *client)
|
||||||
|
{
|
||||||
|
- if (--client->refcount == 0) {
|
||||||
|
+ assert(qemu_in_main_thread());
|
||||||
|
+
|
||||||
|
+ if (qatomic_fetch_dec(&client->refcount) == 1) {
|
||||||
|
/* The last reference should be dropped by client->close,
|
||||||
|
* which is called by client_close.
|
||||||
|
*/
|
||||||
|
@@ -1529,8 +1532,35 @@ void nbd_client_put(NBDClient *client)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
+/*
|
||||||
|
+ * Tries to release the reference to @client, but only if other references
|
||||||
|
+ * remain. This is an optimization for the common case where we want to avoid
|
||||||
|
+ * the expense of scheduling nbd_client_put() in the main loop thread.
|
||||||
|
+ *
|
||||||
|
+ * Returns true upon success or false if the reference was not released because
|
||||||
|
+ * it is the last reference.
|
||||||
|
+ */
|
||||||
|
+static bool nbd_client_put_nonzero(NBDClient *client)
|
||||||
|
+{
|
||||||
|
+ int old = qatomic_read(&client->refcount);
|
||||||
|
+ int expected;
|
||||||
|
+
|
||||||
|
+ do {
|
||||||
|
+ if (old == 1) {
|
||||||
|
+ return false;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ expected = old;
|
||||||
|
+ old = qatomic_cmpxchg(&client->refcount, expected, expected - 1);
|
||||||
|
+ } while (old != expected);
|
||||||
|
+
|
||||||
|
+ return true;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static void client_close(NBDClient *client, bool negotiated)
|
||||||
|
{
|
||||||
|
+ assert(qemu_in_main_thread());
|
||||||
|
+
|
||||||
|
if (client->closing) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
@@ -2933,15 +2963,20 @@ static coroutine_fn int nbd_handle_request(NBDClient *client,
|
||||||
|
static coroutine_fn void nbd_trip(void *opaque)
|
||||||
|
{
|
||||||
|
NBDClient *client = opaque;
|
||||||
|
- NBDRequestData *req;
|
||||||
|
+ NBDRequestData *req = NULL;
|
||||||
|
NBDRequest request = { 0 }; /* GCC thinks it can be used uninitialized */
|
||||||
|
int ret;
|
||||||
|
Error *local_err = NULL;
|
||||||
|
|
||||||
|
+ /*
|
||||||
|
+ * Note that nbd_client_put() and client_close() must be called from the
|
||||||
|
+ * main loop thread. Use aio_co_reschedule_self() to switch AioContext
|
||||||
|
+ * before calling these functions.
|
||||||
|
+ */
|
||||||
|
+
|
||||||
|
trace_nbd_trip();
|
||||||
|
if (client->closing) {
|
||||||
|
- nbd_client_put(client);
|
||||||
|
- return;
|
||||||
|
+ goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (client->quiescing) {
|
||||||
|
@@ -2949,10 +2984,9 @@ static coroutine_fn void nbd_trip(void *opaque)
|
||||||
|
* We're switching between AIO contexts. Don't attempt to receive a new
|
||||||
|
* request and kick the main context which may be waiting for us.
|
||||||
|
*/
|
||||||
|
- nbd_client_put(client);
|
||||||
|
client->recv_coroutine = NULL;
|
||||||
|
aio_wait_kick();
|
||||||
|
- return;
|
||||||
|
+ goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
req = nbd_request_get(client);
|
||||||
|
@@ -3012,8 +3046,13 @@ static coroutine_fn void nbd_trip(void *opaque)
|
||||||
|
|
||||||
|
qio_channel_set_cork(client->ioc, false);
|
||||||
|
done:
|
||||||
|
- nbd_request_put(req);
|
||||||
|
- nbd_client_put(client);
|
||||||
|
+ if (req) {
|
||||||
|
+ nbd_request_put(req);
|
||||||
|
+ }
|
||||||
|
+ if (!nbd_client_put_nonzero(client)) {
|
||||||
|
+ aio_co_reschedule_self(qemu_get_aio_context());
|
||||||
|
+ nbd_client_put(client);
|
||||||
|
+ }
|
||||||
|
return;
|
||||||
|
|
||||||
|
disconnect:
|
||||||
|
@@ -3021,6 +3060,8 @@ disconnect:
|
||||||
|
error_reportf_err(local_err, "Disconnect client, due to: ");
|
||||||
|
}
|
||||||
|
nbd_request_put(req);
|
||||||
|
+
|
||||||
|
+ aio_co_reschedule_self(qemu_get_aio_context());
|
||||||
|
client_close(client, true);
|
||||||
|
nbd_client_put(client);
|
||||||
|
}
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
106
kvm-s390x-pci-avoid-double-enable-disable-of-aif.patch
Normal file
106
kvm-s390x-pci-avoid-double-enable-disable-of-aif.patch
Normal file
@ -0,0 +1,106 @@
|
|||||||
|
From 64b0180f5a52668f8ac4c444ba369231dbc4d5b9 Mon Sep 17 00:00:00 2001
|
||||||
|
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
||||||
|
Date: Mon, 22 Jan 2024 09:25:53 +0100
|
||||||
|
Subject: [PATCH 096/101] s390x/pci: avoid double enable/disable of aif
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-MergeRequest: 215: s390x: Fix reset ordering of passthrough ISM devices
|
||||||
|
RH-Jira: RHEL-21169
|
||||||
|
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||||
|
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||||
|
RH-Commit: [1/3] ebdf8a474ea21486f5ec051683f17bae6d20f675 (clegoate/qemu-kvm-c9s)
|
||||||
|
|
||||||
|
JIRA: https://issues.redhat.com/browse/RHEL-21169
|
||||||
|
|
||||||
|
commit 07b2c8e034d80ff92e202405c494d2ff80fcf848
|
||||||
|
Author: Matthew Rosato <mjrosato@linux.ibm.com>
|
||||||
|
Date: Thu Jan 18 13:51:49 2024 -0500
|
||||||
|
|
||||||
|
s390x/pci: avoid double enable/disable of aif
|
||||||
|
|
||||||
|
Use a flag to keep track of whether AIF is currently enabled. This can be
|
||||||
|
used to avoid enabling/disabling AIF multiple times as well as to determine
|
||||||
|
whether or not it should be disabled during reset processing.
|
||||||
|
|
||||||
|
Fixes: d0bc7091c2 ("s390x/pci: enable adapter event notification for interpreted devices")
|
||||||
|
Reported-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Reviewed-by: Eric Farman <farman@linux.ibm.com>
|
||||||
|
Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
|
||||||
|
Message-ID: <20240118185151.265329-2-mjrosato@linux.ibm.com>
|
||||||
|
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||||
|
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
---
|
||||||
|
hw/s390x/s390-pci-kvm.c | 25 +++++++++++++++++++++++--
|
||||||
|
include/hw/s390x/s390-pci-bus.h | 1 +
|
||||||
|
2 files changed, 24 insertions(+), 2 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/s390x/s390-pci-kvm.c b/hw/s390x/s390-pci-kvm.c
|
||||||
|
index ff41e4106d..1ee510436c 100644
|
||||||
|
--- a/hw/s390x/s390-pci-kvm.c
|
||||||
|
+++ b/hw/s390x/s390-pci-kvm.c
|
||||||
|
@@ -27,6 +27,7 @@ bool s390_pci_kvm_interp_allowed(void)
|
||||||
|
|
||||||
|
int s390_pci_kvm_aif_enable(S390PCIBusDevice *pbdev, ZpciFib *fib, bool assist)
|
||||||
|
{
|
||||||
|
+ int rc;
|
||||||
|
struct kvm_s390_zpci_op args = {
|
||||||
|
.fh = pbdev->fh,
|
||||||
|
.op = KVM_S390_ZPCIOP_REG_AEN,
|
||||||
|
@@ -38,15 +39,35 @@ int s390_pci_kvm_aif_enable(S390PCIBusDevice *pbdev, ZpciFib *fib, bool assist)
|
||||||
|
.u.reg_aen.flags = (assist) ? 0 : KVM_S390_ZPCIOP_REGAEN_HOST
|
||||||
|
};
|
||||||
|
|
||||||
|
- return kvm_vm_ioctl(kvm_state, KVM_S390_ZPCI_OP, &args);
|
||||||
|
+ if (pbdev->aif) {
|
||||||
|
+ return -EINVAL;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ rc = kvm_vm_ioctl(kvm_state, KVM_S390_ZPCI_OP, &args);
|
||||||
|
+ if (rc == 0) {
|
||||||
|
+ pbdev->aif = true;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
int s390_pci_kvm_aif_disable(S390PCIBusDevice *pbdev)
|
||||||
|
{
|
||||||
|
+ int rc;
|
||||||
|
+
|
||||||
|
struct kvm_s390_zpci_op args = {
|
||||||
|
.fh = pbdev->fh,
|
||||||
|
.op = KVM_S390_ZPCIOP_DEREG_AEN
|
||||||
|
};
|
||||||
|
|
||||||
|
- return kvm_vm_ioctl(kvm_state, KVM_S390_ZPCI_OP, &args);
|
||||||
|
+ if (!pbdev->aif) {
|
||||||
|
+ return -EINVAL;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ rc = kvm_vm_ioctl(kvm_state, KVM_S390_ZPCI_OP, &args);
|
||||||
|
+ if (rc == 0) {
|
||||||
|
+ pbdev->aif = false;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return rc;
|
||||||
|
}
|
||||||
|
diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h
|
||||||
|
index b1bdbeaeb5..435e788867 100644
|
||||||
|
--- a/include/hw/s390x/s390-pci-bus.h
|
||||||
|
+++ b/include/hw/s390x/s390-pci-bus.h
|
||||||
|
@@ -361,6 +361,7 @@ struct S390PCIBusDevice {
|
||||||
|
bool unplug_requested;
|
||||||
|
bool interp;
|
||||||
|
bool forwarding_assist;
|
||||||
|
+ bool aif;
|
||||||
|
QTAILQ_ENTRY(S390PCIBusDevice) link;
|
||||||
|
};
|
||||||
|
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
137
kvm-s390x-pci-drive-ISM-reset-from-subsystem-reset.patch
Normal file
137
kvm-s390x-pci-drive-ISM-reset-from-subsystem-reset.patch
Normal file
@ -0,0 +1,137 @@
|
|||||||
|
From c885b17e09ab19a3e8d3b2e1765963811af6f764 Mon Sep 17 00:00:00 2001
|
||||||
|
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
||||||
|
Date: Mon, 22 Jan 2024 09:25:53 +0100
|
||||||
|
Subject: [PATCH 098/101] s390x/pci: drive ISM reset from subsystem reset
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-MergeRequest: 215: s390x: Fix reset ordering of passthrough ISM devices
|
||||||
|
RH-Jira: RHEL-21169
|
||||||
|
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||||
|
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||||
|
RH-Commit: [3/3] 426cf156a2c67e6dcd7483a769fa3741e2700504 (clegoate/qemu-kvm-c9s)
|
||||||
|
|
||||||
|
JIRA: https://issues.redhat.com/browse/RHEL-21169
|
||||||
|
|
||||||
|
commit 68c691ca99a2538d6a53a70ce8a9ce06ee307ff1
|
||||||
|
Author: Matthew Rosato <mjrosato@linux.ibm.com>
|
||||||
|
Date: Thu Jan 18 13:51:51 2024 -0500
|
||||||
|
|
||||||
|
s390x/pci: drive ISM reset from subsystem reset
|
||||||
|
|
||||||
|
ISM devices are sensitive to manipulation of the IOMMU, so the ISM device
|
||||||
|
needs to be reset before the vfio-pci device is reset (triggering a full
|
||||||
|
UNMAP). In order to ensure this occurs, trigger ISM device resets from
|
||||||
|
subsystem_reset before triggering the PCI bus reset (which will also
|
||||||
|
trigger vfio-pci reset). This only needs to be done for ISM devices
|
||||||
|
which were enabled for use by the guest.
|
||||||
|
Further, ensure that AIF is disabled as part of the reset event.
|
||||||
|
|
||||||
|
Fixes: ef1535901a ("s390x: do a subsystem reset before the unprotect on reboot")
|
||||||
|
Fixes: 03451953c7 ("s390x/pci: reset ISM passthrough devices on shutdown and system reset")
|
||||||
|
Reported-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
|
||||||
|
Message-ID: <20240118185151.265329-4-mjrosato@linux.ibm.com>
|
||||||
|
Reviewed-by: Eric Farman <farman@linux.ibm.com>
|
||||||
|
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||||
|
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
---
|
||||||
|
hw/s390x/s390-pci-bus.c | 26 +++++++++++++++++---------
|
||||||
|
hw/s390x/s390-virtio-ccw.c | 8 ++++++++
|
||||||
|
include/hw/s390x/s390-pci-bus.h | 1 +
|
||||||
|
3 files changed, 26 insertions(+), 9 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
|
||||||
|
index 347580ebac..3e57d5faca 100644
|
||||||
|
--- a/hw/s390x/s390-pci-bus.c
|
||||||
|
+++ b/hw/s390x/s390-pci-bus.c
|
||||||
|
@@ -151,20 +151,12 @@ static void s390_pci_shutdown_notifier(Notifier *n, void *opaque)
|
||||||
|
pci_device_reset(pbdev->pdev);
|
||||||
|
}
|
||||||
|
|
||||||
|
-static void s390_pci_reset_cb(void *opaque)
|
||||||
|
-{
|
||||||
|
- S390PCIBusDevice *pbdev = opaque;
|
||||||
|
-
|
||||||
|
- pci_device_reset(pbdev->pdev);
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
static void s390_pci_perform_unplug(S390PCIBusDevice *pbdev)
|
||||||
|
{
|
||||||
|
HotplugHandler *hotplug_ctrl;
|
||||||
|
|
||||||
|
if (pbdev->pft == ZPCI_PFT_ISM) {
|
||||||
|
notifier_remove(&pbdev->shutdown_notifier);
|
||||||
|
- qemu_unregister_reset(s390_pci_reset_cb, pbdev);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Unplug the PCI device */
|
||||||
|
@@ -1132,7 +1124,6 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
|
||||||
|
if (pbdev->pft == ZPCI_PFT_ISM) {
|
||||||
|
pbdev->shutdown_notifier.notify = s390_pci_shutdown_notifier;
|
||||||
|
qemu_register_shutdown_notifier(&pbdev->shutdown_notifier);
|
||||||
|
- qemu_register_reset(s390_pci_reset_cb, pbdev);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
pbdev->fh |= FH_SHM_EMUL;
|
||||||
|
@@ -1279,6 +1270,23 @@ static void s390_pci_enumerate_bridge(PCIBus *bus, PCIDevice *pdev,
|
||||||
|
pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, s->bus_no, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
+void s390_pci_ism_reset(void)
|
||||||
|
+{
|
||||||
|
+ S390pciState *s = s390_get_phb();
|
||||||
|
+
|
||||||
|
+ S390PCIBusDevice *pbdev, *next;
|
||||||
|
+
|
||||||
|
+ /* Trigger reset event for each passthrough ISM device currently in-use */
|
||||||
|
+ QTAILQ_FOREACH_SAFE(pbdev, &s->zpci_devs, link, next) {
|
||||||
|
+ if (pbdev->interp && pbdev->pft == ZPCI_PFT_ISM &&
|
||||||
|
+ pbdev->fh & FH_MASK_ENABLE) {
|
||||||
|
+ s390_pci_kvm_aif_disable(pbdev);
|
||||||
|
+
|
||||||
|
+ pci_device_reset(pbdev->pdev);
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static void s390_pcihost_reset(DeviceState *dev)
|
||||||
|
{
|
||||||
|
S390pciState *s = S390_PCI_HOST_BRIDGE(dev);
|
||||||
|
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
|
||||||
|
index e26ce26f5a..24f4773179 100644
|
||||||
|
--- a/hw/s390x/s390-virtio-ccw.c
|
||||||
|
+++ b/hw/s390x/s390-virtio-ccw.c
|
||||||
|
@@ -118,6 +118,14 @@ static void subsystem_reset(void)
|
||||||
|
DeviceState *dev;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
+ /*
|
||||||
|
+ * ISM firmware is sensitive to unexpected changes to the IOMMU, which can
|
||||||
|
+ * occur during reset of the vfio-pci device (unmap of entire aperture).
|
||||||
|
+ * Ensure any passthrough ISM devices are reset now, while CPUs are paused
|
||||||
|
+ * but before vfio-pci cleanup occurs.
|
||||||
|
+ */
|
||||||
|
+ s390_pci_ism_reset();
|
||||||
|
+
|
||||||
|
for (i = 0; i < ARRAY_SIZE(reset_dev_types); i++) {
|
||||||
|
dev = DEVICE(object_resolve_path_type("", reset_dev_types[i], NULL));
|
||||||
|
if (dev) {
|
||||||
|
diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h
|
||||||
|
index 435e788867..2c43ea123f 100644
|
||||||
|
--- a/include/hw/s390x/s390-pci-bus.h
|
||||||
|
+++ b/include/hw/s390x/s390-pci-bus.h
|
||||||
|
@@ -401,5 +401,6 @@ S390PCIBusDevice *s390_pci_find_dev_by_target(S390pciState *s,
|
||||||
|
const char *target);
|
||||||
|
S390PCIBusDevice *s390_pci_find_next_avail_dev(S390pciState *s,
|
||||||
|
S390PCIBusDevice *pbdev);
|
||||||
|
+void s390_pci_ism_reset(void);
|
||||||
|
|
||||||
|
#endif
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
71
kvm-s390x-pci-refresh-fh-before-disabling-aif.patch
Normal file
71
kvm-s390x-pci-refresh-fh-before-disabling-aif.patch
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
From 49078bdfd4c116da3e920632ec6f7041f1b38015 Mon Sep 17 00:00:00 2001
|
||||||
|
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
||||||
|
Date: Mon, 22 Jan 2024 09:25:53 +0100
|
||||||
|
Subject: [PATCH 097/101] s390x/pci: refresh fh before disabling aif
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-MergeRequest: 215: s390x: Fix reset ordering of passthrough ISM devices
|
||||||
|
RH-Jira: RHEL-21169
|
||||||
|
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
||||||
|
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||||
|
RH-Commit: [2/3] 3523067909c41818dfc769abdb93930833416c11 (clegoate/qemu-kvm-c9s)
|
||||||
|
|
||||||
|
JIRA: https://issues.redhat.com/browse/RHEL-21169
|
||||||
|
|
||||||
|
commit 30e35258e25c75c9d799c34fd89afcafffb37084
|
||||||
|
Author: Matthew Rosato <mjrosato@linux.ibm.com>
|
||||||
|
Date: Thu Jan 18 13:51:50 2024 -0500
|
||||||
|
|
||||||
|
s390x/pci: refresh fh before disabling aif
|
||||||
|
|
||||||
|
Typically we refresh the host fh during CLP enable, however it's possible
|
||||||
|
that the device goes through multiple reset events before the guest
|
||||||
|
performs another CLP enable. Let's handle this for now by refreshing the
|
||||||
|
host handle from vfio before disabling aif.
|
||||||
|
|
||||||
|
Fixes: 03451953c7 ("s390x/pci: reset ISM passthrough devices on shutdown and system reset")
|
||||||
|
Reported-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Reviewed-by: Eric Farman <farman@linux.ibm.com>
|
||||||
|
Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
|
||||||
|
Message-ID: <20240118185151.265329-3-mjrosato@linux.ibm.com>
|
||||||
|
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||||
|
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
---
|
||||||
|
hw/s390x/s390-pci-kvm.c | 9 +++++++++
|
||||||
|
1 file changed, 9 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/hw/s390x/s390-pci-kvm.c b/hw/s390x/s390-pci-kvm.c
|
||||||
|
index 1ee510436c..9eef4fc3ec 100644
|
||||||
|
--- a/hw/s390x/s390-pci-kvm.c
|
||||||
|
+++ b/hw/s390x/s390-pci-kvm.c
|
||||||
|
@@ -18,6 +18,7 @@
|
||||||
|
#include "hw/s390x/s390-pci-bus.h"
|
||||||
|
#include "hw/s390x/s390-pci-kvm.h"
|
||||||
|
#include "hw/s390x/s390-pci-inst.h"
|
||||||
|
+#include "hw/s390x/s390-pci-vfio.h"
|
||||||
|
#include "cpu_models.h"
|
||||||
|
|
||||||
|
bool s390_pci_kvm_interp_allowed(void)
|
||||||
|
@@ -64,6 +65,14 @@ int s390_pci_kvm_aif_disable(S390PCIBusDevice *pbdev)
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
+ /*
|
||||||
|
+ * The device may have already been reset but we still want to relinquish
|
||||||
|
+ * the guest ISC, so always be sure to use an up-to-date host fh.
|
||||||
|
+ */
|
||||||
|
+ if (!s390_pci_get_host_fh(pbdev, &args.fh)) {
|
||||||
|
+ return -EPERM;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
rc = kvm_vm_ioctl(kvm_state, KVM_S390_ZPCI_OP, &args);
|
||||||
|
if (rc == 0) {
|
||||||
|
pbdev->aif = false;
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
@ -0,0 +1,88 @@
|
|||||||
|
From cd08d22a0da022d99fe6cfddb7de680abf66c8be Mon Sep 17 00:00:00 2001
|
||||||
|
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
Date: Tue, 5 Dec 2023 13:19:59 -0500
|
||||||
|
Subject: [PATCH 082/101] scsi: assert that callbacks run in the correct
|
||||||
|
AioContext
|
||||||
|
|
||||||
|
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
RH-MergeRequest: 214: Remove AioContext lock
|
||||||
|
RH-Jira: RHEL-15965
|
||||||
|
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||||
|
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
RH-Commit: [13/26] d2fd5065c3b72d9d2f4e37efee39fe12eba0f0a9 (kmwolf/centos-qemu-kvm)
|
||||||
|
|
||||||
|
Since the removal of AioContext locking, the correctness of the code
|
||||||
|
relies on running requests from a single AioContext at any given time.
|
||||||
|
|
||||||
|
Add assertions that verify that callbacks are invoked in the correct
|
||||||
|
AioContext.
|
||||||
|
|
||||||
|
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
Message-ID: <20231205182011.1976568-3-stefanha@redhat.com>
|
||||||
|
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
---
|
||||||
|
hw/scsi/scsi-disk.c | 14 ++++++++++++++
|
||||||
|
system/dma-helpers.c | 3 +++
|
||||||
|
2 files changed, 17 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
|
||||||
|
index 2c1bbb3530..a5048e0aaf 100644
|
||||||
|
--- a/hw/scsi/scsi-disk.c
|
||||||
|
+++ b/hw/scsi/scsi-disk.c
|
||||||
|
@@ -273,6 +273,10 @@ static void scsi_aio_complete(void *opaque, int ret)
|
||||||
|
SCSIDiskReq *r = (SCSIDiskReq *)opaque;
|
||||||
|
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
|
||||||
|
|
||||||
|
+ /* The request must only run in the BlockBackend's AioContext */
|
||||||
|
+ assert(blk_get_aio_context(s->qdev.conf.blk) ==
|
||||||
|
+ qemu_get_current_aio_context());
|
||||||
|
+
|
||||||
|
assert(r->req.aiocb != NULL);
|
||||||
|
r->req.aiocb = NULL;
|
||||||
|
|
||||||
|
@@ -370,8 +374,13 @@ static void scsi_dma_complete(void *opaque, int ret)
|
||||||
|
|
||||||
|
static void scsi_read_complete_noio(SCSIDiskReq *r, int ret)
|
||||||
|
{
|
||||||
|
+ SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
|
||||||
|
uint32_t n;
|
||||||
|
|
||||||
|
+ /* The request must only run in the BlockBackend's AioContext */
|
||||||
|
+ assert(blk_get_aio_context(s->qdev.conf.blk) ==
|
||||||
|
+ qemu_get_current_aio_context());
|
||||||
|
+
|
||||||
|
assert(r->req.aiocb == NULL);
|
||||||
|
if (scsi_disk_req_check_error(r, ret, false)) {
|
||||||
|
goto done;
|
||||||
|
@@ -496,8 +505,13 @@ static void scsi_read_data(SCSIRequest *req)
|
||||||
|
|
||||||
|
static void scsi_write_complete_noio(SCSIDiskReq *r, int ret)
|
||||||
|
{
|
||||||
|
+ SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
|
||||||
|
uint32_t n;
|
||||||
|
|
||||||
|
+ /* The request must only run in the BlockBackend's AioContext */
|
||||||
|
+ assert(blk_get_aio_context(s->qdev.conf.blk) ==
|
||||||
|
+ qemu_get_current_aio_context());
|
||||||
|
+
|
||||||
|
assert (r->req.aiocb == NULL);
|
||||||
|
if (scsi_disk_req_check_error(r, ret, false)) {
|
||||||
|
goto done;
|
||||||
|
diff --git a/system/dma-helpers.c b/system/dma-helpers.c
|
||||||
|
index 528117f256..9b221cf94e 100644
|
||||||
|
--- a/system/dma-helpers.c
|
||||||
|
+++ b/system/dma-helpers.c
|
||||||
|
@@ -119,6 +119,9 @@ static void dma_blk_cb(void *opaque, int ret)
|
||||||
|
|
||||||
|
trace_dma_blk_cb(dbs, ret);
|
||||||
|
|
||||||
|
+ /* DMAAIOCB is not thread-safe and must be accessed only from dbs->ctx */
|
||||||
|
+ assert(ctx == qemu_get_current_aio_context());
|
||||||
|
+
|
||||||
|
dbs->acb = NULL;
|
||||||
|
dbs->offset += dbs->iov.size;
|
||||||
|
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
245
kvm-scsi-don-t-lock-AioContext-in-I-O-code-path.patch
Normal file
245
kvm-scsi-don-t-lock-AioContext-in-I-O-code-path.patch
Normal file
@ -0,0 +1,245 @@
|
|||||||
|
From d1d384bd24a7aeb527f4abd8a0958146544ef9bb Mon Sep 17 00:00:00 2001
|
||||||
|
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
Date: Mon, 4 Dec 2023 11:42:58 -0500
|
||||||
|
Subject: [PATCH 079/101] scsi: don't lock AioContext in I/O code path
|
||||||
|
|
||||||
|
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
RH-MergeRequest: 214: Remove AioContext lock
|
||||||
|
RH-Jira: RHEL-15965
|
||||||
|
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||||
|
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
RH-Commit: [10/26] b5814cec94af5c254e300646d8783672b085bac3 (kmwolf/centos-qemu-kvm)
|
||||||
|
|
||||||
|
blk_aio_*() doesn't require the AioContext lock and the SCSI subsystem's
|
||||||
|
internal state also does not anymore.
|
||||||
|
|
||||||
|
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||||
|
Acked-by: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
Message-ID: <20231204164259.1515217-4-stefanha@redhat.com>
|
||||||
|
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
---
|
||||||
|
hw/scsi/scsi-disk.c | 23 -----------------------
|
||||||
|
hw/scsi/scsi-generic.c | 20 +++-----------------
|
||||||
|
2 files changed, 3 insertions(+), 40 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
|
||||||
|
index 6691f5edb8..2c1bbb3530 100644
|
||||||
|
--- a/hw/scsi/scsi-disk.c
|
||||||
|
+++ b/hw/scsi/scsi-disk.c
|
||||||
|
@@ -273,8 +273,6 @@ static void scsi_aio_complete(void *opaque, int ret)
|
||||||
|
SCSIDiskReq *r = (SCSIDiskReq *)opaque;
|
||||||
|
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
|
||||||
|
|
||||||
|
- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
|
||||||
|
-
|
||||||
|
assert(r->req.aiocb != NULL);
|
||||||
|
r->req.aiocb = NULL;
|
||||||
|
|
||||||
|
@@ -286,7 +284,6 @@ static void scsi_aio_complete(void *opaque, int ret)
|
||||||
|
scsi_req_complete(&r->req, GOOD);
|
||||||
|
|
||||||
|
done:
|
||||||
|
- aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
|
||||||
|
scsi_req_unref(&r->req);
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -394,8 +391,6 @@ static void scsi_read_complete(void *opaque, int ret)
|
||||||
|
SCSIDiskReq *r = (SCSIDiskReq *)opaque;
|
||||||
|
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
|
||||||
|
|
||||||
|
- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
|
||||||
|
-
|
||||||
|
assert(r->req.aiocb != NULL);
|
||||||
|
r->req.aiocb = NULL;
|
||||||
|
|
||||||
|
@@ -406,7 +401,6 @@ static void scsi_read_complete(void *opaque, int ret)
|
||||||
|
trace_scsi_disk_read_complete(r->req.tag, r->qiov.size);
|
||||||
|
}
|
||||||
|
scsi_read_complete_noio(r, ret);
|
||||||
|
- aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Actually issue a read to the block device. */
|
||||||
|
@@ -448,8 +442,6 @@ static void scsi_do_read_cb(void *opaque, int ret)
|
||||||
|
SCSIDiskReq *r = (SCSIDiskReq *)opaque;
|
||||||
|
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
|
||||||
|
|
||||||
|
- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
|
||||||
|
-
|
||||||
|
assert (r->req.aiocb != NULL);
|
||||||
|
r->req.aiocb = NULL;
|
||||||
|
|
||||||
|
@@ -459,7 +451,6 @@ static void scsi_do_read_cb(void *opaque, int ret)
|
||||||
|
block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
|
||||||
|
}
|
||||||
|
scsi_do_read(opaque, ret);
|
||||||
|
- aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Read more data from scsi device into buffer. */
|
||||||
|
@@ -533,8 +524,6 @@ static void scsi_write_complete(void * opaque, int ret)
|
||||||
|
SCSIDiskReq *r = (SCSIDiskReq *)opaque;
|
||||||
|
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
|
||||||
|
|
||||||
|
- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
|
||||||
|
-
|
||||||
|
assert (r->req.aiocb != NULL);
|
||||||
|
r->req.aiocb = NULL;
|
||||||
|
|
||||||
|
@@ -544,7 +533,6 @@ static void scsi_write_complete(void * opaque, int ret)
|
||||||
|
block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
|
||||||
|
}
|
||||||
|
scsi_write_complete_noio(r, ret);
|
||||||
|
- aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
|
||||||
|
}
|
||||||
|
|
||||||
|
static void scsi_write_data(SCSIRequest *req)
|
||||||
|
@@ -1742,8 +1730,6 @@ static void scsi_unmap_complete(void *opaque, int ret)
|
||||||
|
SCSIDiskReq *r = data->r;
|
||||||
|
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
|
||||||
|
|
||||||
|
- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
|
||||||
|
-
|
||||||
|
assert(r->req.aiocb != NULL);
|
||||||
|
r->req.aiocb = NULL;
|
||||||
|
|
||||||
|
@@ -1754,7 +1740,6 @@ static void scsi_unmap_complete(void *opaque, int ret)
|
||||||
|
block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
|
||||||
|
scsi_unmap_complete_noio(data, ret);
|
||||||
|
}
|
||||||
|
- aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
|
||||||
|
}
|
||||||
|
|
||||||
|
static void scsi_disk_emulate_unmap(SCSIDiskReq *r, uint8_t *inbuf)
|
||||||
|
@@ -1822,8 +1807,6 @@ static void scsi_write_same_complete(void *opaque, int ret)
|
||||||
|
SCSIDiskReq *r = data->r;
|
||||||
|
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
|
||||||
|
|
||||||
|
- aio_context_acquire(blk_get_aio_context(s->qdev.conf.blk));
|
||||||
|
-
|
||||||
|
assert(r->req.aiocb != NULL);
|
||||||
|
r->req.aiocb = NULL;
|
||||||
|
|
||||||
|
@@ -1847,7 +1830,6 @@ static void scsi_write_same_complete(void *opaque, int ret)
|
||||||
|
data->sector << BDRV_SECTOR_BITS,
|
||||||
|
&data->qiov, 0,
|
||||||
|
scsi_write_same_complete, data);
|
||||||
|
- aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -1857,7 +1839,6 @@ done:
|
||||||
|
scsi_req_unref(&r->req);
|
||||||
|
qemu_vfree(data->iov.iov_base);
|
||||||
|
g_free(data);
|
||||||
|
- aio_context_release(blk_get_aio_context(s->qdev.conf.blk));
|
||||||
|
}
|
||||||
|
|
||||||
|
static void scsi_disk_emulate_write_same(SCSIDiskReq *r, uint8_t *inbuf)
|
||||||
|
@@ -2810,7 +2791,6 @@ static void scsi_block_sgio_complete(void *opaque, int ret)
|
||||||
|
{
|
||||||
|
SCSIBlockReq *req = (SCSIBlockReq *)opaque;
|
||||||
|
SCSIDiskReq *r = &req->req;
|
||||||
|
- SCSIDevice *s = r->req.dev;
|
||||||
|
sg_io_hdr_t *io_hdr = &req->io_header;
|
||||||
|
|
||||||
|
if (ret == 0) {
|
||||||
|
@@ -2827,13 +2807,10 @@ static void scsi_block_sgio_complete(void *opaque, int ret)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ret > 0) {
|
||||||
|
- aio_context_acquire(blk_get_aio_context(s->conf.blk));
|
||||||
|
if (scsi_handle_rw_error(r, ret, true)) {
|
||||||
|
- aio_context_release(blk_get_aio_context(s->conf.blk));
|
||||||
|
scsi_req_unref(&r->req);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
- aio_context_release(blk_get_aio_context(s->conf.blk));
|
||||||
|
|
||||||
|
/* Ignore error. */
|
||||||
|
ret = 0;
|
||||||
|
diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c
|
||||||
|
index 2417f0ad84..b7b04e1d63 100644
|
||||||
|
--- a/hw/scsi/scsi-generic.c
|
||||||
|
+++ b/hw/scsi/scsi-generic.c
|
||||||
|
@@ -109,15 +109,11 @@ done:
|
||||||
|
static void scsi_command_complete(void *opaque, int ret)
|
||||||
|
{
|
||||||
|
SCSIGenericReq *r = (SCSIGenericReq *)opaque;
|
||||||
|
- SCSIDevice *s = r->req.dev;
|
||||||
|
-
|
||||||
|
- aio_context_acquire(blk_get_aio_context(s->conf.blk));
|
||||||
|
|
||||||
|
assert(r->req.aiocb != NULL);
|
||||||
|
r->req.aiocb = NULL;
|
||||||
|
|
||||||
|
scsi_command_complete_noio(r, ret);
|
||||||
|
- aio_context_release(blk_get_aio_context(s->conf.blk));
|
||||||
|
}
|
||||||
|
|
||||||
|
static int execute_command(BlockBackend *blk,
|
||||||
|
@@ -274,14 +270,12 @@ static void scsi_read_complete(void * opaque, int ret)
|
||||||
|
SCSIDevice *s = r->req.dev;
|
||||||
|
int len;
|
||||||
|
|
||||||
|
- aio_context_acquire(blk_get_aio_context(s->conf.blk));
|
||||||
|
-
|
||||||
|
assert(r->req.aiocb != NULL);
|
||||||
|
r->req.aiocb = NULL;
|
||||||
|
|
||||||
|
if (ret || r->req.io_canceled) {
|
||||||
|
scsi_command_complete_noio(r, ret);
|
||||||
|
- goto done;
|
||||||
|
+ return;
|
||||||
|
}
|
||||||
|
|
||||||
|
len = r->io_header.dxfer_len - r->io_header.resid;
|
||||||
|
@@ -320,7 +314,7 @@ static void scsi_read_complete(void * opaque, int ret)
|
||||||
|
r->io_header.status != GOOD ||
|
||||||
|
len == 0) {
|
||||||
|
scsi_command_complete_noio(r, 0);
|
||||||
|
- goto done;
|
||||||
|
+ return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Snoop READ CAPACITY output to set the blocksize. */
|
||||||
|
@@ -356,9 +350,6 @@ static void scsi_read_complete(void * opaque, int ret)
|
||||||
|
req_complete:
|
||||||
|
scsi_req_data(&r->req, len);
|
||||||
|
scsi_req_unref(&r->req);
|
||||||
|
-
|
||||||
|
-done:
|
||||||
|
- aio_context_release(blk_get_aio_context(s->conf.blk));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Read more data from scsi device into buffer. */
|
||||||
|
@@ -391,14 +382,12 @@ static void scsi_write_complete(void * opaque, int ret)
|
||||||
|
|
||||||
|
trace_scsi_generic_write_complete(ret);
|
||||||
|
|
||||||
|
- aio_context_acquire(blk_get_aio_context(s->conf.blk));
|
||||||
|
-
|
||||||
|
assert(r->req.aiocb != NULL);
|
||||||
|
r->req.aiocb = NULL;
|
||||||
|
|
||||||
|
if (ret || r->req.io_canceled) {
|
||||||
|
scsi_command_complete_noio(r, ret);
|
||||||
|
- goto done;
|
||||||
|
+ return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (r->req.cmd.buf[0] == MODE_SELECT && r->req.cmd.buf[4] == 12 &&
|
||||||
|
@@ -408,9 +397,6 @@ static void scsi_write_complete(void * opaque, int ret)
|
||||||
|
}
|
||||||
|
|
||||||
|
scsi_command_complete_noio(r, ret);
|
||||||
|
-
|
||||||
|
-done:
|
||||||
|
- aio_context_release(blk_get_aio_context(s->conf.blk));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Write data to a scsi device. Returns nonzero on failure.
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
307
kvm-scsi-only-access-SCSIDevice-requests-from-one-thread.patch
Normal file
307
kvm-scsi-only-access-SCSIDevice-requests-from-one-thread.patch
Normal file
@ -0,0 +1,307 @@
|
|||||||
|
From 42dd1357310bd1a68d6cacaa53cd5b1d1b02880d Mon Sep 17 00:00:00 2001
|
||||||
|
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
Date: Mon, 4 Dec 2023 11:42:56 -0500
|
||||||
|
Subject: [PATCH 077/101] scsi: only access SCSIDevice->requests from one
|
||||||
|
thread
|
||||||
|
|
||||||
|
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
RH-MergeRequest: 214: Remove AioContext lock
|
||||||
|
RH-Jira: RHEL-15965
|
||||||
|
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||||
|
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
RH-Commit: [8/26] 9df662e82a63e93d184b5763bebbe7e43bc9dabe (kmwolf/centos-qemu-kvm)
|
||||||
|
|
||||||
|
Stop depending on the AioContext lock and instead access
|
||||||
|
SCSIDevice->requests from only one thread at a time:
|
||||||
|
- When the VM is running only the BlockBackend's AioContext may access
|
||||||
|
the requests list.
|
||||||
|
- When the VM is stopped only the main loop may access the requests
|
||||||
|
list.
|
||||||
|
|
||||||
|
These constraints protect the requests list without the need for locking
|
||||||
|
in the I/O code path.
|
||||||
|
|
||||||
|
Note that multiple IOThreads are not supported yet because the code
|
||||||
|
assumes all SCSIRequests are executed from a single AioContext. Leave
|
||||||
|
that as future work.
|
||||||
|
|
||||||
|
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||||
|
Message-ID: <20231204164259.1515217-2-stefanha@redhat.com>
|
||||||
|
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
---
|
||||||
|
hw/scsi/scsi-bus.c | 181 ++++++++++++++++++++++++++++-------------
|
||||||
|
include/hw/scsi/scsi.h | 7 +-
|
||||||
|
2 files changed, 131 insertions(+), 57 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c
|
||||||
|
index fc4b77fdb0..b649cdf555 100644
|
||||||
|
--- a/hw/scsi/scsi-bus.c
|
||||||
|
+++ b/hw/scsi/scsi-bus.c
|
||||||
|
@@ -85,6 +85,89 @@ SCSIDevice *scsi_device_get(SCSIBus *bus, int channel, int id, int lun)
|
||||||
|
return d;
|
||||||
|
}
|
||||||
|
|
||||||
|
+/*
|
||||||
|
+ * Invoke @fn() for each enqueued request in device @s. Must be called from the
|
||||||
|
+ * main loop thread while the guest is stopped. This is only suitable for
|
||||||
|
+ * vmstate ->put(), use scsi_device_for_each_req_async() for other cases.
|
||||||
|
+ */
|
||||||
|
+static void scsi_device_for_each_req_sync(SCSIDevice *s,
|
||||||
|
+ void (*fn)(SCSIRequest *, void *),
|
||||||
|
+ void *opaque)
|
||||||
|
+{
|
||||||
|
+ SCSIRequest *req;
|
||||||
|
+ SCSIRequest *next_req;
|
||||||
|
+
|
||||||
|
+ assert(!runstate_is_running());
|
||||||
|
+ assert(qemu_in_main_thread());
|
||||||
|
+
|
||||||
|
+ QTAILQ_FOREACH_SAFE(req, &s->requests, next, next_req) {
|
||||||
|
+ fn(req, opaque);
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+typedef struct {
|
||||||
|
+ SCSIDevice *s;
|
||||||
|
+ void (*fn)(SCSIRequest *, void *);
|
||||||
|
+ void *fn_opaque;
|
||||||
|
+} SCSIDeviceForEachReqAsyncData;
|
||||||
|
+
|
||||||
|
+static void scsi_device_for_each_req_async_bh(void *opaque)
|
||||||
|
+{
|
||||||
|
+ g_autofree SCSIDeviceForEachReqAsyncData *data = opaque;
|
||||||
|
+ SCSIDevice *s = data->s;
|
||||||
|
+ AioContext *ctx;
|
||||||
|
+ SCSIRequest *req;
|
||||||
|
+ SCSIRequest *next;
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * If the AioContext changed before this BH was called then reschedule into
|
||||||
|
+ * the new AioContext before accessing ->requests. This can happen when
|
||||||
|
+ * scsi_device_for_each_req_async() is called and then the AioContext is
|
||||||
|
+ * changed before BHs are run.
|
||||||
|
+ */
|
||||||
|
+ ctx = blk_get_aio_context(s->conf.blk);
|
||||||
|
+ if (ctx != qemu_get_current_aio_context()) {
|
||||||
|
+ aio_bh_schedule_oneshot(ctx, scsi_device_for_each_req_async_bh,
|
||||||
|
+ g_steal_pointer(&data));
|
||||||
|
+ return;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) {
|
||||||
|
+ data->fn(req, data->fn_opaque);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /* Drop the reference taken by scsi_device_for_each_req_async() */
|
||||||
|
+ object_unref(OBJECT(s));
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/*
|
||||||
|
+ * Schedule @fn() to be invoked for each enqueued request in device @s. @fn()
|
||||||
|
+ * runs in the AioContext that is executing the request.
|
||||||
|
+ */
|
||||||
|
+static void scsi_device_for_each_req_async(SCSIDevice *s,
|
||||||
|
+ void (*fn)(SCSIRequest *, void *),
|
||||||
|
+ void *opaque)
|
||||||
|
+{
|
||||||
|
+ assert(qemu_in_main_thread());
|
||||||
|
+
|
||||||
|
+ SCSIDeviceForEachReqAsyncData *data =
|
||||||
|
+ g_new(SCSIDeviceForEachReqAsyncData, 1);
|
||||||
|
+
|
||||||
|
+ data->s = s;
|
||||||
|
+ data->fn = fn;
|
||||||
|
+ data->fn_opaque = opaque;
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * Hold a reference to the SCSIDevice until
|
||||||
|
+ * scsi_device_for_each_req_async_bh() finishes.
|
||||||
|
+ */
|
||||||
|
+ object_ref(OBJECT(s));
|
||||||
|
+
|
||||||
|
+ aio_bh_schedule_oneshot(blk_get_aio_context(s->conf.blk),
|
||||||
|
+ scsi_device_for_each_req_async_bh,
|
||||||
|
+ data);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static void scsi_device_realize(SCSIDevice *s, Error **errp)
|
||||||
|
{
|
||||||
|
SCSIDeviceClass *sc = SCSI_DEVICE_GET_CLASS(s);
|
||||||
|
@@ -144,20 +227,18 @@ void scsi_bus_init_named(SCSIBus *bus, size_t bus_size, DeviceState *host,
|
||||||
|
qbus_set_bus_hotplug_handler(BUS(bus));
|
||||||
|
}
|
||||||
|
|
||||||
|
-static void scsi_dma_restart_bh(void *opaque)
|
||||||
|
+void scsi_req_retry(SCSIRequest *req)
|
||||||
|
{
|
||||||
|
- SCSIDevice *s = opaque;
|
||||||
|
- SCSIRequest *req, *next;
|
||||||
|
-
|
||||||
|
- qemu_bh_delete(s->bh);
|
||||||
|
- s->bh = NULL;
|
||||||
|
+ req->retry = true;
|
||||||
|
+}
|
||||||
|
|
||||||
|
- aio_context_acquire(blk_get_aio_context(s->conf.blk));
|
||||||
|
- QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) {
|
||||||
|
- scsi_req_ref(req);
|
||||||
|
- if (req->retry) {
|
||||||
|
- req->retry = false;
|
||||||
|
- switch (req->cmd.mode) {
|
||||||
|
+/* Called in the AioContext that is executing the request */
|
||||||
|
+static void scsi_dma_restart_req(SCSIRequest *req, void *opaque)
|
||||||
|
+{
|
||||||
|
+ scsi_req_ref(req);
|
||||||
|
+ if (req->retry) {
|
||||||
|
+ req->retry = false;
|
||||||
|
+ switch (req->cmd.mode) {
|
||||||
|
case SCSI_XFER_FROM_DEV:
|
||||||
|
case SCSI_XFER_TO_DEV:
|
||||||
|
scsi_req_continue(req);
|
||||||
|
@@ -166,37 +247,22 @@ static void scsi_dma_restart_bh(void *opaque)
|
||||||
|
scsi_req_dequeue(req);
|
||||||
|
scsi_req_enqueue(req);
|
||||||
|
break;
|
||||||
|
- }
|
||||||
|
}
|
||||||
|
- scsi_req_unref(req);
|
||||||
|
}
|
||||||
|
- aio_context_release(blk_get_aio_context(s->conf.blk));
|
||||||
|
- /* Drop the reference that was acquired in scsi_dma_restart_cb */
|
||||||
|
- object_unref(OBJECT(s));
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-void scsi_req_retry(SCSIRequest *req)
|
||||||
|
-{
|
||||||
|
- /* No need to save a reference, because scsi_dma_restart_bh just
|
||||||
|
- * looks at the request list. */
|
||||||
|
- req->retry = true;
|
||||||
|
+ scsi_req_unref(req);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void scsi_dma_restart_cb(void *opaque, bool running, RunState state)
|
||||||
|
{
|
||||||
|
SCSIDevice *s = opaque;
|
||||||
|
|
||||||
|
+ assert(qemu_in_main_thread());
|
||||||
|
+
|
||||||
|
if (!running) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
- if (!s->bh) {
|
||||||
|
- AioContext *ctx = blk_get_aio_context(s->conf.blk);
|
||||||
|
- /* The reference is dropped in scsi_dma_restart_bh.*/
|
||||||
|
- object_ref(OBJECT(s));
|
||||||
|
- s->bh = aio_bh_new_guarded(ctx, scsi_dma_restart_bh, s,
|
||||||
|
- &DEVICE(s)->mem_reentrancy_guard);
|
||||||
|
- qemu_bh_schedule(s->bh);
|
||||||
|
- }
|
||||||
|
+
|
||||||
|
+ scsi_device_for_each_req_async(s, scsi_dma_restart_req, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool scsi_bus_is_address_free(SCSIBus *bus,
|
||||||
|
@@ -1657,15 +1723,16 @@ void scsi_device_set_ua(SCSIDevice *sdev, SCSISense sense)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
+static void scsi_device_purge_one_req(SCSIRequest *req, void *opaque)
|
||||||
|
+{
|
||||||
|
+ scsi_req_cancel_async(req, NULL);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
void scsi_device_purge_requests(SCSIDevice *sdev, SCSISense sense)
|
||||||
|
{
|
||||||
|
- SCSIRequest *req;
|
||||||
|
+ scsi_device_for_each_req_async(sdev, scsi_device_purge_one_req, NULL);
|
||||||
|
|
||||||
|
aio_context_acquire(blk_get_aio_context(sdev->conf.blk));
|
||||||
|
- while (!QTAILQ_EMPTY(&sdev->requests)) {
|
||||||
|
- req = QTAILQ_FIRST(&sdev->requests);
|
||||||
|
- scsi_req_cancel_async(req, NULL);
|
||||||
|
- }
|
||||||
|
blk_drain(sdev->conf.blk);
|
||||||
|
aio_context_release(blk_get_aio_context(sdev->conf.blk));
|
||||||
|
scsi_device_set_ua(sdev, sense);
|
||||||
|
@@ -1737,31 +1804,33 @@ static char *scsibus_get_fw_dev_path(DeviceState *dev)
|
||||||
|
|
||||||
|
/* SCSI request list. For simplicity, pv points to the whole device */
|
||||||
|
|
||||||
|
+static void put_scsi_req(SCSIRequest *req, void *opaque)
|
||||||
|
+{
|
||||||
|
+ QEMUFile *f = opaque;
|
||||||
|
+
|
||||||
|
+ assert(!req->io_canceled);
|
||||||
|
+ assert(req->status == -1 && req->host_status == -1);
|
||||||
|
+ assert(req->enqueued);
|
||||||
|
+
|
||||||
|
+ qemu_put_sbyte(f, req->retry ? 1 : 2);
|
||||||
|
+ qemu_put_buffer(f, req->cmd.buf, sizeof(req->cmd.buf));
|
||||||
|
+ qemu_put_be32s(f, &req->tag);
|
||||||
|
+ qemu_put_be32s(f, &req->lun);
|
||||||
|
+ if (req->bus->info->save_request) {
|
||||||
|
+ req->bus->info->save_request(f, req);
|
||||||
|
+ }
|
||||||
|
+ if (req->ops->save_request) {
|
||||||
|
+ req->ops->save_request(f, req);
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static int put_scsi_requests(QEMUFile *f, void *pv, size_t size,
|
||||||
|
const VMStateField *field, JSONWriter *vmdesc)
|
||||||
|
{
|
||||||
|
SCSIDevice *s = pv;
|
||||||
|
- SCSIBus *bus = DO_UPCAST(SCSIBus, qbus, s->qdev.parent_bus);
|
||||||
|
- SCSIRequest *req;
|
||||||
|
|
||||||
|
- QTAILQ_FOREACH(req, &s->requests, next) {
|
||||||
|
- assert(!req->io_canceled);
|
||||||
|
- assert(req->status == -1 && req->host_status == -1);
|
||||||
|
- assert(req->enqueued);
|
||||||
|
-
|
||||||
|
- qemu_put_sbyte(f, req->retry ? 1 : 2);
|
||||||
|
- qemu_put_buffer(f, req->cmd.buf, sizeof(req->cmd.buf));
|
||||||
|
- qemu_put_be32s(f, &req->tag);
|
||||||
|
- qemu_put_be32s(f, &req->lun);
|
||||||
|
- if (bus->info->save_request) {
|
||||||
|
- bus->info->save_request(f, req);
|
||||||
|
- }
|
||||||
|
- if (req->ops->save_request) {
|
||||||
|
- req->ops->save_request(f, req);
|
||||||
|
- }
|
||||||
|
- }
|
||||||
|
+ scsi_device_for_each_req_sync(s, put_scsi_req, f);
|
||||||
|
qemu_put_sbyte(f, 0);
|
||||||
|
-
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h
|
||||||
|
index 3692ca82f3..10c4e8288d 100644
|
||||||
|
--- a/include/hw/scsi/scsi.h
|
||||||
|
+++ b/include/hw/scsi/scsi.h
|
||||||
|
@@ -69,14 +69,19 @@ struct SCSIDevice
|
||||||
|
{
|
||||||
|
DeviceState qdev;
|
||||||
|
VMChangeStateEntry *vmsentry;
|
||||||
|
- QEMUBH *bh;
|
||||||
|
uint32_t id;
|
||||||
|
BlockConf conf;
|
||||||
|
SCSISense unit_attention;
|
||||||
|
bool sense_is_ua;
|
||||||
|
uint8_t sense[SCSI_SENSE_BUF_SIZE];
|
||||||
|
uint32_t sense_len;
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * The requests list is only accessed from the AioContext that executes
|
||||||
|
+ * requests or from the main loop when IOThread processing is stopped.
|
||||||
|
+ */
|
||||||
|
QTAILQ_HEAD(, SCSIRequest) requests;
|
||||||
|
+
|
||||||
|
uint32_t channel;
|
||||||
|
uint32_t lun;
|
||||||
|
int blocksize;
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
280
kvm-scsi-remove-AioContext-locking.patch
Normal file
280
kvm-scsi-remove-AioContext-locking.patch
Normal file
@ -0,0 +1,280 @@
|
|||||||
|
From 61d605433a5edfcc7fe836fd399106ed1e1907bb Mon Sep 17 00:00:00 2001
|
||||||
|
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
Date: Tue, 5 Dec 2023 13:20:05 -0500
|
||||||
|
Subject: [PATCH 088/101] scsi: remove AioContext locking
|
||||||
|
|
||||||
|
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
RH-MergeRequest: 214: Remove AioContext lock
|
||||||
|
RH-Jira: RHEL-15965
|
||||||
|
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||||
|
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
RH-Commit: [19/26] 12a8e26670074a17dd2b0cfac06e0aea03b3068f (kmwolf/centos-qemu-kvm)
|
||||||
|
|
||||||
|
The AioContext lock no longer has any effect. Remove it.
|
||||||
|
|
||||||
|
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||||
|
Message-ID: <20231205182011.1976568-9-stefanha@redhat.com>
|
||||||
|
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
---
|
||||||
|
hw/scsi/scsi-bus.c | 2 --
|
||||||
|
hw/scsi/scsi-disk.c | 31 +++++--------------------------
|
||||||
|
hw/scsi/virtio-scsi.c | 18 ------------------
|
||||||
|
include/hw/virtio/virtio-scsi.h | 14 --------------
|
||||||
|
4 files changed, 5 insertions(+), 60 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c
|
||||||
|
index b649cdf555..5b08cbf60a 100644
|
||||||
|
--- a/hw/scsi/scsi-bus.c
|
||||||
|
+++ b/hw/scsi/scsi-bus.c
|
||||||
|
@@ -1732,9 +1732,7 @@ void scsi_device_purge_requests(SCSIDevice *sdev, SCSISense sense)
|
||||||
|
{
|
||||||
|
scsi_device_for_each_req_async(sdev, scsi_device_purge_one_req, NULL);
|
||||||
|
|
||||||
|
- aio_context_acquire(blk_get_aio_context(sdev->conf.blk));
|
||||||
|
blk_drain(sdev->conf.blk);
|
||||||
|
- aio_context_release(blk_get_aio_context(sdev->conf.blk));
|
||||||
|
scsi_device_set_ua(sdev, sense);
|
||||||
|
}
|
||||||
|
|
||||||
|
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
|
||||||
|
index a5048e0aaf..61be3d395a 100644
|
||||||
|
--- a/hw/scsi/scsi-disk.c
|
||||||
|
+++ b/hw/scsi/scsi-disk.c
|
||||||
|
@@ -2339,14 +2339,10 @@ static void scsi_disk_reset(DeviceState *dev)
|
||||||
|
{
|
||||||
|
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev.qdev, dev);
|
||||||
|
uint64_t nb_sectors;
|
||||||
|
- AioContext *ctx;
|
||||||
|
|
||||||
|
scsi_device_purge_requests(&s->qdev, SENSE_CODE(RESET));
|
||||||
|
|
||||||
|
- ctx = blk_get_aio_context(s->qdev.conf.blk);
|
||||||
|
- aio_context_acquire(ctx);
|
||||||
|
blk_get_geometry(s->qdev.conf.blk, &nb_sectors);
|
||||||
|
- aio_context_release(ctx);
|
||||||
|
|
||||||
|
nb_sectors /= s->qdev.blocksize / BDRV_SECTOR_SIZE;
|
||||||
|
if (nb_sectors) {
|
||||||
|
@@ -2545,15 +2541,13 @@ static void scsi_unrealize(SCSIDevice *dev)
|
||||||
|
static void scsi_hd_realize(SCSIDevice *dev, Error **errp)
|
||||||
|
{
|
||||||
|
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
|
||||||
|
- AioContext *ctx = NULL;
|
||||||
|
+
|
||||||
|
/* can happen for devices without drive. The error message for missing
|
||||||
|
* backend will be issued in scsi_realize
|
||||||
|
*/
|
||||||
|
if (s->qdev.conf.blk) {
|
||||||
|
- ctx = blk_get_aio_context(s->qdev.conf.blk);
|
||||||
|
- aio_context_acquire(ctx);
|
||||||
|
if (!blkconf_blocksizes(&s->qdev.conf, errp)) {
|
||||||
|
- goto out;
|
||||||
|
+ return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
s->qdev.blocksize = s->qdev.conf.logical_block_size;
|
||||||
|
@@ -2562,16 +2556,11 @@ static void scsi_hd_realize(SCSIDevice *dev, Error **errp)
|
||||||
|
s->product = g_strdup("QEMU HARDDISK");
|
||||||
|
}
|
||||||
|
scsi_realize(&s->qdev, errp);
|
||||||
|
-out:
|
||||||
|
- if (ctx) {
|
||||||
|
- aio_context_release(ctx);
|
||||||
|
- }
|
||||||
|
}
|
||||||
|
|
||||||
|
static void scsi_cd_realize(SCSIDevice *dev, Error **errp)
|
||||||
|
{
|
||||||
|
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
|
||||||
|
- AioContext *ctx;
|
||||||
|
int ret;
|
||||||
|
uint32_t blocksize = 2048;
|
||||||
|
|
||||||
|
@@ -2587,8 +2576,6 @@ static void scsi_cd_realize(SCSIDevice *dev, Error **errp)
|
||||||
|
blocksize = dev->conf.physical_block_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
- ctx = blk_get_aio_context(dev->conf.blk);
|
||||||
|
- aio_context_acquire(ctx);
|
||||||
|
s->qdev.blocksize = blocksize;
|
||||||
|
s->qdev.type = TYPE_ROM;
|
||||||
|
s->features |= 1 << SCSI_DISK_F_REMOVABLE;
|
||||||
|
@@ -2596,7 +2583,6 @@ static void scsi_cd_realize(SCSIDevice *dev, Error **errp)
|
||||||
|
s->product = g_strdup("QEMU CD-ROM");
|
||||||
|
}
|
||||||
|
scsi_realize(&s->qdev, errp);
|
||||||
|
- aio_context_release(ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@@ -2727,7 +2713,6 @@ static int get_device_type(SCSIDiskState *s)
|
||||||
|
static void scsi_block_realize(SCSIDevice *dev, Error **errp)
|
||||||
|
{
|
||||||
|
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
|
||||||
|
- AioContext *ctx;
|
||||||
|
int sg_version;
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
@@ -2742,9 +2727,6 @@ static void scsi_block_realize(SCSIDevice *dev, Error **errp)
|
||||||
|
"be removed in a future version");
|
||||||
|
}
|
||||||
|
|
||||||
|
- ctx = blk_get_aio_context(s->qdev.conf.blk);
|
||||||
|
- aio_context_acquire(ctx);
|
||||||
|
-
|
||||||
|
/* check we are using a driver managing SG_IO (version 3 and after) */
|
||||||
|
rc = blk_ioctl(s->qdev.conf.blk, SG_GET_VERSION_NUM, &sg_version);
|
||||||
|
if (rc < 0) {
|
||||||
|
@@ -2752,18 +2734,18 @@ static void scsi_block_realize(SCSIDevice *dev, Error **errp)
|
||||||
|
if (rc != -EPERM) {
|
||||||
|
error_append_hint(errp, "Is this a SCSI device?\n");
|
||||||
|
}
|
||||||
|
- goto out;
|
||||||
|
+ return;
|
||||||
|
}
|
||||||
|
if (sg_version < 30000) {
|
||||||
|
error_setg(errp, "scsi generic interface too old");
|
||||||
|
- goto out;
|
||||||
|
+ return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* get device type from INQUIRY data */
|
||||||
|
rc = get_device_type(s);
|
||||||
|
if (rc < 0) {
|
||||||
|
error_setg(errp, "INQUIRY failed");
|
||||||
|
- goto out;
|
||||||
|
+ return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Make a guess for the block size, we'll fix it when the guest sends.
|
||||||
|
@@ -2783,9 +2765,6 @@ static void scsi_block_realize(SCSIDevice *dev, Error **errp)
|
||||||
|
|
||||||
|
scsi_realize(&s->qdev, errp);
|
||||||
|
scsi_generic_read_device_inquiry(&s->qdev);
|
||||||
|
-
|
||||||
|
-out:
|
||||||
|
- aio_context_release(ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef struct SCSIBlockReq {
|
||||||
|
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
|
||||||
|
index 4f8d35facc..ca365a70e9 100644
|
||||||
|
--- a/hw/scsi/virtio-scsi.c
|
||||||
|
+++ b/hw/scsi/virtio-scsi.c
|
||||||
|
@@ -642,9 +642,7 @@ static void virtio_scsi_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
- virtio_scsi_acquire(s);
|
||||||
|
virtio_scsi_handle_ctrl_vq(s, vq);
|
||||||
|
- virtio_scsi_release(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void virtio_scsi_complete_cmd_req(VirtIOSCSIReq *req)
|
||||||
|
@@ -882,9 +880,7 @@ static void virtio_scsi_handle_cmd(VirtIODevice *vdev, VirtQueue *vq)
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
- virtio_scsi_acquire(s);
|
||||||
|
virtio_scsi_handle_cmd_vq(s, vq);
|
||||||
|
- virtio_scsi_release(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void virtio_scsi_get_config(VirtIODevice *vdev,
|
||||||
|
@@ -1031,9 +1027,7 @@ static void virtio_scsi_handle_event(VirtIODevice *vdev, VirtQueue *vq)
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
- virtio_scsi_acquire(s);
|
||||||
|
virtio_scsi_handle_event_vq(s, vq);
|
||||||
|
- virtio_scsi_release(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void virtio_scsi_change(SCSIBus *bus, SCSIDevice *dev, SCSISense sense)
|
||||||
|
@@ -1052,9 +1046,7 @@ static void virtio_scsi_change(SCSIBus *bus, SCSIDevice *dev, SCSISense sense)
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
- virtio_scsi_acquire(s);
|
||||||
|
virtio_scsi_push_event(s, &info);
|
||||||
|
- virtio_scsi_release(s);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -1071,17 +1063,13 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev,
|
||||||
|
VirtIODevice *vdev = VIRTIO_DEVICE(hotplug_dev);
|
||||||
|
VirtIOSCSI *s = VIRTIO_SCSI(vdev);
|
||||||
|
SCSIDevice *sd = SCSI_DEVICE(dev);
|
||||||
|
- AioContext *old_context;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
if (s->ctx && !s->dataplane_fenced) {
|
||||||
|
if (blk_op_is_blocked(sd->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
- old_context = blk_get_aio_context(sd->conf.blk);
|
||||||
|
- aio_context_acquire(old_context);
|
||||||
|
ret = blk_set_aio_context(sd->conf.blk, s->ctx, errp);
|
||||||
|
- aio_context_release(old_context);
|
||||||
|
if (ret < 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
@@ -1097,10 +1085,8 @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
- virtio_scsi_acquire(s);
|
||||||
|
virtio_scsi_push_event(s, &info);
|
||||||
|
scsi_bus_set_ua(&s->bus, SENSE_CODE(REPORTED_LUNS_CHANGED));
|
||||||
|
- virtio_scsi_release(s);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -1122,17 +1108,13 @@ static void virtio_scsi_hotunplug(HotplugHandler *hotplug_dev, DeviceState *dev,
|
||||||
|
qdev_simple_device_unplug_cb(hotplug_dev, dev, errp);
|
||||||
|
|
||||||
|
if (s->ctx) {
|
||||||
|
- virtio_scsi_acquire(s);
|
||||||
|
/* If other users keep the BlockBackend in the iothread, that's ok */
|
||||||
|
blk_set_aio_context(sd->conf.blk, qemu_get_aio_context(), NULL);
|
||||||
|
- virtio_scsi_release(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (virtio_vdev_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG)) {
|
||||||
|
- virtio_scsi_acquire(s);
|
||||||
|
virtio_scsi_push_event(s, &info);
|
||||||
|
scsi_bus_set_ua(&s->bus, SENSE_CODE(REPORTED_LUNS_CHANGED));
|
||||||
|
- virtio_scsi_release(s);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h
|
||||||
|
index da8cb928d9..7f0573b1bf 100644
|
||||||
|
--- a/include/hw/virtio/virtio-scsi.h
|
||||||
|
+++ b/include/hw/virtio/virtio-scsi.h
|
||||||
|
@@ -101,20 +101,6 @@ struct VirtIOSCSI {
|
||||||
|
uint32_t host_features;
|
||||||
|
};
|
||||||
|
|
||||||
|
-static inline void virtio_scsi_acquire(VirtIOSCSI *s)
|
||||||
|
-{
|
||||||
|
- if (s->ctx) {
|
||||||
|
- aio_context_acquire(s->ctx);
|
||||||
|
- }
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-static inline void virtio_scsi_release(VirtIOSCSI *s)
|
||||||
|
-{
|
||||||
|
- if (s->ctx) {
|
||||||
|
- aio_context_release(s->ctx);
|
||||||
|
- }
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
void virtio_scsi_common_realize(DeviceState *dev,
|
||||||
|
VirtIOHandleOutput ctrl,
|
||||||
|
VirtIOHandleOutput evt,
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
41
kvm-scsi-remove-outdated-AioContext-lock-comment.patch
Normal file
41
kvm-scsi-remove-outdated-AioContext-lock-comment.patch
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
From 9f5c6dbe907fe6227006ab51179eaa50a63559cb Mon Sep 17 00:00:00 2001
|
||||||
|
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
Date: Tue, 5 Dec 2023 13:20:09 -0500
|
||||||
|
Subject: [PATCH 092/101] scsi: remove outdated AioContext lock comment
|
||||||
|
|
||||||
|
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
RH-MergeRequest: 214: Remove AioContext lock
|
||||||
|
RH-Jira: RHEL-15965
|
||||||
|
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||||
|
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
RH-Commit: [23/26] 96e2e7d2e6a160ce4d695060f902d21030b3b1d8 (kmwolf/centos-qemu-kvm)
|
||||||
|
|
||||||
|
The SCSI subsystem no longer uses the AioContext lock. Request
|
||||||
|
processing runs exclusively in the BlockBackend's AioContext since
|
||||||
|
"scsi: only access SCSIDevice->requests from one thread" and hence the
|
||||||
|
lock is unnecessary.
|
||||||
|
|
||||||
|
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||||
|
Message-ID: <20231205182011.1976568-13-stefanha@redhat.com>
|
||||||
|
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
---
|
||||||
|
hw/scsi/scsi-disk.c | 1 -
|
||||||
|
1 file changed, 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
|
||||||
|
index 61be3d395a..2e7e1e9a1c 100644
|
||||||
|
--- a/hw/scsi/scsi-disk.c
|
||||||
|
+++ b/hw/scsi/scsi-disk.c
|
||||||
|
@@ -355,7 +355,6 @@ done:
|
||||||
|
scsi_req_unref(&r->req);
|
||||||
|
}
|
||||||
|
|
||||||
|
-/* Called with AioContext lock held */
|
||||||
|
static void scsi_dma_complete(void *opaque, int ret)
|
||||||
|
{
|
||||||
|
SCSIDiskReq *r = (SCSIDiskReq *)opaque;
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
205
kvm-target-s390x-kvm-pv-Provide-some-more-useful-informa.patch
Normal file
205
kvm-target-s390x-kvm-pv-Provide-some-more-useful-informa.patch
Normal file
@ -0,0 +1,205 @@
|
|||||||
|
From cc8d794932e26df7c7f3c8cc0c1f42da8d52f12b Mon Sep 17 00:00:00 2001
|
||||||
|
From: Thomas Huth <thuth@redhat.com>
|
||||||
|
Date: Mon, 15 Jan 2024 10:26:52 +0100
|
||||||
|
Subject: [PATCH 069/101] target/s390x/kvm/pv: Provide some more useful
|
||||||
|
information if decryption fails
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Thomas Huth <thuth@redhat.com>
|
||||||
|
RH-MergeRequest: 213: s390x: Provide some more useful information if decryption of a PV image fails
|
||||||
|
RH-Jira: RHEL-18212
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||||
|
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||||
|
RH-Commit: [1/1] 4ffb61869f7df33e23d3e0ebf8c29e386e3f6cbc (thuth/qemu-kvm-cs9)
|
||||||
|
|
||||||
|
JIRA: https://issues.redhat.com/browse/RHEL-18212
|
||||||
|
|
||||||
|
commit 7af51621b16ae86646cc2dc9dee30de8176ff761
|
||||||
|
Author: Thomas Huth <thuth@redhat.com>
|
||||||
|
Date: Wed Jan 10 15:29:16 2024 +0100
|
||||||
|
|
||||||
|
target/s390x/kvm/pv: Provide some more useful information if decryption fails
|
||||||
|
|
||||||
|
It's a common scenario to copy guest images from one host to another
|
||||||
|
to run the guest on the other machine. This (of course) does not work
|
||||||
|
with "secure execution" guests since they are encrypted with one certain
|
||||||
|
host key. However, if you still (accidentally) do it, you only get a
|
||||||
|
very user-unfriendly error message that looks like this:
|
||||||
|
|
||||||
|
qemu-system-s390x: KVM PV command 2 (KVM_PV_SET_SEC_PARMS) failed:
|
||||||
|
header rc 108 rrc 5 IOCTL rc: -22
|
||||||
|
|
||||||
|
Let's provide at least a somewhat nicer hint to the users so that they
|
||||||
|
are able to figure out what might have gone wrong.
|
||||||
|
|
||||||
|
Buglink: https://issues.redhat.com/browse/RHEL-18212
|
||||||
|
Message-ID: <20240110142916.850605-1-thuth@redhat.com>
|
||||||
|
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||||
|
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
|
||||||
|
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||||
|
|
||||||
|
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
||||||
|
---
|
||||||
|
hw/s390x/ipl.c | 5 ++---
|
||||||
|
hw/s390x/ipl.h | 2 +-
|
||||||
|
hw/s390x/s390-virtio-ccw.c | 5 ++++-
|
||||||
|
target/s390x/kvm/pv.c | 25 ++++++++++++++++++++-----
|
||||||
|
target/s390x/kvm/pv.h | 5 +++--
|
||||||
|
5 files changed, 30 insertions(+), 12 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c
|
||||||
|
index 515dcf51b5..b23a6a0ef3 100644
|
||||||
|
--- a/hw/s390x/ipl.c
|
||||||
|
+++ b/hw/s390x/ipl.c
|
||||||
|
@@ -703,7 +703,7 @@ static void s390_ipl_prepare_qipl(S390CPU *cpu)
|
||||||
|
cpu_physical_memory_unmap(addr, len, 1, len);
|
||||||
|
}
|
||||||
|
|
||||||
|
-int s390_ipl_prepare_pv_header(void)
|
||||||
|
+int s390_ipl_prepare_pv_header(Error **errp)
|
||||||
|
{
|
||||||
|
IplParameterBlock *ipib = s390_ipl_get_iplb_pv();
|
||||||
|
IPLBlockPV *ipib_pv = &ipib->pv;
|
||||||
|
@@ -712,8 +712,7 @@ int s390_ipl_prepare_pv_header(void)
|
||||||
|
|
||||||
|
cpu_physical_memory_read(ipib_pv->pv_header_addr, hdr,
|
||||||
|
ipib_pv->pv_header_len);
|
||||||
|
- rc = s390_pv_set_sec_parms((uintptr_t)hdr,
|
||||||
|
- ipib_pv->pv_header_len);
|
||||||
|
+ rc = s390_pv_set_sec_parms((uintptr_t)hdr, ipib_pv->pv_header_len, errp);
|
||||||
|
g_free(hdr);
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
diff --git a/hw/s390x/ipl.h b/hw/s390x/ipl.h
|
||||||
|
index 7fc86e7905..57cd125769 100644
|
||||||
|
--- a/hw/s390x/ipl.h
|
||||||
|
+++ b/hw/s390x/ipl.h
|
||||||
|
@@ -107,7 +107,7 @@ typedef union IplParameterBlock IplParameterBlock;
|
||||||
|
|
||||||
|
int s390_ipl_set_loadparm(uint8_t *loadparm);
|
||||||
|
void s390_ipl_update_diag308(IplParameterBlock *iplb);
|
||||||
|
-int s390_ipl_prepare_pv_header(void);
|
||||||
|
+int s390_ipl_prepare_pv_header(Error **errp);
|
||||||
|
int s390_ipl_pv_unpack(void);
|
||||||
|
void s390_ipl_prepare_cpu(S390CPU *cpu);
|
||||||
|
IplParameterBlock *s390_ipl_get_iplb(void);
|
||||||
|
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
|
||||||
|
index 984891b82a..e26ce26f5a 100644
|
||||||
|
--- a/hw/s390x/s390-virtio-ccw.c
|
||||||
|
+++ b/hw/s390x/s390-virtio-ccw.c
|
||||||
|
@@ -391,7 +391,7 @@ static int s390_machine_protect(S390CcwMachineState *ms)
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Set SE header and unpack */
|
||||||
|
- rc = s390_ipl_prepare_pv_header();
|
||||||
|
+ rc = s390_ipl_prepare_pv_header(&local_err);
|
||||||
|
if (rc) {
|
||||||
|
goto out_err;
|
||||||
|
}
|
||||||
|
@@ -410,6 +410,9 @@ static int s390_machine_protect(S390CcwMachineState *ms)
|
||||||
|
return rc;
|
||||||
|
|
||||||
|
out_err:
|
||||||
|
+ if (local_err) {
|
||||||
|
+ error_report_err(local_err);
|
||||||
|
+ }
|
||||||
|
s390_machine_unprotect(ms);
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
diff --git a/target/s390x/kvm/pv.c b/target/s390x/kvm/pv.c
|
||||||
|
index 6a69be7e5c..7ca7faec73 100644
|
||||||
|
--- a/target/s390x/kvm/pv.c
|
||||||
|
+++ b/target/s390x/kvm/pv.c
|
||||||
|
@@ -29,7 +29,8 @@ static bool info_valid;
|
||||||
|
static struct kvm_s390_pv_info_vm info_vm;
|
||||||
|
static struct kvm_s390_pv_info_dump info_dump;
|
||||||
|
|
||||||
|
-static int __s390_pv_cmd(uint32_t cmd, const char *cmdname, void *data)
|
||||||
|
+static int __s390_pv_cmd(uint32_t cmd, const char *cmdname, void *data,
|
||||||
|
+ int *pvrc)
|
||||||
|
{
|
||||||
|
struct kvm_pv_cmd pv_cmd = {
|
||||||
|
.cmd = cmd,
|
||||||
|
@@ -46,6 +47,9 @@ static int __s390_pv_cmd(uint32_t cmd, const char *cmdname, void *data)
|
||||||
|
"IOCTL rc: %d", cmd, cmdname, pv_cmd.rc, pv_cmd.rrc,
|
||||||
|
rc);
|
||||||
|
}
|
||||||
|
+ if (pvrc) {
|
||||||
|
+ *pvrc = pv_cmd.rc;
|
||||||
|
+ }
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -53,12 +57,13 @@ static int __s390_pv_cmd(uint32_t cmd, const char *cmdname, void *data)
|
||||||
|
* This macro lets us pass the command as a string to the function so
|
||||||
|
* we can print it on an error.
|
||||||
|
*/
|
||||||
|
-#define s390_pv_cmd(cmd, data) __s390_pv_cmd(cmd, #cmd, data)
|
||||||
|
+#define s390_pv_cmd(cmd, data) __s390_pv_cmd(cmd, #cmd, data, NULL)
|
||||||
|
+#define s390_pv_cmd_pvrc(cmd, data, pvrc) __s390_pv_cmd(cmd, #cmd, data, pvrc)
|
||||||
|
#define s390_pv_cmd_exit(cmd, data) \
|
||||||
|
{ \
|
||||||
|
int rc; \
|
||||||
|
\
|
||||||
|
- rc = __s390_pv_cmd(cmd, #cmd, data);\
|
||||||
|
+ rc = __s390_pv_cmd(cmd, #cmd, data, NULL); \
|
||||||
|
if (rc) { \
|
||||||
|
exit(1); \
|
||||||
|
} \
|
||||||
|
@@ -142,14 +147,24 @@ bool s390_pv_vm_try_disable_async(S390CcwMachineState *ms)
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
-int s390_pv_set_sec_parms(uint64_t origin, uint64_t length)
|
||||||
|
+int s390_pv_set_sec_parms(uint64_t origin, uint64_t length, Error **errp)
|
||||||
|
{
|
||||||
|
+ int ret, pvrc;
|
||||||
|
struct kvm_s390_pv_sec_parm args = {
|
||||||
|
.origin = origin,
|
||||||
|
.length = length,
|
||||||
|
};
|
||||||
|
|
||||||
|
- return s390_pv_cmd(KVM_PV_SET_SEC_PARMS, &args);
|
||||||
|
+ ret = s390_pv_cmd_pvrc(KVM_PV_SET_SEC_PARMS, &args, &pvrc);
|
||||||
|
+ if (ret) {
|
||||||
|
+ error_setg(errp, "Failed to set secure execution parameters");
|
||||||
|
+ if (pvrc == 0x108) {
|
||||||
|
+ error_append_hint(errp, "Please check whether the image is "
|
||||||
|
+ "correctly encrypted for this host\n");
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
diff --git a/target/s390x/kvm/pv.h b/target/s390x/kvm/pv.h
|
||||||
|
index 7b935e2246..5877d28ff1 100644
|
||||||
|
--- a/target/s390x/kvm/pv.h
|
||||||
|
+++ b/target/s390x/kvm/pv.h
|
||||||
|
@@ -42,7 +42,7 @@ int s390_pv_query_info(void);
|
||||||
|
int s390_pv_vm_enable(void);
|
||||||
|
void s390_pv_vm_disable(void);
|
||||||
|
bool s390_pv_vm_try_disable_async(S390CcwMachineState *ms);
|
||||||
|
-int s390_pv_set_sec_parms(uint64_t origin, uint64_t length);
|
||||||
|
+int s390_pv_set_sec_parms(uint64_t origin, uint64_t length, Error **errp);
|
||||||
|
int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak);
|
||||||
|
void s390_pv_prep_reset(void);
|
||||||
|
int s390_pv_verify(void);
|
||||||
|
@@ -62,7 +62,8 @@ static inline int s390_pv_query_info(void) { return 0; }
|
||||||
|
static inline int s390_pv_vm_enable(void) { return 0; }
|
||||||
|
static inline void s390_pv_vm_disable(void) {}
|
||||||
|
static inline bool s390_pv_vm_try_disable_async(S390CcwMachineState *ms) { return false; }
|
||||||
|
-static inline int s390_pv_set_sec_parms(uint64_t origin, uint64_t length) { return 0; }
|
||||||
|
+static inline int s390_pv_set_sec_parms(uint64_t origin, uint64_t length,
|
||||||
|
+ Error **errp) { return 0; }
|
||||||
|
static inline int s390_pv_unpack(uint64_t addr, uint64_t size, uint64_t tweak) { return 0; }
|
||||||
|
static inline void s390_pv_prep_reset(void) {}
|
||||||
|
static inline int s390_pv_verify(void) { return 0; }
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
125
kvm-tests-remove-aio_context_acquire-tests.patch
Normal file
125
kvm-tests-remove-aio_context_acquire-tests.patch
Normal file
@ -0,0 +1,125 @@
|
|||||||
|
From 420bf75353286324822c3bbca3b52a7a56ed668c Mon Sep 17 00:00:00 2001
|
||||||
|
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
Date: Tue, 5 Dec 2023 13:20:00 -0500
|
||||||
|
Subject: [PATCH 083/101] tests: remove aio_context_acquire() tests
|
||||||
|
|
||||||
|
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
RH-MergeRequest: 214: Remove AioContext lock
|
||||||
|
RH-Jira: RHEL-15965
|
||||||
|
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||||
|
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
RH-Commit: [14/26] f6421037c1523bc957f3be0f4ad05571ae012dba (kmwolf/centos-qemu-kvm)
|
||||||
|
|
||||||
|
The aio_context_acquire() API is being removed. Drop the test case that
|
||||||
|
calls the API.
|
||||||
|
|
||||||
|
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||||
|
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
Message-ID: <20231205182011.1976568-4-stefanha@redhat.com>
|
||||||
|
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
---
|
||||||
|
tests/unit/test-aio.c | 67 +------------------------------------------
|
||||||
|
1 file changed, 1 insertion(+), 66 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/tests/unit/test-aio.c b/tests/unit/test-aio.c
|
||||||
|
index 337b6e4ea7..e77d86be87 100644
|
||||||
|
--- a/tests/unit/test-aio.c
|
||||||
|
+++ b/tests/unit/test-aio.c
|
||||||
|
@@ -100,76 +100,12 @@ static void event_ready_cb(EventNotifier *e)
|
||||||
|
|
||||||
|
/* Tests using aio_*. */
|
||||||
|
|
||||||
|
-typedef struct {
|
||||||
|
- QemuMutex start_lock;
|
||||||
|
- EventNotifier notifier;
|
||||||
|
- bool thread_acquired;
|
||||||
|
-} AcquireTestData;
|
||||||
|
-
|
||||||
|
-static void *test_acquire_thread(void *opaque)
|
||||||
|
-{
|
||||||
|
- AcquireTestData *data = opaque;
|
||||||
|
-
|
||||||
|
- /* Wait for other thread to let us start */
|
||||||
|
- qemu_mutex_lock(&data->start_lock);
|
||||||
|
- qemu_mutex_unlock(&data->start_lock);
|
||||||
|
-
|
||||||
|
- /* event_notifier_set might be called either before or after
|
||||||
|
- * the main thread's call to poll(). The test case's outcome
|
||||||
|
- * should be the same in either case.
|
||||||
|
- */
|
||||||
|
- event_notifier_set(&data->notifier);
|
||||||
|
- aio_context_acquire(ctx);
|
||||||
|
- aio_context_release(ctx);
|
||||||
|
-
|
||||||
|
- data->thread_acquired = true; /* success, we got here */
|
||||||
|
-
|
||||||
|
- return NULL;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
static void set_event_notifier(AioContext *nctx, EventNotifier *notifier,
|
||||||
|
EventNotifierHandler *handler)
|
||||||
|
{
|
||||||
|
aio_set_event_notifier(nctx, notifier, handler, NULL, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
-static void dummy_notifier_read(EventNotifier *n)
|
||||||
|
-{
|
||||||
|
- event_notifier_test_and_clear(n);
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-static void test_acquire(void)
|
||||||
|
-{
|
||||||
|
- QemuThread thread;
|
||||||
|
- AcquireTestData data;
|
||||||
|
-
|
||||||
|
- /* Dummy event notifier ensures aio_poll() will block */
|
||||||
|
- event_notifier_init(&data.notifier, false);
|
||||||
|
- set_event_notifier(ctx, &data.notifier, dummy_notifier_read);
|
||||||
|
- g_assert(!aio_poll(ctx, false)); /* consume aio_notify() */
|
||||||
|
-
|
||||||
|
- qemu_mutex_init(&data.start_lock);
|
||||||
|
- qemu_mutex_lock(&data.start_lock);
|
||||||
|
- data.thread_acquired = false;
|
||||||
|
-
|
||||||
|
- qemu_thread_create(&thread, "test_acquire_thread",
|
||||||
|
- test_acquire_thread,
|
||||||
|
- &data, QEMU_THREAD_JOINABLE);
|
||||||
|
-
|
||||||
|
- /* Block in aio_poll(), let other thread kick us and acquire context */
|
||||||
|
- aio_context_acquire(ctx);
|
||||||
|
- qemu_mutex_unlock(&data.start_lock); /* let the thread run */
|
||||||
|
- g_assert(aio_poll(ctx, true));
|
||||||
|
- g_assert(!data.thread_acquired);
|
||||||
|
- aio_context_release(ctx);
|
||||||
|
-
|
||||||
|
- qemu_thread_join(&thread);
|
||||||
|
- set_event_notifier(ctx, &data.notifier, NULL);
|
||||||
|
- event_notifier_cleanup(&data.notifier);
|
||||||
|
-
|
||||||
|
- g_assert(data.thread_acquired);
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
static void test_bh_schedule(void)
|
||||||
|
{
|
||||||
|
BHTestData data = { .n = 0 };
|
||||||
|
@@ -879,7 +815,7 @@ static void test_worker_thread_co_enter(void)
|
||||||
|
qemu_thread_get_self(&this_thread);
|
||||||
|
co = qemu_coroutine_create(co_check_current_thread, &this_thread);
|
||||||
|
|
||||||
|
- qemu_thread_create(&worker_thread, "test_acquire_thread",
|
||||||
|
+ qemu_thread_create(&worker_thread, "test_aio_co_enter",
|
||||||
|
test_aio_co_enter,
|
||||||
|
co, QEMU_THREAD_JOINABLE);
|
||||||
|
|
||||||
|
@@ -899,7 +835,6 @@ int main(int argc, char **argv)
|
||||||
|
while (g_main_context_iteration(NULL, false));
|
||||||
|
|
||||||
|
g_test_init(&argc, &argv, NULL);
|
||||||
|
- g_test_add_func("/aio/acquire", test_acquire);
|
||||||
|
g_test_add_func("/aio/bh/schedule", test_bh_schedule);
|
||||||
|
g_test_add_func("/aio/bh/schedule10", test_bh_schedule10);
|
||||||
|
g_test_add_func("/aio/bh/cancel", test_bh_cancel);
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
175
kvm-util-char_dev-Add-open_cdev.patch
Normal file
175
kvm-util-char_dev-Add-open_cdev.patch
Normal file
@ -0,0 +1,175 @@
|
|||||||
|
From de167878ec4ca159cc6def5134c91c5fe9b5ab96 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Yi Liu <yi.l.liu@intel.com>
|
||||||
|
Date: Tue, 21 Nov 2023 16:44:01 +0800
|
||||||
|
Subject: [PATCH 022/101] util/char_dev: Add open_cdev()
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [21/67] 72bf9ec3ccc9959626235bd270ec84caa4cee435 (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
/dev/vfio/devices/vfioX may not exist. In that case it is still possible
|
||||||
|
to open /dev/char/$major:$minor instead. Add helper function to abstract
|
||||||
|
the cdev open.
|
||||||
|
|
||||||
|
Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
|
||||||
|
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
|
||||||
|
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Reviewed-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
Tested-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit d6b5c4c1b516a8176b74ec35a0af8cf89b04b6c1)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
MAINTAINERS | 2 +
|
||||||
|
include/qemu/chardev_open.h | 16 ++++++++
|
||||||
|
util/chardev_open.c | 81 +++++++++++++++++++++++++++++++++++++
|
||||||
|
util/meson.build | 1 +
|
||||||
|
4 files changed, 100 insertions(+)
|
||||||
|
create mode 100644 include/qemu/chardev_open.h
|
||||||
|
create mode 100644 util/chardev_open.c
|
||||||
|
|
||||||
|
diff --git a/MAINTAINERS b/MAINTAINERS
|
||||||
|
index a5a446914a..ca70bb4e64 100644
|
||||||
|
--- a/MAINTAINERS
|
||||||
|
+++ b/MAINTAINERS
|
||||||
|
@@ -2174,6 +2174,8 @@ M: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
S: Supported
|
||||||
|
F: backends/iommufd.c
|
||||||
|
F: include/sysemu/iommufd.h
|
||||||
|
+F: include/qemu/chardev_open.h
|
||||||
|
+F: util/chardev_open.c
|
||||||
|
|
||||||
|
vhost
|
||||||
|
M: Michael S. Tsirkin <mst@redhat.com>
|
||||||
|
diff --git a/include/qemu/chardev_open.h b/include/qemu/chardev_open.h
|
||||||
|
new file mode 100644
|
||||||
|
index 0000000000..64e8fcfdcb
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/include/qemu/chardev_open.h
|
||||||
|
@@ -0,0 +1,16 @@
|
||||||
|
+/*
|
||||||
|
+ * QEMU Chardev Helper
|
||||||
|
+ *
|
||||||
|
+ * Copyright (C) 2023 Intel Corporation.
|
||||||
|
+ *
|
||||||
|
+ * Authors: Yi Liu <yi.l.liu@intel.com>
|
||||||
|
+ *
|
||||||
|
+ * This work is licensed under the terms of the GNU GPL, version 2. See
|
||||||
|
+ * the COPYING file in the top-level directory.
|
||||||
|
+ */
|
||||||
|
+
|
||||||
|
+#ifndef QEMU_CHARDEV_OPEN_H
|
||||||
|
+#define QEMU_CHARDEV_OPEN_H
|
||||||
|
+
|
||||||
|
+int open_cdev(const char *devpath, dev_t cdev);
|
||||||
|
+#endif
|
||||||
|
diff --git a/util/chardev_open.c b/util/chardev_open.c
|
||||||
|
new file mode 100644
|
||||||
|
index 0000000000..f776429788
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/util/chardev_open.c
|
||||||
|
@@ -0,0 +1,81 @@
|
||||||
|
+/*
|
||||||
|
+ * Copyright (c) 2019, Mellanox Technologies. All rights reserved.
|
||||||
|
+ * Copyright (C) 2023 Intel Corporation.
|
||||||
|
+ *
|
||||||
|
+ * This software is available to you under a choice of one of two
|
||||||
|
+ * licenses. You may choose to be licensed under the terms of the GNU
|
||||||
|
+ * General Public License (GPL) Version 2, available from the file
|
||||||
|
+ * COPYING in the main directory of this source tree, or the
|
||||||
|
+ * OpenIB.org BSD license below:
|
||||||
|
+ *
|
||||||
|
+ * Redistribution and use in source and binary forms, with or
|
||||||
|
+ * without modification, are permitted provided that the following
|
||||||
|
+ * conditions are met:
|
||||||
|
+ *
|
||||||
|
+ * - Redistributions of source code must retain the above
|
||||||
|
+ * copyright notice, this list of conditions and the following
|
||||||
|
+ * disclaimer.
|
||||||
|
+ *
|
||||||
|
+ * - Redistributions in binary form must reproduce the above
|
||||||
|
+ * copyright notice, this list of conditions and the following
|
||||||
|
+ * disclaimer in the documentation and/or other materials
|
||||||
|
+ * provided with the distribution.
|
||||||
|
+ *
|
||||||
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||||
|
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||||
|
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
+ * SOFTWARE.
|
||||||
|
+ *
|
||||||
|
+ * Authors: Yi Liu <yi.l.liu@intel.com>
|
||||||
|
+ *
|
||||||
|
+ * Copied from
|
||||||
|
+ * https://github.com/linux-rdma/rdma-core/blob/master/util/open_cdev.c
|
||||||
|
+ *
|
||||||
|
+ */
|
||||||
|
+
|
||||||
|
+#include "qemu/osdep.h"
|
||||||
|
+#include "qemu/chardev_open.h"
|
||||||
|
+
|
||||||
|
+static int open_cdev_internal(const char *path, dev_t cdev)
|
||||||
|
+{
|
||||||
|
+ struct stat st;
|
||||||
|
+ int fd;
|
||||||
|
+
|
||||||
|
+ fd = qemu_open_old(path, O_RDWR);
|
||||||
|
+ if (fd == -1) {
|
||||||
|
+ return -1;
|
||||||
|
+ }
|
||||||
|
+ if (fstat(fd, &st) || !S_ISCHR(st.st_mode) ||
|
||||||
|
+ (cdev != 0 && st.st_rdev != cdev)) {
|
||||||
|
+ close(fd);
|
||||||
|
+ return -1;
|
||||||
|
+ }
|
||||||
|
+ return fd;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int open_cdev_robust(dev_t cdev)
|
||||||
|
+{
|
||||||
|
+ g_autofree char *devpath = NULL;
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * This assumes that udev is being used and is creating the /dev/char/
|
||||||
|
+ * symlinks.
|
||||||
|
+ */
|
||||||
|
+ devpath = g_strdup_printf("/dev/char/%u:%u", major(cdev), minor(cdev));
|
||||||
|
+ return open_cdev_internal(devpath, cdev);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int open_cdev(const char *devpath, dev_t cdev)
|
||||||
|
+{
|
||||||
|
+ int fd;
|
||||||
|
+
|
||||||
|
+ fd = open_cdev_internal(devpath, cdev);
|
||||||
|
+ if (fd == -1 && cdev != 0) {
|
||||||
|
+ return open_cdev_robust(cdev);
|
||||||
|
+ }
|
||||||
|
+ return fd;
|
||||||
|
+}
|
||||||
|
diff --git a/util/meson.build b/util/meson.build
|
||||||
|
index c2322ef6e7..174c133368 100644
|
||||||
|
--- a/util/meson.build
|
||||||
|
+++ b/util/meson.build
|
||||||
|
@@ -108,6 +108,7 @@ if have_block
|
||||||
|
util_ss.add(files('filemonitor-stub.c'))
|
||||||
|
endif
|
||||||
|
util_ss.add(when: 'CONFIG_LINUX', if_true: files('vfio-helpers.c'))
|
||||||
|
+ util_ss.add(when: 'CONFIG_LINUX', if_true: files('chardev_open.c'))
|
||||||
|
endif
|
||||||
|
|
||||||
|
if cpu == 'aarch64'
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
154
kvm-vfio-Introduce-a-helper-function-to-initialize-VFIOD.patch
Normal file
154
kvm-vfio-Introduce-a-helper-function-to-initialize-VFIOD.patch
Normal file
@ -0,0 +1,154 @@
|
|||||||
|
From f554328f6f4702743af71befcb83c25c36e4fa4d Mon Sep 17 00:00:00 2001
|
||||||
|
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Date: Tue, 21 Nov 2023 16:44:25 +0800
|
||||||
|
Subject: [PATCH 046/101] vfio: Introduce a helper function to initialize
|
||||||
|
VFIODevice
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [45/67] 73225f394540bf5aeb70c0bdb89771f19a6d286d (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
Introduce a helper function to replace the common code to initialize
|
||||||
|
VFIODevice in pci, platform, ap and ccw VFIO device.
|
||||||
|
|
||||||
|
No functional change intended.
|
||||||
|
|
||||||
|
Suggested-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit 6106a329141af7d47bdc3346ce9820d4714e0e5d)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/vfio/ap.c | 8 ++------
|
||||||
|
hw/vfio/ccw.c | 8 ++------
|
||||||
|
hw/vfio/helpers.c | 11 +++++++++++
|
||||||
|
hw/vfio/pci.c | 6 ++----
|
||||||
|
hw/vfio/platform.c | 6 ++----
|
||||||
|
include/hw/vfio/vfio-common.h | 2 ++
|
||||||
|
6 files changed, 21 insertions(+), 20 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c
|
||||||
|
index 95fe7cd98b..e157aa1ff7 100644
|
||||||
|
--- a/hw/vfio/ap.c
|
||||||
|
+++ b/hw/vfio/ap.c
|
||||||
|
@@ -226,18 +226,14 @@ static void vfio_ap_instance_init(Object *obj)
|
||||||
|
VFIOAPDevice *vapdev = VFIO_AP_DEVICE(obj);
|
||||||
|
VFIODevice *vbasedev = &vapdev->vdev;
|
||||||
|
|
||||||
|
- vbasedev->type = VFIO_DEVICE_TYPE_AP;
|
||||||
|
- vbasedev->ops = &vfio_ap_ops;
|
||||||
|
- vbasedev->dev = DEVICE(vapdev);
|
||||||
|
- vbasedev->fd = -1;
|
||||||
|
-
|
||||||
|
/*
|
||||||
|
* vfio-ap devices operate in a way compatible with discarding of
|
||||||
|
* memory in RAM blocks, as no pages are pinned in the host.
|
||||||
|
* This needs to be set before vfio_get_device() for vfio common to
|
||||||
|
* handle ram_block_discard_disable().
|
||||||
|
*/
|
||||||
|
- vbasedev->ram_block_discard_allowed = true;
|
||||||
|
+ vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_AP, &vfio_ap_ops,
|
||||||
|
+ DEVICE(vapdev), true);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_IOMMUFD
|
||||||
|
diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c
|
||||||
|
index 6305a4c1b8..90e4a53437 100644
|
||||||
|
--- a/hw/vfio/ccw.c
|
||||||
|
+++ b/hw/vfio/ccw.c
|
||||||
|
@@ -683,11 +683,6 @@ static void vfio_ccw_instance_init(Object *obj)
|
||||||
|
VFIOCCWDevice *vcdev = VFIO_CCW(obj);
|
||||||
|
VFIODevice *vbasedev = &vcdev->vdev;
|
||||||
|
|
||||||
|
- vbasedev->type = VFIO_DEVICE_TYPE_CCW;
|
||||||
|
- vbasedev->ops = &vfio_ccw_ops;
|
||||||
|
- vbasedev->dev = DEVICE(vcdev);
|
||||||
|
- vbasedev->fd = -1;
|
||||||
|
-
|
||||||
|
/*
|
||||||
|
* All vfio-ccw devices are believed to operate in a way compatible with
|
||||||
|
* discarding of memory in RAM blocks, ie. pages pinned in the host are
|
||||||
|
@@ -696,7 +691,8 @@ static void vfio_ccw_instance_init(Object *obj)
|
||||||
|
* needs to be set before vfio_get_device() for vfio common to handle
|
||||||
|
* ram_block_discard_disable().
|
||||||
|
*/
|
||||||
|
- vbasedev->ram_block_discard_allowed = true;
|
||||||
|
+ vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_CCW, &vfio_ccw_ops,
|
||||||
|
+ DEVICE(vcdev), true);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_IOMMUFD
|
||||||
|
diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
|
||||||
|
index 3592c3d54e..6789870802 100644
|
||||||
|
--- a/hw/vfio/helpers.c
|
||||||
|
+++ b/hw/vfio/helpers.c
|
||||||
|
@@ -652,3 +652,14 @@ void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp)
|
||||||
|
}
|
||||||
|
vbasedev->fd = fd;
|
||||||
|
}
|
||||||
|
+
|
||||||
|
+void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops,
|
||||||
|
+ DeviceState *dev, bool ram_discard)
|
||||||
|
+{
|
||||||
|
+ vbasedev->type = type;
|
||||||
|
+ vbasedev->ops = ops;
|
||||||
|
+ vbasedev->dev = dev;
|
||||||
|
+ vbasedev->fd = -1;
|
||||||
|
+
|
||||||
|
+ vbasedev->ram_block_discard_allowed = ram_discard;
|
||||||
|
+}
|
||||||
|
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
|
||||||
|
index 3f5900cc46..83c3238608 100644
|
||||||
|
--- a/hw/vfio/pci.c
|
||||||
|
+++ b/hw/vfio/pci.c
|
||||||
|
@@ -3353,10 +3353,8 @@ static void vfio_instance_init(Object *obj)
|
||||||
|
vdev->host.slot = ~0U;
|
||||||
|
vdev->host.function = ~0U;
|
||||||
|
|
||||||
|
- vbasedev->type = VFIO_DEVICE_TYPE_PCI;
|
||||||
|
- vbasedev->ops = &vfio_pci_ops;
|
||||||
|
- vbasedev->dev = DEVICE(vdev);
|
||||||
|
- vbasedev->fd = -1;
|
||||||
|
+ vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_PCI, &vfio_pci_ops,
|
||||||
|
+ DEVICE(vdev), false);
|
||||||
|
|
||||||
|
vdev->nv_gpudirect_clique = 0xFF;
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c
|
||||||
|
index 506eb8193f..a8d9b7da63 100644
|
||||||
|
--- a/hw/vfio/platform.c
|
||||||
|
+++ b/hw/vfio/platform.c
|
||||||
|
@@ -657,10 +657,8 @@ static void vfio_platform_instance_init(Object *obj)
|
||||||
|
VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(obj);
|
||||||
|
VFIODevice *vbasedev = &vdev->vbasedev;
|
||||||
|
|
||||||
|
- vbasedev->type = VFIO_DEVICE_TYPE_PLATFORM;
|
||||||
|
- vbasedev->ops = &vfio_platform_ops;
|
||||||
|
- vbasedev->dev = DEVICE(vdev);
|
||||||
|
- vbasedev->fd = -1;
|
||||||
|
+ vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_PLATFORM, &vfio_platform_ops,
|
||||||
|
+ DEVICE(vdev), false);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_IOMMUFD
|
||||||
|
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
|
||||||
|
index efcba19f66..b8aa8a5495 100644
|
||||||
|
--- a/include/hw/vfio/vfio-common.h
|
||||||
|
+++ b/include/hw/vfio/vfio-common.h
|
||||||
|
@@ -257,4 +257,6 @@ int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer, uint64_t iova,
|
||||||
|
/* Returns 0 on success, or a negative errno. */
|
||||||
|
int vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
|
||||||
|
void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp);
|
||||||
|
+void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops,
|
||||||
|
+ DeviceState *dev, bool ram_discard);
|
||||||
|
#endif /* HW_VFIO_VFIO_COMMON_H */
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
129
kvm-vfio-Introduce-base-object-for-VFIOContainer-and-tar.patch
Normal file
129
kvm-vfio-Introduce-base-object-for-VFIOContainer-and-tar.patch
Normal file
@ -0,0 +1,129 @@
|
|||||||
|
From 7f392385d1b865904eae4b6681e3e7a87eb3af3d Mon Sep 17 00:00:00 2001
|
||||||
|
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Date: Thu, 2 Nov 2023 15:12:27 +0800
|
||||||
|
Subject: [PATCH 002/101] vfio: Introduce base object for VFIOContainer and
|
||||||
|
targeted interface
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [1/67] e63af50c2cb94f286b2d91f58c2d19dd862e019d (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
Introduce a dumb VFIOContainerBase object and its targeted interface.
|
||||||
|
This is willingly not a QOM object because we don't want it to be
|
||||||
|
visible from the user interface. The VFIOContainerBase will be
|
||||||
|
smoothly populated in subsequent patches as well as interfaces.
|
||||||
|
|
||||||
|
No functional change intended.
|
||||||
|
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
|
||||||
|
Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
|
||||||
|
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit f61dddd73232e3d82d560d1e1bca120446021f2f)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
include/hw/vfio/vfio-common.h | 8 ++---
|
||||||
|
include/hw/vfio/vfio-container-base.h | 50 +++++++++++++++++++++++++++
|
||||||
|
2 files changed, 52 insertions(+), 6 deletions(-)
|
||||||
|
create mode 100644 include/hw/vfio/vfio-container-base.h
|
||||||
|
|
||||||
|
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
|
||||||
|
index a4a22accb9..586d153c12 100644
|
||||||
|
--- a/include/hw/vfio/vfio-common.h
|
||||||
|
+++ b/include/hw/vfio/vfio-common.h
|
||||||
|
@@ -30,6 +30,7 @@
|
||||||
|
#include <linux/vfio.h>
|
||||||
|
#endif
|
||||||
|
#include "sysemu/sysemu.h"
|
||||||
|
+#include "hw/vfio/vfio-container-base.h"
|
||||||
|
|
||||||
|
#define VFIO_MSG_PREFIX "vfio %s: "
|
||||||
|
|
||||||
|
@@ -81,6 +82,7 @@ typedef struct VFIOAddressSpace {
|
||||||
|
struct VFIOGroup;
|
||||||
|
|
||||||
|
typedef struct VFIOContainer {
|
||||||
|
+ VFIOContainerBase bcontainer;
|
||||||
|
VFIOAddressSpace *space;
|
||||||
|
int fd; /* /dev/vfio/vfio, empowered by the attached groups */
|
||||||
|
MemoryListener listener;
|
||||||
|
@@ -201,12 +203,6 @@ typedef struct VFIODisplay {
|
||||||
|
} dmabuf;
|
||||||
|
} VFIODisplay;
|
||||||
|
|
||||||
|
-typedef struct {
|
||||||
|
- unsigned long *bitmap;
|
||||||
|
- hwaddr size;
|
||||||
|
- hwaddr pages;
|
||||||
|
-} VFIOBitmap;
|
||||||
|
-
|
||||||
|
VFIOAddressSpace *vfio_get_address_space(AddressSpace *as);
|
||||||
|
void vfio_put_address_space(VFIOAddressSpace *space);
|
||||||
|
bool vfio_devices_all_running_and_saving(VFIOContainer *container);
|
||||||
|
diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h
|
||||||
|
new file mode 100644
|
||||||
|
index 0000000000..1d6daaea5d
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/include/hw/vfio/vfio-container-base.h
|
||||||
|
@@ -0,0 +1,50 @@
|
||||||
|
+/*
|
||||||
|
+ * VFIO BASE CONTAINER
|
||||||
|
+ *
|
||||||
|
+ * Copyright (C) 2023 Intel Corporation.
|
||||||
|
+ * Copyright Red Hat, Inc. 2023
|
||||||
|
+ *
|
||||||
|
+ * Authors: Yi Liu <yi.l.liu@intel.com>
|
||||||
|
+ * Eric Auger <eric.auger@redhat.com>
|
||||||
|
+ *
|
||||||
|
+ * SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
+ */
|
||||||
|
+
|
||||||
|
+#ifndef HW_VFIO_VFIO_CONTAINER_BASE_H
|
||||||
|
+#define HW_VFIO_VFIO_CONTAINER_BASE_H
|
||||||
|
+
|
||||||
|
+#include "exec/memory.h"
|
||||||
|
+
|
||||||
|
+typedef struct VFIODevice VFIODevice;
|
||||||
|
+typedef struct VFIOIOMMUOps VFIOIOMMUOps;
|
||||||
|
+
|
||||||
|
+typedef struct {
|
||||||
|
+ unsigned long *bitmap;
|
||||||
|
+ hwaddr size;
|
||||||
|
+ hwaddr pages;
|
||||||
|
+} VFIOBitmap;
|
||||||
|
+
|
||||||
|
+/*
|
||||||
|
+ * This is the base object for vfio container backends
|
||||||
|
+ */
|
||||||
|
+typedef struct VFIOContainerBase {
|
||||||
|
+ const VFIOIOMMUOps *ops;
|
||||||
|
+} VFIOContainerBase;
|
||||||
|
+
|
||||||
|
+struct VFIOIOMMUOps {
|
||||||
|
+ /* basic feature */
|
||||||
|
+ int (*dma_map)(VFIOContainerBase *bcontainer,
|
||||||
|
+ hwaddr iova, ram_addr_t size,
|
||||||
|
+ void *vaddr, bool readonly);
|
||||||
|
+ int (*dma_unmap)(VFIOContainerBase *bcontainer,
|
||||||
|
+ hwaddr iova, ram_addr_t size,
|
||||||
|
+ IOMMUTLBEntry *iotlb);
|
||||||
|
+ int (*attach_device)(const char *name, VFIODevice *vbasedev,
|
||||||
|
+ AddressSpace *as, Error **errp);
|
||||||
|
+ void (*detach_device)(VFIODevice *vbasedev);
|
||||||
|
+ /* migration feature */
|
||||||
|
+ int (*set_dirty_page_tracking)(VFIOContainerBase *bcontainer, bool start);
|
||||||
|
+ int (*query_dirty_bitmap)(VFIOContainerBase *bcontainer, VFIOBitmap *vbmap,
|
||||||
|
+ hwaddr iova, hwaddr size);
|
||||||
|
+};
|
||||||
|
+#endif /* HW_VFIO_VFIO_CONTAINER_BASE_H */
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
276
kvm-vfio-Make-VFIOContainerBase-poiner-parameter-const-i.patch
Normal file
276
kvm-vfio-Make-VFIOContainerBase-poiner-parameter-const-i.patch
Normal file
@ -0,0 +1,276 @@
|
|||||||
|
From 84b15fad1af781d06d0206d362de0801d7a18d0b Mon Sep 17 00:00:00 2001
|
||||||
|
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Date: Tue, 21 Nov 2023 16:44:17 +0800
|
||||||
|
Subject: [PATCH 038/101] vfio: Make VFIOContainerBase poiner parameter const
|
||||||
|
in VFIOIOMMUOps callbacks
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [37/67] 95eb9edc7fcfefbd4b075f6f04941ed4a19ff87d (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
Some of the callbacks in VFIOIOMMUOps pass VFIOContainerBase poiner,
|
||||||
|
those callbacks only need read access to the sub object of VFIOContainerBase.
|
||||||
|
So make VFIOContainerBase, VFIOContainer and VFIOIOMMUFDContainer as const
|
||||||
|
in these callbacks.
|
||||||
|
|
||||||
|
Local functions called by those callbacks also need same changes to avoid
|
||||||
|
build error.
|
||||||
|
|
||||||
|
Suggested-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Reviewed-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
Tested-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit 4517c33c31d392f08fa96a9db911da1e3507be94)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/vfio/common.c | 9 +++----
|
||||||
|
hw/vfio/container-base.c | 2 +-
|
||||||
|
hw/vfio/container.c | 34 ++++++++++++++-------------
|
||||||
|
hw/vfio/iommufd.c | 8 +++----
|
||||||
|
include/hw/vfio/vfio-common.h | 12 ++++++----
|
||||||
|
include/hw/vfio/vfio-container-base.h | 12 ++++++----
|
||||||
|
6 files changed, 42 insertions(+), 35 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
|
||||||
|
index 6569732b7a..08a3e57672 100644
|
||||||
|
--- a/hw/vfio/common.c
|
||||||
|
+++ b/hw/vfio/common.c
|
||||||
|
@@ -204,7 +204,7 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainerBase *bcontainer)
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
-bool vfio_devices_all_device_dirty_tracking(VFIOContainerBase *bcontainer)
|
||||||
|
+bool vfio_devices_all_device_dirty_tracking(const VFIOContainerBase *bcontainer)
|
||||||
|
{
|
||||||
|
VFIODevice *vbasedev;
|
||||||
|
|
||||||
|
@@ -221,7 +221,8 @@ bool vfio_devices_all_device_dirty_tracking(VFIOContainerBase *bcontainer)
|
||||||
|
* Check if all VFIO devices are running and migration is active, which is
|
||||||
|
* essentially equivalent to the migration being in pre-copy phase.
|
||||||
|
*/
|
||||||
|
-bool vfio_devices_all_running_and_mig_active(VFIOContainerBase *bcontainer)
|
||||||
|
+bool
|
||||||
|
+vfio_devices_all_running_and_mig_active(const VFIOContainerBase *bcontainer)
|
||||||
|
{
|
||||||
|
VFIODevice *vbasedev;
|
||||||
|
|
||||||
|
@@ -1139,7 +1140,7 @@ static int vfio_device_dma_logging_report(VFIODevice *vbasedev, hwaddr iova,
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
-int vfio_devices_query_dirty_bitmap(VFIOContainerBase *bcontainer,
|
||||||
|
+int vfio_devices_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
|
||||||
|
VFIOBitmap *vbmap, hwaddr iova,
|
||||||
|
hwaddr size)
|
||||||
|
{
|
||||||
|
@@ -1162,7 +1163,7 @@ int vfio_devices_query_dirty_bitmap(VFIOContainerBase *bcontainer,
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
-int vfio_get_dirty_bitmap(VFIOContainerBase *bcontainer, uint64_t iova,
|
||||||
|
+int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer, uint64_t iova,
|
||||||
|
uint64_t size, ram_addr_t ram_addr)
|
||||||
|
{
|
||||||
|
bool all_device_dirty_tracking =
|
||||||
|
diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c
|
||||||
|
index eee2dcfe76..1ffd25bbfa 100644
|
||||||
|
--- a/hw/vfio/container-base.c
|
||||||
|
+++ b/hw/vfio/container-base.c
|
||||||
|
@@ -63,7 +63,7 @@ int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer,
|
||||||
|
return bcontainer->ops->set_dirty_page_tracking(bcontainer, start);
|
||||||
|
}
|
||||||
|
|
||||||
|
-int vfio_container_query_dirty_bitmap(VFIOContainerBase *bcontainer,
|
||||||
|
+int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
|
||||||
|
VFIOBitmap *vbmap,
|
||||||
|
hwaddr iova, hwaddr size)
|
||||||
|
{
|
||||||
|
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
|
||||||
|
index 1dbf9b9a17..b22feb8ded 100644
|
||||||
|
--- a/hw/vfio/container.c
|
||||||
|
+++ b/hw/vfio/container.c
|
||||||
|
@@ -61,11 +61,11 @@ static int vfio_ram_block_discard_disable(VFIOContainer *container, bool state)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
-static int vfio_dma_unmap_bitmap(VFIOContainer *container,
|
||||||
|
+static int vfio_dma_unmap_bitmap(const VFIOContainer *container,
|
||||||
|
hwaddr iova, ram_addr_t size,
|
||||||
|
IOMMUTLBEntry *iotlb)
|
||||||
|
{
|
||||||
|
- VFIOContainerBase *bcontainer = &container->bcontainer;
|
||||||
|
+ const VFIOContainerBase *bcontainer = &container->bcontainer;
|
||||||
|
struct vfio_iommu_type1_dma_unmap *unmap;
|
||||||
|
struct vfio_bitmap *bitmap;
|
||||||
|
VFIOBitmap vbmap;
|
||||||
|
@@ -117,11 +117,12 @@ unmap_exit:
|
||||||
|
/*
|
||||||
|
* DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86
|
||||||
|
*/
|
||||||
|
-static int vfio_legacy_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova,
|
||||||
|
- ram_addr_t size, IOMMUTLBEntry *iotlb)
|
||||||
|
+static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer,
|
||||||
|
+ hwaddr iova, ram_addr_t size,
|
||||||
|
+ IOMMUTLBEntry *iotlb)
|
||||||
|
{
|
||||||
|
- VFIOContainer *container = container_of(bcontainer, VFIOContainer,
|
||||||
|
- bcontainer);
|
||||||
|
+ const VFIOContainer *container = container_of(bcontainer, VFIOContainer,
|
||||||
|
+ bcontainer);
|
||||||
|
struct vfio_iommu_type1_dma_unmap unmap = {
|
||||||
|
.argsz = sizeof(unmap),
|
||||||
|
.flags = 0,
|
||||||
|
@@ -174,11 +175,11 @@ static int vfio_legacy_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova,
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
-static int vfio_legacy_dma_map(VFIOContainerBase *bcontainer, hwaddr iova,
|
||||||
|
+static int vfio_legacy_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova,
|
||||||
|
ram_addr_t size, void *vaddr, bool readonly)
|
||||||
|
{
|
||||||
|
- VFIOContainer *container = container_of(bcontainer, VFIOContainer,
|
||||||
|
- bcontainer);
|
||||||
|
+ const VFIOContainer *container = container_of(bcontainer, VFIOContainer,
|
||||||
|
+ bcontainer);
|
||||||
|
struct vfio_iommu_type1_dma_map map = {
|
||||||
|
.argsz = sizeof(map),
|
||||||
|
.flags = VFIO_DMA_MAP_FLAG_READ,
|
||||||
|
@@ -207,11 +208,12 @@ static int vfio_legacy_dma_map(VFIOContainerBase *bcontainer, hwaddr iova,
|
||||||
|
return -errno;
|
||||||
|
}
|
||||||
|
|
||||||
|
-static int vfio_legacy_set_dirty_page_tracking(VFIOContainerBase *bcontainer,
|
||||||
|
- bool start)
|
||||||
|
+static int
|
||||||
|
+vfio_legacy_set_dirty_page_tracking(const VFIOContainerBase *bcontainer,
|
||||||
|
+ bool start)
|
||||||
|
{
|
||||||
|
- VFIOContainer *container = container_of(bcontainer, VFIOContainer,
|
||||||
|
- bcontainer);
|
||||||
|
+ const VFIOContainer *container = container_of(bcontainer, VFIOContainer,
|
||||||
|
+ bcontainer);
|
||||||
|
int ret;
|
||||||
|
struct vfio_iommu_type1_dirty_bitmap dirty = {
|
||||||
|
.argsz = sizeof(dirty),
|
||||||
|
@@ -233,12 +235,12 @@ static int vfio_legacy_set_dirty_page_tracking(VFIOContainerBase *bcontainer,
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
-static int vfio_legacy_query_dirty_bitmap(VFIOContainerBase *bcontainer,
|
||||||
|
+static int vfio_legacy_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
|
||||||
|
VFIOBitmap *vbmap,
|
||||||
|
hwaddr iova, hwaddr size)
|
||||||
|
{
|
||||||
|
- VFIOContainer *container = container_of(bcontainer, VFIOContainer,
|
||||||
|
- bcontainer);
|
||||||
|
+ const VFIOContainer *container = container_of(bcontainer, VFIOContainer,
|
||||||
|
+ bcontainer);
|
||||||
|
struct vfio_iommu_type1_dirty_bitmap *dbitmap;
|
||||||
|
struct vfio_iommu_type1_dirty_bitmap_get *range;
|
||||||
|
int ret;
|
||||||
|
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
|
||||||
|
index 5accd26484..87a561c545 100644
|
||||||
|
--- a/hw/vfio/iommufd.c
|
||||||
|
+++ b/hw/vfio/iommufd.c
|
||||||
|
@@ -26,10 +26,10 @@
|
||||||
|
#include "qemu/chardev_open.h"
|
||||||
|
#include "pci.h"
|
||||||
|
|
||||||
|
-static int iommufd_cdev_map(VFIOContainerBase *bcontainer, hwaddr iova,
|
||||||
|
+static int iommufd_cdev_map(const VFIOContainerBase *bcontainer, hwaddr iova,
|
||||||
|
ram_addr_t size, void *vaddr, bool readonly)
|
||||||
|
{
|
||||||
|
- VFIOIOMMUFDContainer *container =
|
||||||
|
+ const VFIOIOMMUFDContainer *container =
|
||||||
|
container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
|
||||||
|
|
||||||
|
return iommufd_backend_map_dma(container->be,
|
||||||
|
@@ -37,11 +37,11 @@ static int iommufd_cdev_map(VFIOContainerBase *bcontainer, hwaddr iova,
|
||||||
|
iova, size, vaddr, readonly);
|
||||||
|
}
|
||||||
|
|
||||||
|
-static int iommufd_cdev_unmap(VFIOContainerBase *bcontainer,
|
||||||
|
+static int iommufd_cdev_unmap(const VFIOContainerBase *bcontainer,
|
||||||
|
hwaddr iova, ram_addr_t size,
|
||||||
|
IOMMUTLBEntry *iotlb)
|
||||||
|
{
|
||||||
|
- VFIOIOMMUFDContainer *container =
|
||||||
|
+ const VFIOIOMMUFDContainer *container =
|
||||||
|
container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
|
||||||
|
|
||||||
|
/* TODO: Handle dma_unmap_bitmap with iotlb args (migration) */
|
||||||
|
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
|
||||||
|
index 697bf24a35..efcba19f66 100644
|
||||||
|
--- a/include/hw/vfio/vfio-common.h
|
||||||
|
+++ b/include/hw/vfio/vfio-common.h
|
||||||
|
@@ -244,13 +244,15 @@ bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp);
|
||||||
|
void vfio_migration_exit(VFIODevice *vbasedev);
|
||||||
|
|
||||||
|
int vfio_bitmap_alloc(VFIOBitmap *vbmap, hwaddr size);
|
||||||
|
-bool vfio_devices_all_running_and_mig_active(VFIOContainerBase *bcontainer);
|
||||||
|
-bool vfio_devices_all_device_dirty_tracking(VFIOContainerBase *bcontainer);
|
||||||
|
-int vfio_devices_query_dirty_bitmap(VFIOContainerBase *bcontainer,
|
||||||
|
+bool
|
||||||
|
+vfio_devices_all_running_and_mig_active(const VFIOContainerBase *bcontainer);
|
||||||
|
+bool
|
||||||
|
+vfio_devices_all_device_dirty_tracking(const VFIOContainerBase *bcontainer);
|
||||||
|
+int vfio_devices_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
|
||||||
|
VFIOBitmap *vbmap, hwaddr iova,
|
||||||
|
hwaddr size);
|
||||||
|
-int vfio_get_dirty_bitmap(VFIOContainerBase *bcontainer, uint64_t iova,
|
||||||
|
- uint64_t size, ram_addr_t ram_addr);
|
||||||
|
+int vfio_get_dirty_bitmap(const VFIOContainerBase *bcontainer, uint64_t iova,
|
||||||
|
+ uint64_t size, ram_addr_t ram_addr);
|
||||||
|
|
||||||
|
/* Returns 0 on success, or a negative errno. */
|
||||||
|
int vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
|
||||||
|
diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h
|
||||||
|
index 45bb19c767..2ae297ccda 100644
|
||||||
|
--- a/include/hw/vfio/vfio-container-base.h
|
||||||
|
+++ b/include/hw/vfio/vfio-container-base.h
|
||||||
|
@@ -82,7 +82,7 @@ void vfio_container_del_section_window(VFIOContainerBase *bcontainer,
|
||||||
|
MemoryRegionSection *section);
|
||||||
|
int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer,
|
||||||
|
bool start);
|
||||||
|
-int vfio_container_query_dirty_bitmap(VFIOContainerBase *bcontainer,
|
||||||
|
+int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
|
||||||
|
VFIOBitmap *vbmap,
|
||||||
|
hwaddr iova, hwaddr size);
|
||||||
|
|
||||||
|
@@ -93,18 +93,20 @@ void vfio_container_destroy(VFIOContainerBase *bcontainer);
|
||||||
|
|
||||||
|
struct VFIOIOMMUOps {
|
||||||
|
/* basic feature */
|
||||||
|
- int (*dma_map)(VFIOContainerBase *bcontainer,
|
||||||
|
+ int (*dma_map)(const VFIOContainerBase *bcontainer,
|
||||||
|
hwaddr iova, ram_addr_t size,
|
||||||
|
void *vaddr, bool readonly);
|
||||||
|
- int (*dma_unmap)(VFIOContainerBase *bcontainer,
|
||||||
|
+ int (*dma_unmap)(const VFIOContainerBase *bcontainer,
|
||||||
|
hwaddr iova, ram_addr_t size,
|
||||||
|
IOMMUTLBEntry *iotlb);
|
||||||
|
int (*attach_device)(const char *name, VFIODevice *vbasedev,
|
||||||
|
AddressSpace *as, Error **errp);
|
||||||
|
void (*detach_device)(VFIODevice *vbasedev);
|
||||||
|
/* migration feature */
|
||||||
|
- int (*set_dirty_page_tracking)(VFIOContainerBase *bcontainer, bool start);
|
||||||
|
- int (*query_dirty_bitmap)(VFIOContainerBase *bcontainer, VFIOBitmap *vbmap,
|
||||||
|
+ int (*set_dirty_page_tracking)(const VFIOContainerBase *bcontainer,
|
||||||
|
+ bool start);
|
||||||
|
+ int (*query_dirty_bitmap)(const VFIOContainerBase *bcontainer,
|
||||||
|
+ VFIOBitmap *vbmap,
|
||||||
|
hwaddr iova, hwaddr size);
|
||||||
|
/* PCI specific */
|
||||||
|
int (*pci_hot_reset)(VFIODevice *vbasedev, bool single);
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
@ -0,0 +1,75 @@
|
|||||||
|
From 57bdfc821d6f4b4f9c6b1ff05bf0114e5cabc77e Mon Sep 17 00:00:00 2001
|
||||||
|
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Date: Tue, 21 Nov 2023 16:44:13 +0800
|
||||||
|
Subject: [PATCH 034/101] vfio/ap: Allow the selection of a given iommu backend
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [33/67] a12bb86e5b627ccf246fb9ce60820595589ff8e5 (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
Now we support two types of iommu backends, let's add the capability
|
||||||
|
to select one of them. This depends on whether an iommufd object has
|
||||||
|
been linked with the vfio-ap device:
|
||||||
|
|
||||||
|
if the user wants to use the legacy backend, it shall not
|
||||||
|
link the vfio-ap device with any iommufd object:
|
||||||
|
|
||||||
|
-device vfio-ap,sysfsdev=/sys/bus/mdev/devices/XXX
|
||||||
|
|
||||||
|
This is called the legacy mode/backend.
|
||||||
|
|
||||||
|
If the user wants to use the iommufd backend (/dev/iommu) it
|
||||||
|
shall pass an iommufd object id in the vfio-ap device options:
|
||||||
|
|
||||||
|
-object iommufd,id=iommufd0
|
||||||
|
-device vfio-ap,sysfsdev=/sys/bus/mdev/devices/XXX,iommufd=iommufd0
|
||||||
|
|
||||||
|
Suggested-by: Alex Williamson <alex.williamson@redhat.com>
|
||||||
|
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Reviewed-by: Matthew Rosato <mjrosato@linux.ibm.com>
|
||||||
|
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit 336f308958d598f3db351bb7d94cc57b4b2d448d)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/vfio/ap.c | 6 ++++++
|
||||||
|
1 file changed, 6 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c
|
||||||
|
index bbf69ff55a..80629609ae 100644
|
||||||
|
--- a/hw/vfio/ap.c
|
||||||
|
+++ b/hw/vfio/ap.c
|
||||||
|
@@ -11,10 +11,12 @@
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "qemu/osdep.h"
|
||||||
|
+#include CONFIG_DEVICES /* CONFIG_IOMMUFD */
|
||||||
|
#include <linux/vfio.h>
|
||||||
|
#include <sys/ioctl.h>
|
||||||
|
#include "qapi/error.h"
|
||||||
|
#include "hw/vfio/vfio-common.h"
|
||||||
|
+#include "sysemu/iommufd.h"
|
||||||
|
#include "hw/s390x/ap-device.h"
|
||||||
|
#include "qemu/error-report.h"
|
||||||
|
#include "qemu/event_notifier.h"
|
||||||
|
@@ -204,6 +206,10 @@ static void vfio_ap_unrealize(DeviceState *dev)
|
||||||
|
|
||||||
|
static Property vfio_ap_properties[] = {
|
||||||
|
DEFINE_PROP_STRING("sysfsdev", VFIOAPDevice, vdev.sysfsdev),
|
||||||
|
+#ifdef CONFIG_IOMMUFD
|
||||||
|
+ DEFINE_PROP_LINK("iommufd", VFIOAPDevice, vdev.iommufd,
|
||||||
|
+ TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *),
|
||||||
|
+#endif
|
||||||
|
DEFINE_PROP_END_OF_LIST(),
|
||||||
|
};
|
||||||
|
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
@ -0,0 +1,87 @@
|
|||||||
|
From db09b7c60c01ee75d602261ee959a96fa0d89d68 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Date: Tue, 21 Nov 2023 16:44:14 +0800
|
||||||
|
Subject: [PATCH 035/101] vfio/ap: Make vfio cdev pre-openable by passing a
|
||||||
|
file handle
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [34/67] aaafa6088a9b0302d53aa539f67792d02ea0f663 (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
This gives management tools like libvirt a chance to open the vfio
|
||||||
|
cdev with privilege and pass FD to qemu. This way qemu never needs
|
||||||
|
to have privilege to open a VFIO or iommu cdev node.
|
||||||
|
|
||||||
|
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Reviewed-by: Matthew Rosato <mjrosato@linux.ibm.com>
|
||||||
|
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit 5e7ba401b71d18544a3e44b2a58b9e63fd5148d5)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/vfio/ap.c | 23 ++++++++++++++++++++++-
|
||||||
|
1 file changed, 22 insertions(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c
|
||||||
|
index 80629609ae..f180e4a32a 100644
|
||||||
|
--- a/hw/vfio/ap.c
|
||||||
|
+++ b/hw/vfio/ap.c
|
||||||
|
@@ -160,7 +160,10 @@ static void vfio_ap_realize(DeviceState *dev, Error **errp)
|
||||||
|
VFIOAPDevice *vapdev = VFIO_AP_DEVICE(dev);
|
||||||
|
VFIODevice *vbasedev = &vapdev->vdev;
|
||||||
|
|
||||||
|
- vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
|
||||||
|
+ if (vfio_device_get_name(vbasedev, errp) < 0) {
|
||||||
|
+ return;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
vbasedev->ops = &vfio_ap_ops;
|
||||||
|
vbasedev->type = VFIO_DEVICE_TYPE_AP;
|
||||||
|
vbasedev->dev = dev;
|
||||||
|
@@ -230,11 +233,28 @@ static const VMStateDescription vfio_ap_vmstate = {
|
||||||
|
.unmigratable = 1,
|
||||||
|
};
|
||||||
|
|
||||||
|
+static void vfio_ap_instance_init(Object *obj)
|
||||||
|
+{
|
||||||
|
+ VFIOAPDevice *vapdev = VFIO_AP_DEVICE(obj);
|
||||||
|
+
|
||||||
|
+ vapdev->vdev.fd = -1;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+#ifdef CONFIG_IOMMUFD
|
||||||
|
+static void vfio_ap_set_fd(Object *obj, const char *str, Error **errp)
|
||||||
|
+{
|
||||||
|
+ vfio_device_set_fd(&VFIO_AP_DEVICE(obj)->vdev, str, errp);
|
||||||
|
+}
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
static void vfio_ap_class_init(ObjectClass *klass, void *data)
|
||||||
|
{
|
||||||
|
DeviceClass *dc = DEVICE_CLASS(klass);
|
||||||
|
|
||||||
|
device_class_set_props(dc, vfio_ap_properties);
|
||||||
|
+#ifdef CONFIG_IOMMUFD
|
||||||
|
+ object_class_property_add_str(klass, "fd", NULL, vfio_ap_set_fd);
|
||||||
|
+#endif
|
||||||
|
dc->vmsd = &vfio_ap_vmstate;
|
||||||
|
dc->desc = "VFIO-based AP device assignment";
|
||||||
|
set_bit(DEVICE_CATEGORY_MISC, dc->categories);
|
||||||
|
@@ -249,6 +269,7 @@ static const TypeInfo vfio_ap_info = {
|
||||||
|
.name = TYPE_VFIO_AP_DEVICE,
|
||||||
|
.parent = TYPE_AP_DEVICE,
|
||||||
|
.instance_size = sizeof(VFIOAPDevice),
|
||||||
|
+ .instance_init = vfio_ap_instance_init,
|
||||||
|
.class_init = vfio_ap_class_init,
|
||||||
|
};
|
||||||
|
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
@ -0,0 +1,81 @@
|
|||||||
|
From b8630ecb698e31311089ba4e224d5e2c08c8e665 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Date: Tue, 21 Nov 2023 16:44:23 +0800
|
||||||
|
Subject: [PATCH 044/101] vfio/ap: Move VFIODevice initializations in
|
||||||
|
vfio_ap_instance_init
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [43/67] 95a527f649b28c5c78903e99735107667e8468b1 (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
Some of the VFIODevice initializations is in vfio_ap_realize,
|
||||||
|
move all of them in vfio_ap_instance_init.
|
||||||
|
|
||||||
|
No functional change intended.
|
||||||
|
|
||||||
|
Suggested-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||||
|
Reviewed-by: Eric Farman <farman@linux.ibm.com>
|
||||||
|
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit cbbcc2f1706aa1a08637142744d2f5f6515ac93f)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/vfio/ap.c | 26 +++++++++++++-------------
|
||||||
|
1 file changed, 13 insertions(+), 13 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c
|
||||||
|
index f180e4a32a..95fe7cd98b 100644
|
||||||
|
--- a/hw/vfio/ap.c
|
||||||
|
+++ b/hw/vfio/ap.c
|
||||||
|
@@ -164,18 +164,6 @@ static void vfio_ap_realize(DeviceState *dev, Error **errp)
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
- vbasedev->ops = &vfio_ap_ops;
|
||||||
|
- vbasedev->type = VFIO_DEVICE_TYPE_AP;
|
||||||
|
- vbasedev->dev = dev;
|
||||||
|
-
|
||||||
|
- /*
|
||||||
|
- * vfio-ap devices operate in a way compatible with discarding of
|
||||||
|
- * memory in RAM blocks, as no pages are pinned in the host.
|
||||||
|
- * This needs to be set before vfio_get_device() for vfio common to
|
||||||
|
- * handle ram_block_discard_disable().
|
||||||
|
- */
|
||||||
|
- vapdev->vdev.ram_block_discard_allowed = true;
|
||||||
|
-
|
||||||
|
ret = vfio_attach_device(vbasedev->name, vbasedev,
|
||||||
|
&address_space_memory, errp);
|
||||||
|
if (ret) {
|
||||||
|
@@ -236,8 +224,20 @@ static const VMStateDescription vfio_ap_vmstate = {
|
||||||
|
static void vfio_ap_instance_init(Object *obj)
|
||||||
|
{
|
||||||
|
VFIOAPDevice *vapdev = VFIO_AP_DEVICE(obj);
|
||||||
|
+ VFIODevice *vbasedev = &vapdev->vdev;
|
||||||
|
|
||||||
|
- vapdev->vdev.fd = -1;
|
||||||
|
+ vbasedev->type = VFIO_DEVICE_TYPE_AP;
|
||||||
|
+ vbasedev->ops = &vfio_ap_ops;
|
||||||
|
+ vbasedev->dev = DEVICE(vapdev);
|
||||||
|
+ vbasedev->fd = -1;
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * vfio-ap devices operate in a way compatible with discarding of
|
||||||
|
+ * memory in RAM blocks, as no pages are pinned in the host.
|
||||||
|
+ * This needs to be set before vfio_get_device() for vfio common to
|
||||||
|
+ * handle ram_block_discard_disable().
|
||||||
|
+ */
|
||||||
|
+ vbasedev->ram_block_discard_allowed = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_IOMMUFD
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
@ -0,0 +1,79 @@
|
|||||||
|
From 732115c80eb0dd672925a0737e09643d8a889abd Mon Sep 17 00:00:00 2001
|
||||||
|
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Date: Tue, 21 Nov 2023 16:44:15 +0800
|
||||||
|
Subject: [PATCH 036/101] vfio/ccw: Allow the selection of a given iommu
|
||||||
|
backend
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [35/67] 1701de023a9f3b3f0420689bf851e11aee88800d (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
Now we support two types of iommu backends, let's add the capability
|
||||||
|
to select one of them. This depends on whether an iommufd object has
|
||||||
|
been linked with the vfio-ccw device:
|
||||||
|
|
||||||
|
If the user wants to use the legacy backend, it shall not
|
||||||
|
link the vfio-ccw device with any iommufd object:
|
||||||
|
|
||||||
|
-device vfio-ccw,sysfsdev=/sys/bus/mdev/devices/XXX
|
||||||
|
|
||||||
|
This is called the legacy mode/backend.
|
||||||
|
|
||||||
|
If the user wants to use the iommufd backend (/dev/iommu) it
|
||||||
|
shall pass an iommufd object id in the vfio-ccw device options:
|
||||||
|
|
||||||
|
-object iommufd,id=iommufd0
|
||||||
|
-device vfio-ccw,sysfsdev=/sys/bus/mdev/devices/XXX,iommufd=iommufd0
|
||||||
|
|
||||||
|
Suggested-by: Alex Williamson <alex.williamson@redhat.com>
|
||||||
|
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Reviewed-by: Matthew Rosato <mjrosato@linux.ibm.com>
|
||||||
|
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Reviewed-by: Eric Farman <farman@linux.ibm.com>
|
||||||
|
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit e70f971a6c1230138843d7ab82267e4a5aaf6bda)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/vfio/ccw.c | 6 ++++++
|
||||||
|
1 file changed, 6 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c
|
||||||
|
index d857bb8d0f..d2d58bb677 100644
|
||||||
|
--- a/hw/vfio/ccw.c
|
||||||
|
+++ b/hw/vfio/ccw.c
|
||||||
|
@@ -15,12 +15,14 @@
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "qemu/osdep.h"
|
||||||
|
+#include CONFIG_DEVICES /* CONFIG_IOMMUFD */
|
||||||
|
#include <linux/vfio.h>
|
||||||
|
#include <linux/vfio_ccw.h>
|
||||||
|
#include <sys/ioctl.h>
|
||||||
|
|
||||||
|
#include "qapi/error.h"
|
||||||
|
#include "hw/vfio/vfio-common.h"
|
||||||
|
+#include "sysemu/iommufd.h"
|
||||||
|
#include "hw/s390x/s390-ccw.h"
|
||||||
|
#include "hw/s390x/vfio-ccw.h"
|
||||||
|
#include "hw/qdev-properties.h"
|
||||||
|
@@ -677,6 +679,10 @@ static void vfio_ccw_unrealize(DeviceState *dev)
|
||||||
|
static Property vfio_ccw_properties[] = {
|
||||||
|
DEFINE_PROP_STRING("sysfsdev", VFIOCCWDevice, vdev.sysfsdev),
|
||||||
|
DEFINE_PROP_BOOL("force-orb-pfch", VFIOCCWDevice, force_orb_pfch, false),
|
||||||
|
+#ifdef CONFIG_IOMMUFD
|
||||||
|
+ DEFINE_PROP_LINK("iommufd", VFIOCCWDevice, vdev.iommufd,
|
||||||
|
+ TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *),
|
||||||
|
+#endif
|
||||||
|
DEFINE_PROP_END_OF_LIST(),
|
||||||
|
};
|
||||||
|
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
@ -0,0 +1,93 @@
|
|||||||
|
From 0ff08afdec19f4decaf750fa7d158e0ea498ff28 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Date: Tue, 21 Nov 2023 16:44:16 +0800
|
||||||
|
Subject: [PATCH 037/101] vfio/ccw: Make vfio cdev pre-openable by passing a
|
||||||
|
file handle
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [36/67] cc0d8f51cffa5d5a7aebc2334b908b9877179ae7 (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
This gives management tools like libvirt a chance to open the vfio
|
||||||
|
cdev with privilege and pass FD to qemu. This way qemu never needs
|
||||||
|
to have privilege to open a VFIO or iommu cdev node.
|
||||||
|
|
||||||
|
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Reviewed-by: Matthew Rosato <mjrosato@linux.ibm.com>
|
||||||
|
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Reviewed-by: Eric Farman <farman@linux.ibm.com>
|
||||||
|
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit 909a6254edaa8d0b0e3f1c0a623862e73d1842e9)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/vfio/ccw.c | 25 ++++++++++++++++++++++---
|
||||||
|
1 file changed, 22 insertions(+), 3 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c
|
||||||
|
index d2d58bb677..2afdf17dbe 100644
|
||||||
|
--- a/hw/vfio/ccw.c
|
||||||
|
+++ b/hw/vfio/ccw.c
|
||||||
|
@@ -590,11 +590,12 @@ static void vfio_ccw_realize(DeviceState *dev, Error **errp)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
+ if (vfio_device_get_name(vbasedev, errp) < 0) {
|
||||||
|
+ return;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
vbasedev->ops = &vfio_ccw_ops;
|
||||||
|
vbasedev->type = VFIO_DEVICE_TYPE_CCW;
|
||||||
|
- vbasedev->name = g_strdup_printf("%x.%x.%04x", vcdev->cdev.hostid.cssid,
|
||||||
|
- vcdev->cdev.hostid.ssid,
|
||||||
|
- vcdev->cdev.hostid.devid);
|
||||||
|
vbasedev->dev = dev;
|
||||||
|
|
||||||
|
/*
|
||||||
|
@@ -691,12 +692,29 @@ static const VMStateDescription vfio_ccw_vmstate = {
|
||||||
|
.unmigratable = 1,
|
||||||
|
};
|
||||||
|
|
||||||
|
+static void vfio_ccw_instance_init(Object *obj)
|
||||||
|
+{
|
||||||
|
+ VFIOCCWDevice *vcdev = VFIO_CCW(obj);
|
||||||
|
+
|
||||||
|
+ vcdev->vdev.fd = -1;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+#ifdef CONFIG_IOMMUFD
|
||||||
|
+static void vfio_ccw_set_fd(Object *obj, const char *str, Error **errp)
|
||||||
|
+{
|
||||||
|
+ vfio_device_set_fd(&VFIO_CCW(obj)->vdev, str, errp);
|
||||||
|
+}
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
static void vfio_ccw_class_init(ObjectClass *klass, void *data)
|
||||||
|
{
|
||||||
|
DeviceClass *dc = DEVICE_CLASS(klass);
|
||||||
|
S390CCWDeviceClass *cdc = S390_CCW_DEVICE_CLASS(klass);
|
||||||
|
|
||||||
|
device_class_set_props(dc, vfio_ccw_properties);
|
||||||
|
+#ifdef CONFIG_IOMMUFD
|
||||||
|
+ object_class_property_add_str(klass, "fd", NULL, vfio_ccw_set_fd);
|
||||||
|
+#endif
|
||||||
|
dc->vmsd = &vfio_ccw_vmstate;
|
||||||
|
dc->desc = "VFIO-based subchannel assignment";
|
||||||
|
set_bit(DEVICE_CATEGORY_MISC, dc->categories);
|
||||||
|
@@ -714,6 +732,7 @@ static const TypeInfo vfio_ccw_info = {
|
||||||
|
.name = TYPE_VFIO_CCW,
|
||||||
|
.parent = TYPE_S390_CCW,
|
||||||
|
.instance_size = sizeof(VFIOCCWDevice),
|
||||||
|
+ .instance_init = vfio_ccw_instance_init,
|
||||||
|
.class_init = vfio_ccw_class_init,
|
||||||
|
};
|
||||||
|
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
@ -0,0 +1,85 @@
|
|||||||
|
From 2ef1c050722115247962e3cd4d8fcf73727e597e Mon Sep 17 00:00:00 2001
|
||||||
|
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Date: Tue, 21 Nov 2023 16:44:24 +0800
|
||||||
|
Subject: [PATCH 045/101] vfio/ccw: Move VFIODevice initializations in
|
||||||
|
vfio_ccw_instance_init
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [44/67] 3345ed58f491aba8fd51bcc172af267ae53e6c8c (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
Some of the VFIODevice initializations is in vfio_ccw_realize,
|
||||||
|
move all of them in vfio_ccw_instance_init.
|
||||||
|
|
||||||
|
No functional change intended.
|
||||||
|
|
||||||
|
Suggested-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||||
|
Reviewed-by: Eric Farman <farman@linux.ibm.com>
|
||||||
|
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit c12b55ad6f9d3b4792b590e9211bd7319e4a2d70)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/vfio/ccw.c | 30 +++++++++++++++---------------
|
||||||
|
1 file changed, 15 insertions(+), 15 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c
|
||||||
|
index 2afdf17dbe..6305a4c1b8 100644
|
||||||
|
--- a/hw/vfio/ccw.c
|
||||||
|
+++ b/hw/vfio/ccw.c
|
||||||
|
@@ -594,20 +594,6 @@ static void vfio_ccw_realize(DeviceState *dev, Error **errp)
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
- vbasedev->ops = &vfio_ccw_ops;
|
||||||
|
- vbasedev->type = VFIO_DEVICE_TYPE_CCW;
|
||||||
|
- vbasedev->dev = dev;
|
||||||
|
-
|
||||||
|
- /*
|
||||||
|
- * All vfio-ccw devices are believed to operate in a way compatible with
|
||||||
|
- * discarding of memory in RAM blocks, ie. pages pinned in the host are
|
||||||
|
- * in the current working set of the guest driver and therefore never
|
||||||
|
- * overlap e.g., with pages available to the guest balloon driver. This
|
||||||
|
- * needs to be set before vfio_get_device() for vfio common to handle
|
||||||
|
- * ram_block_discard_disable().
|
||||||
|
- */
|
||||||
|
- vbasedev->ram_block_discard_allowed = true;
|
||||||
|
-
|
||||||
|
ret = vfio_attach_device(cdev->mdevid, vbasedev,
|
||||||
|
&address_space_memory, errp);
|
||||||
|
if (ret) {
|
||||||
|
@@ -695,8 +681,22 @@ static const VMStateDescription vfio_ccw_vmstate = {
|
||||||
|
static void vfio_ccw_instance_init(Object *obj)
|
||||||
|
{
|
||||||
|
VFIOCCWDevice *vcdev = VFIO_CCW(obj);
|
||||||
|
+ VFIODevice *vbasedev = &vcdev->vdev;
|
||||||
|
+
|
||||||
|
+ vbasedev->type = VFIO_DEVICE_TYPE_CCW;
|
||||||
|
+ vbasedev->ops = &vfio_ccw_ops;
|
||||||
|
+ vbasedev->dev = DEVICE(vcdev);
|
||||||
|
+ vbasedev->fd = -1;
|
||||||
|
|
||||||
|
- vcdev->vdev.fd = -1;
|
||||||
|
+ /*
|
||||||
|
+ * All vfio-ccw devices are believed to operate in a way compatible with
|
||||||
|
+ * discarding of memory in RAM blocks, ie. pages pinned in the host are
|
||||||
|
+ * in the current working set of the guest driver and therefore never
|
||||||
|
+ * overlap e.g., with pages available to the guest balloon driver. This
|
||||||
|
+ * needs to be set before vfio_get_device() for vfio common to handle
|
||||||
|
+ * ram_block_discard_disable().
|
||||||
|
+ */
|
||||||
|
+ vbasedev->ram_block_discard_allowed = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_IOMMUFD
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
@ -0,0 +1,98 @@
|
|||||||
|
From 7de36998dd6177380e46b8c5f3a91c3fad75483c Mon Sep 17 00:00:00 2001
|
||||||
|
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Date: Thu, 2 Nov 2023 15:12:30 +0800
|
||||||
|
Subject: [PATCH 005/101] vfio/common: Introduce vfio_container_init/destroy
|
||||||
|
helper
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [4/67] 8287f687ef19cd84afede1e8f3b16ac3caf29a1d (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
This adds two helper functions vfio_container_init/destroy which will be
|
||||||
|
used by both legacy and iommufd containers to do base container specific
|
||||||
|
initialization and release.
|
||||||
|
|
||||||
|
No functional change intended.
|
||||||
|
|
||||||
|
Suggested-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit ed2f7f80170251e7cdd2965a13ee97527d1fbec8)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/vfio/container-base.c | 9 +++++++++
|
||||||
|
hw/vfio/container.c | 4 +++-
|
||||||
|
include/hw/vfio/vfio-container-base.h | 4 ++++
|
||||||
|
3 files changed, 16 insertions(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c
|
||||||
|
index 55d3a35fa4..e929435751 100644
|
||||||
|
--- a/hw/vfio/container-base.c
|
||||||
|
+++ b/hw/vfio/container-base.c
|
||||||
|
@@ -30,3 +30,12 @@ int vfio_container_dma_unmap(VFIOContainerBase *bcontainer,
|
||||||
|
g_assert(bcontainer->ops->dma_unmap);
|
||||||
|
return bcontainer->ops->dma_unmap(bcontainer, iova, size, iotlb);
|
||||||
|
}
|
||||||
|
+
|
||||||
|
+void vfio_container_init(VFIOContainerBase *bcontainer, const VFIOIOMMUOps *ops)
|
||||||
|
+{
|
||||||
|
+ bcontainer->ops = ops;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void vfio_container_destroy(VFIOContainerBase *bcontainer)
|
||||||
|
+{
|
||||||
|
+}
|
||||||
|
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
|
||||||
|
index c04df26323..32a0251dd1 100644
|
||||||
|
--- a/hw/vfio/container.c
|
||||||
|
+++ b/hw/vfio/container.c
|
||||||
|
@@ -559,7 +559,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
|
||||||
|
QLIST_INIT(&container->giommu_list);
|
||||||
|
QLIST_INIT(&container->vrdl_list);
|
||||||
|
bcontainer = &container->bcontainer;
|
||||||
|
- bcontainer->ops = &vfio_legacy_ops;
|
||||||
|
+ vfio_container_init(bcontainer, &vfio_legacy_ops);
|
||||||
|
|
||||||
|
ret = vfio_init_container(container, group->fd, errp);
|
||||||
|
if (ret) {
|
||||||
|
@@ -661,6 +661,7 @@ put_space_exit:
|
||||||
|
static void vfio_disconnect_container(VFIOGroup *group)
|
||||||
|
{
|
||||||
|
VFIOContainer *container = group->container;
|
||||||
|
+ VFIOContainerBase *bcontainer = &container->bcontainer;
|
||||||
|
|
||||||
|
QLIST_REMOVE(group, container_next);
|
||||||
|
group->container = NULL;
|
||||||
|
@@ -695,6 +696,7 @@ static void vfio_disconnect_container(VFIOGroup *group)
|
||||||
|
QLIST_REMOVE(giommu, giommu_next);
|
||||||
|
g_free(giommu);
|
||||||
|
}
|
||||||
|
+ vfio_container_destroy(bcontainer);
|
||||||
|
|
||||||
|
trace_vfio_disconnect_container(container->fd);
|
||||||
|
close(container->fd);
|
||||||
|
diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h
|
||||||
|
index 56b033f59f..577f52ccbc 100644
|
||||||
|
--- a/include/hw/vfio/vfio-container-base.h
|
||||||
|
+++ b/include/hw/vfio/vfio-container-base.h
|
||||||
|
@@ -38,6 +38,10 @@ int vfio_container_dma_unmap(VFIOContainerBase *bcontainer,
|
||||||
|
hwaddr iova, ram_addr_t size,
|
||||||
|
IOMMUTLBEntry *iotlb);
|
||||||
|
|
||||||
|
+void vfio_container_init(VFIOContainerBase *bcontainer,
|
||||||
|
+ const VFIOIOMMUOps *ops);
|
||||||
|
+void vfio_container_destroy(VFIOContainerBase *bcontainer);
|
||||||
|
+
|
||||||
|
struct VFIOIOMMUOps {
|
||||||
|
/* basic feature */
|
||||||
|
int (*dma_map)(VFIOContainerBase *bcontainer,
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
221
kvm-vfio-common-Move-giommu_list-in-base-container.patch
Normal file
221
kvm-vfio-common-Move-giommu_list-in-base-container.patch
Normal file
@ -0,0 +1,221 @@
|
|||||||
|
From 36f4005c3dbb4c8b63a975494c75281de51c25f9 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Eric Auger <eric.auger@redhat.com>
|
||||||
|
Date: Thu, 2 Nov 2023 15:12:31 +0800
|
||||||
|
Subject: [PATCH 006/101] vfio/common: Move giommu_list in base container
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [5/67] ba5898e96c16c7f6e8108ae461b454d3c8c35404 (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
Move the giommu_list field in the base container and store
|
||||||
|
the base container in the VFIOGuestIOMMU.
|
||||||
|
|
||||||
|
No functional change intended.
|
||||||
|
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
|
||||||
|
Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
|
||||||
|
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit dddf83ab99eb832c449249397a1c302c6ed746bf)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/vfio/common.c | 17 +++++++++++------
|
||||||
|
hw/vfio/container-base.c | 9 +++++++++
|
||||||
|
hw/vfio/container.c | 8 --------
|
||||||
|
include/hw/vfio/vfio-common.h | 9 ---------
|
||||||
|
include/hw/vfio/vfio-container-base.h | 9 +++++++++
|
||||||
|
5 files changed, 29 insertions(+), 23 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
|
||||||
|
index e610771888..43580bcc43 100644
|
||||||
|
--- a/hw/vfio/common.c
|
||||||
|
+++ b/hw/vfio/common.c
|
||||||
|
@@ -292,7 +292,7 @@ static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
|
||||||
|
static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
|
||||||
|
{
|
||||||
|
VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
|
||||||
|
- VFIOContainerBase *bcontainer = &giommu->container->bcontainer;
|
||||||
|
+ VFIOContainerBase *bcontainer = giommu->bcontainer;
|
||||||
|
hwaddr iova = iotlb->iova + giommu->iommu_offset;
|
||||||
|
void *vaddr;
|
||||||
|
int ret;
|
||||||
|
@@ -569,6 +569,7 @@ static void vfio_listener_region_add(MemoryListener *listener,
|
||||||
|
MemoryRegionSection *section)
|
||||||
|
{
|
||||||
|
VFIOContainer *container = container_of(listener, VFIOContainer, listener);
|
||||||
|
+ VFIOContainerBase *bcontainer = &container->bcontainer;
|
||||||
|
hwaddr iova, end;
|
||||||
|
Int128 llend, llsize;
|
||||||
|
void *vaddr;
|
||||||
|
@@ -612,7 +613,7 @@ static void vfio_listener_region_add(MemoryListener *listener,
|
||||||
|
giommu->iommu_mr = iommu_mr;
|
||||||
|
giommu->iommu_offset = section->offset_within_address_space -
|
||||||
|
section->offset_within_region;
|
||||||
|
- giommu->container = container;
|
||||||
|
+ giommu->bcontainer = bcontainer;
|
||||||
|
llend = int128_add(int128_make64(section->offset_within_region),
|
||||||
|
section->size);
|
||||||
|
llend = int128_sub(llend, int128_one());
|
||||||
|
@@ -647,7 +648,7 @@ static void vfio_listener_region_add(MemoryListener *listener,
|
||||||
|
g_free(giommu);
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
- QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next);
|
||||||
|
+ QLIST_INSERT_HEAD(&bcontainer->giommu_list, giommu, giommu_next);
|
||||||
|
memory_region_iommu_replay(giommu->iommu_mr, &giommu->n);
|
||||||
|
|
||||||
|
return;
|
||||||
|
@@ -732,6 +733,7 @@ static void vfio_listener_region_del(MemoryListener *listener,
|
||||||
|
MemoryRegionSection *section)
|
||||||
|
{
|
||||||
|
VFIOContainer *container = container_of(listener, VFIOContainer, listener);
|
||||||
|
+ VFIOContainerBase *bcontainer = &container->bcontainer;
|
||||||
|
hwaddr iova, end;
|
||||||
|
Int128 llend, llsize;
|
||||||
|
int ret;
|
||||||
|
@@ -744,7 +746,7 @@ static void vfio_listener_region_del(MemoryListener *listener,
|
||||||
|
if (memory_region_is_iommu(section->mr)) {
|
||||||
|
VFIOGuestIOMMU *giommu;
|
||||||
|
|
||||||
|
- QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) {
|
||||||
|
+ QLIST_FOREACH(giommu, &bcontainer->giommu_list, giommu_next) {
|
||||||
|
if (MEMORY_REGION(giommu->iommu_mr) == section->mr &&
|
||||||
|
giommu->n.start == section->offset_within_region) {
|
||||||
|
memory_region_unregister_iommu_notifier(section->mr,
|
||||||
|
@@ -1206,7 +1208,9 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
|
||||||
|
vfio_giommu_dirty_notifier *gdn = container_of(n,
|
||||||
|
vfio_giommu_dirty_notifier, n);
|
||||||
|
VFIOGuestIOMMU *giommu = gdn->giommu;
|
||||||
|
- VFIOContainer *container = giommu->container;
|
||||||
|
+ VFIOContainerBase *bcontainer = giommu->bcontainer;
|
||||||
|
+ VFIOContainer *container = container_of(bcontainer, VFIOContainer,
|
||||||
|
+ bcontainer);
|
||||||
|
hwaddr iova = iotlb->iova + giommu->iommu_offset;
|
||||||
|
ram_addr_t translated_addr;
|
||||||
|
int ret = -EINVAL;
|
||||||
|
@@ -1284,12 +1288,13 @@ static int vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainer *container,
|
||||||
|
static int vfio_sync_dirty_bitmap(VFIOContainer *container,
|
||||||
|
MemoryRegionSection *section)
|
||||||
|
{
|
||||||
|
+ VFIOContainerBase *bcontainer = &container->bcontainer;
|
||||||
|
ram_addr_t ram_addr;
|
||||||
|
|
||||||
|
if (memory_region_is_iommu(section->mr)) {
|
||||||
|
VFIOGuestIOMMU *giommu;
|
||||||
|
|
||||||
|
- QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) {
|
||||||
|
+ QLIST_FOREACH(giommu, &bcontainer->giommu_list, giommu_next) {
|
||||||
|
if (MEMORY_REGION(giommu->iommu_mr) == section->mr &&
|
||||||
|
giommu->n.start == section->offset_within_region) {
|
||||||
|
Int128 llend;
|
||||||
|
diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c
|
||||||
|
index e929435751..20bcb9669a 100644
|
||||||
|
--- a/hw/vfio/container-base.c
|
||||||
|
+++ b/hw/vfio/container-base.c
|
||||||
|
@@ -34,8 +34,17 @@ int vfio_container_dma_unmap(VFIOContainerBase *bcontainer,
|
||||||
|
void vfio_container_init(VFIOContainerBase *bcontainer, const VFIOIOMMUOps *ops)
|
||||||
|
{
|
||||||
|
bcontainer->ops = ops;
|
||||||
|
+ QLIST_INIT(&bcontainer->giommu_list);
|
||||||
|
}
|
||||||
|
|
||||||
|
void vfio_container_destroy(VFIOContainerBase *bcontainer)
|
||||||
|
{
|
||||||
|
+ VFIOGuestIOMMU *giommu, *tmp;
|
||||||
|
+
|
||||||
|
+ QLIST_FOREACH_SAFE(giommu, &bcontainer->giommu_list, giommu_next, tmp) {
|
||||||
|
+ memory_region_unregister_iommu_notifier(
|
||||||
|
+ MEMORY_REGION(giommu->iommu_mr), &giommu->n);
|
||||||
|
+ QLIST_REMOVE(giommu, giommu_next);
|
||||||
|
+ g_free(giommu);
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
|
||||||
|
index 32a0251dd1..133d3c8f5c 100644
|
||||||
|
--- a/hw/vfio/container.c
|
||||||
|
+++ b/hw/vfio/container.c
|
||||||
|
@@ -556,7 +556,6 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
|
||||||
|
container->dirty_pages_supported = false;
|
||||||
|
container->dma_max_mappings = 0;
|
||||||
|
container->iova_ranges = NULL;
|
||||||
|
- QLIST_INIT(&container->giommu_list);
|
||||||
|
QLIST_INIT(&container->vrdl_list);
|
||||||
|
bcontainer = &container->bcontainer;
|
||||||
|
vfio_container_init(bcontainer, &vfio_legacy_ops);
|
||||||
|
@@ -686,16 +685,9 @@ static void vfio_disconnect_container(VFIOGroup *group)
|
||||||
|
|
||||||
|
if (QLIST_EMPTY(&container->group_list)) {
|
||||||
|
VFIOAddressSpace *space = container->space;
|
||||||
|
- VFIOGuestIOMMU *giommu, *tmp;
|
||||||
|
|
||||||
|
QLIST_REMOVE(container, next);
|
||||||
|
|
||||||
|
- QLIST_FOREACH_SAFE(giommu, &container->giommu_list, giommu_next, tmp) {
|
||||||
|
- memory_region_unregister_iommu_notifier(
|
||||||
|
- MEMORY_REGION(giommu->iommu_mr), &giommu->n);
|
||||||
|
- QLIST_REMOVE(giommu, giommu_next);
|
||||||
|
- g_free(giommu);
|
||||||
|
- }
|
||||||
|
vfio_container_destroy(bcontainer);
|
||||||
|
|
||||||
|
trace_vfio_disconnect_container(container->fd);
|
||||||
|
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
|
||||||
|
index 24a26345e5..6be082b8f2 100644
|
||||||
|
--- a/include/hw/vfio/vfio-common.h
|
||||||
|
+++ b/include/hw/vfio/vfio-common.h
|
||||||
|
@@ -95,7 +95,6 @@ typedef struct VFIOContainer {
|
||||||
|
uint64_t max_dirty_bitmap_size;
|
||||||
|
unsigned long pgsizes;
|
||||||
|
unsigned int dma_max_mappings;
|
||||||
|
- QLIST_HEAD(, VFIOGuestIOMMU) giommu_list;
|
||||||
|
QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list;
|
||||||
|
QLIST_HEAD(, VFIOGroup) group_list;
|
||||||
|
QLIST_HEAD(, VFIORamDiscardListener) vrdl_list;
|
||||||
|
@@ -104,14 +103,6 @@ typedef struct VFIOContainer {
|
||||||
|
GList *iova_ranges;
|
||||||
|
} VFIOContainer;
|
||||||
|
|
||||||
|
-typedef struct VFIOGuestIOMMU {
|
||||||
|
- VFIOContainer *container;
|
||||||
|
- IOMMUMemoryRegion *iommu_mr;
|
||||||
|
- hwaddr iommu_offset;
|
||||||
|
- IOMMUNotifier n;
|
||||||
|
- QLIST_ENTRY(VFIOGuestIOMMU) giommu_next;
|
||||||
|
-} VFIOGuestIOMMU;
|
||||||
|
-
|
||||||
|
typedef struct VFIORamDiscardListener {
|
||||||
|
VFIOContainer *container;
|
||||||
|
MemoryRegion *mr;
|
||||||
|
diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h
|
||||||
|
index 577f52ccbc..a11aec5755 100644
|
||||||
|
--- a/include/hw/vfio/vfio-container-base.h
|
||||||
|
+++ b/include/hw/vfio/vfio-container-base.h
|
||||||
|
@@ -29,8 +29,17 @@ typedef struct {
|
||||||
|
*/
|
||||||
|
typedef struct VFIOContainerBase {
|
||||||
|
const VFIOIOMMUOps *ops;
|
||||||
|
+ QLIST_HEAD(, VFIOGuestIOMMU) giommu_list;
|
||||||
|
} VFIOContainerBase;
|
||||||
|
|
||||||
|
+typedef struct VFIOGuestIOMMU {
|
||||||
|
+ VFIOContainerBase *bcontainer;
|
||||||
|
+ IOMMUMemoryRegion *iommu_mr;
|
||||||
|
+ hwaddr iommu_offset;
|
||||||
|
+ IOMMUNotifier n;
|
||||||
|
+ QLIST_ENTRY(VFIOGuestIOMMU) giommu_next;
|
||||||
|
+} VFIOGuestIOMMU;
|
||||||
|
+
|
||||||
|
int vfio_container_dma_map(VFIOContainerBase *bcontainer,
|
||||||
|
hwaddr iova, ram_addr_t size,
|
||||||
|
void *vaddr, bool readonly);
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
55
kvm-vfio-common-return-early-if-space-isn-t-empty.patch
Normal file
55
kvm-vfio-common-return-early-if-space-isn-t-empty.patch
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
From e9476ee64edd81fafd409fb3ceaad80668446bff Mon Sep 17 00:00:00 2001
|
||||||
|
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Date: Tue, 21 Nov 2023 16:44:02 +0800
|
||||||
|
Subject: [PATCH 023/101] vfio/common: return early if space isn't empty
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [22/67] 239c21ae7cddc8efabc041b9c7774f15b4964631 (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
This is a trivial optimization. If there is active container in space,
|
||||||
|
vfio_reset_handler will never be unregistered. So revert the check of
|
||||||
|
space->containers and return early.
|
||||||
|
|
||||||
|
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Reviewed-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
Tested-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit 1eae5b7bd3ddd03b5591e9122b011c6520064a5a)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/vfio/common.c | 9 ++++++---
|
||||||
|
1 file changed, 6 insertions(+), 3 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
|
||||||
|
index 572ae7c934..934f4f5446 100644
|
||||||
|
--- a/hw/vfio/common.c
|
||||||
|
+++ b/hw/vfio/common.c
|
||||||
|
@@ -1462,10 +1462,13 @@ VFIOAddressSpace *vfio_get_address_space(AddressSpace *as)
|
||||||
|
|
||||||
|
void vfio_put_address_space(VFIOAddressSpace *space)
|
||||||
|
{
|
||||||
|
- if (QLIST_EMPTY(&space->containers)) {
|
||||||
|
- QLIST_REMOVE(space, list);
|
||||||
|
- g_free(space);
|
||||||
|
+ if (!QLIST_EMPTY(&space->containers)) {
|
||||||
|
+ return;
|
||||||
|
}
|
||||||
|
+
|
||||||
|
+ QLIST_REMOVE(space, list);
|
||||||
|
+ g_free(space);
|
||||||
|
+
|
||||||
|
if (QLIST_EMPTY(&vfio_address_spaces)) {
|
||||||
|
qemu_unregister_reset(vfio_reset_handler, NULL);
|
||||||
|
}
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
257
kvm-vfio-container-Convert-functions-to-base-container.patch
Normal file
257
kvm-vfio-container-Convert-functions-to-base-container.patch
Normal file
@ -0,0 +1,257 @@
|
|||||||
|
From facad966c42b1ec38b12e45f2b84bd059542b60c Mon Sep 17 00:00:00 2001
|
||||||
|
From: Eric Auger <eric.auger@redhat.com>
|
||||||
|
Date: Thu, 2 Nov 2023 15:12:35 +0800
|
||||||
|
Subject: [PATCH 010/101] vfio/container: Convert functions to base container
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [9/67] a0002d6e9cb0ca76e3e2f25208ecba22dd9f9a88 (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
In the prospect to get rid of VFIOContainer refs
|
||||||
|
in common.c lets convert misc functions to use the base
|
||||||
|
container object instead:
|
||||||
|
|
||||||
|
vfio_devices_all_dirty_tracking
|
||||||
|
vfio_devices_all_device_dirty_tracking
|
||||||
|
vfio_devices_all_running_and_mig_active
|
||||||
|
vfio_devices_query_dirty_bitmap
|
||||||
|
vfio_get_dirty_bitmap
|
||||||
|
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit e1cac6b203f45b5322e831e8d50edfdf18609b09)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/vfio/common.c | 42 +++++++++++++++--------------------
|
||||||
|
hw/vfio/container.c | 6 ++---
|
||||||
|
hw/vfio/trace-events | 2 +-
|
||||||
|
include/hw/vfio/vfio-common.h | 9 ++++----
|
||||||
|
4 files changed, 26 insertions(+), 33 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
|
||||||
|
index 9415395ed9..cf6618f6ed 100644
|
||||||
|
--- a/hw/vfio/common.c
|
||||||
|
+++ b/hw/vfio/common.c
|
||||||
|
@@ -177,9 +177,8 @@ bool vfio_device_state_is_precopy(VFIODevice *vbasedev)
|
||||||
|
migration->device_state == VFIO_DEVICE_STATE_PRE_COPY_P2P;
|
||||||
|
}
|
||||||
|
|
||||||
|
-static bool vfio_devices_all_dirty_tracking(VFIOContainer *container)
|
||||||
|
+static bool vfio_devices_all_dirty_tracking(VFIOContainerBase *bcontainer)
|
||||||
|
{
|
||||||
|
- VFIOContainerBase *bcontainer = &container->bcontainer;
|
||||||
|
VFIODevice *vbasedev;
|
||||||
|
MigrationState *ms = migrate_get_current();
|
||||||
|
|
||||||
|
@@ -204,9 +203,8 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer *container)
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
-bool vfio_devices_all_device_dirty_tracking(VFIOContainer *container)
|
||||||
|
+bool vfio_devices_all_device_dirty_tracking(VFIOContainerBase *bcontainer)
|
||||||
|
{
|
||||||
|
- VFIOContainerBase *bcontainer = &container->bcontainer;
|
||||||
|
VFIODevice *vbasedev;
|
||||||
|
|
||||||
|
QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) {
|
||||||
|
@@ -222,9 +220,8 @@ bool vfio_devices_all_device_dirty_tracking(VFIOContainer *container)
|
||||||
|
* Check if all VFIO devices are running and migration is active, which is
|
||||||
|
* essentially equivalent to the migration being in pre-copy phase.
|
||||||
|
*/
|
||||||
|
-bool vfio_devices_all_running_and_mig_active(VFIOContainer *container)
|
||||||
|
+bool vfio_devices_all_running_and_mig_active(VFIOContainerBase *bcontainer)
|
||||||
|
{
|
||||||
|
- VFIOContainerBase *bcontainer = &container->bcontainer;
|
||||||
|
VFIODevice *vbasedev;
|
||||||
|
|
||||||
|
if (!migration_is_active(migrate_get_current())) {
|
||||||
|
@@ -1082,7 +1079,7 @@ static void vfio_listener_log_global_start(MemoryListener *listener)
|
||||||
|
VFIOContainer *container = container_of(listener, VFIOContainer, listener);
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
- if (vfio_devices_all_device_dirty_tracking(container)) {
|
||||||
|
+ if (vfio_devices_all_device_dirty_tracking(&container->bcontainer)) {
|
||||||
|
ret = vfio_devices_dma_logging_start(container);
|
||||||
|
} else {
|
||||||
|
ret = vfio_container_set_dirty_page_tracking(&container->bcontainer,
|
||||||
|
@@ -1101,7 +1098,7 @@ static void vfio_listener_log_global_stop(MemoryListener *listener)
|
||||||
|
VFIOContainer *container = container_of(listener, VFIOContainer, listener);
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
- if (vfio_devices_all_device_dirty_tracking(container)) {
|
||||||
|
+ if (vfio_devices_all_device_dirty_tracking(&container->bcontainer)) {
|
||||||
|
vfio_devices_dma_logging_stop(container);
|
||||||
|
} else {
|
||||||
|
ret = vfio_container_set_dirty_page_tracking(&container->bcontainer,
|
||||||
|
@@ -1141,11 +1138,10 @@ static int vfio_device_dma_logging_report(VFIODevice *vbasedev, hwaddr iova,
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
-int vfio_devices_query_dirty_bitmap(VFIOContainer *container,
|
||||||
|
+int vfio_devices_query_dirty_bitmap(VFIOContainerBase *bcontainer,
|
||||||
|
VFIOBitmap *vbmap, hwaddr iova,
|
||||||
|
hwaddr size)
|
||||||
|
{
|
||||||
|
- VFIOContainerBase *bcontainer = &container->bcontainer;
|
||||||
|
VFIODevice *vbasedev;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
@@ -1165,17 +1161,16 @@ int vfio_devices_query_dirty_bitmap(VFIOContainer *container,
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
-int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova,
|
||||||
|
+int vfio_get_dirty_bitmap(VFIOContainerBase *bcontainer, uint64_t iova,
|
||||||
|
uint64_t size, ram_addr_t ram_addr)
|
||||||
|
{
|
||||||
|
bool all_device_dirty_tracking =
|
||||||
|
- vfio_devices_all_device_dirty_tracking(container);
|
||||||
|
+ vfio_devices_all_device_dirty_tracking(bcontainer);
|
||||||
|
uint64_t dirty_pages;
|
||||||
|
VFIOBitmap vbmap;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
- if (!container->bcontainer.dirty_pages_supported &&
|
||||||
|
- !all_device_dirty_tracking) {
|
||||||
|
+ if (!bcontainer->dirty_pages_supported && !all_device_dirty_tracking) {
|
||||||
|
cpu_physical_memory_set_dirty_range(ram_addr, size,
|
||||||
|
tcg_enabled() ? DIRTY_CLIENTS_ALL :
|
||||||
|
DIRTY_CLIENTS_NOCODE);
|
||||||
|
@@ -1188,10 +1183,9 @@ int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova,
|
||||||
|
}
|
||||||
|
|
||||||
|
if (all_device_dirty_tracking) {
|
||||||
|
- ret = vfio_devices_query_dirty_bitmap(container, &vbmap, iova, size);
|
||||||
|
+ ret = vfio_devices_query_dirty_bitmap(bcontainer, &vbmap, iova, size);
|
||||||
|
} else {
|
||||||
|
- ret = vfio_container_query_dirty_bitmap(&container->bcontainer, &vbmap,
|
||||||
|
- iova, size);
|
||||||
|
+ ret = vfio_container_query_dirty_bitmap(bcontainer, &vbmap, iova, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ret) {
|
||||||
|
@@ -1201,8 +1195,7 @@ int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova,
|
||||||
|
dirty_pages = cpu_physical_memory_set_dirty_lebitmap(vbmap.bitmap, ram_addr,
|
||||||
|
vbmap.pages);
|
||||||
|
|
||||||
|
- trace_vfio_get_dirty_bitmap(container->fd, iova, size, vbmap.size,
|
||||||
|
- ram_addr, dirty_pages);
|
||||||
|
+ trace_vfio_get_dirty_bitmap(iova, size, vbmap.size, ram_addr, dirty_pages);
|
||||||
|
out:
|
||||||
|
g_free(vbmap.bitmap);
|
||||||
|
|
||||||
|
@@ -1236,8 +1229,8 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
if (vfio_get_xlat_addr(iotlb, NULL, &translated_addr, NULL)) {
|
||||||
|
- ret = vfio_get_dirty_bitmap(container, iova, iotlb->addr_mask + 1,
|
||||||
|
- translated_addr);
|
||||||
|
+ ret = vfio_get_dirty_bitmap(&container->bcontainer, iova,
|
||||||
|
+ iotlb->addr_mask + 1, translated_addr);
|
||||||
|
if (ret) {
|
||||||
|
error_report("vfio_iommu_map_dirty_notify(%p, 0x%"HWADDR_PRIx", "
|
||||||
|
"0x%"HWADDR_PRIx") = %d (%s)",
|
||||||
|
@@ -1266,7 +1259,8 @@ static int vfio_ram_discard_get_dirty_bitmap(MemoryRegionSection *section,
|
||||||
|
* Sync the whole mapped region (spanning multiple individual mappings)
|
||||||
|
* in one go.
|
||||||
|
*/
|
||||||
|
- return vfio_get_dirty_bitmap(vrdl->container, iova, size, ram_addr);
|
||||||
|
+ return vfio_get_dirty_bitmap(&vrdl->container->bcontainer, iova, size,
|
||||||
|
+ ram_addr);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainer *container,
|
||||||
|
@@ -1335,7 +1329,7 @@ static int vfio_sync_dirty_bitmap(VFIOContainer *container,
|
||||||
|
ram_addr = memory_region_get_ram_addr(section->mr) +
|
||||||
|
section->offset_within_region;
|
||||||
|
|
||||||
|
- return vfio_get_dirty_bitmap(container,
|
||||||
|
+ return vfio_get_dirty_bitmap(&container->bcontainer,
|
||||||
|
REAL_HOST_PAGE_ALIGN(section->offset_within_address_space),
|
||||||
|
int128_get64(section->size), ram_addr);
|
||||||
|
}
|
||||||
|
@@ -1350,7 +1344,7 @@ static void vfio_listener_log_sync(MemoryListener *listener,
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
- if (vfio_devices_all_dirty_tracking(container)) {
|
||||||
|
+ if (vfio_devices_all_dirty_tracking(&container->bcontainer)) {
|
||||||
|
ret = vfio_sync_dirty_bitmap(container, section);
|
||||||
|
if (ret) {
|
||||||
|
error_report("vfio: Failed to sync dirty bitmap, err: %d (%s)", ret,
|
||||||
|
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
|
||||||
|
index 63a906de93..7bd81eab09 100644
|
||||||
|
--- a/hw/vfio/container.c
|
||||||
|
+++ b/hw/vfio/container.c
|
||||||
|
@@ -129,8 +129,8 @@ static int vfio_legacy_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova,
|
||||||
|
bool need_dirty_sync = false;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
- if (iotlb && vfio_devices_all_running_and_mig_active(container)) {
|
||||||
|
- if (!vfio_devices_all_device_dirty_tracking(container) &&
|
||||||
|
+ if (iotlb && vfio_devices_all_running_and_mig_active(bcontainer)) {
|
||||||
|
+ if (!vfio_devices_all_device_dirty_tracking(bcontainer) &&
|
||||||
|
container->bcontainer.dirty_pages_supported) {
|
||||||
|
return vfio_dma_unmap_bitmap(container, iova, size, iotlb);
|
||||||
|
}
|
||||||
|
@@ -162,7 +162,7 @@ static int vfio_legacy_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova,
|
||||||
|
}
|
||||||
|
|
||||||
|
if (need_dirty_sync) {
|
||||||
|
- ret = vfio_get_dirty_bitmap(container, iova, size,
|
||||||
|
+ ret = vfio_get_dirty_bitmap(bcontainer, iova, size,
|
||||||
|
iotlb->translated_addr);
|
||||||
|
if (ret) {
|
||||||
|
return ret;
|
||||||
|
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
|
||||||
|
index 9f7fedee98..08a1f9dfa4 100644
|
||||||
|
--- a/hw/vfio/trace-events
|
||||||
|
+++ b/hw/vfio/trace-events
|
||||||
|
@@ -117,7 +117,7 @@ vfio_region_sparse_mmap_header(const char *name, int index, int nr_areas) "Devic
|
||||||
|
vfio_region_sparse_mmap_entry(int i, unsigned long start, unsigned long end) "sparse entry %d [0x%lx - 0x%lx]"
|
||||||
|
vfio_get_dev_region(const char *name, int index, uint32_t type, uint32_t subtype) "%s index %d, %08x/%08x"
|
||||||
|
vfio_legacy_dma_unmap_overflow_workaround(void) ""
|
||||||
|
-vfio_get_dirty_bitmap(int fd, uint64_t iova, uint64_t size, uint64_t bitmap_size, uint64_t start, uint64_t dirty_pages) "container fd=%d, iova=0x%"PRIx64" size= 0x%"PRIx64" bitmap_size=0x%"PRIx64" start=0x%"PRIx64" dirty_pages=%"PRIu64
|
||||||
|
+vfio_get_dirty_bitmap(uint64_t iova, uint64_t size, uint64_t bitmap_size, uint64_t start, uint64_t dirty_pages) "iova=0x%"PRIx64" size= 0x%"PRIx64" bitmap_size=0x%"PRIx64" start=0x%"PRIx64" dirty_pages=%"PRIu64
|
||||||
|
vfio_iommu_map_dirty_notify(uint64_t iova_start, uint64_t iova_end) "iommu dirty @ 0x%"PRIx64" - 0x%"PRIx64
|
||||||
|
|
||||||
|
# platform.c
|
||||||
|
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
|
||||||
|
index 9740cf9fbc..bc67e1316c 100644
|
||||||
|
--- a/include/hw/vfio/vfio-common.h
|
||||||
|
+++ b/include/hw/vfio/vfio-common.h
|
||||||
|
@@ -186,7 +186,6 @@ typedef struct VFIODisplay {
|
||||||
|
|
||||||
|
VFIOAddressSpace *vfio_get_address_space(AddressSpace *as);
|
||||||
|
void vfio_put_address_space(VFIOAddressSpace *space);
|
||||||
|
-bool vfio_devices_all_running_and_saving(VFIOContainer *container);
|
||||||
|
|
||||||
|
/* SPAPR specific */
|
||||||
|
int vfio_container_add_section_window(VFIOContainer *container,
|
||||||
|
@@ -260,11 +259,11 @@ bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp);
|
||||||
|
void vfio_migration_exit(VFIODevice *vbasedev);
|
||||||
|
|
||||||
|
int vfio_bitmap_alloc(VFIOBitmap *vbmap, hwaddr size);
|
||||||
|
-bool vfio_devices_all_running_and_mig_active(VFIOContainer *container);
|
||||||
|
-bool vfio_devices_all_device_dirty_tracking(VFIOContainer *container);
|
||||||
|
-int vfio_devices_query_dirty_bitmap(VFIOContainer *container,
|
||||||
|
+bool vfio_devices_all_running_and_mig_active(VFIOContainerBase *bcontainer);
|
||||||
|
+bool vfio_devices_all_device_dirty_tracking(VFIOContainerBase *bcontainer);
|
||||||
|
+int vfio_devices_query_dirty_bitmap(VFIOContainerBase *bcontainer,
|
||||||
|
VFIOBitmap *vbmap, hwaddr iova,
|
||||||
|
hwaddr size);
|
||||||
|
-int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova,
|
||||||
|
+int vfio_get_dirty_bitmap(VFIOContainerBase *bcontainer, uint64_t iova,
|
||||||
|
uint64_t size, ram_addr_t ram_addr);
|
||||||
|
#endif /* HW_VFIO_VFIO_COMMON_H */
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
97
kvm-vfio-container-Implement-attach-detach_device.patch
Normal file
97
kvm-vfio-container-Implement-attach-detach_device.patch
Normal file
@ -0,0 +1,97 @@
|
|||||||
|
From a5d19bfbfddb36fa6d68ca6282a5acd9b245d48a Mon Sep 17 00:00:00 2001
|
||||||
|
From: Eric Auger <eric.auger@redhat.com>
|
||||||
|
Date: Thu, 2 Nov 2023 15:12:41 +0800
|
||||||
|
Subject: [PATCH 016/101] vfio/container: Implement attach/detach_device
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [15/67] e233c90e4af2061dc0612bc1b1d17be1a47daeae (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
No functional change intended.
|
||||||
|
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
|
||||||
|
Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
|
||||||
|
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit 1eb31f13b24c49884d8256f96a6664df2dd0824d)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/vfio/common.c | 16 ++++++++++++++++
|
||||||
|
hw/vfio/container.c | 12 +++++-------
|
||||||
|
2 files changed, 21 insertions(+), 7 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
|
||||||
|
index 8ef2e7967d..483ba82089 100644
|
||||||
|
--- a/hw/vfio/common.c
|
||||||
|
+++ b/hw/vfio/common.c
|
||||||
|
@@ -1498,3 +1498,19 @@ retry:
|
||||||
|
|
||||||
|
return info;
|
||||||
|
}
|
||||||
|
+
|
||||||
|
+int vfio_attach_device(char *name, VFIODevice *vbasedev,
|
||||||
|
+ AddressSpace *as, Error **errp)
|
||||||
|
+{
|
||||||
|
+ const VFIOIOMMUOps *ops = &vfio_legacy_ops;
|
||||||
|
+
|
||||||
|
+ return ops->attach_device(name, vbasedev, as, errp);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void vfio_detach_device(VFIODevice *vbasedev)
|
||||||
|
+{
|
||||||
|
+ if (!vbasedev->bcontainer) {
|
||||||
|
+ return;
|
||||||
|
+ }
|
||||||
|
+ vbasedev->bcontainer->ops->detach_device(vbasedev);
|
||||||
|
+}
|
||||||
|
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
|
||||||
|
index 721c0d7375..6bacf38222 100644
|
||||||
|
--- a/hw/vfio/container.c
|
||||||
|
+++ b/hw/vfio/container.c
|
||||||
|
@@ -873,8 +873,8 @@ static int vfio_device_groupid(VFIODevice *vbasedev, Error **errp)
|
||||||
|
* @name and @vbasedev->name are likely to be different depending
|
||||||
|
* on the type of the device, hence the need for passing @name
|
||||||
|
*/
|
||||||
|
-int vfio_attach_device(char *name, VFIODevice *vbasedev,
|
||||||
|
- AddressSpace *as, Error **errp)
|
||||||
|
+static int vfio_legacy_attach_device(const char *name, VFIODevice *vbasedev,
|
||||||
|
+ AddressSpace *as, Error **errp)
|
||||||
|
{
|
||||||
|
int groupid = vfio_device_groupid(vbasedev, errp);
|
||||||
|
VFIODevice *vbasedev_iter;
|
||||||
|
@@ -914,14 +914,10 @@ int vfio_attach_device(char *name, VFIODevice *vbasedev,
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
-void vfio_detach_device(VFIODevice *vbasedev)
|
||||||
|
+static void vfio_legacy_detach_device(VFIODevice *vbasedev)
|
||||||
|
{
|
||||||
|
VFIOGroup *group = vbasedev->group;
|
||||||
|
|
||||||
|
- if (!vbasedev->bcontainer) {
|
||||||
|
- return;
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
QLIST_REMOVE(vbasedev, global_next);
|
||||||
|
QLIST_REMOVE(vbasedev, container_next);
|
||||||
|
vbasedev->bcontainer = NULL;
|
||||||
|
@@ -933,6 +929,8 @@ void vfio_detach_device(VFIODevice *vbasedev)
|
||||||
|
const VFIOIOMMUOps vfio_legacy_ops = {
|
||||||
|
.dma_map = vfio_legacy_dma_map,
|
||||||
|
.dma_unmap = vfio_legacy_dma_unmap,
|
||||||
|
+ .attach_device = vfio_legacy_attach_device,
|
||||||
|
+ .detach_device = vfio_legacy_detach_device,
|
||||||
|
.set_dirty_page_tracking = vfio_legacy_set_dirty_page_tracking,
|
||||||
|
.query_dirty_bitmap = vfio_legacy_query_dirty_bitmap,
|
||||||
|
};
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
@ -0,0 +1,65 @@
|
|||||||
|
From c3c9f366c356032fa57ff7cc664732ba87ceb3fb Mon Sep 17 00:00:00 2001
|
||||||
|
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
||||||
|
Date: Tue, 19 Dec 2023 07:58:18 +0100
|
||||||
|
Subject: [PATCH 051/101] vfio/container: Initialize VFIOIOMMUOps under
|
||||||
|
vfio_init_container()
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [50/67] f325136391b22babadb1be3394c527deecdcd3ca (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
vfio_init_container() already defines the IOMMU type of the container.
|
||||||
|
Do the same for the VFIOIOMMUOps struct. This prepares ground for the
|
||||||
|
following patches that will deduce the associated VFIOIOMMUOps struct
|
||||||
|
from the IOMMU type.
|
||||||
|
|
||||||
|
Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Tested-by: Eric Farman <farman@linux.ibm.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit bffe92af0e7571868d47a1d1cd2205e13054d492)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/vfio/container.c | 6 +++---
|
||||||
|
1 file changed, 3 insertions(+), 3 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
|
||||||
|
index afcfe80488..f4a0434a52 100644
|
||||||
|
--- a/hw/vfio/container.c
|
||||||
|
+++ b/hw/vfio/container.c
|
||||||
|
@@ -370,7 +370,7 @@ static int vfio_get_iommu_type(VFIOContainer *container,
|
||||||
|
}
|
||||||
|
|
||||||
|
static int vfio_init_container(VFIOContainer *container, int group_fd,
|
||||||
|
- Error **errp)
|
||||||
|
+ VFIOAddressSpace *space, Error **errp)
|
||||||
|
{
|
||||||
|
int iommu_type, ret;
|
||||||
|
|
||||||
|
@@ -401,6 +401,7 @@ static int vfio_init_container(VFIOContainer *container, int group_fd,
|
||||||
|
}
|
||||||
|
|
||||||
|
container->iommu_type = iommu_type;
|
||||||
|
+ vfio_container_init(&container->bcontainer, space, &vfio_legacy_ops);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -583,9 +584,8 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
|
||||||
|
container = g_malloc0(sizeof(*container));
|
||||||
|
container->fd = fd;
|
||||||
|
bcontainer = &container->bcontainer;
|
||||||
|
- vfio_container_init(bcontainer, space, &vfio_legacy_ops);
|
||||||
|
|
||||||
|
- ret = vfio_init_container(container, group->fd, errp);
|
||||||
|
+ ret = vfio_init_container(container, group->fd, space, errp);
|
||||||
|
if (ret) {
|
||||||
|
goto free_container_exit;
|
||||||
|
}
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
@ -0,0 +1,55 @@
|
|||||||
|
From 29f13011e62f5370ef7fb3248dc85c90ae5bb042 Mon Sep 17 00:00:00 2001
|
||||||
|
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
||||||
|
Date: Tue, 19 Dec 2023 07:58:21 +0100
|
||||||
|
Subject: [PATCH 054/101] vfio/container: Intoduce a new VFIOIOMMUClass::setup
|
||||||
|
handler
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [53/67] 8641161afc33d68795bcf51a47e89061b34d50a8 (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
This will help in converting the sPAPR IOMMU backend to a QOM interface.
|
||||||
|
|
||||||
|
Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Tested-by: Eric Farman <farman@linux.ibm.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit 61d893f2cdb34a2b0255f9b5fbba6b49b94ff730)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/vfio/container.c | 1 +
|
||||||
|
include/hw/vfio/vfio-container-base.h | 1 +
|
||||||
|
2 files changed, 2 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
|
||||||
|
index 220e838a91..c22bdd3216 100644
|
||||||
|
--- a/hw/vfio/container.c
|
||||||
|
+++ b/hw/vfio/container.c
|
||||||
|
@@ -1129,6 +1129,7 @@ static void vfio_iommu_legacy_class_init(ObjectClass *klass, void *data)
|
||||||
|
{
|
||||||
|
VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass);
|
||||||
|
|
||||||
|
+ vioc->setup = vfio_legacy_setup;
|
||||||
|
vioc->dma_map = vfio_legacy_dma_map;
|
||||||
|
vioc->dma_unmap = vfio_legacy_dma_unmap;
|
||||||
|
vioc->attach_device = vfio_legacy_attach_device;
|
||||||
|
diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h
|
||||||
|
index c60370fc5e..ce8b1fba88 100644
|
||||||
|
--- a/include/hw/vfio/vfio-container-base.h
|
||||||
|
+++ b/include/hw/vfio/vfio-container-base.h
|
||||||
|
@@ -109,6 +109,7 @@ struct VFIOIOMMUClass {
|
||||||
|
InterfaceClass parent_class;
|
||||||
|
|
||||||
|
/* basic feature */
|
||||||
|
+ int (*setup)(VFIOContainerBase *bcontainer, Error **errp);
|
||||||
|
int (*dma_map)(const VFIOContainerBase *bcontainer,
|
||||||
|
hwaddr iova, ram_addr_t size,
|
||||||
|
void *vaddr, bool readonly);
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
143
kvm-vfio-container-Introduce-a-VFIOIOMMU-QOM-interface.patch
Normal file
143
kvm-vfio-container-Introduce-a-VFIOIOMMU-QOM-interface.patch
Normal file
@ -0,0 +1,143 @@
|
|||||||
|
From 5b63e4595e106196ef922b7f762c8f4150d73979 Mon Sep 17 00:00:00 2001
|
||||||
|
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
||||||
|
Date: Tue, 19 Dec 2023 07:58:19 +0100
|
||||||
|
Subject: [PATCH 052/101] vfio/container: Introduce a VFIOIOMMU QOM interface
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [51/67] 7c06e2165efe94dcd203d44e422a7aa9fac9816c (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
VFIOContainerBase was not introduced as an abstract QOM object because
|
||||||
|
it felt unnecessary to expose all the IOMMU backends to the QEMU
|
||||||
|
machine and human interface. However, we can still abstract the IOMMU
|
||||||
|
backend handlers using a QOM interface class. This provides more
|
||||||
|
flexibility when referencing the various implementations.
|
||||||
|
|
||||||
|
Simply transform the VFIOIOMMUOps struct in an InterfaceClass and do
|
||||||
|
some initial name replacements. Next changes will start converting
|
||||||
|
VFIOIOMMUOps.
|
||||||
|
|
||||||
|
Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Tested-by: Eric Farman <farman@linux.ibm.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit fdaa774e67435a328c0e28006c4d749f2198294a)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/vfio/common.c | 2 +-
|
||||||
|
hw/vfio/container-base.c | 12 +++++++++++-
|
||||||
|
hw/vfio/pci.c | 2 +-
|
||||||
|
include/hw/vfio/vfio-container-base.h | 23 +++++++++++++++++++----
|
||||||
|
4 files changed, 32 insertions(+), 7 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
|
||||||
|
index 08a3e57672..49dab41566 100644
|
||||||
|
--- a/hw/vfio/common.c
|
||||||
|
+++ b/hw/vfio/common.c
|
||||||
|
@@ -1503,7 +1503,7 @@ retry:
|
||||||
|
int vfio_attach_device(char *name, VFIODevice *vbasedev,
|
||||||
|
AddressSpace *as, Error **errp)
|
||||||
|
{
|
||||||
|
- const VFIOIOMMUOps *ops = &vfio_legacy_ops;
|
||||||
|
+ const VFIOIOMMUClass *ops = &vfio_legacy_ops;
|
||||||
|
|
||||||
|
#ifdef CONFIG_IOMMUFD
|
||||||
|
if (vbasedev->iommufd) {
|
||||||
|
diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c
|
||||||
|
index 1ffd25bbfa..913ae49077 100644
|
||||||
|
--- a/hw/vfio/container-base.c
|
||||||
|
+++ b/hw/vfio/container-base.c
|
||||||
|
@@ -72,7 +72,7 @@ int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
|
||||||
|
}
|
||||||
|
|
||||||
|
void vfio_container_init(VFIOContainerBase *bcontainer, VFIOAddressSpace *space,
|
||||||
|
- const VFIOIOMMUOps *ops)
|
||||||
|
+ const VFIOIOMMUClass *ops)
|
||||||
|
{
|
||||||
|
bcontainer->ops = ops;
|
||||||
|
bcontainer->space = space;
|
||||||
|
@@ -99,3 +99,13 @@ void vfio_container_destroy(VFIOContainerBase *bcontainer)
|
||||||
|
|
||||||
|
g_list_free_full(bcontainer->iova_ranges, g_free);
|
||||||
|
}
|
||||||
|
+
|
||||||
|
+static const TypeInfo types[] = {
|
||||||
|
+ {
|
||||||
|
+ .name = TYPE_VFIO_IOMMU,
|
||||||
|
+ .parent = TYPE_INTERFACE,
|
||||||
|
+ .class_size = sizeof(VFIOIOMMUClass),
|
||||||
|
+ },
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+DEFINE_TYPES(types)
|
||||||
|
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
|
||||||
|
index 83c3238608..adb7c09367 100644
|
||||||
|
--- a/hw/vfio/pci.c
|
||||||
|
+++ b/hw/vfio/pci.c
|
||||||
|
@@ -2491,7 +2491,7 @@ int vfio_pci_get_pci_hot_reset_info(VFIOPCIDevice *vdev,
|
||||||
|
static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
|
||||||
|
{
|
||||||
|
VFIODevice *vbasedev = &vdev->vbasedev;
|
||||||
|
- const VFIOIOMMUOps *ops = vbasedev->bcontainer->ops;
|
||||||
|
+ const VFIOIOMMUClass *ops = vbasedev->bcontainer->ops;
|
||||||
|
|
||||||
|
return ops->pci_hot_reset(vbasedev, single);
|
||||||
|
}
|
||||||
|
diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h
|
||||||
|
index 5c9594b6c7..d6147b4aee 100644
|
||||||
|
--- a/include/hw/vfio/vfio-container-base.h
|
||||||
|
+++ b/include/hw/vfio/vfio-container-base.h
|
||||||
|
@@ -16,7 +16,8 @@
|
||||||
|
#include "exec/memory.h"
|
||||||
|
|
||||||
|
typedef struct VFIODevice VFIODevice;
|
||||||
|
-typedef struct VFIOIOMMUOps VFIOIOMMUOps;
|
||||||
|
+typedef struct VFIOIOMMUClass VFIOIOMMUClass;
|
||||||
|
+#define VFIOIOMMUOps VFIOIOMMUClass /* To remove */
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
unsigned long *bitmap;
|
||||||
|
@@ -34,7 +35,7 @@ typedef struct VFIOAddressSpace {
|
||||||
|
* This is the base object for vfio container backends
|
||||||
|
*/
|
||||||
|
typedef struct VFIOContainerBase {
|
||||||
|
- const VFIOIOMMUOps *ops;
|
||||||
|
+ const VFIOIOMMUClass *ops;
|
||||||
|
VFIOAddressSpace *space;
|
||||||
|
MemoryListener listener;
|
||||||
|
Error *error;
|
||||||
|
@@ -88,10 +89,24 @@ int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
|
||||||
|
|
||||||
|
void vfio_container_init(VFIOContainerBase *bcontainer,
|
||||||
|
VFIOAddressSpace *space,
|
||||||
|
- const VFIOIOMMUOps *ops);
|
||||||
|
+ const VFIOIOMMUClass *ops);
|
||||||
|
void vfio_container_destroy(VFIOContainerBase *bcontainer);
|
||||||
|
|
||||||
|
-struct VFIOIOMMUOps {
|
||||||
|
+
|
||||||
|
+#define TYPE_VFIO_IOMMU "vfio-iommu"
|
||||||
|
+
|
||||||
|
+/*
|
||||||
|
+ * VFIOContainerBase is not an abstract QOM object because it felt
|
||||||
|
+ * unnecessary to expose all the IOMMU backends to the QEMU machine
|
||||||
|
+ * and human interface. However, we can still abstract the IOMMU
|
||||||
|
+ * backend handlers using a QOM interface class. This provides more
|
||||||
|
+ * flexibility when referencing the various implementations.
|
||||||
|
+ */
|
||||||
|
+DECLARE_CLASS_CHECKERS(VFIOIOMMUClass, VFIO_IOMMU, TYPE_VFIO_IOMMU)
|
||||||
|
+
|
||||||
|
+struct VFIOIOMMUClass {
|
||||||
|
+ InterfaceClass parent_class;
|
||||||
|
+
|
||||||
|
/* basic feature */
|
||||||
|
int (*dma_map)(const VFIOContainerBase *bcontainer,
|
||||||
|
hwaddr iova, ram_addr_t size,
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
168
kvm-vfio-container-Introduce-a-VFIOIOMMU-legacy-QOM-inte.patch
Normal file
168
kvm-vfio-container-Introduce-a-VFIOIOMMU-legacy-QOM-inte.patch
Normal file
@ -0,0 +1,168 @@
|
|||||||
|
From 58927bf236541b9423f855eca1970f7a3cf864a9 Mon Sep 17 00:00:00 2001
|
||||||
|
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
||||||
|
Date: Tue, 19 Dec 2023 07:58:20 +0100
|
||||||
|
Subject: [PATCH 053/101] vfio/container: Introduce a VFIOIOMMU legacy QOM
|
||||||
|
interface
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [52/67] a81f39d13305e84699313e17ae64d10ff4b09067 (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
Convert the legacy VFIOIOMMUOps struct to the new VFIOIOMMU QOM
|
||||||
|
interface. The set of of operations for this backend can be referenced
|
||||||
|
with a literal typename instead of a C struct. This will simplify
|
||||||
|
support of multiple backends.
|
||||||
|
|
||||||
|
Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Tested-by: Eric Farman <farman@linux.ibm.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit 9812feefab3a4ff95a6cfd73aecb120b406bc98c)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/vfio/common.c | 6 ++-
|
||||||
|
hw/vfio/container.c | 58 ++++++++++++++++++++++-----
|
||||||
|
include/hw/vfio/vfio-common.h | 1 -
|
||||||
|
include/hw/vfio/vfio-container-base.h | 1 +
|
||||||
|
4 files changed, 55 insertions(+), 11 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
|
||||||
|
index 49dab41566..2329d0efc8 100644
|
||||||
|
--- a/hw/vfio/common.c
|
||||||
|
+++ b/hw/vfio/common.c
|
||||||
|
@@ -1503,13 +1503,17 @@ retry:
|
||||||
|
int vfio_attach_device(char *name, VFIODevice *vbasedev,
|
||||||
|
AddressSpace *as, Error **errp)
|
||||||
|
{
|
||||||
|
- const VFIOIOMMUClass *ops = &vfio_legacy_ops;
|
||||||
|
+ const VFIOIOMMUClass *ops =
|
||||||
|
+ VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_LEGACY));
|
||||||
|
|
||||||
|
#ifdef CONFIG_IOMMUFD
|
||||||
|
if (vbasedev->iommufd) {
|
||||||
|
ops = &vfio_iommufd_ops;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
+
|
||||||
|
+ assert(ops);
|
||||||
|
+
|
||||||
|
return ops->attach_device(name, vbasedev, as, errp);
|
||||||
|
}
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
|
||||||
|
index f4a0434a52..220e838a91 100644
|
||||||
|
--- a/hw/vfio/container.c
|
||||||
|
+++ b/hw/vfio/container.c
|
||||||
|
@@ -369,10 +369,30 @@ static int vfio_get_iommu_type(VFIOContainer *container,
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
+/*
|
||||||
|
+ * vfio_get_iommu_ops - get a VFIOIOMMUClass associated with a type
|
||||||
|
+ */
|
||||||
|
+static const VFIOIOMMUClass *vfio_get_iommu_class(int iommu_type, Error **errp)
|
||||||
|
+{
|
||||||
|
+ ObjectClass *klass = NULL;
|
||||||
|
+
|
||||||
|
+ switch (iommu_type) {
|
||||||
|
+ case VFIO_TYPE1v2_IOMMU:
|
||||||
|
+ case VFIO_TYPE1_IOMMU:
|
||||||
|
+ klass = object_class_by_name(TYPE_VFIO_IOMMU_LEGACY);
|
||||||
|
+ break;
|
||||||
|
+ default:
|
||||||
|
+ g_assert_not_reached();
|
||||||
|
+ };
|
||||||
|
+
|
||||||
|
+ return VFIO_IOMMU_CLASS(klass);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static int vfio_init_container(VFIOContainer *container, int group_fd,
|
||||||
|
VFIOAddressSpace *space, Error **errp)
|
||||||
|
{
|
||||||
|
int iommu_type, ret;
|
||||||
|
+ const VFIOIOMMUClass *vioc;
|
||||||
|
|
||||||
|
iommu_type = vfio_get_iommu_type(container, errp);
|
||||||
|
if (iommu_type < 0) {
|
||||||
|
@@ -401,7 +421,14 @@ static int vfio_init_container(VFIOContainer *container, int group_fd,
|
||||||
|
}
|
||||||
|
|
||||||
|
container->iommu_type = iommu_type;
|
||||||
|
- vfio_container_init(&container->bcontainer, space, &vfio_legacy_ops);
|
||||||
|
+
|
||||||
|
+ vioc = vfio_get_iommu_class(iommu_type, errp);
|
||||||
|
+ if (!vioc) {
|
||||||
|
+ error_setg(errp, "No available IOMMU models");
|
||||||
|
+ return -EINVAL;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ vfio_container_init(&container->bcontainer, space, vioc);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -1098,12 +1125,25 @@ out_single:
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
-const VFIOIOMMUOps vfio_legacy_ops = {
|
||||||
|
- .dma_map = vfio_legacy_dma_map,
|
||||||
|
- .dma_unmap = vfio_legacy_dma_unmap,
|
||||||
|
- .attach_device = vfio_legacy_attach_device,
|
||||||
|
- .detach_device = vfio_legacy_detach_device,
|
||||||
|
- .set_dirty_page_tracking = vfio_legacy_set_dirty_page_tracking,
|
||||||
|
- .query_dirty_bitmap = vfio_legacy_query_dirty_bitmap,
|
||||||
|
- .pci_hot_reset = vfio_legacy_pci_hot_reset,
|
||||||
|
+static void vfio_iommu_legacy_class_init(ObjectClass *klass, void *data)
|
||||||
|
+{
|
||||||
|
+ VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass);
|
||||||
|
+
|
||||||
|
+ vioc->dma_map = vfio_legacy_dma_map;
|
||||||
|
+ vioc->dma_unmap = vfio_legacy_dma_unmap;
|
||||||
|
+ vioc->attach_device = vfio_legacy_attach_device;
|
||||||
|
+ vioc->detach_device = vfio_legacy_detach_device;
|
||||||
|
+ vioc->set_dirty_page_tracking = vfio_legacy_set_dirty_page_tracking;
|
||||||
|
+ vioc->query_dirty_bitmap = vfio_legacy_query_dirty_bitmap;
|
||||||
|
+ vioc->pci_hot_reset = vfio_legacy_pci_hot_reset;
|
||||||
|
};
|
||||||
|
+
|
||||||
|
+static const TypeInfo types[] = {
|
||||||
|
+ {
|
||||||
|
+ .name = TYPE_VFIO_IOMMU_LEGACY,
|
||||||
|
+ .parent = TYPE_VFIO_IOMMU,
|
||||||
|
+ .class_init = vfio_iommu_legacy_class_init,
|
||||||
|
+ },
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+DEFINE_TYPES(types)
|
||||||
|
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
|
||||||
|
index b8aa8a5495..14c497b6b0 100644
|
||||||
|
--- a/include/hw/vfio/vfio-common.h
|
||||||
|
+++ b/include/hw/vfio/vfio-common.h
|
||||||
|
@@ -210,7 +210,6 @@ typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList;
|
||||||
|
typedef QLIST_HEAD(VFIODeviceList, VFIODevice) VFIODeviceList;
|
||||||
|
extern VFIOGroupList vfio_group_list;
|
||||||
|
extern VFIODeviceList vfio_device_list;
|
||||||
|
-extern const VFIOIOMMUOps vfio_legacy_ops;
|
||||||
|
extern const VFIOIOMMUOps vfio_iommufd_ops;
|
||||||
|
extern const MemoryListener vfio_memory_listener;
|
||||||
|
extern int vfio_kvm_device_fd;
|
||||||
|
diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h
|
||||||
|
index d6147b4aee..c60370fc5e 100644
|
||||||
|
--- a/include/hw/vfio/vfio-container-base.h
|
||||||
|
+++ b/include/hw/vfio/vfio-container-base.h
|
||||||
|
@@ -94,6 +94,7 @@ void vfio_container_destroy(VFIOContainerBase *bcontainer);
|
||||||
|
|
||||||
|
|
||||||
|
#define TYPE_VFIO_IOMMU "vfio-iommu"
|
||||||
|
+#define TYPE_VFIO_IOMMU_LEGACY TYPE_VFIO_IOMMU "-legacy"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* VFIOContainerBase is not an abstract QOM object because it felt
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
71
kvm-vfio-container-Introduce-a-empty-VFIOIOMMUOps.patch
Normal file
71
kvm-vfio-container-Introduce-a-empty-VFIOIOMMUOps.patch
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
From e56f961fbe95a53a52c5eca00b4fca17d825e860 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Date: Thu, 2 Nov 2023 15:12:28 +0800
|
||||||
|
Subject: [PATCH 003/101] vfio/container: Introduce a empty VFIOIOMMUOps
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [2/67] 0de0afffce42fa4a17f6d33a10b6162cdfbe8150 (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
This empty VFIOIOMMUOps named vfio_legacy_ops will hold all general
|
||||||
|
IOMMU ops of legacy container.
|
||||||
|
|
||||||
|
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit d24668579184f4098779983724ec74cd3db62e10)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/vfio/container.c | 5 +++++
|
||||||
|
include/hw/vfio/vfio-common.h | 2 +-
|
||||||
|
2 files changed, 6 insertions(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
|
||||||
|
index 242010036a..4bc43ddfa4 100644
|
||||||
|
--- a/hw/vfio/container.c
|
||||||
|
+++ b/hw/vfio/container.c
|
||||||
|
@@ -472,6 +472,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
|
||||||
|
Error **errp)
|
||||||
|
{
|
||||||
|
VFIOContainer *container;
|
||||||
|
+ VFIOContainerBase *bcontainer;
|
||||||
|
int ret, fd;
|
||||||
|
VFIOAddressSpace *space;
|
||||||
|
|
||||||
|
@@ -552,6 +553,8 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
|
||||||
|
container->iova_ranges = NULL;
|
||||||
|
QLIST_INIT(&container->giommu_list);
|
||||||
|
QLIST_INIT(&container->vrdl_list);
|
||||||
|
+ bcontainer = &container->bcontainer;
|
||||||
|
+ bcontainer->ops = &vfio_legacy_ops;
|
||||||
|
|
||||||
|
ret = vfio_init_container(container, group->fd, errp);
|
||||||
|
if (ret) {
|
||||||
|
@@ -933,3 +936,5 @@ void vfio_detach_device(VFIODevice *vbasedev)
|
||||||
|
vfio_put_base_device(vbasedev);
|
||||||
|
vfio_put_group(group);
|
||||||
|
}
|
||||||
|
+
|
||||||
|
+const VFIOIOMMUOps vfio_legacy_ops;
|
||||||
|
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
|
||||||
|
index 586d153c12..678161f207 100644
|
||||||
|
--- a/include/hw/vfio/vfio-common.h
|
||||||
|
+++ b/include/hw/vfio/vfio-common.h
|
||||||
|
@@ -255,7 +255,7 @@ typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList;
|
||||||
|
typedef QLIST_HEAD(VFIODeviceList, VFIODevice) VFIODeviceList;
|
||||||
|
extern VFIOGroupList vfio_group_list;
|
||||||
|
extern VFIODeviceList vfio_device_list;
|
||||||
|
-
|
||||||
|
+extern const VFIOIOMMUOps vfio_legacy_ops;
|
||||||
|
extern const MemoryListener vfio_memory_listener;
|
||||||
|
extern int vfio_kvm_device_fd;
|
||||||
|
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
118
kvm-vfio-container-Introduce-vfio_legacy_setup-for-furth.patch
Normal file
118
kvm-vfio-container-Introduce-vfio_legacy_setup-for-furth.patch
Normal file
@ -0,0 +1,118 @@
|
|||||||
|
From 6c7546756e979e4f5ba29ae51a21c63fa90492cf Mon Sep 17 00:00:00 2001
|
||||||
|
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
||||||
|
Date: Tue, 19 Dec 2023 07:58:17 +0100
|
||||||
|
Subject: [PATCH 050/101] vfio/container: Introduce vfio_legacy_setup() for
|
||||||
|
further cleanups
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [49/67] 3a621ba2605c98b7fbf7fd9f93a207f728f1202e (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
This will help subsequent patches to unify the initialization of type1
|
||||||
|
and sPAPR IOMMU backends.
|
||||||
|
|
||||||
|
Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Tested-by: Eric Farman <farman@linux.ibm.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit d3764db87531cd53849ccee9b2f72aede90ccf5b)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/vfio/container.c | 63 +++++++++++++++++++++++++--------------------
|
||||||
|
1 file changed, 35 insertions(+), 28 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
|
||||||
|
index 1e77a2929e..afcfe80488 100644
|
||||||
|
--- a/hw/vfio/container.c
|
||||||
|
+++ b/hw/vfio/container.c
|
||||||
|
@@ -474,6 +474,35 @@ static void vfio_get_iommu_info_migration(VFIOContainer *container,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
+static int vfio_legacy_setup(VFIOContainerBase *bcontainer, Error **errp)
|
||||||
|
+{
|
||||||
|
+ VFIOContainer *container = container_of(bcontainer, VFIOContainer,
|
||||||
|
+ bcontainer);
|
||||||
|
+ g_autofree struct vfio_iommu_type1_info *info = NULL;
|
||||||
|
+ int ret;
|
||||||
|
+
|
||||||
|
+ ret = vfio_get_iommu_info(container, &info);
|
||||||
|
+ if (ret) {
|
||||||
|
+ error_setg_errno(errp, -ret, "Failed to get VFIO IOMMU info");
|
||||||
|
+ return ret;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (info->flags & VFIO_IOMMU_INFO_PGSIZES) {
|
||||||
|
+ bcontainer->pgsizes = info->iova_pgsizes;
|
||||||
|
+ } else {
|
||||||
|
+ bcontainer->pgsizes = qemu_real_host_page_size();
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (!vfio_get_info_dma_avail(info, &bcontainer->dma_max_mappings)) {
|
||||||
|
+ bcontainer->dma_max_mappings = 65535;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ vfio_get_info_iova_range(info, bcontainer);
|
||||||
|
+
|
||||||
|
+ vfio_get_iommu_info_migration(container, info);
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
|
||||||
|
Error **errp)
|
||||||
|
{
|
||||||
|
@@ -570,40 +599,18 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
|
||||||
|
switch (container->iommu_type) {
|
||||||
|
case VFIO_TYPE1v2_IOMMU:
|
||||||
|
case VFIO_TYPE1_IOMMU:
|
||||||
|
- {
|
||||||
|
- struct vfio_iommu_type1_info *info;
|
||||||
|
-
|
||||||
|
- ret = vfio_get_iommu_info(container, &info);
|
||||||
|
- if (ret) {
|
||||||
|
- error_setg_errno(errp, -ret, "Failed to get VFIO IOMMU info");
|
||||||
|
- goto enable_discards_exit;
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- if (info->flags & VFIO_IOMMU_INFO_PGSIZES) {
|
||||||
|
- bcontainer->pgsizes = info->iova_pgsizes;
|
||||||
|
- } else {
|
||||||
|
- bcontainer->pgsizes = qemu_real_host_page_size();
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- if (!vfio_get_info_dma_avail(info, &bcontainer->dma_max_mappings)) {
|
||||||
|
- bcontainer->dma_max_mappings = 65535;
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- vfio_get_info_iova_range(info, bcontainer);
|
||||||
|
-
|
||||||
|
- vfio_get_iommu_info_migration(container, info);
|
||||||
|
- g_free(info);
|
||||||
|
+ ret = vfio_legacy_setup(bcontainer, errp);
|
||||||
|
break;
|
||||||
|
- }
|
||||||
|
case VFIO_SPAPR_TCE_v2_IOMMU:
|
||||||
|
case VFIO_SPAPR_TCE_IOMMU:
|
||||||
|
- {
|
||||||
|
ret = vfio_spapr_container_init(container, errp);
|
||||||
|
- if (ret) {
|
||||||
|
- goto enable_discards_exit;
|
||||||
|
- }
|
||||||
|
break;
|
||||||
|
+ default:
|
||||||
|
+ g_assert_not_reached();
|
||||||
|
}
|
||||||
|
+
|
||||||
|
+ if (ret) {
|
||||||
|
+ goto enable_discards_exit;
|
||||||
|
}
|
||||||
|
|
||||||
|
vfio_kvm_device_add_group(group);
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
102
kvm-vfio-container-Move-dirty_pgsizes-and-max_dirty_bitm.patch
Normal file
102
kvm-vfio-container-Move-dirty_pgsizes-and-max_dirty_bitm.patch
Normal file
@ -0,0 +1,102 @@
|
|||||||
|
From 6a597d7c82a4538fa1f928db7e600ec2e5a44361 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Eric Auger <eric.auger@redhat.com>
|
||||||
|
Date: Thu, 2 Nov 2023 15:12:39 +0800
|
||||||
|
Subject: [PATCH 014/101] vfio/container: Move dirty_pgsizes and
|
||||||
|
max_dirty_bitmap_size to base container
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [13/67] b9fe57174368e36788b017cc2ad13b748592cfc2 (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
No functional change intended.
|
||||||
|
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
|
||||||
|
Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
|
||||||
|
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit 4d6b95010c59127ac4f7230d6ee88b5d0e99738c)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/vfio/container.c | 9 +++++----
|
||||||
|
include/hw/vfio/vfio-common.h | 2 --
|
||||||
|
include/hw/vfio/vfio-container-base.h | 2 ++
|
||||||
|
3 files changed, 7 insertions(+), 6 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
|
||||||
|
index 5c1dee8c9f..c8088a8174 100644
|
||||||
|
--- a/hw/vfio/container.c
|
||||||
|
+++ b/hw/vfio/container.c
|
||||||
|
@@ -64,6 +64,7 @@ static int vfio_dma_unmap_bitmap(VFIOContainer *container,
|
||||||
|
hwaddr iova, ram_addr_t size,
|
||||||
|
IOMMUTLBEntry *iotlb)
|
||||||
|
{
|
||||||
|
+ VFIOContainerBase *bcontainer = &container->bcontainer;
|
||||||
|
struct vfio_iommu_type1_dma_unmap *unmap;
|
||||||
|
struct vfio_bitmap *bitmap;
|
||||||
|
VFIOBitmap vbmap;
|
||||||
|
@@ -91,7 +92,7 @@ static int vfio_dma_unmap_bitmap(VFIOContainer *container,
|
||||||
|
bitmap->size = vbmap.size;
|
||||||
|
bitmap->data = (__u64 *)vbmap.bitmap;
|
||||||
|
|
||||||
|
- if (vbmap.size > container->max_dirty_bitmap_size) {
|
||||||
|
+ if (vbmap.size > bcontainer->max_dirty_bitmap_size) {
|
||||||
|
error_report("UNMAP: Size of bitmap too big 0x%"PRIx64, vbmap.size);
|
||||||
|
ret = -E2BIG;
|
||||||
|
goto unmap_exit;
|
||||||
|
@@ -131,7 +132,7 @@ static int vfio_legacy_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova,
|
||||||
|
|
||||||
|
if (iotlb && vfio_devices_all_running_and_mig_active(bcontainer)) {
|
||||||
|
if (!vfio_devices_all_device_dirty_tracking(bcontainer) &&
|
||||||
|
- container->bcontainer.dirty_pages_supported) {
|
||||||
|
+ bcontainer->dirty_pages_supported) {
|
||||||
|
return vfio_dma_unmap_bitmap(container, iova, size, iotlb);
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -469,8 +470,8 @@ static void vfio_get_iommu_info_migration(VFIOContainer *container,
|
||||||
|
*/
|
||||||
|
if (cap_mig->pgsize_bitmap & qemu_real_host_page_size()) {
|
||||||
|
bcontainer->dirty_pages_supported = true;
|
||||||
|
- container->max_dirty_bitmap_size = cap_mig->max_dirty_bitmap_size;
|
||||||
|
- container->dirty_pgsizes = cap_mig->pgsize_bitmap;
|
||||||
|
+ bcontainer->max_dirty_bitmap_size = cap_mig->max_dirty_bitmap_size;
|
||||||
|
+ bcontainer->dirty_pgsizes = cap_mig->pgsize_bitmap;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
|
||||||
|
index 922022cbc6..b1c9fe711b 100644
|
||||||
|
--- a/include/hw/vfio/vfio-common.h
|
||||||
|
+++ b/include/hw/vfio/vfio-common.h
|
||||||
|
@@ -80,8 +80,6 @@ typedef struct VFIOContainer {
|
||||||
|
int fd; /* /dev/vfio/vfio, empowered by the attached groups */
|
||||||
|
MemoryListener prereg_listener;
|
||||||
|
unsigned iommu_type;
|
||||||
|
- uint64_t dirty_pgsizes;
|
||||||
|
- uint64_t max_dirty_bitmap_size;
|
||||||
|
QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list;
|
||||||
|
QLIST_HEAD(, VFIOGroup) group_list;
|
||||||
|
GList *iova_ranges;
|
||||||
|
diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h
|
||||||
|
index 95f8d319e0..80e4a993c5 100644
|
||||||
|
--- a/include/hw/vfio/vfio-container-base.h
|
||||||
|
+++ b/include/hw/vfio/vfio-container-base.h
|
||||||
|
@@ -39,6 +39,8 @@ typedef struct VFIOContainerBase {
|
||||||
|
MemoryListener listener;
|
||||||
|
Error *error;
|
||||||
|
bool initialized;
|
||||||
|
+ uint64_t dirty_pgsizes;
|
||||||
|
+ uint64_t max_dirty_bitmap_size;
|
||||||
|
unsigned long pgsizes;
|
||||||
|
unsigned int dma_max_mappings;
|
||||||
|
bool dirty_pages_supported;
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
168
kvm-vfio-container-Move-iova_ranges-to-base-container.patch
Normal file
168
kvm-vfio-container-Move-iova_ranges-to-base-container.patch
Normal file
@ -0,0 +1,168 @@
|
|||||||
|
From 882143ef30da4182f049eb8192e0fac317c372b3 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Date: Thu, 2 Nov 2023 15:12:40 +0800
|
||||||
|
Subject: [PATCH 015/101] vfio/container: Move iova_ranges to base container
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [14/67] 49f2e3c484b4c0c63be9aa4eb1bf08804dcb1ec3 (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
Meanwhile remove the helper function vfio_free_container as it
|
||||||
|
only calls g_free now.
|
||||||
|
|
||||||
|
No functional change intended.
|
||||||
|
|
||||||
|
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit f79baf8c9575ac3193ca86ec508791c86d96b13e)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/vfio/common.c | 5 +++--
|
||||||
|
hw/vfio/container-base.c | 3 +++
|
||||||
|
hw/vfio/container.c | 19 ++++++-------------
|
||||||
|
include/hw/vfio/vfio-common.h | 1 -
|
||||||
|
include/hw/vfio/vfio-container-base.h | 1 +
|
||||||
|
5 files changed, 13 insertions(+), 16 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
|
||||||
|
index be623e544b..8ef2e7967d 100644
|
||||||
|
--- a/hw/vfio/common.c
|
||||||
|
+++ b/hw/vfio/common.c
|
||||||
|
@@ -637,9 +637,10 @@ static void vfio_listener_region_add(MemoryListener *listener,
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
- if (container->iova_ranges) {
|
||||||
|
+ if (bcontainer->iova_ranges) {
|
||||||
|
ret = memory_region_iommu_set_iova_ranges(giommu->iommu_mr,
|
||||||
|
- container->iova_ranges, &err);
|
||||||
|
+ bcontainer->iova_ranges,
|
||||||
|
+ &err);
|
||||||
|
if (ret) {
|
||||||
|
g_free(giommu);
|
||||||
|
goto fail;
|
||||||
|
diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c
|
||||||
|
index 7f508669f5..0177f43741 100644
|
||||||
|
--- a/hw/vfio/container-base.c
|
||||||
|
+++ b/hw/vfio/container-base.c
|
||||||
|
@@ -54,6 +54,7 @@ void vfio_container_init(VFIOContainerBase *bcontainer, VFIOAddressSpace *space,
|
||||||
|
bcontainer->error = NULL;
|
||||||
|
bcontainer->dirty_pages_supported = false;
|
||||||
|
bcontainer->dma_max_mappings = 0;
|
||||||
|
+ bcontainer->iova_ranges = NULL;
|
||||||
|
QLIST_INIT(&bcontainer->giommu_list);
|
||||||
|
QLIST_INIT(&bcontainer->vrdl_list);
|
||||||
|
}
|
||||||
|
@@ -70,4 +71,6 @@ void vfio_container_destroy(VFIOContainerBase *bcontainer)
|
||||||
|
QLIST_REMOVE(giommu, giommu_next);
|
||||||
|
g_free(giommu);
|
||||||
|
}
|
||||||
|
+
|
||||||
|
+ g_list_free_full(bcontainer->iova_ranges, g_free);
|
||||||
|
}
|
||||||
|
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
|
||||||
|
index c8088a8174..721c0d7375 100644
|
||||||
|
--- a/hw/vfio/container.c
|
||||||
|
+++ b/hw/vfio/container.c
|
||||||
|
@@ -308,7 +308,7 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info,
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool vfio_get_info_iova_range(struct vfio_iommu_type1_info *info,
|
||||||
|
- VFIOContainer *container)
|
||||||
|
+ VFIOContainerBase *bcontainer)
|
||||||
|
{
|
||||||
|
struct vfio_info_cap_header *hdr;
|
||||||
|
struct vfio_iommu_type1_info_cap_iova_range *cap;
|
||||||
|
@@ -326,8 +326,8 @@ static bool vfio_get_info_iova_range(struct vfio_iommu_type1_info *info,
|
||||||
|
|
||||||
|
range_set_bounds(range, cap->iova_ranges[i].start,
|
||||||
|
cap->iova_ranges[i].end);
|
||||||
|
- container->iova_ranges =
|
||||||
|
- range_list_insert(container->iova_ranges, range);
|
||||||
|
+ bcontainer->iova_ranges =
|
||||||
|
+ range_list_insert(bcontainer->iova_ranges, range);
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
@@ -475,12 +475,6 @@ static void vfio_get_iommu_info_migration(VFIOContainer *container,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
-static void vfio_free_container(VFIOContainer *container)
|
||||||
|
-{
|
||||||
|
- g_list_free_full(container->iova_ranges, g_free);
|
||||||
|
- g_free(container);
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
|
||||||
|
Error **errp)
|
||||||
|
{
|
||||||
|
@@ -560,7 +554,6 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
|
||||||
|
|
||||||
|
container = g_malloc0(sizeof(*container));
|
||||||
|
container->fd = fd;
|
||||||
|
- container->iova_ranges = NULL;
|
||||||
|
bcontainer = &container->bcontainer;
|
||||||
|
vfio_container_init(bcontainer, space, &vfio_legacy_ops);
|
||||||
|
|
||||||
|
@@ -597,7 +590,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
|
||||||
|
bcontainer->dma_max_mappings = 65535;
|
||||||
|
}
|
||||||
|
|
||||||
|
- vfio_get_info_iova_range(info, container);
|
||||||
|
+ vfio_get_info_iova_range(info, bcontainer);
|
||||||
|
|
||||||
|
vfio_get_iommu_info_migration(container, info);
|
||||||
|
g_free(info);
|
||||||
|
@@ -649,7 +642,7 @@ enable_discards_exit:
|
||||||
|
vfio_ram_block_discard_disable(container, false);
|
||||||
|
|
||||||
|
free_container_exit:
|
||||||
|
- vfio_free_container(container);
|
||||||
|
+ g_free(container);
|
||||||
|
|
||||||
|
close_fd_exit:
|
||||||
|
close(fd);
|
||||||
|
@@ -693,7 +686,7 @@ static void vfio_disconnect_container(VFIOGroup *group)
|
||||||
|
|
||||||
|
trace_vfio_disconnect_container(container->fd);
|
||||||
|
close(container->fd);
|
||||||
|
- vfio_free_container(container);
|
||||||
|
+ g_free(container);
|
||||||
|
|
||||||
|
vfio_put_address_space(space);
|
||||||
|
}
|
||||||
|
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
|
||||||
|
index b1c9fe711b..b9e5a0e64b 100644
|
||||||
|
--- a/include/hw/vfio/vfio-common.h
|
||||||
|
+++ b/include/hw/vfio/vfio-common.h
|
||||||
|
@@ -82,7 +82,6 @@ typedef struct VFIOContainer {
|
||||||
|
unsigned iommu_type;
|
||||||
|
QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list;
|
||||||
|
QLIST_HEAD(, VFIOGroup) group_list;
|
||||||
|
- GList *iova_ranges;
|
||||||
|
} VFIOContainer;
|
||||||
|
|
||||||
|
typedef struct VFIOHostDMAWindow {
|
||||||
|
diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h
|
||||||
|
index 80e4a993c5..9658ffb526 100644
|
||||||
|
--- a/include/hw/vfio/vfio-container-base.h
|
||||||
|
+++ b/include/hw/vfio/vfio-container-base.h
|
||||||
|
@@ -48,6 +48,7 @@ typedef struct VFIOContainerBase {
|
||||||
|
QLIST_HEAD(, VFIORamDiscardListener) vrdl_list;
|
||||||
|
QLIST_ENTRY(VFIOContainerBase) next;
|
||||||
|
QLIST_HEAD(, VFIODevice) device_list;
|
||||||
|
+ GList *iova_ranges;
|
||||||
|
} VFIOContainerBase;
|
||||||
|
|
||||||
|
typedef struct VFIOGuestIOMMU {
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
522
kvm-vfio-container-Move-listener-to-base-container.patch
Normal file
522
kvm-vfio-container-Move-listener-to-base-container.patch
Normal file
@ -0,0 +1,522 @@
|
|||||||
|
From 36bc7782bb02f81368e3e43a3947d16ad362e137 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Eric Auger <eric.auger@redhat.com>
|
||||||
|
Date: Thu, 2 Nov 2023 15:12:38 +0800
|
||||||
|
Subject: [PATCH 013/101] vfio/container: Move listener to base container
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [12/67] f469ab126c6366170aa2520f9b4d9969d3ae0a04 (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
Move listener to base container. Also error and initialized fields
|
||||||
|
are moved at the same time.
|
||||||
|
|
||||||
|
No functional change intended.
|
||||||
|
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
|
||||||
|
Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
|
||||||
|
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit c7b313d300f161c650d011a5c9da469bcd5d34e4)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/vfio/common.c | 110 +++++++++++++-------------
|
||||||
|
hw/vfio/container-base.c | 1 +
|
||||||
|
hw/vfio/container.c | 19 +++--
|
||||||
|
hw/vfio/spapr.c | 11 +--
|
||||||
|
include/hw/vfio/vfio-common.h | 3 -
|
||||||
|
include/hw/vfio/vfio-container-base.h | 3 +
|
||||||
|
6 files changed, 74 insertions(+), 73 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
|
||||||
|
index f15665789f..be623e544b 100644
|
||||||
|
--- a/hw/vfio/common.c
|
||||||
|
+++ b/hw/vfio/common.c
|
||||||
|
@@ -541,7 +541,7 @@ static bool vfio_listener_valid_section(MemoryRegionSection *section,
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
-static bool vfio_get_section_iova_range(VFIOContainer *container,
|
||||||
|
+static bool vfio_get_section_iova_range(VFIOContainerBase *bcontainer,
|
||||||
|
MemoryRegionSection *section,
|
||||||
|
hwaddr *out_iova, hwaddr *out_end,
|
||||||
|
Int128 *out_llend)
|
||||||
|
@@ -569,8 +569,10 @@ static bool vfio_get_section_iova_range(VFIOContainer *container,
|
||||||
|
static void vfio_listener_region_add(MemoryListener *listener,
|
||||||
|
MemoryRegionSection *section)
|
||||||
|
{
|
||||||
|
- VFIOContainer *container = container_of(listener, VFIOContainer, listener);
|
||||||
|
- VFIOContainerBase *bcontainer = &container->bcontainer;
|
||||||
|
+ VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase,
|
||||||
|
+ listener);
|
||||||
|
+ VFIOContainer *container = container_of(bcontainer, VFIOContainer,
|
||||||
|
+ bcontainer);
|
||||||
|
hwaddr iova, end;
|
||||||
|
Int128 llend, llsize;
|
||||||
|
void *vaddr;
|
||||||
|
@@ -581,7 +583,8 @@ static void vfio_listener_region_add(MemoryListener *listener,
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
- if (!vfio_get_section_iova_range(container, section, &iova, &end, &llend)) {
|
||||||
|
+ if (!vfio_get_section_iova_range(bcontainer, section, &iova, &end,
|
||||||
|
+ &llend)) {
|
||||||
|
if (memory_region_is_ram_device(section->mr)) {
|
||||||
|
trace_vfio_listener_region_add_no_dma_map(
|
||||||
|
memory_region_name(section->mr),
|
||||||
|
@@ -688,13 +691,12 @@ static void vfio_listener_region_add(MemoryListener *listener,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
- ret = vfio_container_dma_map(&container->bcontainer,
|
||||||
|
- iova, int128_get64(llsize), vaddr,
|
||||||
|
- section->readonly);
|
||||||
|
+ ret = vfio_container_dma_map(bcontainer, iova, int128_get64(llsize),
|
||||||
|
+ vaddr, section->readonly);
|
||||||
|
if (ret) {
|
||||||
|
error_setg(&err, "vfio_container_dma_map(%p, 0x%"HWADDR_PRIx", "
|
||||||
|
"0x%"HWADDR_PRIx", %p) = %d (%s)",
|
||||||
|
- container, iova, int128_get64(llsize), vaddr, ret,
|
||||||
|
+ bcontainer, iova, int128_get64(llsize), vaddr, ret,
|
||||||
|
strerror(-ret));
|
||||||
|
if (memory_region_is_ram_device(section->mr)) {
|
||||||
|
/* Allow unexpected mappings not to be fatal for RAM devices */
|
||||||
|
@@ -716,9 +718,9 @@ fail:
|
||||||
|
* can gracefully fail. Runtime, there's not much we can do other
|
||||||
|
* than throw a hardware error.
|
||||||
|
*/
|
||||||
|
- if (!container->initialized) {
|
||||||
|
- if (!container->error) {
|
||||||
|
- error_propagate_prepend(&container->error, err,
|
||||||
|
+ if (!bcontainer->initialized) {
|
||||||
|
+ if (!bcontainer->error) {
|
||||||
|
+ error_propagate_prepend(&bcontainer->error, err,
|
||||||
|
"Region %s: ",
|
||||||
|
memory_region_name(section->mr));
|
||||||
|
} else {
|
||||||
|
@@ -733,8 +735,10 @@ fail:
|
||||||
|
static void vfio_listener_region_del(MemoryListener *listener,
|
||||||
|
MemoryRegionSection *section)
|
||||||
|
{
|
||||||
|
- VFIOContainer *container = container_of(listener, VFIOContainer, listener);
|
||||||
|
- VFIOContainerBase *bcontainer = &container->bcontainer;
|
||||||
|
+ VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase,
|
||||||
|
+ listener);
|
||||||
|
+ VFIOContainer *container = container_of(bcontainer, VFIOContainer,
|
||||||
|
+ bcontainer);
|
||||||
|
hwaddr iova, end;
|
||||||
|
Int128 llend, llsize;
|
||||||
|
int ret;
|
||||||
|
@@ -767,7 +771,8 @@ static void vfio_listener_region_del(MemoryListener *listener,
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
|
||||||
|
- if (!vfio_get_section_iova_range(container, section, &iova, &end, &llend)) {
|
||||||
|
+ if (!vfio_get_section_iova_range(bcontainer, section, &iova, &end,
|
||||||
|
+ &llend)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -790,22 +795,22 @@ static void vfio_listener_region_del(MemoryListener *listener,
|
||||||
|
if (int128_eq(llsize, int128_2_64())) {
|
||||||
|
/* The unmap ioctl doesn't accept a full 64-bit span. */
|
||||||
|
llsize = int128_rshift(llsize, 1);
|
||||||
|
- ret = vfio_container_dma_unmap(&container->bcontainer, iova,
|
||||||
|
+ ret = vfio_container_dma_unmap(bcontainer, iova,
|
||||||
|
int128_get64(llsize), NULL);
|
||||||
|
if (ret) {
|
||||||
|
error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", "
|
||||||
|
"0x%"HWADDR_PRIx") = %d (%s)",
|
||||||
|
- container, iova, int128_get64(llsize), ret,
|
||||||
|
+ bcontainer, iova, int128_get64(llsize), ret,
|
||||||
|
strerror(-ret));
|
||||||
|
}
|
||||||
|
iova += int128_get64(llsize);
|
||||||
|
}
|
||||||
|
- ret = vfio_container_dma_unmap(&container->bcontainer, iova,
|
||||||
|
+ ret = vfio_container_dma_unmap(bcontainer, iova,
|
||||||
|
int128_get64(llsize), NULL);
|
||||||
|
if (ret) {
|
||||||
|
error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", "
|
||||||
|
"0x%"HWADDR_PRIx") = %d (%s)",
|
||||||
|
- container, iova, int128_get64(llsize), ret,
|
||||||
|
+ bcontainer, iova, int128_get64(llsize), ret,
|
||||||
|
strerror(-ret));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@@ -825,16 +830,15 @@ typedef struct VFIODirtyRanges {
|
||||||
|
} VFIODirtyRanges;
|
||||||
|
|
||||||
|
typedef struct VFIODirtyRangesListener {
|
||||||
|
- VFIOContainer *container;
|
||||||
|
+ VFIOContainerBase *bcontainer;
|
||||||
|
VFIODirtyRanges ranges;
|
||||||
|
MemoryListener listener;
|
||||||
|
} VFIODirtyRangesListener;
|
||||||
|
|
||||||
|
static bool vfio_section_is_vfio_pci(MemoryRegionSection *section,
|
||||||
|
- VFIOContainer *container)
|
||||||
|
+ VFIOContainerBase *bcontainer)
|
||||||
|
{
|
||||||
|
VFIOPCIDevice *pcidev;
|
||||||
|
- VFIOContainerBase *bcontainer = &container->bcontainer;
|
||||||
|
VFIODevice *vbasedev;
|
||||||
|
Object *owner;
|
||||||
|
|
||||||
|
@@ -863,7 +867,7 @@ static void vfio_dirty_tracking_update(MemoryListener *listener,
|
||||||
|
hwaddr iova, end, *min, *max;
|
||||||
|
|
||||||
|
if (!vfio_listener_valid_section(section, "tracking_update") ||
|
||||||
|
- !vfio_get_section_iova_range(dirty->container, section,
|
||||||
|
+ !vfio_get_section_iova_range(dirty->bcontainer, section,
|
||||||
|
&iova, &end, NULL)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
@@ -887,7 +891,7 @@ static void vfio_dirty_tracking_update(MemoryListener *listener,
|
||||||
|
* The alternative would be an IOVATree but that has a much bigger runtime
|
||||||
|
* overhead and unnecessary complexity.
|
||||||
|
*/
|
||||||
|
- if (vfio_section_is_vfio_pci(section, dirty->container) &&
|
||||||
|
+ if (vfio_section_is_vfio_pci(section, dirty->bcontainer) &&
|
||||||
|
iova >= UINT32_MAX) {
|
||||||
|
min = &range->minpci64;
|
||||||
|
max = &range->maxpci64;
|
||||||
|
@@ -911,7 +915,7 @@ static const MemoryListener vfio_dirty_tracking_listener = {
|
||||||
|
.region_add = vfio_dirty_tracking_update,
|
||||||
|
};
|
||||||
|
|
||||||
|
-static void vfio_dirty_tracking_init(VFIOContainer *container,
|
||||||
|
+static void vfio_dirty_tracking_init(VFIOContainerBase *bcontainer,
|
||||||
|
VFIODirtyRanges *ranges)
|
||||||
|
{
|
||||||
|
VFIODirtyRangesListener dirty;
|
||||||
|
@@ -921,10 +925,10 @@ static void vfio_dirty_tracking_init(VFIOContainer *container,
|
||||||
|
dirty.ranges.min64 = UINT64_MAX;
|
||||||
|
dirty.ranges.minpci64 = UINT64_MAX;
|
||||||
|
dirty.listener = vfio_dirty_tracking_listener;
|
||||||
|
- dirty.container = container;
|
||||||
|
+ dirty.bcontainer = bcontainer;
|
||||||
|
|
||||||
|
memory_listener_register(&dirty.listener,
|
||||||
|
- container->bcontainer.space->as);
|
||||||
|
+ bcontainer->space->as);
|
||||||
|
|
||||||
|
*ranges = dirty.ranges;
|
||||||
|
|
||||||
|
@@ -936,12 +940,11 @@ static void vfio_dirty_tracking_init(VFIOContainer *container,
|
||||||
|
memory_listener_unregister(&dirty.listener);
|
||||||
|
}
|
||||||
|
|
||||||
|
-static void vfio_devices_dma_logging_stop(VFIOContainer *container)
|
||||||
|
+static void vfio_devices_dma_logging_stop(VFIOContainerBase *bcontainer)
|
||||||
|
{
|
||||||
|
uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature),
|
||||||
|
sizeof(uint64_t))] = {};
|
||||||
|
struct vfio_device_feature *feature = (struct vfio_device_feature *)buf;
|
||||||
|
- VFIOContainerBase *bcontainer = &container->bcontainer;
|
||||||
|
VFIODevice *vbasedev;
|
||||||
|
|
||||||
|
feature->argsz = sizeof(buf);
|
||||||
|
@@ -962,7 +965,7 @@ static void vfio_devices_dma_logging_stop(VFIOContainer *container)
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct vfio_device_feature *
|
||||||
|
-vfio_device_feature_dma_logging_start_create(VFIOContainer *container,
|
||||||
|
+vfio_device_feature_dma_logging_start_create(VFIOContainerBase *bcontainer,
|
||||||
|
VFIODirtyRanges *tracking)
|
||||||
|
{
|
||||||
|
struct vfio_device_feature *feature;
|
||||||
|
@@ -1035,16 +1038,15 @@ static void vfio_device_feature_dma_logging_start_destroy(
|
||||||
|
g_free(feature);
|
||||||
|
}
|
||||||
|
|
||||||
|
-static int vfio_devices_dma_logging_start(VFIOContainer *container)
|
||||||
|
+static int vfio_devices_dma_logging_start(VFIOContainerBase *bcontainer)
|
||||||
|
{
|
||||||
|
struct vfio_device_feature *feature;
|
||||||
|
VFIODirtyRanges ranges;
|
||||||
|
- VFIOContainerBase *bcontainer = &container->bcontainer;
|
||||||
|
VFIODevice *vbasedev;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
- vfio_dirty_tracking_init(container, &ranges);
|
||||||
|
- feature = vfio_device_feature_dma_logging_start_create(container,
|
||||||
|
+ vfio_dirty_tracking_init(bcontainer, &ranges);
|
||||||
|
+ feature = vfio_device_feature_dma_logging_start_create(bcontainer,
|
||||||
|
&ranges);
|
||||||
|
if (!feature) {
|
||||||
|
return -errno;
|
||||||
|
@@ -1067,7 +1069,7 @@ static int vfio_devices_dma_logging_start(VFIOContainer *container)
|
||||||
|
|
||||||
|
out:
|
||||||
|
if (ret) {
|
||||||
|
- vfio_devices_dma_logging_stop(container);
|
||||||
|
+ vfio_devices_dma_logging_stop(bcontainer);
|
||||||
|
}
|
||||||
|
|
||||||
|
vfio_device_feature_dma_logging_start_destroy(feature);
|
||||||
|
@@ -1077,14 +1079,14 @@ out:
|
||||||
|
|
||||||
|
static void vfio_listener_log_global_start(MemoryListener *listener)
|
||||||
|
{
|
||||||
|
- VFIOContainer *container = container_of(listener, VFIOContainer, listener);
|
||||||
|
+ VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase,
|
||||||
|
+ listener);
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
- if (vfio_devices_all_device_dirty_tracking(&container->bcontainer)) {
|
||||||
|
- ret = vfio_devices_dma_logging_start(container);
|
||||||
|
+ if (vfio_devices_all_device_dirty_tracking(bcontainer)) {
|
||||||
|
+ ret = vfio_devices_dma_logging_start(bcontainer);
|
||||||
|
} else {
|
||||||
|
- ret = vfio_container_set_dirty_page_tracking(&container->bcontainer,
|
||||||
|
- true);
|
||||||
|
+ ret = vfio_container_set_dirty_page_tracking(bcontainer, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ret) {
|
||||||
|
@@ -1096,14 +1098,14 @@ static void vfio_listener_log_global_start(MemoryListener *listener)
|
||||||
|
|
||||||
|
static void vfio_listener_log_global_stop(MemoryListener *listener)
|
||||||
|
{
|
||||||
|
- VFIOContainer *container = container_of(listener, VFIOContainer, listener);
|
||||||
|
+ VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase,
|
||||||
|
+ listener);
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
- if (vfio_devices_all_device_dirty_tracking(&container->bcontainer)) {
|
||||||
|
- vfio_devices_dma_logging_stop(container);
|
||||||
|
+ if (vfio_devices_all_device_dirty_tracking(bcontainer)) {
|
||||||
|
+ vfio_devices_dma_logging_stop(bcontainer);
|
||||||
|
} else {
|
||||||
|
- ret = vfio_container_set_dirty_page_tracking(&container->bcontainer,
|
||||||
|
- false);
|
||||||
|
+ ret = vfio_container_set_dirty_page_tracking(bcontainer, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ret) {
|
||||||
|
@@ -1214,8 +1216,6 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
|
||||||
|
vfio_giommu_dirty_notifier, n);
|
||||||
|
VFIOGuestIOMMU *giommu = gdn->giommu;
|
||||||
|
VFIOContainerBase *bcontainer = giommu->bcontainer;
|
||||||
|
- VFIOContainer *container = container_of(bcontainer, VFIOContainer,
|
||||||
|
- bcontainer);
|
||||||
|
hwaddr iova = iotlb->iova + giommu->iommu_offset;
|
||||||
|
ram_addr_t translated_addr;
|
||||||
|
int ret = -EINVAL;
|
||||||
|
@@ -1230,12 +1230,12 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
if (vfio_get_xlat_addr(iotlb, NULL, &translated_addr, NULL)) {
|
||||||
|
- ret = vfio_get_dirty_bitmap(&container->bcontainer, iova,
|
||||||
|
- iotlb->addr_mask + 1, translated_addr);
|
||||||
|
+ ret = vfio_get_dirty_bitmap(bcontainer, iova, iotlb->addr_mask + 1,
|
||||||
|
+ translated_addr);
|
||||||
|
if (ret) {
|
||||||
|
error_report("vfio_iommu_map_dirty_notify(%p, 0x%"HWADDR_PRIx", "
|
||||||
|
"0x%"HWADDR_PRIx") = %d (%s)",
|
||||||
|
- container, iova, iotlb->addr_mask + 1, ret,
|
||||||
|
+ bcontainer, iova, iotlb->addr_mask + 1, ret,
|
||||||
|
strerror(-ret));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@@ -1291,10 +1291,9 @@ vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainerBase *bcontainer,
|
||||||
|
&vrdl);
|
||||||
|
}
|
||||||
|
|
||||||
|
-static int vfio_sync_dirty_bitmap(VFIOContainer *container,
|
||||||
|
+static int vfio_sync_dirty_bitmap(VFIOContainerBase *bcontainer,
|
||||||
|
MemoryRegionSection *section)
|
||||||
|
{
|
||||||
|
- VFIOContainerBase *bcontainer = &container->bcontainer;
|
||||||
|
ram_addr_t ram_addr;
|
||||||
|
|
||||||
|
if (memory_region_is_iommu(section->mr)) {
|
||||||
|
@@ -1330,7 +1329,7 @@ static int vfio_sync_dirty_bitmap(VFIOContainer *container,
|
||||||
|
ram_addr = memory_region_get_ram_addr(section->mr) +
|
||||||
|
section->offset_within_region;
|
||||||
|
|
||||||
|
- return vfio_get_dirty_bitmap(&container->bcontainer,
|
||||||
|
+ return vfio_get_dirty_bitmap(bcontainer,
|
||||||
|
REAL_HOST_PAGE_ALIGN(section->offset_within_address_space),
|
||||||
|
int128_get64(section->size), ram_addr);
|
||||||
|
}
|
||||||
|
@@ -1338,15 +1337,16 @@ static int vfio_sync_dirty_bitmap(VFIOContainer *container,
|
||||||
|
static void vfio_listener_log_sync(MemoryListener *listener,
|
||||||
|
MemoryRegionSection *section)
|
||||||
|
{
|
||||||
|
- VFIOContainer *container = container_of(listener, VFIOContainer, listener);
|
||||||
|
+ VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase,
|
||||||
|
+ listener);
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
if (vfio_listener_skipped_section(section)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
- if (vfio_devices_all_dirty_tracking(&container->bcontainer)) {
|
||||||
|
- ret = vfio_sync_dirty_bitmap(container, section);
|
||||||
|
+ if (vfio_devices_all_dirty_tracking(bcontainer)) {
|
||||||
|
+ ret = vfio_sync_dirty_bitmap(bcontainer, section);
|
||||||
|
if (ret) {
|
||||||
|
error_report("vfio: Failed to sync dirty bitmap, err: %d (%s)", ret,
|
||||||
|
strerror(-ret));
|
||||||
|
diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c
|
||||||
|
index 584eee4ba1..7f508669f5 100644
|
||||||
|
--- a/hw/vfio/container-base.c
|
||||||
|
+++ b/hw/vfio/container-base.c
|
||||||
|
@@ -51,6 +51,7 @@ void vfio_container_init(VFIOContainerBase *bcontainer, VFIOAddressSpace *space,
|
||||||
|
{
|
||||||
|
bcontainer->ops = ops;
|
||||||
|
bcontainer->space = space;
|
||||||
|
+ bcontainer->error = NULL;
|
||||||
|
bcontainer->dirty_pages_supported = false;
|
||||||
|
bcontainer->dma_max_mappings = 0;
|
||||||
|
QLIST_INIT(&bcontainer->giommu_list);
|
||||||
|
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
|
||||||
|
index 6ba2e2f8c4..5c1dee8c9f 100644
|
||||||
|
--- a/hw/vfio/container.c
|
||||||
|
+++ b/hw/vfio/container.c
|
||||||
|
@@ -453,6 +453,7 @@ static void vfio_get_iommu_info_migration(VFIOContainer *container,
|
||||||
|
{
|
||||||
|
struct vfio_info_cap_header *hdr;
|
||||||
|
struct vfio_iommu_type1_info_cap_migration *cap_mig;
|
||||||
|
+ VFIOContainerBase *bcontainer = &container->bcontainer;
|
||||||
|
|
||||||
|
hdr = vfio_get_iommu_info_cap(info, VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION);
|
||||||
|
if (!hdr) {
|
||||||
|
@@ -467,7 +468,7 @@ static void vfio_get_iommu_info_migration(VFIOContainer *container,
|
||||||
|
* qemu_real_host_page_size to mark those dirty.
|
||||||
|
*/
|
||||||
|
if (cap_mig->pgsize_bitmap & qemu_real_host_page_size()) {
|
||||||
|
- container->bcontainer.dirty_pages_supported = true;
|
||||||
|
+ bcontainer->dirty_pages_supported = true;
|
||||||
|
container->max_dirty_bitmap_size = cap_mig->max_dirty_bitmap_size;
|
||||||
|
container->dirty_pgsizes = cap_mig->pgsize_bitmap;
|
||||||
|
}
|
||||||
|
@@ -558,7 +559,6 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
|
||||||
|
|
||||||
|
container = g_malloc0(sizeof(*container));
|
||||||
|
container->fd = fd;
|
||||||
|
- container->error = NULL;
|
||||||
|
container->iova_ranges = NULL;
|
||||||
|
bcontainer = &container->bcontainer;
|
||||||
|
vfio_container_init(bcontainer, space, &vfio_legacy_ops);
|
||||||
|
@@ -621,25 +621,24 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
|
||||||
|
group->container = container;
|
||||||
|
QLIST_INSERT_HEAD(&container->group_list, group, container_next);
|
||||||
|
|
||||||
|
- container->listener = vfio_memory_listener;
|
||||||
|
-
|
||||||
|
- memory_listener_register(&container->listener, bcontainer->space->as);
|
||||||
|
+ bcontainer->listener = vfio_memory_listener;
|
||||||
|
+ memory_listener_register(&bcontainer->listener, bcontainer->space->as);
|
||||||
|
|
||||||
|
- if (container->error) {
|
||||||
|
+ if (bcontainer->error) {
|
||||||
|
ret = -1;
|
||||||
|
- error_propagate_prepend(errp, container->error,
|
||||||
|
+ error_propagate_prepend(errp, bcontainer->error,
|
||||||
|
"memory listener initialization failed: ");
|
||||||
|
goto listener_release_exit;
|
||||||
|
}
|
||||||
|
|
||||||
|
- container->initialized = true;
|
||||||
|
+ bcontainer->initialized = true;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
listener_release_exit:
|
||||||
|
QLIST_REMOVE(group, container_next);
|
||||||
|
QLIST_REMOVE(bcontainer, next);
|
||||||
|
vfio_kvm_device_del_group(group);
|
||||||
|
- memory_listener_unregister(&container->listener);
|
||||||
|
+ memory_listener_unregister(&bcontainer->listener);
|
||||||
|
if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU ||
|
||||||
|
container->iommu_type == VFIO_SPAPR_TCE_IOMMU) {
|
||||||
|
vfio_spapr_container_deinit(container);
|
||||||
|
@@ -674,7 +673,7 @@ static void vfio_disconnect_container(VFIOGroup *group)
|
||||||
|
* group.
|
||||||
|
*/
|
||||||
|
if (QLIST_EMPTY(&container->group_list)) {
|
||||||
|
- memory_listener_unregister(&container->listener);
|
||||||
|
+ memory_listener_unregister(&bcontainer->listener);
|
||||||
|
if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU ||
|
||||||
|
container->iommu_type == VFIO_SPAPR_TCE_IOMMU) {
|
||||||
|
vfio_spapr_container_deinit(container);
|
||||||
|
diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c
|
||||||
|
index 4f76bdd3ca..7a50975f25 100644
|
||||||
|
--- a/hw/vfio/spapr.c
|
||||||
|
+++ b/hw/vfio/spapr.c
|
||||||
|
@@ -46,6 +46,7 @@ static void vfio_prereg_listener_region_add(MemoryListener *listener,
|
||||||
|
{
|
||||||
|
VFIOContainer *container = container_of(listener, VFIOContainer,
|
||||||
|
prereg_listener);
|
||||||
|
+ VFIOContainerBase *bcontainer = &container->bcontainer;
|
||||||
|
const hwaddr gpa = section->offset_within_address_space;
|
||||||
|
hwaddr end;
|
||||||
|
int ret;
|
||||||
|
@@ -88,9 +89,9 @@ static void vfio_prereg_listener_region_add(MemoryListener *listener,
|
||||||
|
* can gracefully fail. Runtime, there's not much we can do other
|
||||||
|
* than throw a hardware error.
|
||||||
|
*/
|
||||||
|
- if (!container->initialized) {
|
||||||
|
- if (!container->error) {
|
||||||
|
- error_setg_errno(&container->error, -ret,
|
||||||
|
+ if (!bcontainer->initialized) {
|
||||||
|
+ if (!bcontainer->error) {
|
||||||
|
+ error_setg_errno(&bcontainer->error, -ret,
|
||||||
|
"Memory registering failed");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
@@ -445,9 +446,9 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp)
|
||||||
|
|
||||||
|
memory_listener_register(&container->prereg_listener,
|
||||||
|
&address_space_memory);
|
||||||
|
- if (container->error) {
|
||||||
|
+ if (bcontainer->error) {
|
||||||
|
ret = -1;
|
||||||
|
- error_propagate_prepend(errp, container->error,
|
||||||
|
+ error_propagate_prepend(errp, bcontainer->error,
|
||||||
|
"RAM memory listener initialization failed: ");
|
||||||
|
goto listener_unregister_exit;
|
||||||
|
}
|
||||||
|
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
|
||||||
|
index 8a607a4c17..922022cbc6 100644
|
||||||
|
--- a/include/hw/vfio/vfio-common.h
|
||||||
|
+++ b/include/hw/vfio/vfio-common.h
|
||||||
|
@@ -78,11 +78,8 @@ struct VFIOGroup;
|
||||||
|
typedef struct VFIOContainer {
|
||||||
|
VFIOContainerBase bcontainer;
|
||||||
|
int fd; /* /dev/vfio/vfio, empowered by the attached groups */
|
||||||
|
- MemoryListener listener;
|
||||||
|
MemoryListener prereg_listener;
|
||||||
|
unsigned iommu_type;
|
||||||
|
- Error *error;
|
||||||
|
- bool initialized;
|
||||||
|
uint64_t dirty_pgsizes;
|
||||||
|
uint64_t max_dirty_bitmap_size;
|
||||||
|
QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list;
|
||||||
|
diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h
|
||||||
|
index 8e05b5ac5a..95f8d319e0 100644
|
||||||
|
--- a/include/hw/vfio/vfio-container-base.h
|
||||||
|
+++ b/include/hw/vfio/vfio-container-base.h
|
||||||
|
@@ -36,6 +36,9 @@ typedef struct VFIOAddressSpace {
|
||||||
|
typedef struct VFIOContainerBase {
|
||||||
|
const VFIOIOMMUOps *ops;
|
||||||
|
VFIOAddressSpace *space;
|
||||||
|
+ MemoryListener listener;
|
||||||
|
+ Error *error;
|
||||||
|
+ bool initialized;
|
||||||
|
unsigned long pgsizes;
|
||||||
|
unsigned int dma_max_mappings;
|
||||||
|
bool dirty_pages_supported;
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
230
kvm-vfio-container-Move-per-container-device-list-in-bas.patch
Normal file
230
kvm-vfio-container-Move-per-container-device-list-in-bas.patch
Normal file
@ -0,0 +1,230 @@
|
|||||||
|
From 0b3fbb6bf5c5bccec184829ff9454fd637c512b9 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Date: Thu, 2 Nov 2023 15:12:34 +0800
|
||||||
|
Subject: [PATCH 009/101] vfio/container: Move per container device list in
|
||||||
|
base container
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [8/67] d546cc25f4424b2d42356765c860fdaf4a3ba652 (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
VFIO Device is also changed to point to base container instead of
|
||||||
|
legacy container.
|
||||||
|
|
||||||
|
No functional change intended.
|
||||||
|
|
||||||
|
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit 3e6015d1117579324b456aa169dfca06da9922cf)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/vfio/common.c | 23 +++++++++++++++--------
|
||||||
|
hw/vfio/container.c | 12 ++++++------
|
||||||
|
include/hw/vfio/vfio-common.h | 3 +--
|
||||||
|
include/hw/vfio/vfio-container-base.h | 1 +
|
||||||
|
4 files changed, 23 insertions(+), 16 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
|
||||||
|
index b1a875ca93..9415395ed9 100644
|
||||||
|
--- a/hw/vfio/common.c
|
||||||
|
+++ b/hw/vfio/common.c
|
||||||
|
@@ -145,7 +145,7 @@ void vfio_unblock_multiple_devices_migration(void)
|
||||||
|
|
||||||
|
bool vfio_viommu_preset(VFIODevice *vbasedev)
|
||||||
|
{
|
||||||
|
- return vbasedev->container->bcontainer.space->as != &address_space_memory;
|
||||||
|
+ return vbasedev->bcontainer->space->as != &address_space_memory;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void vfio_set_migration_error(int err)
|
||||||
|
@@ -179,6 +179,7 @@ bool vfio_device_state_is_precopy(VFIODevice *vbasedev)
|
||||||
|
|
||||||
|
static bool vfio_devices_all_dirty_tracking(VFIOContainer *container)
|
||||||
|
{
|
||||||
|
+ VFIOContainerBase *bcontainer = &container->bcontainer;
|
||||||
|
VFIODevice *vbasedev;
|
||||||
|
MigrationState *ms = migrate_get_current();
|
||||||
|
|
||||||
|
@@ -187,7 +188,7 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer *container)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
- QLIST_FOREACH(vbasedev, &container->device_list, container_next) {
|
||||||
|
+ QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) {
|
||||||
|
VFIOMigration *migration = vbasedev->migration;
|
||||||
|
|
||||||
|
if (!migration) {
|
||||||
|
@@ -205,9 +206,10 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer *container)
|
||||||
|
|
||||||
|
bool vfio_devices_all_device_dirty_tracking(VFIOContainer *container)
|
||||||
|
{
|
||||||
|
+ VFIOContainerBase *bcontainer = &container->bcontainer;
|
||||||
|
VFIODevice *vbasedev;
|
||||||
|
|
||||||
|
- QLIST_FOREACH(vbasedev, &container->device_list, container_next) {
|
||||||
|
+ QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) {
|
||||||
|
if (!vbasedev->dirty_pages_supported) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
@@ -222,13 +224,14 @@ bool vfio_devices_all_device_dirty_tracking(VFIOContainer *container)
|
||||||
|
*/
|
||||||
|
bool vfio_devices_all_running_and_mig_active(VFIOContainer *container)
|
||||||
|
{
|
||||||
|
+ VFIOContainerBase *bcontainer = &container->bcontainer;
|
||||||
|
VFIODevice *vbasedev;
|
||||||
|
|
||||||
|
if (!migration_is_active(migrate_get_current())) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
- QLIST_FOREACH(vbasedev, &container->device_list, container_next) {
|
||||||
|
+ QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) {
|
||||||
|
VFIOMigration *migration = vbasedev->migration;
|
||||||
|
|
||||||
|
if (!migration) {
|
||||||
|
@@ -833,12 +836,13 @@ static bool vfio_section_is_vfio_pci(MemoryRegionSection *section,
|
||||||
|
VFIOContainer *container)
|
||||||
|
{
|
||||||
|
VFIOPCIDevice *pcidev;
|
||||||
|
+ VFIOContainerBase *bcontainer = &container->bcontainer;
|
||||||
|
VFIODevice *vbasedev;
|
||||||
|
Object *owner;
|
||||||
|
|
||||||
|
owner = memory_region_owner(section->mr);
|
||||||
|
|
||||||
|
- QLIST_FOREACH(vbasedev, &container->device_list, container_next) {
|
||||||
|
+ QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) {
|
||||||
|
if (vbasedev->type != VFIO_DEVICE_TYPE_PCI) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
@@ -939,13 +943,14 @@ static void vfio_devices_dma_logging_stop(VFIOContainer *container)
|
||||||
|
uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature),
|
||||||
|
sizeof(uint64_t))] = {};
|
||||||
|
struct vfio_device_feature *feature = (struct vfio_device_feature *)buf;
|
||||||
|
+ VFIOContainerBase *bcontainer = &container->bcontainer;
|
||||||
|
VFIODevice *vbasedev;
|
||||||
|
|
||||||
|
feature->argsz = sizeof(buf);
|
||||||
|
feature->flags = VFIO_DEVICE_FEATURE_SET |
|
||||||
|
VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP;
|
||||||
|
|
||||||
|
- QLIST_FOREACH(vbasedev, &container->device_list, container_next) {
|
||||||
|
+ QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) {
|
||||||
|
if (!vbasedev->dirty_tracking) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
@@ -1036,6 +1041,7 @@ static int vfio_devices_dma_logging_start(VFIOContainer *container)
|
||||||
|
{
|
||||||
|
struct vfio_device_feature *feature;
|
||||||
|
VFIODirtyRanges ranges;
|
||||||
|
+ VFIOContainerBase *bcontainer = &container->bcontainer;
|
||||||
|
VFIODevice *vbasedev;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
@@ -1046,7 +1052,7 @@ static int vfio_devices_dma_logging_start(VFIOContainer *container)
|
||||||
|
return -errno;
|
||||||
|
}
|
||||||
|
|
||||||
|
- QLIST_FOREACH(vbasedev, &container->device_list, container_next) {
|
||||||
|
+ QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) {
|
||||||
|
if (vbasedev->dirty_tracking) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
@@ -1139,10 +1145,11 @@ int vfio_devices_query_dirty_bitmap(VFIOContainer *container,
|
||||||
|
VFIOBitmap *vbmap, hwaddr iova,
|
||||||
|
hwaddr size)
|
||||||
|
{
|
||||||
|
+ VFIOContainerBase *bcontainer = &container->bcontainer;
|
||||||
|
VFIODevice *vbasedev;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
- QLIST_FOREACH(vbasedev, &container->device_list, container_next) {
|
||||||
|
+ QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) {
|
||||||
|
ret = vfio_device_dma_logging_report(vbasedev, iova, size,
|
||||||
|
vbmap->bitmap);
|
||||||
|
if (ret) {
|
||||||
|
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
|
||||||
|
index 3ab74e2615..63a906de93 100644
|
||||||
|
--- a/hw/vfio/container.c
|
||||||
|
+++ b/hw/vfio/container.c
|
||||||
|
@@ -888,7 +888,7 @@ int vfio_attach_device(char *name, VFIODevice *vbasedev,
|
||||||
|
int groupid = vfio_device_groupid(vbasedev, errp);
|
||||||
|
VFIODevice *vbasedev_iter;
|
||||||
|
VFIOGroup *group;
|
||||||
|
- VFIOContainer *container;
|
||||||
|
+ VFIOContainerBase *bcontainer;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
if (groupid < 0) {
|
||||||
|
@@ -915,9 +915,9 @@ int vfio_attach_device(char *name, VFIODevice *vbasedev,
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
- container = group->container;
|
||||||
|
- vbasedev->container = container;
|
||||||
|
- QLIST_INSERT_HEAD(&container->device_list, vbasedev, container_next);
|
||||||
|
+ bcontainer = &group->container->bcontainer;
|
||||||
|
+ vbasedev->bcontainer = bcontainer;
|
||||||
|
+ QLIST_INSERT_HEAD(&bcontainer->device_list, vbasedev, container_next);
|
||||||
|
QLIST_INSERT_HEAD(&vfio_device_list, vbasedev, global_next);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
@@ -927,13 +927,13 @@ void vfio_detach_device(VFIODevice *vbasedev)
|
||||||
|
{
|
||||||
|
VFIOGroup *group = vbasedev->group;
|
||||||
|
|
||||||
|
- if (!vbasedev->container) {
|
||||||
|
+ if (!vbasedev->bcontainer) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
QLIST_REMOVE(vbasedev, global_next);
|
||||||
|
QLIST_REMOVE(vbasedev, container_next);
|
||||||
|
- vbasedev->container = NULL;
|
||||||
|
+ vbasedev->bcontainer = NULL;
|
||||||
|
trace_vfio_detach_device(vbasedev->name, group->groupid);
|
||||||
|
vfio_put_base_device(vbasedev);
|
||||||
|
vfio_put_group(group);
|
||||||
|
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
|
||||||
|
index 60f2785fe0..9740cf9fbc 100644
|
||||||
|
--- a/include/hw/vfio/vfio-common.h
|
||||||
|
+++ b/include/hw/vfio/vfio-common.h
|
||||||
|
@@ -90,7 +90,6 @@ typedef struct VFIOContainer {
|
||||||
|
QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list;
|
||||||
|
QLIST_HEAD(, VFIOGroup) group_list;
|
||||||
|
QLIST_HEAD(, VFIORamDiscardListener) vrdl_list;
|
||||||
|
- QLIST_HEAD(, VFIODevice) device_list;
|
||||||
|
GList *iova_ranges;
|
||||||
|
} VFIOContainer;
|
||||||
|
|
||||||
|
@@ -118,7 +117,7 @@ typedef struct VFIODevice {
|
||||||
|
QLIST_ENTRY(VFIODevice) container_next;
|
||||||
|
QLIST_ENTRY(VFIODevice) global_next;
|
||||||
|
struct VFIOGroup *group;
|
||||||
|
- VFIOContainer *container;
|
||||||
|
+ VFIOContainerBase *bcontainer;
|
||||||
|
char *sysfsdev;
|
||||||
|
char *name;
|
||||||
|
DeviceState *dev;
|
||||||
|
diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h
|
||||||
|
index f244f003d0..7090962496 100644
|
||||||
|
--- a/include/hw/vfio/vfio-container-base.h
|
||||||
|
+++ b/include/hw/vfio/vfio-container-base.h
|
||||||
|
@@ -39,6 +39,7 @@ typedef struct VFIOContainerBase {
|
||||||
|
bool dirty_pages_supported;
|
||||||
|
QLIST_HEAD(, VFIOGuestIOMMU) giommu_list;
|
||||||
|
QLIST_ENTRY(VFIOContainerBase) next;
|
||||||
|
+ QLIST_HEAD(, VFIODevice) device_list;
|
||||||
|
} VFIOContainerBase;
|
||||||
|
|
||||||
|
typedef struct VFIOGuestIOMMU {
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
242
kvm-vfio-container-Move-pgsizes-and-dma_max_mappings-to-.patch
Normal file
242
kvm-vfio-container-Move-pgsizes-and-dma_max_mappings-to-.patch
Normal file
@ -0,0 +1,242 @@
|
|||||||
|
From d798939fbbe6c27200c165edd6f3771413821b34 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Eric Auger <eric.auger@redhat.com>
|
||||||
|
Date: Thu, 2 Nov 2023 15:12:36 +0800
|
||||||
|
Subject: [PATCH 011/101] vfio/container: Move pgsizes and dma_max_mappings to
|
||||||
|
base container
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [10/67] e80696175aba159a17ce9a869535db66682deb08 (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
No functional change intended.
|
||||||
|
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
|
||||||
|
Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
|
||||||
|
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit 7ab1cb74ffdbf92ef237243b41bde5c7067d5298)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/vfio/common.c | 17 +++++++++--------
|
||||||
|
hw/vfio/container-base.c | 1 +
|
||||||
|
hw/vfio/container.c | 11 +++++------
|
||||||
|
hw/vfio/spapr.c | 10 ++++++----
|
||||||
|
include/hw/vfio/vfio-common.h | 2 --
|
||||||
|
include/hw/vfio/vfio-container-base.h | 2 ++
|
||||||
|
6 files changed, 23 insertions(+), 20 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
|
||||||
|
index cf6618f6ed..1cb53d369e 100644
|
||||||
|
--- a/hw/vfio/common.c
|
||||||
|
+++ b/hw/vfio/common.c
|
||||||
|
@@ -401,6 +401,7 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl,
|
||||||
|
static void vfio_register_ram_discard_listener(VFIOContainer *container,
|
||||||
|
MemoryRegionSection *section)
|
||||||
|
{
|
||||||
|
+ VFIOContainerBase *bcontainer = &container->bcontainer;
|
||||||
|
RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr);
|
||||||
|
VFIORamDiscardListener *vrdl;
|
||||||
|
|
||||||
|
@@ -419,8 +420,8 @@ static void vfio_register_ram_discard_listener(VFIOContainer *container,
|
||||||
|
section->mr);
|
||||||
|
|
||||||
|
g_assert(vrdl->granularity && is_power_of_2(vrdl->granularity));
|
||||||
|
- g_assert(container->pgsizes &&
|
||||||
|
- vrdl->granularity >= 1ULL << ctz64(container->pgsizes));
|
||||||
|
+ g_assert(bcontainer->pgsizes &&
|
||||||
|
+ vrdl->granularity >= 1ULL << ctz64(bcontainer->pgsizes));
|
||||||
|
|
||||||
|
ram_discard_listener_init(&vrdl->listener,
|
||||||
|
vfio_ram_discard_notify_populate,
|
||||||
|
@@ -441,7 +442,7 @@ static void vfio_register_ram_discard_listener(VFIOContainer *container,
|
||||||
|
* number of sections in the address space we could have over time,
|
||||||
|
* also consuming DMA mappings.
|
||||||
|
*/
|
||||||
|
- if (container->dma_max_mappings) {
|
||||||
|
+ if (bcontainer->dma_max_mappings) {
|
||||||
|
unsigned int vrdl_count = 0, vrdl_mappings = 0, max_memslots = 512;
|
||||||
|
|
||||||
|
#ifdef CONFIG_KVM
|
||||||
|
@@ -462,11 +463,11 @@ static void vfio_register_ram_discard_listener(VFIOContainer *container,
|
||||||
|
}
|
||||||
|
|
||||||
|
if (vrdl_mappings + max_memslots - vrdl_count >
|
||||||
|
- container->dma_max_mappings) {
|
||||||
|
+ bcontainer->dma_max_mappings) {
|
||||||
|
warn_report("%s: possibly running out of DMA mappings. E.g., try"
|
||||||
|
" increasing the 'block-size' of virtio-mem devies."
|
||||||
|
" Maximum possible DMA mappings: %d, Maximum possible"
|
||||||
|
- " memslots: %d", __func__, container->dma_max_mappings,
|
||||||
|
+ " memslots: %d", __func__, bcontainer->dma_max_mappings,
|
||||||
|
max_memslots);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@@ -626,7 +627,7 @@ static void vfio_listener_region_add(MemoryListener *listener,
|
||||||
|
iommu_idx);
|
||||||
|
|
||||||
|
ret = memory_region_iommu_set_page_size_mask(giommu->iommu_mr,
|
||||||
|
- container->pgsizes,
|
||||||
|
+ bcontainer->pgsizes,
|
||||||
|
&err);
|
||||||
|
if (ret) {
|
||||||
|
g_free(giommu);
|
||||||
|
@@ -675,7 +676,7 @@ static void vfio_listener_region_add(MemoryListener *listener,
|
||||||
|
llsize = int128_sub(llend, int128_make64(iova));
|
||||||
|
|
||||||
|
if (memory_region_is_ram_device(section->mr)) {
|
||||||
|
- hwaddr pgmask = (1ULL << ctz64(container->pgsizes)) - 1;
|
||||||
|
+ hwaddr pgmask = (1ULL << ctz64(bcontainer->pgsizes)) - 1;
|
||||||
|
|
||||||
|
if ((iova & pgmask) || (int128_get64(llsize) & pgmask)) {
|
||||||
|
trace_vfio_listener_region_add_no_dma_map(
|
||||||
|
@@ -777,7 +778,7 @@ static void vfio_listener_region_del(MemoryListener *listener,
|
||||||
|
if (memory_region_is_ram_device(section->mr)) {
|
||||||
|
hwaddr pgmask;
|
||||||
|
|
||||||
|
- pgmask = (1ULL << ctz64(container->pgsizes)) - 1;
|
||||||
|
+ pgmask = (1ULL << ctz64(bcontainer->pgsizes)) - 1;
|
||||||
|
try_unmap = !((iova & pgmask) || (int128_get64(llsize) & pgmask));
|
||||||
|
} else if (memory_region_has_ram_discard_manager(section->mr)) {
|
||||||
|
vfio_unregister_ram_discard_listener(container, section);
|
||||||
|
diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c
|
||||||
|
index 5d654ae172..dcce111349 100644
|
||||||
|
--- a/hw/vfio/container-base.c
|
||||||
|
+++ b/hw/vfio/container-base.c
|
||||||
|
@@ -52,6 +52,7 @@ void vfio_container_init(VFIOContainerBase *bcontainer, VFIOAddressSpace *space,
|
||||||
|
bcontainer->ops = ops;
|
||||||
|
bcontainer->space = space;
|
||||||
|
bcontainer->dirty_pages_supported = false;
|
||||||
|
+ bcontainer->dma_max_mappings = 0;
|
||||||
|
QLIST_INIT(&bcontainer->giommu_list);
|
||||||
|
}
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
|
||||||
|
index 7bd81eab09..c5a6262882 100644
|
||||||
|
--- a/hw/vfio/container.c
|
||||||
|
+++ b/hw/vfio/container.c
|
||||||
|
@@ -154,7 +154,7 @@ static int vfio_legacy_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova,
|
||||||
|
if (errno == EINVAL && unmap.size && !(unmap.iova + unmap.size) &&
|
||||||
|
container->iommu_type == VFIO_TYPE1v2_IOMMU) {
|
||||||
|
trace_vfio_legacy_dma_unmap_overflow_workaround();
|
||||||
|
- unmap.size -= 1ULL << ctz64(container->pgsizes);
|
||||||
|
+ unmap.size -= 1ULL << ctz64(bcontainer->pgsizes);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
error_report("VFIO_UNMAP_DMA failed: %s", strerror(errno));
|
||||||
|
@@ -559,7 +559,6 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
|
||||||
|
container = g_malloc0(sizeof(*container));
|
||||||
|
container->fd = fd;
|
||||||
|
container->error = NULL;
|
||||||
|
- container->dma_max_mappings = 0;
|
||||||
|
container->iova_ranges = NULL;
|
||||||
|
QLIST_INIT(&container->vrdl_list);
|
||||||
|
bcontainer = &container->bcontainer;
|
||||||
|
@@ -589,13 +588,13 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
|
||||||
|
}
|
||||||
|
|
||||||
|
if (info->flags & VFIO_IOMMU_INFO_PGSIZES) {
|
||||||
|
- container->pgsizes = info->iova_pgsizes;
|
||||||
|
+ bcontainer->pgsizes = info->iova_pgsizes;
|
||||||
|
} else {
|
||||||
|
- container->pgsizes = qemu_real_host_page_size();
|
||||||
|
+ bcontainer->pgsizes = qemu_real_host_page_size();
|
||||||
|
}
|
||||||
|
|
||||||
|
- if (!vfio_get_info_dma_avail(info, &container->dma_max_mappings)) {
|
||||||
|
- container->dma_max_mappings = 65535;
|
||||||
|
+ if (!vfio_get_info_dma_avail(info, &bcontainer->dma_max_mappings)) {
|
||||||
|
+ bcontainer->dma_max_mappings = 65535;
|
||||||
|
}
|
||||||
|
|
||||||
|
vfio_get_info_iova_range(info, container);
|
||||||
|
diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c
|
||||||
|
index 83da2f7ec2..4f76bdd3ca 100644
|
||||||
|
--- a/hw/vfio/spapr.c
|
||||||
|
+++ b/hw/vfio/spapr.c
|
||||||
|
@@ -226,6 +226,7 @@ static int vfio_spapr_create_window(VFIOContainer *container,
|
||||||
|
hwaddr *pgsize)
|
||||||
|
{
|
||||||
|
int ret = 0;
|
||||||
|
+ VFIOContainerBase *bcontainer = &container->bcontainer;
|
||||||
|
IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(section->mr);
|
||||||
|
uint64_t pagesize = memory_region_iommu_get_min_page_size(iommu_mr), pgmask;
|
||||||
|
unsigned entries, bits_total, bits_per_level, max_levels;
|
||||||
|
@@ -239,13 +240,13 @@ static int vfio_spapr_create_window(VFIOContainer *container,
|
||||||
|
if (pagesize > rampagesize) {
|
||||||
|
pagesize = rampagesize;
|
||||||
|
}
|
||||||
|
- pgmask = container->pgsizes & (pagesize | (pagesize - 1));
|
||||||
|
+ pgmask = bcontainer->pgsizes & (pagesize | (pagesize - 1));
|
||||||
|
pagesize = pgmask ? (1ULL << (63 - clz64(pgmask))) : 0;
|
||||||
|
if (!pagesize) {
|
||||||
|
error_report("Host doesn't support page size 0x%"PRIx64
|
||||||
|
", the supported mask is 0x%lx",
|
||||||
|
memory_region_iommu_get_min_page_size(iommu_mr),
|
||||||
|
- container->pgsizes);
|
||||||
|
+ bcontainer->pgsizes);
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -421,6 +422,7 @@ void vfio_container_del_section_window(VFIOContainer *container,
|
||||||
|
|
||||||
|
int vfio_spapr_container_init(VFIOContainer *container, Error **errp)
|
||||||
|
{
|
||||||
|
+ VFIOContainerBase *bcontainer = &container->bcontainer;
|
||||||
|
struct vfio_iommu_spapr_tce_info info;
|
||||||
|
bool v2 = container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU;
|
||||||
|
int ret, fd = container->fd;
|
||||||
|
@@ -461,7 +463,7 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (v2) {
|
||||||
|
- container->pgsizes = info.ddw.pgsizes;
|
||||||
|
+ bcontainer->pgsizes = info.ddw.pgsizes;
|
||||||
|
/*
|
||||||
|
* There is a default window in just created container.
|
||||||
|
* To make region_add/del simpler, we better remove this
|
||||||
|
@@ -476,7 +478,7 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/* The default table uses 4K pages */
|
||||||
|
- container->pgsizes = 0x1000;
|
||||||
|
+ bcontainer->pgsizes = 0x1000;
|
||||||
|
vfio_host_win_add(container, info.dma32_window_start,
|
||||||
|
info.dma32_window_start +
|
||||||
|
info.dma32_window_size - 1,
|
||||||
|
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
|
||||||
|
index bc67e1316c..d3dc2f9dcb 100644
|
||||||
|
--- a/include/hw/vfio/vfio-common.h
|
||||||
|
+++ b/include/hw/vfio/vfio-common.h
|
||||||
|
@@ -85,8 +85,6 @@ typedef struct VFIOContainer {
|
||||||
|
bool initialized;
|
||||||
|
uint64_t dirty_pgsizes;
|
||||||
|
uint64_t max_dirty_bitmap_size;
|
||||||
|
- unsigned long pgsizes;
|
||||||
|
- unsigned int dma_max_mappings;
|
||||||
|
QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list;
|
||||||
|
QLIST_HEAD(, VFIOGroup) group_list;
|
||||||
|
QLIST_HEAD(, VFIORamDiscardListener) vrdl_list;
|
||||||
|
diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h
|
||||||
|
index 7090962496..85ec7e1a56 100644
|
||||||
|
--- a/include/hw/vfio/vfio-container-base.h
|
||||||
|
+++ b/include/hw/vfio/vfio-container-base.h
|
||||||
|
@@ -36,6 +36,8 @@ typedef struct VFIOAddressSpace {
|
||||||
|
typedef struct VFIOContainerBase {
|
||||||
|
const VFIOIOMMUOps *ops;
|
||||||
|
VFIOAddressSpace *space;
|
||||||
|
+ unsigned long pgsizes;
|
||||||
|
+ unsigned int dma_max_mappings;
|
||||||
|
bool dirty_pages_supported;
|
||||||
|
QLIST_HEAD(, VFIOGuestIOMMU) giommu_list;
|
||||||
|
QLIST_ENTRY(VFIOContainerBase) next;
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
265
kvm-vfio-container-Move-space-field-to-base-container.patch
Normal file
265
kvm-vfio-container-Move-space-field-to-base-container.patch
Normal file
@ -0,0 +1,265 @@
|
|||||||
|
From 3ba43cbc5b096feed6272e070cf152d5fc74df01 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Eric Auger <eric.auger@redhat.com>
|
||||||
|
Date: Thu, 2 Nov 2023 15:12:32 +0800
|
||||||
|
Subject: [PATCH 007/101] vfio/container: Move space field to base container
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [6/67] b0aa17d9ec4588bd64373452a30306e826234d0b (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
Move the space field to the base object. Also the VFIOAddressSpace
|
||||||
|
now contains a list of base containers.
|
||||||
|
|
||||||
|
No functional change intended.
|
||||||
|
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
|
||||||
|
Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
|
||||||
|
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit e5597063386a0c76308ad16da31726d23f489945)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/ppc/spapr_pci_vfio.c | 10 +++++-----
|
||||||
|
hw/vfio/common.c | 4 ++--
|
||||||
|
hw/vfio/container-base.c | 6 +++++-
|
||||||
|
hw/vfio/container.c | 18 ++++++++----------
|
||||||
|
include/hw/vfio/vfio-common.h | 8 --------
|
||||||
|
include/hw/vfio/vfio-container-base.h | 9 +++++++++
|
||||||
|
6 files changed, 29 insertions(+), 26 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c
|
||||||
|
index f283f7e38d..d1d07bec46 100644
|
||||||
|
--- a/hw/ppc/spapr_pci_vfio.c
|
||||||
|
+++ b/hw/ppc/spapr_pci_vfio.c
|
||||||
|
@@ -84,27 +84,27 @@ static int vfio_eeh_container_op(VFIOContainer *container, uint32_t op)
|
||||||
|
static VFIOContainer *vfio_eeh_as_container(AddressSpace *as)
|
||||||
|
{
|
||||||
|
VFIOAddressSpace *space = vfio_get_address_space(as);
|
||||||
|
- VFIOContainer *container = NULL;
|
||||||
|
+ VFIOContainerBase *bcontainer = NULL;
|
||||||
|
|
||||||
|
if (QLIST_EMPTY(&space->containers)) {
|
||||||
|
/* No containers to act on */
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
- container = QLIST_FIRST(&space->containers);
|
||||||
|
+ bcontainer = QLIST_FIRST(&space->containers);
|
||||||
|
|
||||||
|
- if (QLIST_NEXT(container, next)) {
|
||||||
|
+ if (QLIST_NEXT(bcontainer, next)) {
|
||||||
|
/*
|
||||||
|
* We don't yet have logic to synchronize EEH state across
|
||||||
|
* multiple containers
|
||||||
|
*/
|
||||||
|
- container = NULL;
|
||||||
|
+ bcontainer = NULL;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
out:
|
||||||
|
vfio_put_address_space(space);
|
||||||
|
- return container;
|
||||||
|
+ return container_of(bcontainer, VFIOContainer, bcontainer);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool vfio_eeh_as_ok(AddressSpace *as)
|
||||||
|
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
|
||||||
|
index 43580bcc43..1d8202537e 100644
|
||||||
|
--- a/hw/vfio/common.c
|
||||||
|
+++ b/hw/vfio/common.c
|
||||||
|
@@ -145,7 +145,7 @@ void vfio_unblock_multiple_devices_migration(void)
|
||||||
|
|
||||||
|
bool vfio_viommu_preset(VFIODevice *vbasedev)
|
||||||
|
{
|
||||||
|
- return vbasedev->container->space->as != &address_space_memory;
|
||||||
|
+ return vbasedev->container->bcontainer.space->as != &address_space_memory;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void vfio_set_migration_error(int err)
|
||||||
|
@@ -922,7 +922,7 @@ static void vfio_dirty_tracking_init(VFIOContainer *container,
|
||||||
|
dirty.container = container;
|
||||||
|
|
||||||
|
memory_listener_register(&dirty.listener,
|
||||||
|
- container->space->as);
|
||||||
|
+ container->bcontainer.space->as);
|
||||||
|
|
||||||
|
*ranges = dirty.ranges;
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c
|
||||||
|
index 20bcb9669a..3933391e0d 100644
|
||||||
|
--- a/hw/vfio/container-base.c
|
||||||
|
+++ b/hw/vfio/container-base.c
|
||||||
|
@@ -31,9 +31,11 @@ int vfio_container_dma_unmap(VFIOContainerBase *bcontainer,
|
||||||
|
return bcontainer->ops->dma_unmap(bcontainer, iova, size, iotlb);
|
||||||
|
}
|
||||||
|
|
||||||
|
-void vfio_container_init(VFIOContainerBase *bcontainer, const VFIOIOMMUOps *ops)
|
||||||
|
+void vfio_container_init(VFIOContainerBase *bcontainer, VFIOAddressSpace *space,
|
||||||
|
+ const VFIOIOMMUOps *ops)
|
||||||
|
{
|
||||||
|
bcontainer->ops = ops;
|
||||||
|
+ bcontainer->space = space;
|
||||||
|
QLIST_INIT(&bcontainer->giommu_list);
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -41,6 +43,8 @@ void vfio_container_destroy(VFIOContainerBase *bcontainer)
|
||||||
|
{
|
||||||
|
VFIOGuestIOMMU *giommu, *tmp;
|
||||||
|
|
||||||
|
+ QLIST_REMOVE(bcontainer, next);
|
||||||
|
+
|
||||||
|
QLIST_FOREACH_SAFE(giommu, &bcontainer->giommu_list, giommu_next, tmp) {
|
||||||
|
memory_region_unregister_iommu_notifier(
|
||||||
|
MEMORY_REGION(giommu->iommu_mr), &giommu->n);
|
||||||
|
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
|
||||||
|
index 133d3c8f5c..f12fcb6fe1 100644
|
||||||
|
--- a/hw/vfio/container.c
|
||||||
|
+++ b/hw/vfio/container.c
|
||||||
|
@@ -514,7 +514,8 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
|
||||||
|
* details once we know which type of IOMMU we are using.
|
||||||
|
*/
|
||||||
|
|
||||||
|
- QLIST_FOREACH(container, &space->containers, next) {
|
||||||
|
+ QLIST_FOREACH(bcontainer, &space->containers, next) {
|
||||||
|
+ container = container_of(bcontainer, VFIOContainer, bcontainer);
|
||||||
|
if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) {
|
||||||
|
ret = vfio_ram_block_discard_disable(container, true);
|
||||||
|
if (ret) {
|
||||||
|
@@ -550,7 +551,6 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
|
||||||
|
}
|
||||||
|
|
||||||
|
container = g_malloc0(sizeof(*container));
|
||||||
|
- container->space = space;
|
||||||
|
container->fd = fd;
|
||||||
|
container->error = NULL;
|
||||||
|
container->dirty_pages_supported = false;
|
||||||
|
@@ -558,7 +558,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
|
||||||
|
container->iova_ranges = NULL;
|
||||||
|
QLIST_INIT(&container->vrdl_list);
|
||||||
|
bcontainer = &container->bcontainer;
|
||||||
|
- vfio_container_init(bcontainer, &vfio_legacy_ops);
|
||||||
|
+ vfio_container_init(bcontainer, space, &vfio_legacy_ops);
|
||||||
|
|
||||||
|
ret = vfio_init_container(container, group->fd, errp);
|
||||||
|
if (ret) {
|
||||||
|
@@ -613,14 +613,14 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
|
||||||
|
vfio_kvm_device_add_group(group);
|
||||||
|
|
||||||
|
QLIST_INIT(&container->group_list);
|
||||||
|
- QLIST_INSERT_HEAD(&space->containers, container, next);
|
||||||
|
+ QLIST_INSERT_HEAD(&space->containers, bcontainer, next);
|
||||||
|
|
||||||
|
group->container = container;
|
||||||
|
QLIST_INSERT_HEAD(&container->group_list, group, container_next);
|
||||||
|
|
||||||
|
container->listener = vfio_memory_listener;
|
||||||
|
|
||||||
|
- memory_listener_register(&container->listener, container->space->as);
|
||||||
|
+ memory_listener_register(&container->listener, bcontainer->space->as);
|
||||||
|
|
||||||
|
if (container->error) {
|
||||||
|
ret = -1;
|
||||||
|
@@ -634,7 +634,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
|
||||||
|
return 0;
|
||||||
|
listener_release_exit:
|
||||||
|
QLIST_REMOVE(group, container_next);
|
||||||
|
- QLIST_REMOVE(container, next);
|
||||||
|
+ QLIST_REMOVE(bcontainer, next);
|
||||||
|
vfio_kvm_device_del_group(group);
|
||||||
|
memory_listener_unregister(&container->listener);
|
||||||
|
if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU ||
|
||||||
|
@@ -684,9 +684,7 @@ static void vfio_disconnect_container(VFIOGroup *group)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (QLIST_EMPTY(&container->group_list)) {
|
||||||
|
- VFIOAddressSpace *space = container->space;
|
||||||
|
-
|
||||||
|
- QLIST_REMOVE(container, next);
|
||||||
|
+ VFIOAddressSpace *space = bcontainer->space;
|
||||||
|
|
||||||
|
vfio_container_destroy(bcontainer);
|
||||||
|
|
||||||
|
@@ -707,7 +705,7 @@ static VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp)
|
||||||
|
QLIST_FOREACH(group, &vfio_group_list, next) {
|
||||||
|
if (group->groupid == groupid) {
|
||||||
|
/* Found it. Now is it already in the right context? */
|
||||||
|
- if (group->container->space->as == as) {
|
||||||
|
+ if (group->container->bcontainer.space->as == as) {
|
||||||
|
return group;
|
||||||
|
} else {
|
||||||
|
error_setg(errp, "group %d used in multiple address spaces",
|
||||||
|
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
|
||||||
|
index 6be082b8f2..bd4de6cb3a 100644
|
||||||
|
--- a/include/hw/vfio/vfio-common.h
|
||||||
|
+++ b/include/hw/vfio/vfio-common.h
|
||||||
|
@@ -73,17 +73,10 @@ typedef struct VFIOMigration {
|
||||||
|
bool initial_data_sent;
|
||||||
|
} VFIOMigration;
|
||||||
|
|
||||||
|
-typedef struct VFIOAddressSpace {
|
||||||
|
- AddressSpace *as;
|
||||||
|
- QLIST_HEAD(, VFIOContainer) containers;
|
||||||
|
- QLIST_ENTRY(VFIOAddressSpace) list;
|
||||||
|
-} VFIOAddressSpace;
|
||||||
|
-
|
||||||
|
struct VFIOGroup;
|
||||||
|
|
||||||
|
typedef struct VFIOContainer {
|
||||||
|
VFIOContainerBase bcontainer;
|
||||||
|
- VFIOAddressSpace *space;
|
||||||
|
int fd; /* /dev/vfio/vfio, empowered by the attached groups */
|
||||||
|
MemoryListener listener;
|
||||||
|
MemoryListener prereg_listener;
|
||||||
|
@@ -98,7 +91,6 @@ typedef struct VFIOContainer {
|
||||||
|
QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list;
|
||||||
|
QLIST_HEAD(, VFIOGroup) group_list;
|
||||||
|
QLIST_HEAD(, VFIORamDiscardListener) vrdl_list;
|
||||||
|
- QLIST_ENTRY(VFIOContainer) next;
|
||||||
|
QLIST_HEAD(, VFIODevice) device_list;
|
||||||
|
GList *iova_ranges;
|
||||||
|
} VFIOContainer;
|
||||||
|
diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h
|
||||||
|
index a11aec5755..c7cc6ec9c5 100644
|
||||||
|
--- a/include/hw/vfio/vfio-container-base.h
|
||||||
|
+++ b/include/hw/vfio/vfio-container-base.h
|
||||||
|
@@ -24,12 +24,20 @@ typedef struct {
|
||||||
|
hwaddr pages;
|
||||||
|
} VFIOBitmap;
|
||||||
|
|
||||||
|
+typedef struct VFIOAddressSpace {
|
||||||
|
+ AddressSpace *as;
|
||||||
|
+ QLIST_HEAD(, VFIOContainerBase) containers;
|
||||||
|
+ QLIST_ENTRY(VFIOAddressSpace) list;
|
||||||
|
+} VFIOAddressSpace;
|
||||||
|
+
|
||||||
|
/*
|
||||||
|
* This is the base object for vfio container backends
|
||||||
|
*/
|
||||||
|
typedef struct VFIOContainerBase {
|
||||||
|
const VFIOIOMMUOps *ops;
|
||||||
|
+ VFIOAddressSpace *space;
|
||||||
|
QLIST_HEAD(, VFIOGuestIOMMU) giommu_list;
|
||||||
|
+ QLIST_ENTRY(VFIOContainerBase) next;
|
||||||
|
} VFIOContainerBase;
|
||||||
|
|
||||||
|
typedef struct VFIOGuestIOMMU {
|
||||||
|
@@ -48,6 +56,7 @@ int vfio_container_dma_unmap(VFIOContainerBase *bcontainer,
|
||||||
|
IOMMUTLBEntry *iotlb);
|
||||||
|
|
||||||
|
void vfio_container_init(VFIOContainerBase *bcontainer,
|
||||||
|
+ VFIOAddressSpace *space,
|
||||||
|
const VFIOIOMMUOps *ops);
|
||||||
|
void vfio_container_destroy(VFIOContainerBase *bcontainer);
|
||||||
|
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
255
kvm-vfio-container-Move-vrdl_list-to-base-container.patch
Normal file
255
kvm-vfio-container-Move-vrdl_list-to-base-container.patch
Normal file
@ -0,0 +1,255 @@
|
|||||||
|
From aadd055dcc06cb964ebfd2868b7e9b207d62ae0e Mon Sep 17 00:00:00 2001
|
||||||
|
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Date: Thu, 2 Nov 2023 15:12:37 +0800
|
||||||
|
Subject: [PATCH 012/101] vfio/container: Move vrdl_list to base container
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [11/67] 42da5389e39291839259f0e4c020c7461b7225cc (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
No functional change intended.
|
||||||
|
|
||||||
|
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit dc74a4b0056c0c803d46612a2319294921097974)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/vfio/common.c | 38 +++++++++++++--------------
|
||||||
|
hw/vfio/container-base.c | 1 +
|
||||||
|
hw/vfio/container.c | 1 -
|
||||||
|
include/hw/vfio/vfio-common.h | 11 --------
|
||||||
|
include/hw/vfio/vfio-container-base.h | 11 ++++++++
|
||||||
|
5 files changed, 31 insertions(+), 31 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
|
||||||
|
index 1cb53d369e..f15665789f 100644
|
||||||
|
--- a/hw/vfio/common.c
|
||||||
|
+++ b/hw/vfio/common.c
|
||||||
|
@@ -351,13 +351,13 @@ static void vfio_ram_discard_notify_discard(RamDiscardListener *rdl,
|
||||||
|
{
|
||||||
|
VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener,
|
||||||
|
listener);
|
||||||
|
+ VFIOContainerBase *bcontainer = vrdl->bcontainer;
|
||||||
|
const hwaddr size = int128_get64(section->size);
|
||||||
|
const hwaddr iova = section->offset_within_address_space;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
/* Unmap with a single call. */
|
||||||
|
- ret = vfio_container_dma_unmap(&vrdl->container->bcontainer,
|
||||||
|
- iova, size , NULL);
|
||||||
|
+ ret = vfio_container_dma_unmap(bcontainer, iova, size , NULL);
|
||||||
|
if (ret) {
|
||||||
|
error_report("%s: vfio_container_dma_unmap() failed: %s", __func__,
|
||||||
|
strerror(-ret));
|
||||||
|
@@ -369,6 +369,7 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl,
|
||||||
|
{
|
||||||
|
VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener,
|
||||||
|
listener);
|
||||||
|
+ VFIOContainerBase *bcontainer = vrdl->bcontainer;
|
||||||
|
const hwaddr end = section->offset_within_region +
|
||||||
|
int128_get64(section->size);
|
||||||
|
hwaddr start, next, iova;
|
||||||
|
@@ -387,8 +388,8 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl,
|
||||||
|
section->offset_within_address_space;
|
||||||
|
vaddr = memory_region_get_ram_ptr(section->mr) + start;
|
||||||
|
|
||||||
|
- ret = vfio_container_dma_map(&vrdl->container->bcontainer, iova,
|
||||||
|
- next - start, vaddr, section->readonly);
|
||||||
|
+ ret = vfio_container_dma_map(bcontainer, iova, next - start,
|
||||||
|
+ vaddr, section->readonly);
|
||||||
|
if (ret) {
|
||||||
|
/* Rollback */
|
||||||
|
vfio_ram_discard_notify_discard(rdl, section);
|
||||||
|
@@ -398,10 +399,9 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl,
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
-static void vfio_register_ram_discard_listener(VFIOContainer *container,
|
||||||
|
+static void vfio_register_ram_discard_listener(VFIOContainerBase *bcontainer,
|
||||||
|
MemoryRegionSection *section)
|
||||||
|
{
|
||||||
|
- VFIOContainerBase *bcontainer = &container->bcontainer;
|
||||||
|
RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr);
|
||||||
|
VFIORamDiscardListener *vrdl;
|
||||||
|
|
||||||
|
@@ -412,7 +412,7 @@ static void vfio_register_ram_discard_listener(VFIOContainer *container,
|
||||||
|
g_assert(QEMU_IS_ALIGNED(int128_get64(section->size), TARGET_PAGE_SIZE));
|
||||||
|
|
||||||
|
vrdl = g_new0(VFIORamDiscardListener, 1);
|
||||||
|
- vrdl->container = container;
|
||||||
|
+ vrdl->bcontainer = bcontainer;
|
||||||
|
vrdl->mr = section->mr;
|
||||||
|
vrdl->offset_within_address_space = section->offset_within_address_space;
|
||||||
|
vrdl->size = int128_get64(section->size);
|
||||||
|
@@ -427,7 +427,7 @@ static void vfio_register_ram_discard_listener(VFIOContainer *container,
|
||||||
|
vfio_ram_discard_notify_populate,
|
||||||
|
vfio_ram_discard_notify_discard, true);
|
||||||
|
ram_discard_manager_register_listener(rdm, &vrdl->listener, section);
|
||||||
|
- QLIST_INSERT_HEAD(&container->vrdl_list, vrdl, next);
|
||||||
|
+ QLIST_INSERT_HEAD(&bcontainer->vrdl_list, vrdl, next);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Sanity-check if we have a theoretically problematic setup where we could
|
||||||
|
@@ -451,7 +451,7 @@ static void vfio_register_ram_discard_listener(VFIOContainer *container,
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
- QLIST_FOREACH(vrdl, &container->vrdl_list, next) {
|
||||||
|
+ QLIST_FOREACH(vrdl, &bcontainer->vrdl_list, next) {
|
||||||
|
hwaddr start, end;
|
||||||
|
|
||||||
|
start = QEMU_ALIGN_DOWN(vrdl->offset_within_address_space,
|
||||||
|
@@ -473,13 +473,13 @@ static void vfio_register_ram_discard_listener(VFIOContainer *container,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
-static void vfio_unregister_ram_discard_listener(VFIOContainer *container,
|
||||||
|
+static void vfio_unregister_ram_discard_listener(VFIOContainerBase *bcontainer,
|
||||||
|
MemoryRegionSection *section)
|
||||||
|
{
|
||||||
|
RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr);
|
||||||
|
VFIORamDiscardListener *vrdl = NULL;
|
||||||
|
|
||||||
|
- QLIST_FOREACH(vrdl, &container->vrdl_list, next) {
|
||||||
|
+ QLIST_FOREACH(vrdl, &bcontainer->vrdl_list, next) {
|
||||||
|
if (vrdl->mr == section->mr &&
|
||||||
|
vrdl->offset_within_address_space ==
|
||||||
|
section->offset_within_address_space) {
|
||||||
|
@@ -663,7 +663,7 @@ static void vfio_listener_region_add(MemoryListener *listener,
|
||||||
|
* about changes.
|
||||||
|
*/
|
||||||
|
if (memory_region_has_ram_discard_manager(section->mr)) {
|
||||||
|
- vfio_register_ram_discard_listener(container, section);
|
||||||
|
+ vfio_register_ram_discard_listener(bcontainer, section);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -781,7 +781,7 @@ static void vfio_listener_region_del(MemoryListener *listener,
|
||||||
|
pgmask = (1ULL << ctz64(bcontainer->pgsizes)) - 1;
|
||||||
|
try_unmap = !((iova & pgmask) || (int128_get64(llsize) & pgmask));
|
||||||
|
} else if (memory_region_has_ram_discard_manager(section->mr)) {
|
||||||
|
- vfio_unregister_ram_discard_listener(container, section);
|
||||||
|
+ vfio_unregister_ram_discard_listener(bcontainer, section);
|
||||||
|
/* Unregistering will trigger an unmap. */
|
||||||
|
try_unmap = false;
|
||||||
|
}
|
||||||
|
@@ -1260,17 +1260,17 @@ static int vfio_ram_discard_get_dirty_bitmap(MemoryRegionSection *section,
|
||||||
|
* Sync the whole mapped region (spanning multiple individual mappings)
|
||||||
|
* in one go.
|
||||||
|
*/
|
||||||
|
- return vfio_get_dirty_bitmap(&vrdl->container->bcontainer, iova, size,
|
||||||
|
- ram_addr);
|
||||||
|
+ return vfio_get_dirty_bitmap(vrdl->bcontainer, iova, size, ram_addr);
|
||||||
|
}
|
||||||
|
|
||||||
|
-static int vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainer *container,
|
||||||
|
- MemoryRegionSection *section)
|
||||||
|
+static int
|
||||||
|
+vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainerBase *bcontainer,
|
||||||
|
+ MemoryRegionSection *section)
|
||||||
|
{
|
||||||
|
RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr);
|
||||||
|
VFIORamDiscardListener *vrdl = NULL;
|
||||||
|
|
||||||
|
- QLIST_FOREACH(vrdl, &container->vrdl_list, next) {
|
||||||
|
+ QLIST_FOREACH(vrdl, &bcontainer->vrdl_list, next) {
|
||||||
|
if (vrdl->mr == section->mr &&
|
||||||
|
vrdl->offset_within_address_space ==
|
||||||
|
section->offset_within_address_space) {
|
||||||
|
@@ -1324,7 +1324,7 @@ static int vfio_sync_dirty_bitmap(VFIOContainer *container,
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
} else if (memory_region_has_ram_discard_manager(section->mr)) {
|
||||||
|
- return vfio_sync_ram_discard_listener_dirty_bitmap(container, section);
|
||||||
|
+ return vfio_sync_ram_discard_listener_dirty_bitmap(bcontainer, section);
|
||||||
|
}
|
||||||
|
|
||||||
|
ram_addr = memory_region_get_ram_addr(section->mr) +
|
||||||
|
diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c
|
||||||
|
index dcce111349..584eee4ba1 100644
|
||||||
|
--- a/hw/vfio/container-base.c
|
||||||
|
+++ b/hw/vfio/container-base.c
|
||||||
|
@@ -54,6 +54,7 @@ void vfio_container_init(VFIOContainerBase *bcontainer, VFIOAddressSpace *space,
|
||||||
|
bcontainer->dirty_pages_supported = false;
|
||||||
|
bcontainer->dma_max_mappings = 0;
|
||||||
|
QLIST_INIT(&bcontainer->giommu_list);
|
||||||
|
+ QLIST_INIT(&bcontainer->vrdl_list);
|
||||||
|
}
|
||||||
|
|
||||||
|
void vfio_container_destroy(VFIOContainerBase *bcontainer)
|
||||||
|
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
|
||||||
|
index c5a6262882..6ba2e2f8c4 100644
|
||||||
|
--- a/hw/vfio/container.c
|
||||||
|
+++ b/hw/vfio/container.c
|
||||||
|
@@ -560,7 +560,6 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
|
||||||
|
container->fd = fd;
|
||||||
|
container->error = NULL;
|
||||||
|
container->iova_ranges = NULL;
|
||||||
|
- QLIST_INIT(&container->vrdl_list);
|
||||||
|
bcontainer = &container->bcontainer;
|
||||||
|
vfio_container_init(bcontainer, space, &vfio_legacy_ops);
|
||||||
|
|
||||||
|
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
|
||||||
|
index d3dc2f9dcb..8a607a4c17 100644
|
||||||
|
--- a/include/hw/vfio/vfio-common.h
|
||||||
|
+++ b/include/hw/vfio/vfio-common.h
|
||||||
|
@@ -87,20 +87,9 @@ typedef struct VFIOContainer {
|
||||||
|
uint64_t max_dirty_bitmap_size;
|
||||||
|
QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list;
|
||||||
|
QLIST_HEAD(, VFIOGroup) group_list;
|
||||||
|
- QLIST_HEAD(, VFIORamDiscardListener) vrdl_list;
|
||||||
|
GList *iova_ranges;
|
||||||
|
} VFIOContainer;
|
||||||
|
|
||||||
|
-typedef struct VFIORamDiscardListener {
|
||||||
|
- VFIOContainer *container;
|
||||||
|
- MemoryRegion *mr;
|
||||||
|
- hwaddr offset_within_address_space;
|
||||||
|
- hwaddr size;
|
||||||
|
- uint64_t granularity;
|
||||||
|
- RamDiscardListener listener;
|
||||||
|
- QLIST_ENTRY(VFIORamDiscardListener) next;
|
||||||
|
-} VFIORamDiscardListener;
|
||||||
|
-
|
||||||
|
typedef struct VFIOHostDMAWindow {
|
||||||
|
hwaddr min_iova;
|
||||||
|
hwaddr max_iova;
|
||||||
|
diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h
|
||||||
|
index 85ec7e1a56..8e05b5ac5a 100644
|
||||||
|
--- a/include/hw/vfio/vfio-container-base.h
|
||||||
|
+++ b/include/hw/vfio/vfio-container-base.h
|
||||||
|
@@ -40,6 +40,7 @@ typedef struct VFIOContainerBase {
|
||||||
|
unsigned int dma_max_mappings;
|
||||||
|
bool dirty_pages_supported;
|
||||||
|
QLIST_HEAD(, VFIOGuestIOMMU) giommu_list;
|
||||||
|
+ QLIST_HEAD(, VFIORamDiscardListener) vrdl_list;
|
||||||
|
QLIST_ENTRY(VFIOContainerBase) next;
|
||||||
|
QLIST_HEAD(, VFIODevice) device_list;
|
||||||
|
} VFIOContainerBase;
|
||||||
|
@@ -52,6 +53,16 @@ typedef struct VFIOGuestIOMMU {
|
||||||
|
QLIST_ENTRY(VFIOGuestIOMMU) giommu_next;
|
||||||
|
} VFIOGuestIOMMU;
|
||||||
|
|
||||||
|
+typedef struct VFIORamDiscardListener {
|
||||||
|
+ VFIOContainerBase *bcontainer;
|
||||||
|
+ MemoryRegion *mr;
|
||||||
|
+ hwaddr offset_within_address_space;
|
||||||
|
+ hwaddr size;
|
||||||
|
+ uint64_t granularity;
|
||||||
|
+ RamDiscardListener listener;
|
||||||
|
+ QLIST_ENTRY(VFIORamDiscardListener) next;
|
||||||
|
+} VFIORamDiscardListener;
|
||||||
|
+
|
||||||
|
int vfio_container_dma_map(VFIOContainerBase *bcontainer,
|
||||||
|
hwaddr iova, ram_addr_t size,
|
||||||
|
void *vaddr, bool readonly);
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
@ -0,0 +1,66 @@
|
|||||||
|
From edfc1ee2a1854d180ffad92e70212535a2ca668c Mon Sep 17 00:00:00 2001
|
||||||
|
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Date: Thu, 21 Dec 2023 10:45:17 +0800
|
||||||
|
Subject: [PATCH 062/101] vfio/container: Rename vfio_init_container to
|
||||||
|
vfio_set_iommu
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [61/67] 5e7f956379b54fe6fa7e078ec17e71325aa109af (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
vfio_container_init() and vfio_init_container() names are confusing
|
||||||
|
especially when we see vfio_init_container() calls vfio_container_init().
|
||||||
|
|
||||||
|
vfio_container_init() operates on base container which is consistent
|
||||||
|
with all routines handling 'VFIOContainerBase *' ops.
|
||||||
|
|
||||||
|
vfio_init_container() operates on legacy container and setup IOMMU
|
||||||
|
context with ioctl(VFIO_SET_IOMMU).
|
||||||
|
|
||||||
|
So choose to rename vfio_init_container to vfio_set_iommu to avoid
|
||||||
|
the confusion.
|
||||||
|
|
||||||
|
No functional change intended.
|
||||||
|
|
||||||
|
Suggested-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit 9f734a117cbf63b03577b46c8cad8ad88ec6dced)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/vfio/container.c | 6 +++---
|
||||||
|
1 file changed, 3 insertions(+), 3 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
|
||||||
|
index 8d334f52f2..bd25b9fbad 100644
|
||||||
|
--- a/hw/vfio/container.c
|
||||||
|
+++ b/hw/vfio/container.c
|
||||||
|
@@ -392,8 +392,8 @@ static const VFIOIOMMUClass *vfio_get_iommu_class(int iommu_type, Error **errp)
|
||||||
|
return VFIO_IOMMU_CLASS(klass);
|
||||||
|
}
|
||||||
|
|
||||||
|
-static int vfio_init_container(VFIOContainer *container, int group_fd,
|
||||||
|
- VFIOAddressSpace *space, Error **errp)
|
||||||
|
+static int vfio_set_iommu(VFIOContainer *container, int group_fd,
|
||||||
|
+ VFIOAddressSpace *space, Error **errp)
|
||||||
|
{
|
||||||
|
int iommu_type, ret;
|
||||||
|
const VFIOIOMMUClass *vioc;
|
||||||
|
@@ -616,7 +616,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
|
||||||
|
container->fd = fd;
|
||||||
|
bcontainer = &container->bcontainer;
|
||||||
|
|
||||||
|
- ret = vfio_init_container(container, group->fd, space, errp);
|
||||||
|
+ ret = vfio_set_iommu(container, group->fd, space, errp);
|
||||||
|
if (ret) {
|
||||||
|
goto free_container_exit;
|
||||||
|
}
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
@ -0,0 +1,59 @@
|
|||||||
|
From 8d3857c7877da58ed0c6b62cf2714c4127350522 Mon Sep 17 00:00:00 2001
|
||||||
|
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
||||||
|
Date: Wed, 20 Dec 2023 14:53:02 +0100
|
||||||
|
Subject: [PATCH 059/101] vfio/container: Replace basename with
|
||||||
|
g_path_get_basename
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [58/67] 56a90f23dadc89271b1fff014fc64ade87c1a4cb (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
g_path_get_basename() is a portable utility function that has the
|
||||||
|
advantage of not modifing the string argument. It also fixes a compile
|
||||||
|
breakage with the Musl C library reported in [1].
|
||||||
|
|
||||||
|
[1] https://lore.kernel.org/all/20231212010228.2701544-1-raj.khem@gmail.com/
|
||||||
|
|
||||||
|
Reported-by: Khem Raj <raj.khem@gmail.com>
|
||||||
|
Reviewed-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
|
||||||
|
Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit 213ae3ffda463c0503e39e0cf827511b5298c314)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/vfio/container.c | 5 +++--
|
||||||
|
1 file changed, 3 insertions(+), 2 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
|
||||||
|
index 688cf23bab..8d334f52f2 100644
|
||||||
|
--- a/hw/vfio/container.c
|
||||||
|
+++ b/hw/vfio/container.c
|
||||||
|
@@ -869,7 +869,8 @@ static void vfio_put_base_device(VFIODevice *vbasedev)
|
||||||
|
|
||||||
|
static int vfio_device_groupid(VFIODevice *vbasedev, Error **errp)
|
||||||
|
{
|
||||||
|
- char *tmp, group_path[PATH_MAX], *group_name;
|
||||||
|
+ char *tmp, group_path[PATH_MAX];
|
||||||
|
+ g_autofree char *group_name = NULL;
|
||||||
|
int ret, groupid;
|
||||||
|
ssize_t len;
|
||||||
|
|
||||||
|
@@ -885,7 +886,7 @@ static int vfio_device_groupid(VFIODevice *vbasedev, Error **errp)
|
||||||
|
|
||||||
|
group_path[len] = 0;
|
||||||
|
|
||||||
|
- group_name = basename(group_path);
|
||||||
|
+ group_name = g_path_get_basename(group_path);
|
||||||
|
if (sscanf(group_name, "%d", &groupid) != 1) {
|
||||||
|
error_setg_errno(errp, errno, "failed to read %s", group_path);
|
||||||
|
return -errno;
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
235
kvm-vfio-container-Switch-to-IOMMU-BE-set_dirty_page_tra.patch
Normal file
235
kvm-vfio-container-Switch-to-IOMMU-BE-set_dirty_page_tra.patch
Normal file
@ -0,0 +1,235 @@
|
|||||||
|
From a2c8aa64b1b21a3e1d4cf2a4fe7d84dc32f69284 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Eric Auger <eric.auger@redhat.com>
|
||||||
|
Date: Thu, 2 Nov 2023 15:12:33 +0800
|
||||||
|
Subject: [PATCH 008/101] vfio/container: Switch to IOMMU BE
|
||||||
|
set_dirty_page_tracking/query_dirty_bitmap API
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [7/67] 88368809c7990e1d9b01406e48694fe3e3fb1397 (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
dirty_pages_supported field is also moved to the base container
|
||||||
|
|
||||||
|
No functional change intended.
|
||||||
|
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
|
||||||
|
Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
|
||||||
|
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit bb424490edcef73d07f200d53f69415b203d81df)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/vfio/common.c | 12 ++++++++----
|
||||||
|
hw/vfio/container-base.c | 16 ++++++++++++++++
|
||||||
|
hw/vfio/container.c | 21 ++++++++++++++-------
|
||||||
|
include/hw/vfio/vfio-common.h | 6 ------
|
||||||
|
include/hw/vfio/vfio-container-base.h | 6 ++++++
|
||||||
|
5 files changed, 44 insertions(+), 17 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
|
||||||
|
index 1d8202537e..b1a875ca93 100644
|
||||||
|
--- a/hw/vfio/common.c
|
||||||
|
+++ b/hw/vfio/common.c
|
||||||
|
@@ -1079,7 +1079,8 @@ static void vfio_listener_log_global_start(MemoryListener *listener)
|
||||||
|
if (vfio_devices_all_device_dirty_tracking(container)) {
|
||||||
|
ret = vfio_devices_dma_logging_start(container);
|
||||||
|
} else {
|
||||||
|
- ret = vfio_set_dirty_page_tracking(container, true);
|
||||||
|
+ ret = vfio_container_set_dirty_page_tracking(&container->bcontainer,
|
||||||
|
+ true);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ret) {
|
||||||
|
@@ -1097,7 +1098,8 @@ static void vfio_listener_log_global_stop(MemoryListener *listener)
|
||||||
|
if (vfio_devices_all_device_dirty_tracking(container)) {
|
||||||
|
vfio_devices_dma_logging_stop(container);
|
||||||
|
} else {
|
||||||
|
- ret = vfio_set_dirty_page_tracking(container, false);
|
||||||
|
+ ret = vfio_container_set_dirty_page_tracking(&container->bcontainer,
|
||||||
|
+ false);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ret) {
|
||||||
|
@@ -1165,7 +1167,8 @@ int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova,
|
||||||
|
VFIOBitmap vbmap;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
- if (!container->dirty_pages_supported && !all_device_dirty_tracking) {
|
||||||
|
+ if (!container->bcontainer.dirty_pages_supported &&
|
||||||
|
+ !all_device_dirty_tracking) {
|
||||||
|
cpu_physical_memory_set_dirty_range(ram_addr, size,
|
||||||
|
tcg_enabled() ? DIRTY_CLIENTS_ALL :
|
||||||
|
DIRTY_CLIENTS_NOCODE);
|
||||||
|
@@ -1180,7 +1183,8 @@ int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova,
|
||||||
|
if (all_device_dirty_tracking) {
|
||||||
|
ret = vfio_devices_query_dirty_bitmap(container, &vbmap, iova, size);
|
||||||
|
} else {
|
||||||
|
- ret = vfio_query_dirty_bitmap(container, &vbmap, iova, size);
|
||||||
|
+ ret = vfio_container_query_dirty_bitmap(&container->bcontainer, &vbmap,
|
||||||
|
+ iova, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ret) {
|
||||||
|
diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c
|
||||||
|
index 3933391e0d..5d654ae172 100644
|
||||||
|
--- a/hw/vfio/container-base.c
|
||||||
|
+++ b/hw/vfio/container-base.c
|
||||||
|
@@ -31,11 +31,27 @@ int vfio_container_dma_unmap(VFIOContainerBase *bcontainer,
|
||||||
|
return bcontainer->ops->dma_unmap(bcontainer, iova, size, iotlb);
|
||||||
|
}
|
||||||
|
|
||||||
|
+int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer,
|
||||||
|
+ bool start)
|
||||||
|
+{
|
||||||
|
+ g_assert(bcontainer->ops->set_dirty_page_tracking);
|
||||||
|
+ return bcontainer->ops->set_dirty_page_tracking(bcontainer, start);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int vfio_container_query_dirty_bitmap(VFIOContainerBase *bcontainer,
|
||||||
|
+ VFIOBitmap *vbmap,
|
||||||
|
+ hwaddr iova, hwaddr size)
|
||||||
|
+{
|
||||||
|
+ g_assert(bcontainer->ops->query_dirty_bitmap);
|
||||||
|
+ return bcontainer->ops->query_dirty_bitmap(bcontainer, vbmap, iova, size);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
void vfio_container_init(VFIOContainerBase *bcontainer, VFIOAddressSpace *space,
|
||||||
|
const VFIOIOMMUOps *ops)
|
||||||
|
{
|
||||||
|
bcontainer->ops = ops;
|
||||||
|
bcontainer->space = space;
|
||||||
|
+ bcontainer->dirty_pages_supported = false;
|
||||||
|
QLIST_INIT(&bcontainer->giommu_list);
|
||||||
|
}
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
|
||||||
|
index f12fcb6fe1..3ab74e2615 100644
|
||||||
|
--- a/hw/vfio/container.c
|
||||||
|
+++ b/hw/vfio/container.c
|
||||||
|
@@ -131,7 +131,7 @@ static int vfio_legacy_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova,
|
||||||
|
|
||||||
|
if (iotlb && vfio_devices_all_running_and_mig_active(container)) {
|
||||||
|
if (!vfio_devices_all_device_dirty_tracking(container) &&
|
||||||
|
- container->dirty_pages_supported) {
|
||||||
|
+ container->bcontainer.dirty_pages_supported) {
|
||||||
|
return vfio_dma_unmap_bitmap(container, iova, size, iotlb);
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -205,14 +205,17 @@ static int vfio_legacy_dma_map(VFIOContainerBase *bcontainer, hwaddr iova,
|
||||||
|
return -errno;
|
||||||
|
}
|
||||||
|
|
||||||
|
-int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start)
|
||||||
|
+static int vfio_legacy_set_dirty_page_tracking(VFIOContainerBase *bcontainer,
|
||||||
|
+ bool start)
|
||||||
|
{
|
||||||
|
+ VFIOContainer *container = container_of(bcontainer, VFIOContainer,
|
||||||
|
+ bcontainer);
|
||||||
|
int ret;
|
||||||
|
struct vfio_iommu_type1_dirty_bitmap dirty = {
|
||||||
|
.argsz = sizeof(dirty),
|
||||||
|
};
|
||||||
|
|
||||||
|
- if (!container->dirty_pages_supported) {
|
||||||
|
+ if (!bcontainer->dirty_pages_supported) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -232,9 +235,12 @@ int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start)
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
-int vfio_query_dirty_bitmap(VFIOContainer *container, VFIOBitmap *vbmap,
|
||||||
|
- hwaddr iova, hwaddr size)
|
||||||
|
+static int vfio_legacy_query_dirty_bitmap(VFIOContainerBase *bcontainer,
|
||||||
|
+ VFIOBitmap *vbmap,
|
||||||
|
+ hwaddr iova, hwaddr size)
|
||||||
|
{
|
||||||
|
+ VFIOContainer *container = container_of(bcontainer, VFIOContainer,
|
||||||
|
+ bcontainer);
|
||||||
|
struct vfio_iommu_type1_dirty_bitmap *dbitmap;
|
||||||
|
struct vfio_iommu_type1_dirty_bitmap_get *range;
|
||||||
|
int ret;
|
||||||
|
@@ -461,7 +467,7 @@ static void vfio_get_iommu_info_migration(VFIOContainer *container,
|
||||||
|
* qemu_real_host_page_size to mark those dirty.
|
||||||
|
*/
|
||||||
|
if (cap_mig->pgsize_bitmap & qemu_real_host_page_size()) {
|
||||||
|
- container->dirty_pages_supported = true;
|
||||||
|
+ container->bcontainer.dirty_pages_supported = true;
|
||||||
|
container->max_dirty_bitmap_size = cap_mig->max_dirty_bitmap_size;
|
||||||
|
container->dirty_pgsizes = cap_mig->pgsize_bitmap;
|
||||||
|
}
|
||||||
|
@@ -553,7 +559,6 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
|
||||||
|
container = g_malloc0(sizeof(*container));
|
||||||
|
container->fd = fd;
|
||||||
|
container->error = NULL;
|
||||||
|
- container->dirty_pages_supported = false;
|
||||||
|
container->dma_max_mappings = 0;
|
||||||
|
container->iova_ranges = NULL;
|
||||||
|
QLIST_INIT(&container->vrdl_list);
|
||||||
|
@@ -937,4 +942,6 @@ void vfio_detach_device(VFIODevice *vbasedev)
|
||||||
|
const VFIOIOMMUOps vfio_legacy_ops = {
|
||||||
|
.dma_map = vfio_legacy_dma_map,
|
||||||
|
.dma_unmap = vfio_legacy_dma_unmap,
|
||||||
|
+ .set_dirty_page_tracking = vfio_legacy_set_dirty_page_tracking,
|
||||||
|
+ .query_dirty_bitmap = vfio_legacy_query_dirty_bitmap,
|
||||||
|
};
|
||||||
|
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
|
||||||
|
index bd4de6cb3a..60f2785fe0 100644
|
||||||
|
--- a/include/hw/vfio/vfio-common.h
|
||||||
|
+++ b/include/hw/vfio/vfio-common.h
|
||||||
|
@@ -83,7 +83,6 @@ typedef struct VFIOContainer {
|
||||||
|
unsigned iommu_type;
|
||||||
|
Error *error;
|
||||||
|
bool initialized;
|
||||||
|
- bool dirty_pages_supported;
|
||||||
|
uint64_t dirty_pgsizes;
|
||||||
|
uint64_t max_dirty_bitmap_size;
|
||||||
|
unsigned long pgsizes;
|
||||||
|
@@ -190,11 +189,6 @@ VFIOAddressSpace *vfio_get_address_space(AddressSpace *as);
|
||||||
|
void vfio_put_address_space(VFIOAddressSpace *space);
|
||||||
|
bool vfio_devices_all_running_and_saving(VFIOContainer *container);
|
||||||
|
|
||||||
|
-/* container->fd */
|
||||||
|
-int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start);
|
||||||
|
-int vfio_query_dirty_bitmap(VFIOContainer *container, VFIOBitmap *vbmap,
|
||||||
|
- hwaddr iova, hwaddr size);
|
||||||
|
-
|
||||||
|
/* SPAPR specific */
|
||||||
|
int vfio_container_add_section_window(VFIOContainer *container,
|
||||||
|
MemoryRegionSection *section,
|
||||||
|
diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h
|
||||||
|
index c7cc6ec9c5..f244f003d0 100644
|
||||||
|
--- a/include/hw/vfio/vfio-container-base.h
|
||||||
|
+++ b/include/hw/vfio/vfio-container-base.h
|
||||||
|
@@ -36,6 +36,7 @@ typedef struct VFIOAddressSpace {
|
||||||
|
typedef struct VFIOContainerBase {
|
||||||
|
const VFIOIOMMUOps *ops;
|
||||||
|
VFIOAddressSpace *space;
|
||||||
|
+ bool dirty_pages_supported;
|
||||||
|
QLIST_HEAD(, VFIOGuestIOMMU) giommu_list;
|
||||||
|
QLIST_ENTRY(VFIOContainerBase) next;
|
||||||
|
} VFIOContainerBase;
|
||||||
|
@@ -54,6 +55,11 @@ int vfio_container_dma_map(VFIOContainerBase *bcontainer,
|
||||||
|
int vfio_container_dma_unmap(VFIOContainerBase *bcontainer,
|
||||||
|
hwaddr iova, ram_addr_t size,
|
||||||
|
IOMMUTLBEntry *iotlb);
|
||||||
|
+int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer,
|
||||||
|
+ bool start);
|
||||||
|
+int vfio_container_query_dirty_bitmap(VFIOContainerBase *bcontainer,
|
||||||
|
+ VFIOBitmap *vbmap,
|
||||||
|
+ hwaddr iova, hwaddr size);
|
||||||
|
|
||||||
|
void vfio_container_init(VFIOContainerBase *bcontainer,
|
||||||
|
VFIOAddressSpace *space,
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
303
kvm-vfio-container-Switch-to-dma_map-unmap-API.patch
Normal file
303
kvm-vfio-container-Switch-to-dma_map-unmap-API.patch
Normal file
@ -0,0 +1,303 @@
|
|||||||
|
From 00daef8e3f4f64b1401b2e8945c256d27fbfa960 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Eric Auger <eric.auger@redhat.com>
|
||||||
|
Date: Thu, 2 Nov 2023 15:12:29 +0800
|
||||||
|
Subject: [PATCH 004/101] vfio/container: Switch to dma_map|unmap API
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [3/67] 9a20e2f2b277be65463f145df3309271493be6ac (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
No functional change intended.
|
||||||
|
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
|
||||||
|
Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
|
||||||
|
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit b08501a999e2448f500a46d68da503be55186b04)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/vfio/common.c | 45 +++++++++++++++------------
|
||||||
|
hw/vfio/container-base.c | 32 +++++++++++++++++++
|
||||||
|
hw/vfio/container.c | 22 ++++++++-----
|
||||||
|
hw/vfio/meson.build | 1 +
|
||||||
|
hw/vfio/trace-events | 2 +-
|
||||||
|
include/hw/vfio/vfio-common.h | 4 ---
|
||||||
|
include/hw/vfio/vfio-container-base.h | 7 +++++
|
||||||
|
7 files changed, 81 insertions(+), 32 deletions(-)
|
||||||
|
create mode 100644 hw/vfio/container-base.c
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
|
||||||
|
index e70fdf5e0c..e610771888 100644
|
||||||
|
--- a/hw/vfio/common.c
|
||||||
|
+++ b/hw/vfio/common.c
|
||||||
|
@@ -292,7 +292,7 @@ static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
|
||||||
|
static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
|
||||||
|
{
|
||||||
|
VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
|
||||||
|
- VFIOContainer *container = giommu->container;
|
||||||
|
+ VFIOContainerBase *bcontainer = &giommu->container->bcontainer;
|
||||||
|
hwaddr iova = iotlb->iova + giommu->iommu_offset;
|
||||||
|
void *vaddr;
|
||||||
|
int ret;
|
||||||
|
@@ -322,21 +322,22 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
|
||||||
|
* of vaddr will always be there, even if the memory object is
|
||||||
|
* destroyed and its backing memory munmap-ed.
|
||||||
|
*/
|
||||||
|
- ret = vfio_dma_map(container, iova,
|
||||||
|
- iotlb->addr_mask + 1, vaddr,
|
||||||
|
- read_only);
|
||||||
|
+ ret = vfio_container_dma_map(bcontainer, iova,
|
||||||
|
+ iotlb->addr_mask + 1, vaddr,
|
||||||
|
+ read_only);
|
||||||
|
if (ret) {
|
||||||
|
- error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
|
||||||
|
+ error_report("vfio_container_dma_map(%p, 0x%"HWADDR_PRIx", "
|
||||||
|
"0x%"HWADDR_PRIx", %p) = %d (%s)",
|
||||||
|
- container, iova,
|
||||||
|
+ bcontainer, iova,
|
||||||
|
iotlb->addr_mask + 1, vaddr, ret, strerror(-ret));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
- ret = vfio_dma_unmap(container, iova, iotlb->addr_mask + 1, iotlb);
|
||||||
|
+ ret = vfio_container_dma_unmap(bcontainer, iova,
|
||||||
|
+ iotlb->addr_mask + 1, iotlb);
|
||||||
|
if (ret) {
|
||||||
|
- error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
|
||||||
|
+ error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", "
|
||||||
|
"0x%"HWADDR_PRIx") = %d (%s)",
|
||||||
|
- container, iova,
|
||||||
|
+ bcontainer, iova,
|
||||||
|
iotlb->addr_mask + 1, ret, strerror(-ret));
|
||||||
|
vfio_set_migration_error(ret);
|
||||||
|
}
|
||||||
|
@@ -355,9 +356,10 @@ static void vfio_ram_discard_notify_discard(RamDiscardListener *rdl,
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
/* Unmap with a single call. */
|
||||||
|
- ret = vfio_dma_unmap(vrdl->container, iova, size , NULL);
|
||||||
|
+ ret = vfio_container_dma_unmap(&vrdl->container->bcontainer,
|
||||||
|
+ iova, size , NULL);
|
||||||
|
if (ret) {
|
||||||
|
- error_report("%s: vfio_dma_unmap() failed: %s", __func__,
|
||||||
|
+ error_report("%s: vfio_container_dma_unmap() failed: %s", __func__,
|
||||||
|
strerror(-ret));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@@ -385,8 +387,8 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl,
|
||||||
|
section->offset_within_address_space;
|
||||||
|
vaddr = memory_region_get_ram_ptr(section->mr) + start;
|
||||||
|
|
||||||
|
- ret = vfio_dma_map(vrdl->container, iova, next - start,
|
||||||
|
- vaddr, section->readonly);
|
||||||
|
+ ret = vfio_container_dma_map(&vrdl->container->bcontainer, iova,
|
||||||
|
+ next - start, vaddr, section->readonly);
|
||||||
|
if (ret) {
|
||||||
|
/* Rollback */
|
||||||
|
vfio_ram_discard_notify_discard(rdl, section);
|
||||||
|
@@ -684,10 +686,11 @@ static void vfio_listener_region_add(MemoryListener *listener,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
- ret = vfio_dma_map(container, iova, int128_get64(llsize),
|
||||||
|
- vaddr, section->readonly);
|
||||||
|
+ ret = vfio_container_dma_map(&container->bcontainer,
|
||||||
|
+ iova, int128_get64(llsize), vaddr,
|
||||||
|
+ section->readonly);
|
||||||
|
if (ret) {
|
||||||
|
- error_setg(&err, "vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
|
||||||
|
+ error_setg(&err, "vfio_container_dma_map(%p, 0x%"HWADDR_PRIx", "
|
||||||
|
"0x%"HWADDR_PRIx", %p) = %d (%s)",
|
||||||
|
container, iova, int128_get64(llsize), vaddr, ret,
|
||||||
|
strerror(-ret));
|
||||||
|
@@ -784,18 +787,20 @@ static void vfio_listener_region_del(MemoryListener *listener,
|
||||||
|
if (int128_eq(llsize, int128_2_64())) {
|
||||||
|
/* The unmap ioctl doesn't accept a full 64-bit span. */
|
||||||
|
llsize = int128_rshift(llsize, 1);
|
||||||
|
- ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL);
|
||||||
|
+ ret = vfio_container_dma_unmap(&container->bcontainer, iova,
|
||||||
|
+ int128_get64(llsize), NULL);
|
||||||
|
if (ret) {
|
||||||
|
- error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
|
||||||
|
+ error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", "
|
||||||
|
"0x%"HWADDR_PRIx") = %d (%s)",
|
||||||
|
container, iova, int128_get64(llsize), ret,
|
||||||
|
strerror(-ret));
|
||||||
|
}
|
||||||
|
iova += int128_get64(llsize);
|
||||||
|
}
|
||||||
|
- ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL);
|
||||||
|
+ ret = vfio_container_dma_unmap(&container->bcontainer, iova,
|
||||||
|
+ int128_get64(llsize), NULL);
|
||||||
|
if (ret) {
|
||||||
|
- error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
|
||||||
|
+ error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", "
|
||||||
|
"0x%"HWADDR_PRIx") = %d (%s)",
|
||||||
|
container, iova, int128_get64(llsize), ret,
|
||||||
|
strerror(-ret));
|
||||||
|
diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c
|
||||||
|
new file mode 100644
|
||||||
|
index 0000000000..55d3a35fa4
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/hw/vfio/container-base.c
|
||||||
|
@@ -0,0 +1,32 @@
|
||||||
|
+/*
|
||||||
|
+ * VFIO BASE CONTAINER
|
||||||
|
+ *
|
||||||
|
+ * Copyright (C) 2023 Intel Corporation.
|
||||||
|
+ * Copyright Red Hat, Inc. 2023
|
||||||
|
+ *
|
||||||
|
+ * Authors: Yi Liu <yi.l.liu@intel.com>
|
||||||
|
+ * Eric Auger <eric.auger@redhat.com>
|
||||||
|
+ *
|
||||||
|
+ * SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
+ */
|
||||||
|
+
|
||||||
|
+#include "qemu/osdep.h"
|
||||||
|
+#include "qapi/error.h"
|
||||||
|
+#include "qemu/error-report.h"
|
||||||
|
+#include "hw/vfio/vfio-container-base.h"
|
||||||
|
+
|
||||||
|
+int vfio_container_dma_map(VFIOContainerBase *bcontainer,
|
||||||
|
+ hwaddr iova, ram_addr_t size,
|
||||||
|
+ void *vaddr, bool readonly)
|
||||||
|
+{
|
||||||
|
+ g_assert(bcontainer->ops->dma_map);
|
||||||
|
+ return bcontainer->ops->dma_map(bcontainer, iova, size, vaddr, readonly);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int vfio_container_dma_unmap(VFIOContainerBase *bcontainer,
|
||||||
|
+ hwaddr iova, ram_addr_t size,
|
||||||
|
+ IOMMUTLBEntry *iotlb)
|
||||||
|
+{
|
||||||
|
+ g_assert(bcontainer->ops->dma_unmap);
|
||||||
|
+ return bcontainer->ops->dma_unmap(bcontainer, iova, size, iotlb);
|
||||||
|
+}
|
||||||
|
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
|
||||||
|
index 4bc43ddfa4..c04df26323 100644
|
||||||
|
--- a/hw/vfio/container.c
|
||||||
|
+++ b/hw/vfio/container.c
|
||||||
|
@@ -115,9 +115,11 @@ unmap_exit:
|
||||||
|
/*
|
||||||
|
* DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86
|
||||||
|
*/
|
||||||
|
-int vfio_dma_unmap(VFIOContainer *container, hwaddr iova,
|
||||||
|
- ram_addr_t size, IOMMUTLBEntry *iotlb)
|
||||||
|
+static int vfio_legacy_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova,
|
||||||
|
+ ram_addr_t size, IOMMUTLBEntry *iotlb)
|
||||||
|
{
|
||||||
|
+ VFIOContainer *container = container_of(bcontainer, VFIOContainer,
|
||||||
|
+ bcontainer);
|
||||||
|
struct vfio_iommu_type1_dma_unmap unmap = {
|
||||||
|
.argsz = sizeof(unmap),
|
||||||
|
.flags = 0,
|
||||||
|
@@ -151,7 +153,7 @@ int vfio_dma_unmap(VFIOContainer *container, hwaddr iova,
|
||||||
|
*/
|
||||||
|
if (errno == EINVAL && unmap.size && !(unmap.iova + unmap.size) &&
|
||||||
|
container->iommu_type == VFIO_TYPE1v2_IOMMU) {
|
||||||
|
- trace_vfio_dma_unmap_overflow_workaround();
|
||||||
|
+ trace_vfio_legacy_dma_unmap_overflow_workaround();
|
||||||
|
unmap.size -= 1ULL << ctz64(container->pgsizes);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
@@ -170,9 +172,11 @@ int vfio_dma_unmap(VFIOContainer *container, hwaddr iova,
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
-int vfio_dma_map(VFIOContainer *container, hwaddr iova,
|
||||||
|
- ram_addr_t size, void *vaddr, bool readonly)
|
||||||
|
+static int vfio_legacy_dma_map(VFIOContainerBase *bcontainer, hwaddr iova,
|
||||||
|
+ ram_addr_t size, void *vaddr, bool readonly)
|
||||||
|
{
|
||||||
|
+ VFIOContainer *container = container_of(bcontainer, VFIOContainer,
|
||||||
|
+ bcontainer);
|
||||||
|
struct vfio_iommu_type1_dma_map map = {
|
||||||
|
.argsz = sizeof(map),
|
||||||
|
.flags = VFIO_DMA_MAP_FLAG_READ,
|
||||||
|
@@ -191,7 +195,8 @@ int vfio_dma_map(VFIOContainer *container, hwaddr iova,
|
||||||
|
* the VGA ROM space.
|
||||||
|
*/
|
||||||
|
if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 ||
|
||||||
|
- (errno == EBUSY && vfio_dma_unmap(container, iova, size, NULL) == 0 &&
|
||||||
|
+ (errno == EBUSY &&
|
||||||
|
+ vfio_legacy_dma_unmap(bcontainer, iova, size, NULL) == 0 &&
|
||||||
|
ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
@@ -937,4 +942,7 @@ void vfio_detach_device(VFIODevice *vbasedev)
|
||||||
|
vfio_put_group(group);
|
||||||
|
}
|
||||||
|
|
||||||
|
-const VFIOIOMMUOps vfio_legacy_ops;
|
||||||
|
+const VFIOIOMMUOps vfio_legacy_ops = {
|
||||||
|
+ .dma_map = vfio_legacy_dma_map,
|
||||||
|
+ .dma_unmap = vfio_legacy_dma_unmap,
|
||||||
|
+};
|
||||||
|
diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build
|
||||||
|
index 2a6912c940..eb6ce6229d 100644
|
||||||
|
--- a/hw/vfio/meson.build
|
||||||
|
+++ b/hw/vfio/meson.build
|
||||||
|
@@ -2,6 +2,7 @@ vfio_ss = ss.source_set()
|
||||||
|
vfio_ss.add(files(
|
||||||
|
'helpers.c',
|
||||||
|
'common.c',
|
||||||
|
+ 'container-base.c',
|
||||||
|
'container.c',
|
||||||
|
'spapr.c',
|
||||||
|
'migration.c',
|
||||||
|
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
|
||||||
|
index 0eb2387cf2..9f7fedee98 100644
|
||||||
|
--- a/hw/vfio/trace-events
|
||||||
|
+++ b/hw/vfio/trace-events
|
||||||
|
@@ -116,7 +116,7 @@ vfio_region_unmap(const char *name, unsigned long offset, unsigned long end) "Re
|
||||||
|
vfio_region_sparse_mmap_header(const char *name, int index, int nr_areas) "Device %s region %d: %d sparse mmap entries"
|
||||||
|
vfio_region_sparse_mmap_entry(int i, unsigned long start, unsigned long end) "sparse entry %d [0x%lx - 0x%lx]"
|
||||||
|
vfio_get_dev_region(const char *name, int index, uint32_t type, uint32_t subtype) "%s index %d, %08x/%08x"
|
||||||
|
-vfio_dma_unmap_overflow_workaround(void) ""
|
||||||
|
+vfio_legacy_dma_unmap_overflow_workaround(void) ""
|
||||||
|
vfio_get_dirty_bitmap(int fd, uint64_t iova, uint64_t size, uint64_t bitmap_size, uint64_t start, uint64_t dirty_pages) "container fd=%d, iova=0x%"PRIx64" size= 0x%"PRIx64" bitmap_size=0x%"PRIx64" start=0x%"PRIx64" dirty_pages=%"PRIu64
|
||||||
|
vfio_iommu_map_dirty_notify(uint64_t iova_start, uint64_t iova_end) "iommu dirty @ 0x%"PRIx64" - 0x%"PRIx64
|
||||||
|
|
||||||
|
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
|
||||||
|
index 678161f207..24a26345e5 100644
|
||||||
|
--- a/include/hw/vfio/vfio-common.h
|
||||||
|
+++ b/include/hw/vfio/vfio-common.h
|
||||||
|
@@ -208,10 +208,6 @@ void vfio_put_address_space(VFIOAddressSpace *space);
|
||||||
|
bool vfio_devices_all_running_and_saving(VFIOContainer *container);
|
||||||
|
|
||||||
|
/* container->fd */
|
||||||
|
-int vfio_dma_unmap(VFIOContainer *container, hwaddr iova,
|
||||||
|
- ram_addr_t size, IOMMUTLBEntry *iotlb);
|
||||||
|
-int vfio_dma_map(VFIOContainer *container, hwaddr iova,
|
||||||
|
- ram_addr_t size, void *vaddr, bool readonly);
|
||||||
|
int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start);
|
||||||
|
int vfio_query_dirty_bitmap(VFIOContainer *container, VFIOBitmap *vbmap,
|
||||||
|
hwaddr iova, hwaddr size);
|
||||||
|
diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h
|
||||||
|
index 1d6daaea5d..56b033f59f 100644
|
||||||
|
--- a/include/hw/vfio/vfio-container-base.h
|
||||||
|
+++ b/include/hw/vfio/vfio-container-base.h
|
||||||
|
@@ -31,6 +31,13 @@ typedef struct VFIOContainerBase {
|
||||||
|
const VFIOIOMMUOps *ops;
|
||||||
|
} VFIOContainerBase;
|
||||||
|
|
||||||
|
+int vfio_container_dma_map(VFIOContainerBase *bcontainer,
|
||||||
|
+ hwaddr iova, ram_addr_t size,
|
||||||
|
+ void *vaddr, bool readonly);
|
||||||
|
+int vfio_container_dma_unmap(VFIOContainerBase *bcontainer,
|
||||||
|
+ hwaddr iova, ram_addr_t size,
|
||||||
|
+ IOMMUTLBEntry *iotlb);
|
||||||
|
+
|
||||||
|
struct VFIOIOMMUOps {
|
||||||
|
/* basic feature */
|
||||||
|
int (*dma_map)(VFIOContainerBase *bcontainer,
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
115
kvm-vfio-iommufd-Add-support-for-iova_ranges-and-pgsizes.patch
Normal file
115
kvm-vfio-iommufd-Add-support-for-iova_ranges-and-pgsizes.patch
Normal file
@ -0,0 +1,115 @@
|
|||||||
|
From 49435d4d592bc890f56b69c2290f890c87b5a103 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Date: Tue, 21 Nov 2023 16:44:05 +0800
|
||||||
|
Subject: [PATCH 026/101] vfio/iommufd: Add support for iova_ranges and pgsizes
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [25/67] 578af0547d97276ccd4936b574c12118fc70d468 (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
Some vIOMMU such as virtio-iommu use IOVA ranges from host side to
|
||||||
|
setup reserved ranges for passthrough device, so that guest will not
|
||||||
|
use an IOVA range beyond host support.
|
||||||
|
|
||||||
|
Use an uAPI of IOMMUFD to get IOVA ranges of host side and pass to
|
||||||
|
vIOMMU just like the legacy backend, if this fails, fallback to
|
||||||
|
64bit IOVA range.
|
||||||
|
|
||||||
|
Also use out_iova_alignment returned from uAPI as pgsizes instead of
|
||||||
|
qemu_real_host_page_size() as a fallback.
|
||||||
|
|
||||||
|
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Reviewed-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
Tested-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit 714e9affa8ae1d84007c8afde7bb10fef9cb883d)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/vfio/iommufd.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++-
|
||||||
|
1 file changed, 55 insertions(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
|
||||||
|
index 6d31aeac7b..01b448e840 100644
|
||||||
|
--- a/hw/vfio/iommufd.c
|
||||||
|
+++ b/hw/vfio/iommufd.c
|
||||||
|
@@ -261,6 +261,53 @@ static int iommufd_cdev_ram_block_discard_disable(bool state)
|
||||||
|
return ram_block_uncoordinated_discard_disable(state);
|
||||||
|
}
|
||||||
|
|
||||||
|
+static int iommufd_cdev_get_info_iova_range(VFIOIOMMUFDContainer *container,
|
||||||
|
+ uint32_t ioas_id, Error **errp)
|
||||||
|
+{
|
||||||
|
+ VFIOContainerBase *bcontainer = &container->bcontainer;
|
||||||
|
+ struct iommu_ioas_iova_ranges *info;
|
||||||
|
+ struct iommu_iova_range *iova_ranges;
|
||||||
|
+ int ret, sz, fd = container->be->fd;
|
||||||
|
+
|
||||||
|
+ info = g_malloc0(sizeof(*info));
|
||||||
|
+ info->size = sizeof(*info);
|
||||||
|
+ info->ioas_id = ioas_id;
|
||||||
|
+
|
||||||
|
+ ret = ioctl(fd, IOMMU_IOAS_IOVA_RANGES, info);
|
||||||
|
+ if (ret && errno != EMSGSIZE) {
|
||||||
|
+ goto error;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ sz = info->num_iovas * sizeof(struct iommu_iova_range);
|
||||||
|
+ info = g_realloc(info, sizeof(*info) + sz);
|
||||||
|
+ info->allowed_iovas = (uintptr_t)(info + 1);
|
||||||
|
+
|
||||||
|
+ ret = ioctl(fd, IOMMU_IOAS_IOVA_RANGES, info);
|
||||||
|
+ if (ret) {
|
||||||
|
+ goto error;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ iova_ranges = (struct iommu_iova_range *)(uintptr_t)info->allowed_iovas;
|
||||||
|
+
|
||||||
|
+ for (int i = 0; i < info->num_iovas; i++) {
|
||||||
|
+ Range *range = g_new(Range, 1);
|
||||||
|
+
|
||||||
|
+ range_set_bounds(range, iova_ranges[i].start, iova_ranges[i].last);
|
||||||
|
+ bcontainer->iova_ranges =
|
||||||
|
+ range_list_insert(bcontainer->iova_ranges, range);
|
||||||
|
+ }
|
||||||
|
+ bcontainer->pgsizes = info->out_iova_alignment;
|
||||||
|
+
|
||||||
|
+ g_free(info);
|
||||||
|
+ return 0;
|
||||||
|
+
|
||||||
|
+error:
|
||||||
|
+ ret = -errno;
|
||||||
|
+ g_free(info);
|
||||||
|
+ error_setg_errno(errp, errno, "Cannot get IOVA ranges");
|
||||||
|
+ return ret;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
|
||||||
|
AddressSpace *as, Error **errp)
|
||||||
|
{
|
||||||
|
@@ -335,7 +382,14 @@ static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
|
||||||
|
goto err_discard_disable;
|
||||||
|
}
|
||||||
|
|
||||||
|
- bcontainer->pgsizes = qemu_real_host_page_size();
|
||||||
|
+ ret = iommufd_cdev_get_info_iova_range(container, ioas_id, &err);
|
||||||
|
+ if (ret) {
|
||||||
|
+ error_append_hint(&err,
|
||||||
|
+ "Fallback to default 64bit IOVA range and 4K page size\n");
|
||||||
|
+ warn_report_err(err);
|
||||||
|
+ err = NULL;
|
||||||
|
+ bcontainer->pgsizes = qemu_real_host_page_size();
|
||||||
|
+ }
|
||||||
|
|
||||||
|
bcontainer->listener = vfio_memory_listener;
|
||||||
|
memory_listener_register(&bcontainer->listener, bcontainer->space->as);
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
215
kvm-vfio-iommufd-Enable-pci-hot-reset-through-iommufd-cd.patch
Normal file
215
kvm-vfio-iommufd-Enable-pci-hot-reset-through-iommufd-cd.patch
Normal file
@ -0,0 +1,215 @@
|
|||||||
|
From e94700896dd8fcea149d9719eccde6f485440be2 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Date: Tue, 21 Nov 2023 16:44:08 +0800
|
||||||
|
Subject: [PATCH 029/101] vfio/iommufd: Enable pci hot reset through iommufd
|
||||||
|
cdev interface
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [28/67] ca1ae970138ee4a6f4b3b49817e775f3159f4c97 (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
Implement the newly introduced pci_hot_reset callback named
|
||||||
|
iommufd_cdev_pci_hot_reset to do iommufd specific check and
|
||||||
|
reset operation.
|
||||||
|
|
||||||
|
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Reviewed-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
Tested-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit 96d6f85ff012abd7aaa35b1a2bc48b8640c898d9)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/vfio/iommufd.c | 150 +++++++++++++++++++++++++++++++++++++++++++
|
||||||
|
hw/vfio/trace-events | 1 +
|
||||||
|
2 files changed, 151 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
|
||||||
|
index 01b448e840..6e53e013ef 100644
|
||||||
|
--- a/hw/vfio/iommufd.c
|
||||||
|
+++ b/hw/vfio/iommufd.c
|
||||||
|
@@ -24,6 +24,7 @@
|
||||||
|
#include "sysemu/reset.h"
|
||||||
|
#include "qemu/cutils.h"
|
||||||
|
#include "qemu/chardev_open.h"
|
||||||
|
+#include "pci.h"
|
||||||
|
|
||||||
|
static int iommufd_cdev_map(VFIOContainerBase *bcontainer, hwaddr iova,
|
||||||
|
ram_addr_t size, void *vaddr, bool readonly)
|
||||||
|
@@ -468,9 +469,158 @@ static void iommufd_cdev_detach(VFIODevice *vbasedev)
|
||||||
|
close(vbasedev->fd);
|
||||||
|
}
|
||||||
|
|
||||||
|
+static VFIODevice *iommufd_cdev_pci_find_by_devid(__u32 devid)
|
||||||
|
+{
|
||||||
|
+ VFIODevice *vbasedev_iter;
|
||||||
|
+
|
||||||
|
+ QLIST_FOREACH(vbasedev_iter, &vfio_device_list, global_next) {
|
||||||
|
+ if (vbasedev_iter->bcontainer->ops != &vfio_iommufd_ops) {
|
||||||
|
+ continue;
|
||||||
|
+ }
|
||||||
|
+ if (devid == vbasedev_iter->devid) {
|
||||||
|
+ return vbasedev_iter;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ return NULL;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static VFIOPCIDevice *
|
||||||
|
+iommufd_cdev_dep_get_realized_vpdev(struct vfio_pci_dependent_device *dep_dev,
|
||||||
|
+ VFIODevice *reset_dev)
|
||||||
|
+{
|
||||||
|
+ VFIODevice *vbasedev_tmp;
|
||||||
|
+
|
||||||
|
+ if (dep_dev->devid == reset_dev->devid ||
|
||||||
|
+ dep_dev->devid == VFIO_PCI_DEVID_OWNED) {
|
||||||
|
+ return NULL;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ vbasedev_tmp = iommufd_cdev_pci_find_by_devid(dep_dev->devid);
|
||||||
|
+ if (!vbasedev_tmp || !vbasedev_tmp->dev->realized ||
|
||||||
|
+ vbasedev_tmp->type != VFIO_DEVICE_TYPE_PCI) {
|
||||||
|
+ return NULL;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return container_of(vbasedev_tmp, VFIOPCIDevice, vbasedev);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int iommufd_cdev_pci_hot_reset(VFIODevice *vbasedev, bool single)
|
||||||
|
+{
|
||||||
|
+ VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
|
||||||
|
+ struct vfio_pci_hot_reset_info *info = NULL;
|
||||||
|
+ struct vfio_pci_dependent_device *devices;
|
||||||
|
+ struct vfio_pci_hot_reset *reset;
|
||||||
|
+ int ret, i;
|
||||||
|
+ bool multi = false;
|
||||||
|
+
|
||||||
|
+ trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi");
|
||||||
|
+
|
||||||
|
+ if (!single) {
|
||||||
|
+ vfio_pci_pre_reset(vdev);
|
||||||
|
+ }
|
||||||
|
+ vdev->vbasedev.needs_reset = false;
|
||||||
|
+
|
||||||
|
+ ret = vfio_pci_get_pci_hot_reset_info(vdev, &info);
|
||||||
|
+
|
||||||
|
+ if (ret) {
|
||||||
|
+ goto out_single;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ assert(info->flags & VFIO_PCI_HOT_RESET_FLAG_DEV_ID);
|
||||||
|
+
|
||||||
|
+ devices = &info->devices[0];
|
||||||
|
+
|
||||||
|
+ if (!(info->flags & VFIO_PCI_HOT_RESET_FLAG_DEV_ID_OWNED)) {
|
||||||
|
+ if (!vdev->has_pm_reset) {
|
||||||
|
+ for (i = 0; i < info->count; i++) {
|
||||||
|
+ if (devices[i].devid == VFIO_PCI_DEVID_NOT_OWNED) {
|
||||||
|
+ error_report("vfio: Cannot reset device %s, "
|
||||||
|
+ "depends on device %04x:%02x:%02x.%x "
|
||||||
|
+ "which is not owned.",
|
||||||
|
+ vdev->vbasedev.name, devices[i].segment,
|
||||||
|
+ devices[i].bus, PCI_SLOT(devices[i].devfn),
|
||||||
|
+ PCI_FUNC(devices[i].devfn));
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ ret = -EPERM;
|
||||||
|
+ goto out_single;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name);
|
||||||
|
+
|
||||||
|
+ for (i = 0; i < info->count; i++) {
|
||||||
|
+ VFIOPCIDevice *tmp;
|
||||||
|
+
|
||||||
|
+ trace_iommufd_cdev_pci_hot_reset_dep_devices(devices[i].segment,
|
||||||
|
+ devices[i].bus,
|
||||||
|
+ PCI_SLOT(devices[i].devfn),
|
||||||
|
+ PCI_FUNC(devices[i].devfn),
|
||||||
|
+ devices[i].devid);
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * If a VFIO cdev device is resettable, all the dependent devices
|
||||||
|
+ * are either bound to same iommufd or within same iommu_groups as
|
||||||
|
+ * one of the iommufd bound devices.
|
||||||
|
+ */
|
||||||
|
+ assert(devices[i].devid != VFIO_PCI_DEVID_NOT_OWNED);
|
||||||
|
+
|
||||||
|
+ tmp = iommufd_cdev_dep_get_realized_vpdev(&devices[i], &vdev->vbasedev);
|
||||||
|
+ if (!tmp) {
|
||||||
|
+ continue;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (single) {
|
||||||
|
+ ret = -EINVAL;
|
||||||
|
+ goto out_single;
|
||||||
|
+ }
|
||||||
|
+ vfio_pci_pre_reset(tmp);
|
||||||
|
+ tmp->vbasedev.needs_reset = false;
|
||||||
|
+ multi = true;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (!single && !multi) {
|
||||||
|
+ ret = -EINVAL;
|
||||||
|
+ goto out_single;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /* Use zero length array for hot reset with iommufd backend */
|
||||||
|
+ reset = g_malloc0(sizeof(*reset));
|
||||||
|
+ reset->argsz = sizeof(*reset);
|
||||||
|
+
|
||||||
|
+ /* Bus reset! */
|
||||||
|
+ ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset);
|
||||||
|
+ g_free(reset);
|
||||||
|
+ if (ret) {
|
||||||
|
+ ret = -errno;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ trace_vfio_pci_hot_reset_result(vdev->vbasedev.name,
|
||||||
|
+ ret ? strerror(errno) : "Success");
|
||||||
|
+
|
||||||
|
+ /* Re-enable INTx on affected devices */
|
||||||
|
+ for (i = 0; i < info->count; i++) {
|
||||||
|
+ VFIOPCIDevice *tmp;
|
||||||
|
+
|
||||||
|
+ tmp = iommufd_cdev_dep_get_realized_vpdev(&devices[i], &vdev->vbasedev);
|
||||||
|
+ if (!tmp) {
|
||||||
|
+ continue;
|
||||||
|
+ }
|
||||||
|
+ vfio_pci_post_reset(tmp);
|
||||||
|
+ }
|
||||||
|
+out_single:
|
||||||
|
+ if (!single) {
|
||||||
|
+ vfio_pci_post_reset(vdev);
|
||||||
|
+ }
|
||||||
|
+ g_free(info);
|
||||||
|
+
|
||||||
|
+ return ret;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
const VFIOIOMMUOps vfio_iommufd_ops = {
|
||||||
|
.dma_map = iommufd_cdev_map,
|
||||||
|
.dma_unmap = iommufd_cdev_unmap,
|
||||||
|
.attach_device = iommufd_cdev_attach,
|
||||||
|
.detach_device = iommufd_cdev_detach,
|
||||||
|
+ .pci_hot_reset = iommufd_cdev_pci_hot_reset,
|
||||||
|
};
|
||||||
|
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
|
||||||
|
index 3340c93af0..8fdde54456 100644
|
||||||
|
--- a/hw/vfio/trace-events
|
||||||
|
+++ b/hw/vfio/trace-events
|
||||||
|
@@ -174,3 +174,4 @@ iommufd_cdev_detach_ioas_hwpt(int iommufd, const char *name) " [iommufd=%d] Succ
|
||||||
|
iommufd_cdev_fail_attach_existing_container(const char *msg) " %s"
|
||||||
|
iommufd_cdev_alloc_ioas(int iommufd, int ioas_id) " [iommufd=%d] new IOMMUFD container with ioasid=%d"
|
||||||
|
iommufd_cdev_device_info(char *name, int devfd, int num_irqs, int num_regions, int flags) " %s (%d) num_irqs=%d num_regions=%d flags=%d"
|
||||||
|
+iommufd_cdev_pci_hot_reset_dep_devices(int domain, int bus, int slot, int function, int dev_id) "\t%04x:%02x:%02x.%x devid %d"
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
561
kvm-vfio-iommufd-Implement-the-iommufd-backend.patch
Normal file
561
kvm-vfio-iommufd-Implement-the-iommufd-backend.patch
Normal file
@ -0,0 +1,561 @@
|
|||||||
|
From f018d0b686406256c2b5e823e4227316ee1394e9 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Yi Liu <yi.l.liu@intel.com>
|
||||||
|
Date: Tue, 21 Nov 2023 16:44:03 +0800
|
||||||
|
Subject: [PATCH 024/101] vfio/iommufd: Implement the iommufd backend
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [23/67] d11046654117a690542a1e2b48b9d1994f778b2d (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
The iommufd backend is implemented based on the new /dev/iommu user API.
|
||||||
|
This backend obviously depends on CONFIG_IOMMUFD.
|
||||||
|
|
||||||
|
So far, the iommufd backend doesn't support dirty page sync yet.
|
||||||
|
|
||||||
|
Co-authored-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
|
||||||
|
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Tested-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit 5ee3dc7af7859e7b8aa34c10c21778101c15e812)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/vfio/common.c | 6 +
|
||||||
|
hw/vfio/iommufd.c | 422 ++++++++++++++++++++++++++++++++++
|
||||||
|
hw/vfio/meson.build | 3 +
|
||||||
|
hw/vfio/trace-events | 10 +
|
||||||
|
include/hw/vfio/vfio-common.h | 11 +
|
||||||
|
5 files changed, 452 insertions(+)
|
||||||
|
create mode 100644 hw/vfio/iommufd.c
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
|
||||||
|
index 934f4f5446..6569732b7a 100644
|
||||||
|
--- a/hw/vfio/common.c
|
||||||
|
+++ b/hw/vfio/common.c
|
||||||
|
@@ -19,6 +19,7 @@
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "qemu/osdep.h"
|
||||||
|
+#include CONFIG_DEVICES /* CONFIG_IOMMUFD */
|
||||||
|
#include <sys/ioctl.h>
|
||||||
|
#ifdef CONFIG_KVM
|
||||||
|
#include <linux/kvm.h>
|
||||||
|
@@ -1503,6 +1504,11 @@ int vfio_attach_device(char *name, VFIODevice *vbasedev,
|
||||||
|
{
|
||||||
|
const VFIOIOMMUOps *ops = &vfio_legacy_ops;
|
||||||
|
|
||||||
|
+#ifdef CONFIG_IOMMUFD
|
||||||
|
+ if (vbasedev->iommufd) {
|
||||||
|
+ ops = &vfio_iommufd_ops;
|
||||||
|
+ }
|
||||||
|
+#endif
|
||||||
|
return ops->attach_device(name, vbasedev, as, errp);
|
||||||
|
}
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
|
||||||
|
new file mode 100644
|
||||||
|
index 0000000000..6d31aeac7b
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/hw/vfio/iommufd.c
|
||||||
|
@@ -0,0 +1,422 @@
|
||||||
|
+/*
|
||||||
|
+ * iommufd container backend
|
||||||
|
+ *
|
||||||
|
+ * Copyright (C) 2023 Intel Corporation.
|
||||||
|
+ * Copyright Red Hat, Inc. 2023
|
||||||
|
+ *
|
||||||
|
+ * Authors: Yi Liu <yi.l.liu@intel.com>
|
||||||
|
+ * Eric Auger <eric.auger@redhat.com>
|
||||||
|
+ *
|
||||||
|
+ * SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
+ */
|
||||||
|
+
|
||||||
|
+#include "qemu/osdep.h"
|
||||||
|
+#include <sys/ioctl.h>
|
||||||
|
+#include <linux/vfio.h>
|
||||||
|
+#include <linux/iommufd.h>
|
||||||
|
+
|
||||||
|
+#include "hw/vfio/vfio-common.h"
|
||||||
|
+#include "qemu/error-report.h"
|
||||||
|
+#include "trace.h"
|
||||||
|
+#include "qapi/error.h"
|
||||||
|
+#include "sysemu/iommufd.h"
|
||||||
|
+#include "hw/qdev-core.h"
|
||||||
|
+#include "sysemu/reset.h"
|
||||||
|
+#include "qemu/cutils.h"
|
||||||
|
+#include "qemu/chardev_open.h"
|
||||||
|
+
|
||||||
|
+static int iommufd_cdev_map(VFIOContainerBase *bcontainer, hwaddr iova,
|
||||||
|
+ ram_addr_t size, void *vaddr, bool readonly)
|
||||||
|
+{
|
||||||
|
+ VFIOIOMMUFDContainer *container =
|
||||||
|
+ container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
|
||||||
|
+
|
||||||
|
+ return iommufd_backend_map_dma(container->be,
|
||||||
|
+ container->ioas_id,
|
||||||
|
+ iova, size, vaddr, readonly);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int iommufd_cdev_unmap(VFIOContainerBase *bcontainer,
|
||||||
|
+ hwaddr iova, ram_addr_t size,
|
||||||
|
+ IOMMUTLBEntry *iotlb)
|
||||||
|
+{
|
||||||
|
+ VFIOIOMMUFDContainer *container =
|
||||||
|
+ container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
|
||||||
|
+
|
||||||
|
+ /* TODO: Handle dma_unmap_bitmap with iotlb args (migration) */
|
||||||
|
+ return iommufd_backend_unmap_dma(container->be,
|
||||||
|
+ container->ioas_id, iova, size);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int iommufd_cdev_kvm_device_add(VFIODevice *vbasedev, Error **errp)
|
||||||
|
+{
|
||||||
|
+ return vfio_kvm_device_add_fd(vbasedev->fd, errp);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void iommufd_cdev_kvm_device_del(VFIODevice *vbasedev)
|
||||||
|
+{
|
||||||
|
+ Error *err = NULL;
|
||||||
|
+
|
||||||
|
+ if (vfio_kvm_device_del_fd(vbasedev->fd, &err)) {
|
||||||
|
+ error_report_err(err);
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int iommufd_cdev_connect_and_bind(VFIODevice *vbasedev, Error **errp)
|
||||||
|
+{
|
||||||
|
+ IOMMUFDBackend *iommufd = vbasedev->iommufd;
|
||||||
|
+ struct vfio_device_bind_iommufd bind = {
|
||||||
|
+ .argsz = sizeof(bind),
|
||||||
|
+ .flags = 0,
|
||||||
|
+ };
|
||||||
|
+ int ret;
|
||||||
|
+
|
||||||
|
+ ret = iommufd_backend_connect(iommufd, errp);
|
||||||
|
+ if (ret) {
|
||||||
|
+ return ret;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * Add device to kvm-vfio to be prepared for the tracking
|
||||||
|
+ * in KVM. Especially for some emulated devices, it requires
|
||||||
|
+ * to have kvm information in the device open.
|
||||||
|
+ */
|
||||||
|
+ ret = iommufd_cdev_kvm_device_add(vbasedev, errp);
|
||||||
|
+ if (ret) {
|
||||||
|
+ goto err_kvm_device_add;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /* Bind device to iommufd */
|
||||||
|
+ bind.iommufd = iommufd->fd;
|
||||||
|
+ ret = ioctl(vbasedev->fd, VFIO_DEVICE_BIND_IOMMUFD, &bind);
|
||||||
|
+ if (ret) {
|
||||||
|
+ error_setg_errno(errp, errno, "error bind device fd=%d to iommufd=%d",
|
||||||
|
+ vbasedev->fd, bind.iommufd);
|
||||||
|
+ goto err_bind;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ vbasedev->devid = bind.out_devid;
|
||||||
|
+ trace_iommufd_cdev_connect_and_bind(bind.iommufd, vbasedev->name,
|
||||||
|
+ vbasedev->fd, vbasedev->devid);
|
||||||
|
+ return ret;
|
||||||
|
+err_bind:
|
||||||
|
+ iommufd_cdev_kvm_device_del(vbasedev);
|
||||||
|
+err_kvm_device_add:
|
||||||
|
+ iommufd_backend_disconnect(iommufd);
|
||||||
|
+ return ret;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void iommufd_cdev_unbind_and_disconnect(VFIODevice *vbasedev)
|
||||||
|
+{
|
||||||
|
+ /* Unbind is automatically conducted when device fd is closed */
|
||||||
|
+ iommufd_cdev_kvm_device_del(vbasedev);
|
||||||
|
+ iommufd_backend_disconnect(vbasedev->iommufd);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int iommufd_cdev_getfd(const char *sysfs_path, Error **errp)
|
||||||
|
+{
|
||||||
|
+ long int ret = -ENOTTY;
|
||||||
|
+ char *path, *vfio_dev_path = NULL, *vfio_path = NULL;
|
||||||
|
+ DIR *dir = NULL;
|
||||||
|
+ struct dirent *dent;
|
||||||
|
+ gchar *contents;
|
||||||
|
+ struct stat st;
|
||||||
|
+ gsize length;
|
||||||
|
+ int major, minor;
|
||||||
|
+ dev_t vfio_devt;
|
||||||
|
+
|
||||||
|
+ path = g_strdup_printf("%s/vfio-dev", sysfs_path);
|
||||||
|
+ if (stat(path, &st) < 0) {
|
||||||
|
+ error_setg_errno(errp, errno, "no such host device");
|
||||||
|
+ goto out_free_path;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ dir = opendir(path);
|
||||||
|
+ if (!dir) {
|
||||||
|
+ error_setg_errno(errp, errno, "couldn't open directory %s", path);
|
||||||
|
+ goto out_free_path;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ while ((dent = readdir(dir))) {
|
||||||
|
+ if (!strncmp(dent->d_name, "vfio", 4)) {
|
||||||
|
+ vfio_dev_path = g_strdup_printf("%s/%s/dev", path, dent->d_name);
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (!vfio_dev_path) {
|
||||||
|
+ error_setg(errp, "failed to find vfio-dev/vfioX/dev");
|
||||||
|
+ goto out_close_dir;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (!g_file_get_contents(vfio_dev_path, &contents, &length, NULL)) {
|
||||||
|
+ error_setg(errp, "failed to load \"%s\"", vfio_dev_path);
|
||||||
|
+ goto out_free_dev_path;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (sscanf(contents, "%d:%d", &major, &minor) != 2) {
|
||||||
|
+ error_setg(errp, "failed to get major:minor for \"%s\"", vfio_dev_path);
|
||||||
|
+ goto out_free_dev_path;
|
||||||
|
+ }
|
||||||
|
+ g_free(contents);
|
||||||
|
+ vfio_devt = makedev(major, minor);
|
||||||
|
+
|
||||||
|
+ vfio_path = g_strdup_printf("/dev/vfio/devices/%s", dent->d_name);
|
||||||
|
+ ret = open_cdev(vfio_path, vfio_devt);
|
||||||
|
+ if (ret < 0) {
|
||||||
|
+ error_setg(errp, "Failed to open %s", vfio_path);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ trace_iommufd_cdev_getfd(vfio_path, ret);
|
||||||
|
+ g_free(vfio_path);
|
||||||
|
+
|
||||||
|
+out_free_dev_path:
|
||||||
|
+ g_free(vfio_dev_path);
|
||||||
|
+out_close_dir:
|
||||||
|
+ closedir(dir);
|
||||||
|
+out_free_path:
|
||||||
|
+ if (*errp) {
|
||||||
|
+ error_prepend(errp, VFIO_MSG_PREFIX, path);
|
||||||
|
+ }
|
||||||
|
+ g_free(path);
|
||||||
|
+
|
||||||
|
+ return ret;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int iommufd_cdev_attach_ioas_hwpt(VFIODevice *vbasedev, uint32_t id,
|
||||||
|
+ Error **errp)
|
||||||
|
+{
|
||||||
|
+ int ret, iommufd = vbasedev->iommufd->fd;
|
||||||
|
+ struct vfio_device_attach_iommufd_pt attach_data = {
|
||||||
|
+ .argsz = sizeof(attach_data),
|
||||||
|
+ .flags = 0,
|
||||||
|
+ .pt_id = id,
|
||||||
|
+ };
|
||||||
|
+
|
||||||
|
+ /* Attach device to an IOAS or hwpt within iommufd */
|
||||||
|
+ ret = ioctl(vbasedev->fd, VFIO_DEVICE_ATTACH_IOMMUFD_PT, &attach_data);
|
||||||
|
+ if (ret) {
|
||||||
|
+ error_setg_errno(errp, errno,
|
||||||
|
+ "[iommufd=%d] error attach %s (%d) to id=%d",
|
||||||
|
+ iommufd, vbasedev->name, vbasedev->fd, id);
|
||||||
|
+ } else {
|
||||||
|
+ trace_iommufd_cdev_attach_ioas_hwpt(iommufd, vbasedev->name,
|
||||||
|
+ vbasedev->fd, id);
|
||||||
|
+ }
|
||||||
|
+ return ret;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int iommufd_cdev_detach_ioas_hwpt(VFIODevice *vbasedev, Error **errp)
|
||||||
|
+{
|
||||||
|
+ int ret, iommufd = vbasedev->iommufd->fd;
|
||||||
|
+ struct vfio_device_detach_iommufd_pt detach_data = {
|
||||||
|
+ .argsz = sizeof(detach_data),
|
||||||
|
+ .flags = 0,
|
||||||
|
+ };
|
||||||
|
+
|
||||||
|
+ ret = ioctl(vbasedev->fd, VFIO_DEVICE_DETACH_IOMMUFD_PT, &detach_data);
|
||||||
|
+ if (ret) {
|
||||||
|
+ error_setg_errno(errp, errno, "detach %s failed", vbasedev->name);
|
||||||
|
+ } else {
|
||||||
|
+ trace_iommufd_cdev_detach_ioas_hwpt(iommufd, vbasedev->name);
|
||||||
|
+ }
|
||||||
|
+ return ret;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int iommufd_cdev_attach_container(VFIODevice *vbasedev,
|
||||||
|
+ VFIOIOMMUFDContainer *container,
|
||||||
|
+ Error **errp)
|
||||||
|
+{
|
||||||
|
+ return iommufd_cdev_attach_ioas_hwpt(vbasedev, container->ioas_id, errp);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void iommufd_cdev_detach_container(VFIODevice *vbasedev,
|
||||||
|
+ VFIOIOMMUFDContainer *container)
|
||||||
|
+{
|
||||||
|
+ Error *err = NULL;
|
||||||
|
+
|
||||||
|
+ if (iommufd_cdev_detach_ioas_hwpt(vbasedev, &err)) {
|
||||||
|
+ error_report_err(err);
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void iommufd_cdev_container_destroy(VFIOIOMMUFDContainer *container)
|
||||||
|
+{
|
||||||
|
+ VFIOContainerBase *bcontainer = &container->bcontainer;
|
||||||
|
+
|
||||||
|
+ if (!QLIST_EMPTY(&bcontainer->device_list)) {
|
||||||
|
+ return;
|
||||||
|
+ }
|
||||||
|
+ memory_listener_unregister(&bcontainer->listener);
|
||||||
|
+ vfio_container_destroy(bcontainer);
|
||||||
|
+ iommufd_backend_free_id(container->be, container->ioas_id);
|
||||||
|
+ g_free(container);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int iommufd_cdev_ram_block_discard_disable(bool state)
|
||||||
|
+{
|
||||||
|
+ /*
|
||||||
|
+ * We support coordinated discarding of RAM via the RamDiscardManager.
|
||||||
|
+ */
|
||||||
|
+ return ram_block_uncoordinated_discard_disable(state);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
|
||||||
|
+ AddressSpace *as, Error **errp)
|
||||||
|
+{
|
||||||
|
+ VFIOContainerBase *bcontainer;
|
||||||
|
+ VFIOIOMMUFDContainer *container;
|
||||||
|
+ VFIOAddressSpace *space;
|
||||||
|
+ struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) };
|
||||||
|
+ int ret, devfd;
|
||||||
|
+ uint32_t ioas_id;
|
||||||
|
+ Error *err = NULL;
|
||||||
|
+
|
||||||
|
+ devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
|
||||||
|
+ if (devfd < 0) {
|
||||||
|
+ return devfd;
|
||||||
|
+ }
|
||||||
|
+ vbasedev->fd = devfd;
|
||||||
|
+
|
||||||
|
+ ret = iommufd_cdev_connect_and_bind(vbasedev, errp);
|
||||||
|
+ if (ret) {
|
||||||
|
+ goto err_connect_bind;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ space = vfio_get_address_space(as);
|
||||||
|
+
|
||||||
|
+ /* try to attach to an existing container in this space */
|
||||||
|
+ QLIST_FOREACH(bcontainer, &space->containers, next) {
|
||||||
|
+ container = container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
|
||||||
|
+ if (bcontainer->ops != &vfio_iommufd_ops ||
|
||||||
|
+ vbasedev->iommufd != container->be) {
|
||||||
|
+ continue;
|
||||||
|
+ }
|
||||||
|
+ if (iommufd_cdev_attach_container(vbasedev, container, &err)) {
|
||||||
|
+ const char *msg = error_get_pretty(err);
|
||||||
|
+
|
||||||
|
+ trace_iommufd_cdev_fail_attach_existing_container(msg);
|
||||||
|
+ error_free(err);
|
||||||
|
+ err = NULL;
|
||||||
|
+ } else {
|
||||||
|
+ ret = iommufd_cdev_ram_block_discard_disable(true);
|
||||||
|
+ if (ret) {
|
||||||
|
+ error_setg(errp,
|
||||||
|
+ "Cannot set discarding of RAM broken (%d)", ret);
|
||||||
|
+ goto err_discard_disable;
|
||||||
|
+ }
|
||||||
|
+ goto found_container;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /* Need to allocate a new dedicated container */
|
||||||
|
+ ret = iommufd_backend_alloc_ioas(vbasedev->iommufd, &ioas_id, errp);
|
||||||
|
+ if (ret < 0) {
|
||||||
|
+ goto err_alloc_ioas;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ trace_iommufd_cdev_alloc_ioas(vbasedev->iommufd->fd, ioas_id);
|
||||||
|
+
|
||||||
|
+ container = g_malloc0(sizeof(*container));
|
||||||
|
+ container->be = vbasedev->iommufd;
|
||||||
|
+ container->ioas_id = ioas_id;
|
||||||
|
+
|
||||||
|
+ bcontainer = &container->bcontainer;
|
||||||
|
+ vfio_container_init(bcontainer, space, &vfio_iommufd_ops);
|
||||||
|
+ QLIST_INSERT_HEAD(&space->containers, bcontainer, next);
|
||||||
|
+
|
||||||
|
+ ret = iommufd_cdev_attach_container(vbasedev, container, errp);
|
||||||
|
+ if (ret) {
|
||||||
|
+ goto err_attach_container;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ ret = iommufd_cdev_ram_block_discard_disable(true);
|
||||||
|
+ if (ret) {
|
||||||
|
+ goto err_discard_disable;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ bcontainer->pgsizes = qemu_real_host_page_size();
|
||||||
|
+
|
||||||
|
+ bcontainer->listener = vfio_memory_listener;
|
||||||
|
+ memory_listener_register(&bcontainer->listener, bcontainer->space->as);
|
||||||
|
+
|
||||||
|
+ if (bcontainer->error) {
|
||||||
|
+ ret = -1;
|
||||||
|
+ error_propagate_prepend(errp, bcontainer->error,
|
||||||
|
+ "memory listener initialization failed: ");
|
||||||
|
+ goto err_listener_register;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ bcontainer->initialized = true;
|
||||||
|
+
|
||||||
|
+found_container:
|
||||||
|
+ ret = ioctl(devfd, VFIO_DEVICE_GET_INFO, &dev_info);
|
||||||
|
+ if (ret) {
|
||||||
|
+ error_setg_errno(errp, errno, "error getting device info");
|
||||||
|
+ goto err_listener_register;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * TODO: examine RAM_BLOCK_DISCARD stuff, should we do group level
|
||||||
|
+ * for discarding incompatibility check as well?
|
||||||
|
+ */
|
||||||
|
+ if (vbasedev->ram_block_discard_allowed) {
|
||||||
|
+ iommufd_cdev_ram_block_discard_disable(false);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ vbasedev->group = 0;
|
||||||
|
+ vbasedev->num_irqs = dev_info.num_irqs;
|
||||||
|
+ vbasedev->num_regions = dev_info.num_regions;
|
||||||
|
+ vbasedev->flags = dev_info.flags;
|
||||||
|
+ vbasedev->reset_works = !!(dev_info.flags & VFIO_DEVICE_FLAGS_RESET);
|
||||||
|
+ vbasedev->bcontainer = bcontainer;
|
||||||
|
+ QLIST_INSERT_HEAD(&bcontainer->device_list, vbasedev, container_next);
|
||||||
|
+ QLIST_INSERT_HEAD(&vfio_device_list, vbasedev, global_next);
|
||||||
|
+
|
||||||
|
+ trace_iommufd_cdev_device_info(vbasedev->name, devfd, vbasedev->num_irqs,
|
||||||
|
+ vbasedev->num_regions, vbasedev->flags);
|
||||||
|
+ return 0;
|
||||||
|
+
|
||||||
|
+err_listener_register:
|
||||||
|
+ iommufd_cdev_ram_block_discard_disable(false);
|
||||||
|
+err_discard_disable:
|
||||||
|
+ iommufd_cdev_detach_container(vbasedev, container);
|
||||||
|
+err_attach_container:
|
||||||
|
+ iommufd_cdev_container_destroy(container);
|
||||||
|
+err_alloc_ioas:
|
||||||
|
+ vfio_put_address_space(space);
|
||||||
|
+ iommufd_cdev_unbind_and_disconnect(vbasedev);
|
||||||
|
+err_connect_bind:
|
||||||
|
+ close(vbasedev->fd);
|
||||||
|
+ return ret;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void iommufd_cdev_detach(VFIODevice *vbasedev)
|
||||||
|
+{
|
||||||
|
+ VFIOContainerBase *bcontainer = vbasedev->bcontainer;
|
||||||
|
+ VFIOAddressSpace *space = bcontainer->space;
|
||||||
|
+ VFIOIOMMUFDContainer *container = container_of(bcontainer,
|
||||||
|
+ VFIOIOMMUFDContainer,
|
||||||
|
+ bcontainer);
|
||||||
|
+ QLIST_REMOVE(vbasedev, global_next);
|
||||||
|
+ QLIST_REMOVE(vbasedev, container_next);
|
||||||
|
+ vbasedev->bcontainer = NULL;
|
||||||
|
+
|
||||||
|
+ if (!vbasedev->ram_block_discard_allowed) {
|
||||||
|
+ iommufd_cdev_ram_block_discard_disable(false);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ iommufd_cdev_detach_container(vbasedev, container);
|
||||||
|
+ iommufd_cdev_container_destroy(container);
|
||||||
|
+ vfio_put_address_space(space);
|
||||||
|
+
|
||||||
|
+ iommufd_cdev_unbind_and_disconnect(vbasedev);
|
||||||
|
+ close(vbasedev->fd);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+const VFIOIOMMUOps vfio_iommufd_ops = {
|
||||||
|
+ .dma_map = iommufd_cdev_map,
|
||||||
|
+ .dma_unmap = iommufd_cdev_unmap,
|
||||||
|
+ .attach_device = iommufd_cdev_attach,
|
||||||
|
+ .detach_device = iommufd_cdev_detach,
|
||||||
|
+};
|
||||||
|
diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build
|
||||||
|
index eb6ce6229d..e5d98b6adc 100644
|
||||||
|
--- a/hw/vfio/meson.build
|
||||||
|
+++ b/hw/vfio/meson.build
|
||||||
|
@@ -7,6 +7,9 @@ vfio_ss.add(files(
|
||||||
|
'spapr.c',
|
||||||
|
'migration.c',
|
||||||
|
))
|
||||||
|
+vfio_ss.add(when: 'CONFIG_IOMMUFD', if_true: files(
|
||||||
|
+ 'iommufd.c',
|
||||||
|
+))
|
||||||
|
vfio_ss.add(when: 'CONFIG_VFIO_PCI', if_true: files(
|
||||||
|
'display.c',
|
||||||
|
'pci-quirks.c',
|
||||||
|
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
|
||||||
|
index 08a1f9dfa4..3340c93af0 100644
|
||||||
|
--- a/hw/vfio/trace-events
|
||||||
|
+++ b/hw/vfio/trace-events
|
||||||
|
@@ -164,3 +164,13 @@ vfio_state_pending_estimate(const char *name, uint64_t precopy, uint64_t postcop
|
||||||
|
vfio_state_pending_exact(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t stopcopy_size, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" stopcopy size 0x%"PRIx64" precopy initial size 0x%"PRIx64" precopy dirty size 0x%"PRIx64
|
||||||
|
vfio_vmstate_change(const char *name, int running, const char *reason, const char *dev_state) " (%s) running %d reason %s device state %s"
|
||||||
|
vfio_vmstate_change_prepare(const char *name, int running, const char *reason, const char *dev_state) " (%s) running %d reason %s device state %s"
|
||||||
|
+
|
||||||
|
+#iommufd.c
|
||||||
|
+
|
||||||
|
+iommufd_cdev_connect_and_bind(int iommufd, const char *name, int devfd, int devid) " [iommufd=%d] Successfully bound device %s (fd=%d): output devid=%d"
|
||||||
|
+iommufd_cdev_getfd(const char *dev, int devfd) " %s (fd=%d)"
|
||||||
|
+iommufd_cdev_attach_ioas_hwpt(int iommufd, const char *name, int devfd, int id) " [iommufd=%d] Successfully attached device %s (%d) to id=%d"
|
||||||
|
+iommufd_cdev_detach_ioas_hwpt(int iommufd, const char *name) " [iommufd=%d] Successfully detached %s"
|
||||||
|
+iommufd_cdev_fail_attach_existing_container(const char *msg) " %s"
|
||||||
|
+iommufd_cdev_alloc_ioas(int iommufd, int ioas_id) " [iommufd=%d] new IOMMUFD container with ioasid=%d"
|
||||||
|
+iommufd_cdev_device_info(char *name, int devfd, int num_irqs, int num_regions, int flags) " %s (%d) num_irqs=%d num_regions=%d flags=%d"
|
||||||
|
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
|
||||||
|
index 24ecc0e7ee..3dac5c167e 100644
|
||||||
|
--- a/include/hw/vfio/vfio-common.h
|
||||||
|
+++ b/include/hw/vfio/vfio-common.h
|
||||||
|
@@ -89,6 +89,14 @@ typedef struct VFIOHostDMAWindow {
|
||||||
|
QLIST_ENTRY(VFIOHostDMAWindow) hostwin_next;
|
||||||
|
} VFIOHostDMAWindow;
|
||||||
|
|
||||||
|
+typedef struct IOMMUFDBackend IOMMUFDBackend;
|
||||||
|
+
|
||||||
|
+typedef struct VFIOIOMMUFDContainer {
|
||||||
|
+ VFIOContainerBase bcontainer;
|
||||||
|
+ IOMMUFDBackend *be;
|
||||||
|
+ uint32_t ioas_id;
|
||||||
|
+} VFIOIOMMUFDContainer;
|
||||||
|
+
|
||||||
|
typedef struct VFIODeviceOps VFIODeviceOps;
|
||||||
|
|
||||||
|
typedef struct VFIODevice {
|
||||||
|
@@ -116,6 +124,8 @@ typedef struct VFIODevice {
|
||||||
|
OnOffAuto pre_copy_dirty_page_tracking;
|
||||||
|
bool dirty_pages_supported;
|
||||||
|
bool dirty_tracking;
|
||||||
|
+ int devid;
|
||||||
|
+ IOMMUFDBackend *iommufd;
|
||||||
|
} VFIODevice;
|
||||||
|
|
||||||
|
struct VFIODeviceOps {
|
||||||
|
@@ -201,6 +211,7 @@ typedef QLIST_HEAD(VFIODeviceList, VFIODevice) VFIODeviceList;
|
||||||
|
extern VFIOGroupList vfio_group_list;
|
||||||
|
extern VFIODeviceList vfio_device_list;
|
||||||
|
extern const VFIOIOMMUOps vfio_legacy_ops;
|
||||||
|
+extern const VFIOIOMMUOps vfio_iommufd_ops;
|
||||||
|
extern const MemoryListener vfio_memory_listener;
|
||||||
|
extern int vfio_kvm_device_fd;
|
||||||
|
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
155
kvm-vfio-iommufd-Introduce-a-VFIOIOMMU-iommufd-QOM-inter.patch
Normal file
155
kvm-vfio-iommufd-Introduce-a-VFIOIOMMU-iommufd-QOM-inter.patch
Normal file
@ -0,0 +1,155 @@
|
|||||||
|
From f98defd6fe081bc44f5bd823d187d7d3b12832ac Mon Sep 17 00:00:00 2001
|
||||||
|
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
||||||
|
Date: Tue, 19 Dec 2023 07:58:23 +0100
|
||||||
|
Subject: [PATCH 056/101] vfio/iommufd: Introduce a VFIOIOMMU iommufd QOM
|
||||||
|
interface
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [55/67] 789ecf74ace326b0df5d494fd558d7d0b6294a85 (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
As previously done for the sPAPR and legacy IOMMU backends, convert
|
||||||
|
the VFIOIOMMUOps struct to a QOM interface. The set of of operations
|
||||||
|
for this backend can be referenced with a literal typename instead of
|
||||||
|
a C struct.
|
||||||
|
|
||||||
|
Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Tested-by: Eric Farman <farman@linux.ibm.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit ce5f6d49f5845c3b9955cc377a5223c3f8d7ba1e)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/vfio/common.c | 2 +-
|
||||||
|
hw/vfio/iommufd.c | 35 ++++++++++++++++++++-------
|
||||||
|
include/hw/vfio/vfio-common.h | 1 -
|
||||||
|
include/hw/vfio/vfio-container-base.h | 2 +-
|
||||||
|
4 files changed, 28 insertions(+), 12 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
|
||||||
|
index 2329d0efc8..89ff1c7aed 100644
|
||||||
|
--- a/hw/vfio/common.c
|
||||||
|
+++ b/hw/vfio/common.c
|
||||||
|
@@ -1508,7 +1508,7 @@ int vfio_attach_device(char *name, VFIODevice *vbasedev,
|
||||||
|
|
||||||
|
#ifdef CONFIG_IOMMUFD
|
||||||
|
if (vbasedev->iommufd) {
|
||||||
|
- ops = &vfio_iommufd_ops;
|
||||||
|
+ ops = VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
|
||||||
|
index 87a561c545..d4c586e842 100644
|
||||||
|
--- a/hw/vfio/iommufd.c
|
||||||
|
+++ b/hw/vfio/iommufd.c
|
||||||
|
@@ -319,6 +319,8 @@ static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
|
||||||
|
int ret, devfd;
|
||||||
|
uint32_t ioas_id;
|
||||||
|
Error *err = NULL;
|
||||||
|
+ const VFIOIOMMUClass *iommufd_vioc =
|
||||||
|
+ VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD));
|
||||||
|
|
||||||
|
if (vbasedev->fd < 0) {
|
||||||
|
devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
|
||||||
|
@@ -340,7 +342,7 @@ static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
|
||||||
|
/* try to attach to an existing container in this space */
|
||||||
|
QLIST_FOREACH(bcontainer, &space->containers, next) {
|
||||||
|
container = container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
|
||||||
|
- if (bcontainer->ops != &vfio_iommufd_ops ||
|
||||||
|
+ if (bcontainer->ops != iommufd_vioc ||
|
||||||
|
vbasedev->iommufd != container->be) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
@@ -374,7 +376,7 @@ static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
|
||||||
|
container->ioas_id = ioas_id;
|
||||||
|
|
||||||
|
bcontainer = &container->bcontainer;
|
||||||
|
- vfio_container_init(bcontainer, space, &vfio_iommufd_ops);
|
||||||
|
+ vfio_container_init(bcontainer, space, iommufd_vioc);
|
||||||
|
QLIST_INSERT_HEAD(&space->containers, bcontainer, next);
|
||||||
|
|
||||||
|
ret = iommufd_cdev_attach_container(vbasedev, container, errp);
|
||||||
|
@@ -476,9 +478,11 @@ static void iommufd_cdev_detach(VFIODevice *vbasedev)
|
||||||
|
static VFIODevice *iommufd_cdev_pci_find_by_devid(__u32 devid)
|
||||||
|
{
|
||||||
|
VFIODevice *vbasedev_iter;
|
||||||
|
+ const VFIOIOMMUClass *iommufd_vioc =
|
||||||
|
+ VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD));
|
||||||
|
|
||||||
|
QLIST_FOREACH(vbasedev_iter, &vfio_device_list, global_next) {
|
||||||
|
- if (vbasedev_iter->bcontainer->ops != &vfio_iommufd_ops) {
|
||||||
|
+ if (vbasedev_iter->bcontainer->ops != iommufd_vioc) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (devid == vbasedev_iter->devid) {
|
||||||
|
@@ -621,10 +625,23 @@ out_single:
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
-const VFIOIOMMUOps vfio_iommufd_ops = {
|
||||||
|
- .dma_map = iommufd_cdev_map,
|
||||||
|
- .dma_unmap = iommufd_cdev_unmap,
|
||||||
|
- .attach_device = iommufd_cdev_attach,
|
||||||
|
- .detach_device = iommufd_cdev_detach,
|
||||||
|
- .pci_hot_reset = iommufd_cdev_pci_hot_reset,
|
||||||
|
+static void vfio_iommu_iommufd_class_init(ObjectClass *klass, void *data)
|
||||||
|
+{
|
||||||
|
+ VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass);
|
||||||
|
+
|
||||||
|
+ vioc->dma_map = iommufd_cdev_map;
|
||||||
|
+ vioc->dma_unmap = iommufd_cdev_unmap;
|
||||||
|
+ vioc->attach_device = iommufd_cdev_attach;
|
||||||
|
+ vioc->detach_device = iommufd_cdev_detach;
|
||||||
|
+ vioc->pci_hot_reset = iommufd_cdev_pci_hot_reset;
|
||||||
|
};
|
||||||
|
+
|
||||||
|
+static const TypeInfo types[] = {
|
||||||
|
+ {
|
||||||
|
+ .name = TYPE_VFIO_IOMMU_IOMMUFD,
|
||||||
|
+ .parent = TYPE_VFIO_IOMMU,
|
||||||
|
+ .class_init = vfio_iommu_iommufd_class_init,
|
||||||
|
+ },
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+DEFINE_TYPES(types)
|
||||||
|
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
|
||||||
|
index 14c497b6b0..9b7ef7d02b 100644
|
||||||
|
--- a/include/hw/vfio/vfio-common.h
|
||||||
|
+++ b/include/hw/vfio/vfio-common.h
|
||||||
|
@@ -210,7 +210,6 @@ typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList;
|
||||||
|
typedef QLIST_HEAD(VFIODeviceList, VFIODevice) VFIODeviceList;
|
||||||
|
extern VFIOGroupList vfio_group_list;
|
||||||
|
extern VFIODeviceList vfio_device_list;
|
||||||
|
-extern const VFIOIOMMUOps vfio_iommufd_ops;
|
||||||
|
extern const MemoryListener vfio_memory_listener;
|
||||||
|
extern int vfio_kvm_device_fd;
|
||||||
|
|
||||||
|
diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h
|
||||||
|
index 9e21d7811f..b2813b0c11 100644
|
||||||
|
--- a/include/hw/vfio/vfio-container-base.h
|
||||||
|
+++ b/include/hw/vfio/vfio-container-base.h
|
||||||
|
@@ -17,7 +17,6 @@
|
||||||
|
|
||||||
|
typedef struct VFIODevice VFIODevice;
|
||||||
|
typedef struct VFIOIOMMUClass VFIOIOMMUClass;
|
||||||
|
-#define VFIOIOMMUOps VFIOIOMMUClass /* To remove */
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
unsigned long *bitmap;
|
||||||
|
@@ -96,6 +95,7 @@ void vfio_container_destroy(VFIOContainerBase *bcontainer);
|
||||||
|
#define TYPE_VFIO_IOMMU "vfio-iommu"
|
||||||
|
#define TYPE_VFIO_IOMMU_LEGACY TYPE_VFIO_IOMMU "-legacy"
|
||||||
|
#define TYPE_VFIO_IOMMU_SPAPR TYPE_VFIO_IOMMU "-spapr"
|
||||||
|
+#define TYPE_VFIO_IOMMU_IOMMUFD TYPE_VFIO_IOMMU "-iommufd"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* VFIOContainerBase is not an abstract QOM object because it felt
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
@ -0,0 +1,71 @@
|
|||||||
|
From 5a49c5bb690d55fc88b6fb12f059ae932de0a716 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Date: Tue, 21 Nov 2023 16:44:04 +0800
|
||||||
|
Subject: [PATCH 025/101] vfio/iommufd: Relax assert check for iommufd backend
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [24/67] 2c9e41e9ca0b67ebf807d1643a98866a0cb75768 (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
Currently iommufd doesn't support dirty page sync yet,
|
||||||
|
but it will not block us doing live migration if VFIO
|
||||||
|
migration is force enabled.
|
||||||
|
|
||||||
|
So in this case we allow set_dirty_page_tracking to be NULL.
|
||||||
|
Note we don't need same change for query_dirty_bitmap because
|
||||||
|
when dirty page sync isn't supported, query_dirty_bitmap will
|
||||||
|
never be called.
|
||||||
|
|
||||||
|
Suggested-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Reviewed-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
Tested-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit 36e84d0c17102fa1c887d8c650a13ec08fca0ec0)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/vfio/container-base.c | 4 ++++
|
||||||
|
hw/vfio/container.c | 4 ----
|
||||||
|
2 files changed, 4 insertions(+), 4 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c
|
||||||
|
index 71f7274973..eee2dcfe76 100644
|
||||||
|
--- a/hw/vfio/container-base.c
|
||||||
|
+++ b/hw/vfio/container-base.c
|
||||||
|
@@ -55,6 +55,10 @@ void vfio_container_del_section_window(VFIOContainerBase *bcontainer,
|
||||||
|
int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer,
|
||||||
|
bool start)
|
||||||
|
{
|
||||||
|
+ if (!bcontainer->dirty_pages_supported) {
|
||||||
|
+ return 0;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
g_assert(bcontainer->ops->set_dirty_page_tracking);
|
||||||
|
return bcontainer->ops->set_dirty_page_tracking(bcontainer, start);
|
||||||
|
}
|
||||||
|
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
|
||||||
|
index 6bacf38222..ed2d721b2b 100644
|
||||||
|
--- a/hw/vfio/container.c
|
||||||
|
+++ b/hw/vfio/container.c
|
||||||
|
@@ -216,10 +216,6 @@ static int vfio_legacy_set_dirty_page_tracking(VFIOContainerBase *bcontainer,
|
||||||
|
.argsz = sizeof(dirty),
|
||||||
|
};
|
||||||
|
|
||||||
|
- if (!bcontainer->dirty_pages_supported) {
|
||||||
|
- return 0;
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
if (start) {
|
||||||
|
dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_START;
|
||||||
|
} else {
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
55
kvm-vfio-iommufd-Remove-CONFIG_IOMMUFD-usage.patch
Normal file
55
kvm-vfio-iommufd-Remove-CONFIG_IOMMUFD-usage.patch
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
From 5549bf1b2e07213c23e280a43ab2ab67d5b7304a Mon Sep 17 00:00:00 2001
|
||||||
|
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
||||||
|
Date: Tue, 19 Dec 2023 07:58:25 +0100
|
||||||
|
Subject: [PATCH 058/101] vfio/iommufd: Remove CONFIG_IOMMUFD usage
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [57/67] 3a6a45d379241d9412e0b8bcfeb9be0b4add59a5 (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
Availability of the IOMMUFD backend can now be fully determined at
|
||||||
|
runtime and the ifdef check was a build time protection (for PPC not
|
||||||
|
supporting it mostly).
|
||||||
|
|
||||||
|
Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Tested-by: Eric Farman <farman@linux.ibm.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit c1139fa4feba8c320e4bd0a4e34af55caa5ffbb9)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/vfio/common.c | 3 ---
|
||||||
|
1 file changed, 3 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
|
||||||
|
index 89ff1c7aed..0d4d8b8416 100644
|
||||||
|
--- a/hw/vfio/common.c
|
||||||
|
+++ b/hw/vfio/common.c
|
||||||
|
@@ -19,7 +19,6 @@
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "qemu/osdep.h"
|
||||||
|
-#include CONFIG_DEVICES /* CONFIG_IOMMUFD */
|
||||||
|
#include <sys/ioctl.h>
|
||||||
|
#ifdef CONFIG_KVM
|
||||||
|
#include <linux/kvm.h>
|
||||||
|
@@ -1506,11 +1505,9 @@ int vfio_attach_device(char *name, VFIODevice *vbasedev,
|
||||||
|
const VFIOIOMMUClass *ops =
|
||||||
|
VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_LEGACY));
|
||||||
|
|
||||||
|
-#ifdef CONFIG_IOMMUFD
|
||||||
|
if (vbasedev->iommufd) {
|
||||||
|
ops = VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD));
|
||||||
|
}
|
||||||
|
-#endif
|
||||||
|
|
||||||
|
assert(ops);
|
||||||
|
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
@ -0,0 +1,56 @@
|
|||||||
|
From 6b36dc2a305af856af03aad2e315eea96a349153 Mon Sep 17 00:00:00 2001
|
||||||
|
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
||||||
|
Date: Thu, 21 Dec 2023 09:09:57 +0100
|
||||||
|
Subject: [PATCH 061/101] vfio/iommufd: Remove the use of stat() to check file
|
||||||
|
existence
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [60/67] 485770e45c1a6399780939bfb8b01b615d9213c6 (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
Using stat() before opening a file or a directory can lead to a
|
||||||
|
time-of-check to time-of-use (TOCTOU) filesystem race, which is
|
||||||
|
reported by coverity as a Security best practices violations. The
|
||||||
|
sequence could be replaced by open and fdopendir but it doesn't add
|
||||||
|
much in this case. Simply use opendir to avoid the race.
|
||||||
|
|
||||||
|
Fixes: CID 1531551
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Reviewed-by: Zhenzhong Duan <Zhenzhong.duan@intel.com>
|
||||||
|
(cherry picked from commit 6ba254801f6bc7f3ef68a6414f1b107237c7eb26)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/vfio/iommufd.c | 6 ------
|
||||||
|
1 file changed, 6 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
|
||||||
|
index d4c586e842..9bfddc1360 100644
|
||||||
|
--- a/hw/vfio/iommufd.c
|
||||||
|
+++ b/hw/vfio/iommufd.c
|
||||||
|
@@ -121,17 +121,11 @@ static int iommufd_cdev_getfd(const char *sysfs_path, Error **errp)
|
||||||
|
DIR *dir = NULL;
|
||||||
|
struct dirent *dent;
|
||||||
|
gchar *contents;
|
||||||
|
- struct stat st;
|
||||||
|
gsize length;
|
||||||
|
int major, minor;
|
||||||
|
dev_t vfio_devt;
|
||||||
|
|
||||||
|
path = g_strdup_printf("%s/vfio-dev", sysfs_path);
|
||||||
|
- if (stat(path, &st) < 0) {
|
||||||
|
- error_setg_errno(errp, errno, "no such host device");
|
||||||
|
- goto out_free_path;
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
dir = opendir(path);
|
||||||
|
if (!dir) {
|
||||||
|
error_setg_errno(errp, errno, "couldn't open directory %s", path);
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
115
kvm-vfio-migration-Add-helper-function-to-set-state-or-r.patch
Normal file
115
kvm-vfio-migration-Add-helper-function-to-set-state-or-r.patch
Normal file
@ -0,0 +1,115 @@
|
|||||||
|
From 0c0435e7210b99a6bf7b8f8205f7af8277b7525b Mon Sep 17 00:00:00 2001
|
||||||
|
From: Avihai Horon <avihaih@nvidia.com>
|
||||||
|
Date: Sun, 31 Dec 2023 12:48:18 +0200
|
||||||
|
Subject: [PATCH 063/101] vfio/migration: Add helper function to set state or
|
||||||
|
reset device
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [62/67] 1a63eea289561a05a6a8527c2a9da0289a7836d9 (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
There are several places where failure in setting the device state leads
|
||||||
|
to a device reset, which is done by setting ERROR as the recover state.
|
||||||
|
|
||||||
|
Add a helper function that sets the device state and resets the device
|
||||||
|
in case of failure. This will make the code cleaner and remove duplicate
|
||||||
|
comments.
|
||||||
|
|
||||||
|
Signed-off-by: Avihai Horon <avihaih@nvidia.com>
|
||||||
|
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||||
|
(cherry picked from commit c817e5a377a334241eed149e35760aca58bdeb34)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/vfio/migration.c | 41 +++++++++++++++++------------------------
|
||||||
|
1 file changed, 17 insertions(+), 24 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
|
||||||
|
index 28d422b39f..70e6b1a709 100644
|
||||||
|
--- a/hw/vfio/migration.c
|
||||||
|
+++ b/hw/vfio/migration.c
|
||||||
|
@@ -163,6 +163,19 @@ reset_device:
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
+/*
|
||||||
|
+ * Some device state transitions require resetting the device if they fail.
|
||||||
|
+ * This function sets the device in new_state and resets the device if that
|
||||||
|
+ * fails. Reset is done by using ERROR as the recover state.
|
||||||
|
+ */
|
||||||
|
+static int
|
||||||
|
+vfio_migration_set_state_or_reset(VFIODevice *vbasedev,
|
||||||
|
+ enum vfio_device_mig_state new_state)
|
||||||
|
+{
|
||||||
|
+ return vfio_migration_set_state(vbasedev, new_state,
|
||||||
|
+ VFIO_DEVICE_STATE_ERROR);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev,
|
||||||
|
uint64_t data_size)
|
||||||
|
{
|
||||||
|
@@ -422,12 +435,7 @@ static void vfio_save_cleanup(void *opaque)
|
||||||
|
* after migration has completed, so it won't increase downtime.
|
||||||
|
*/
|
||||||
|
if (migration->device_state == VFIO_DEVICE_STATE_STOP_COPY) {
|
||||||
|
- /*
|
||||||
|
- * If setting the device in STOP state fails, the device should be
|
||||||
|
- * reset. To do so, use ERROR state as a recover state.
|
||||||
|
- */
|
||||||
|
- vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP,
|
||||||
|
- VFIO_DEVICE_STATE_ERROR);
|
||||||
|
+ vfio_migration_set_state_or_reset(vbasedev, VFIO_DEVICE_STATE_STOP);
|
||||||
|
}
|
||||||
|
|
||||||
|
g_free(migration->data_buffer);
|
||||||
|
@@ -699,12 +707,7 @@ static void vfio_vmstate_change_prepare(void *opaque, bool running,
|
||||||
|
VFIO_DEVICE_STATE_PRE_COPY_P2P :
|
||||||
|
VFIO_DEVICE_STATE_RUNNING_P2P;
|
||||||
|
|
||||||
|
- /*
|
||||||
|
- * If setting the device in new_state fails, the device should be reset.
|
||||||
|
- * To do so, use ERROR state as a recover state.
|
||||||
|
- */
|
||||||
|
- ret = vfio_migration_set_state(vbasedev, new_state,
|
||||||
|
- VFIO_DEVICE_STATE_ERROR);
|
||||||
|
+ ret = vfio_migration_set_state_or_reset(vbasedev, new_state);
|
||||||
|
if (ret) {
|
||||||
|
/*
|
||||||
|
* Migration should be aborted in this case, but vm_state_notify()
|
||||||
|
@@ -736,12 +739,7 @@ static void vfio_vmstate_change(void *opaque, bool running, RunState state)
|
||||||
|
VFIO_DEVICE_STATE_STOP;
|
||||||
|
}
|
||||||
|
|
||||||
|
- /*
|
||||||
|
- * If setting the device in new_state fails, the device should be reset.
|
||||||
|
- * To do so, use ERROR state as a recover state.
|
||||||
|
- */
|
||||||
|
- ret = vfio_migration_set_state(vbasedev, new_state,
|
||||||
|
- VFIO_DEVICE_STATE_ERROR);
|
||||||
|
+ ret = vfio_migration_set_state_or_reset(vbasedev, new_state);
|
||||||
|
if (ret) {
|
||||||
|
/*
|
||||||
|
* Migration should be aborted in this case, but vm_state_notify()
|
||||||
|
@@ -770,12 +768,7 @@ static void vfio_migration_state_notifier(Notifier *notifier, void *data)
|
||||||
|
case MIGRATION_STATUS_CANCELLING:
|
||||||
|
case MIGRATION_STATUS_CANCELLED:
|
||||||
|
case MIGRATION_STATUS_FAILED:
|
||||||
|
- /*
|
||||||
|
- * If setting the device in RUNNING state fails, the device should
|
||||||
|
- * be reset. To do so, use ERROR state as a recover state.
|
||||||
|
- */
|
||||||
|
- vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_RUNNING,
|
||||||
|
- VFIO_DEVICE_STATE_ERROR);
|
||||||
|
+ vfio_migration_set_state_or_reset(vbasedev, VFIO_DEVICE_STATE_RUNNING);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
@ -0,0 +1,81 @@
|
|||||||
|
From 7788fdc2375e01ead0c8a705c3b3d7467dd93d67 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Eric Auger <eric.auger@redhat.com>
|
||||||
|
Date: Tue, 21 Nov 2023 16:44:09 +0800
|
||||||
|
Subject: [PATCH 030/101] vfio/pci: Allow the selection of a given iommu
|
||||||
|
backend
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [29/67] 363c62607a11093ea0062489e11a708117d8ffb9 (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
Now we support two types of iommu backends, let's add the capability
|
||||||
|
to select one of them. This depends on whether an iommufd object has
|
||||||
|
been linked with the vfio-pci device:
|
||||||
|
|
||||||
|
If the user wants to use the legacy backend, it shall not
|
||||||
|
link the vfio-pci device with any iommufd object:
|
||||||
|
|
||||||
|
-device vfio-pci,host=0000:02:00.0
|
||||||
|
|
||||||
|
This is called the legacy mode/backend.
|
||||||
|
|
||||||
|
If the user wants to use the iommufd backend (/dev/iommu) it
|
||||||
|
shall pass an iommufd object id in the vfio-pci device options:
|
||||||
|
|
||||||
|
-object iommufd,id=iommufd0
|
||||||
|
-device vfio-pci,host=0000:02:00.0,iommufd=iommufd0
|
||||||
|
|
||||||
|
Suggested-by: Alex Williamson <alex.williamson@redhat.com>
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
|
||||||
|
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Tested-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit ee42b261b0a2e465ae003ddcaf1caf117c201f74)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/vfio/pci.c | 6 ++++++
|
||||||
|
1 file changed, 6 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
|
||||||
|
index 83b2561908..39e6a6678e 100644
|
||||||
|
--- a/hw/vfio/pci.c
|
||||||
|
+++ b/hw/vfio/pci.c
|
||||||
|
@@ -19,6 +19,7 @@
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "qemu/osdep.h"
|
||||||
|
+#include CONFIG_DEVICES /* CONFIG_IOMMUFD */
|
||||||
|
#include <linux/vfio.h>
|
||||||
|
#include <sys/ioctl.h>
|
||||||
|
|
||||||
|
@@ -42,6 +43,7 @@
|
||||||
|
#include "qapi/error.h"
|
||||||
|
#include "migration/blocker.h"
|
||||||
|
#include "migration/qemu-file.h"
|
||||||
|
+#include "sysemu/iommufd.h"
|
||||||
|
|
||||||
|
#define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug"
|
||||||
|
|
||||||
|
@@ -3415,6 +3417,10 @@ static Property vfio_pci_dev_properties[] = {
|
||||||
|
* DEFINE_PROP_STRING("vfiofd", VFIOPCIDevice, vfiofd_name),
|
||||||
|
* DEFINE_PROP_STRING("vfiogroupfd, VFIOPCIDevice, vfiogroupfd_name),
|
||||||
|
*/
|
||||||
|
+#ifdef CONFIG_IOMMUFD
|
||||||
|
+ DEFINE_PROP_LINK("iommufd", VFIOPCIDevice, vbasedev.iommufd,
|
||||||
|
+ TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *),
|
||||||
|
+#endif
|
||||||
|
DEFINE_PROP_END_OF_LIST(),
|
||||||
|
};
|
||||||
|
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
139
kvm-vfio-pci-Extract-out-a-helper-vfio_pci_get_pci_hot_r.patch
Normal file
139
kvm-vfio-pci-Extract-out-a-helper-vfio_pci_get_pci_hot_r.patch
Normal file
@ -0,0 +1,139 @@
|
|||||||
|
From fe5ecedd452754eeb238b23eb0544ed3c5086157 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Date: Tue, 21 Nov 2023 16:44:06 +0800
|
||||||
|
Subject: [PATCH 027/101] vfio/pci: Extract out a helper
|
||||||
|
vfio_pci_get_pci_hot_reset_info
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [26/67] 730b7f1496f4f21310fa13c79cb87f8d5e2ad2a8 (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
This helper will be used by both legacy and iommufd backends.
|
||||||
|
|
||||||
|
No functional changes intended.
|
||||||
|
|
||||||
|
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Reviewed-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
Tested-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit 4d36ec23a75eb387492f4d68ff1b8eeee5d68142)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/vfio/pci.c | 54 +++++++++++++++++++++++++++++++++++----------------
|
||||||
|
hw/vfio/pci.h | 3 +++
|
||||||
|
2 files changed, 40 insertions(+), 17 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
|
||||||
|
index ec98080f28..b482e5479f 100644
|
||||||
|
--- a/hw/vfio/pci.c
|
||||||
|
+++ b/hw/vfio/pci.c
|
||||||
|
@@ -2448,22 +2448,13 @@ static bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name)
|
||||||
|
return (strcmp(tmp, name) == 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
-static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
|
||||||
|
+int vfio_pci_get_pci_hot_reset_info(VFIOPCIDevice *vdev,
|
||||||
|
+ struct vfio_pci_hot_reset_info **info_p)
|
||||||
|
{
|
||||||
|
- VFIOGroup *group;
|
||||||
|
struct vfio_pci_hot_reset_info *info;
|
||||||
|
- struct vfio_pci_dependent_device *devices;
|
||||||
|
- struct vfio_pci_hot_reset *reset;
|
||||||
|
- int32_t *fds;
|
||||||
|
- int ret, i, count;
|
||||||
|
- bool multi = false;
|
||||||
|
+ int ret, count;
|
||||||
|
|
||||||
|
- trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi");
|
||||||
|
-
|
||||||
|
- if (!single) {
|
||||||
|
- vfio_pci_pre_reset(vdev);
|
||||||
|
- }
|
||||||
|
- vdev->vbasedev.needs_reset = false;
|
||||||
|
+ assert(info_p && !*info_p);
|
||||||
|
|
||||||
|
info = g_malloc0(sizeof(*info));
|
||||||
|
info->argsz = sizeof(*info);
|
||||||
|
@@ -2471,24 +2462,53 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
|
||||||
|
ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info);
|
||||||
|
if (ret && errno != ENOSPC) {
|
||||||
|
ret = -errno;
|
||||||
|
+ g_free(info);
|
||||||
|
if (!vdev->has_pm_reset) {
|
||||||
|
error_report("vfio: Cannot reset device %s, "
|
||||||
|
"no available reset mechanism.", vdev->vbasedev.name);
|
||||||
|
}
|
||||||
|
- goto out_single;
|
||||||
|
+ return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
count = info->count;
|
||||||
|
- info = g_realloc(info, sizeof(*info) + (count * sizeof(*devices)));
|
||||||
|
- info->argsz = sizeof(*info) + (count * sizeof(*devices));
|
||||||
|
- devices = &info->devices[0];
|
||||||
|
+ info = g_realloc(info, sizeof(*info) + (count * sizeof(info->devices[0])));
|
||||||
|
+ info->argsz = sizeof(*info) + (count * sizeof(info->devices[0]));
|
||||||
|
|
||||||
|
ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info);
|
||||||
|
if (ret) {
|
||||||
|
ret = -errno;
|
||||||
|
+ g_free(info);
|
||||||
|
error_report("vfio: hot reset info failed: %m");
|
||||||
|
+ return ret;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ *info_p = info;
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
|
||||||
|
+{
|
||||||
|
+ VFIOGroup *group;
|
||||||
|
+ struct vfio_pci_hot_reset_info *info = NULL;
|
||||||
|
+ struct vfio_pci_dependent_device *devices;
|
||||||
|
+ struct vfio_pci_hot_reset *reset;
|
||||||
|
+ int32_t *fds;
|
||||||
|
+ int ret, i, count;
|
||||||
|
+ bool multi = false;
|
||||||
|
+
|
||||||
|
+ trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi");
|
||||||
|
+
|
||||||
|
+ if (!single) {
|
||||||
|
+ vfio_pci_pre_reset(vdev);
|
||||||
|
+ }
|
||||||
|
+ vdev->vbasedev.needs_reset = false;
|
||||||
|
+
|
||||||
|
+ ret = vfio_pci_get_pci_hot_reset_info(vdev, &info);
|
||||||
|
+
|
||||||
|
+ if (ret) {
|
||||||
|
goto out_single;
|
||||||
|
}
|
||||||
|
+ devices = &info->devices[0];
|
||||||
|
|
||||||
|
trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name);
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
|
||||||
|
index eb74d9de2d..3568a6135d 100644
|
||||||
|
--- a/hw/vfio/pci.h
|
||||||
|
+++ b/hw/vfio/pci.h
|
||||||
|
@@ -219,6 +219,9 @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr);
|
||||||
|
|
||||||
|
extern const PropertyInfo qdev_prop_nv_gpudirect_clique;
|
||||||
|
|
||||||
|
+int vfio_pci_get_pci_hot_reset_info(VFIOPCIDevice *vdev,
|
||||||
|
+ struct vfio_pci_hot_reset_info **info_p);
|
||||||
|
+
|
||||||
|
int vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp);
|
||||||
|
|
||||||
|
int vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
466
kvm-vfio-pci-Introduce-a-vfio-pci-hot-reset-interface.patch
Normal file
466
kvm-vfio-pci-Introduce-a-vfio-pci-hot-reset-interface.patch
Normal file
@ -0,0 +1,466 @@
|
|||||||
|
From acc3e5306e184567006bc45e7f36f2473e75d08a Mon Sep 17 00:00:00 2001
|
||||||
|
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Date: Tue, 21 Nov 2023 16:44:07 +0800
|
||||||
|
Subject: [PATCH 028/101] vfio/pci: Introduce a vfio pci hot reset interface
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [27/67] 192088dbf2cf88663acd2416f69b7eeb175b2525 (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
Legacy vfio pci and iommufd cdev have different process to hot reset
|
||||||
|
vfio device, expand current code to abstract out pci_hot_reset callback
|
||||||
|
for legacy vfio, this same interface will also be used by iommufd
|
||||||
|
cdev vfio device.
|
||||||
|
|
||||||
|
Rename vfio_pci_hot_reset to vfio_legacy_pci_hot_reset and move it
|
||||||
|
into container.c.
|
||||||
|
|
||||||
|
vfio_pci_[pre/post]_reset and vfio_pci_host_match are exported so
|
||||||
|
they could be called in legacy and iommufd pci_hot_reset callback.
|
||||||
|
|
||||||
|
Suggested-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Reviewed-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
Tested-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit c328e7e8ad1c969dbcbe90ee76afcd3cfec5e945)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/vfio/container.c | 170 ++++++++++++++++++++++++++
|
||||||
|
hw/vfio/pci.c | 168 +------------------------
|
||||||
|
hw/vfio/pci.h | 3 +
|
||||||
|
include/hw/vfio/vfio-container-base.h | 3 +
|
||||||
|
4 files changed, 182 insertions(+), 162 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
|
||||||
|
index ed2d721b2b..1dbf9b9a17 100644
|
||||||
|
--- a/hw/vfio/container.c
|
||||||
|
+++ b/hw/vfio/container.c
|
||||||
|
@@ -33,6 +33,7 @@
|
||||||
|
#include "trace.h"
|
||||||
|
#include "qapi/error.h"
|
||||||
|
#include "migration/migration.h"
|
||||||
|
+#include "pci.h"
|
||||||
|
|
||||||
|
VFIOGroupList vfio_group_list =
|
||||||
|
QLIST_HEAD_INITIALIZER(vfio_group_list);
|
||||||
|
@@ -922,6 +923,174 @@ static void vfio_legacy_detach_device(VFIODevice *vbasedev)
|
||||||
|
vfio_put_group(group);
|
||||||
|
}
|
||||||
|
|
||||||
|
+static int vfio_legacy_pci_hot_reset(VFIODevice *vbasedev, bool single)
|
||||||
|
+{
|
||||||
|
+ VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
|
||||||
|
+ VFIOGroup *group;
|
||||||
|
+ struct vfio_pci_hot_reset_info *info = NULL;
|
||||||
|
+ struct vfio_pci_dependent_device *devices;
|
||||||
|
+ struct vfio_pci_hot_reset *reset;
|
||||||
|
+ int32_t *fds;
|
||||||
|
+ int ret, i, count;
|
||||||
|
+ bool multi = false;
|
||||||
|
+
|
||||||
|
+ trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi");
|
||||||
|
+
|
||||||
|
+ if (!single) {
|
||||||
|
+ vfio_pci_pre_reset(vdev);
|
||||||
|
+ }
|
||||||
|
+ vdev->vbasedev.needs_reset = false;
|
||||||
|
+
|
||||||
|
+ ret = vfio_pci_get_pci_hot_reset_info(vdev, &info);
|
||||||
|
+
|
||||||
|
+ if (ret) {
|
||||||
|
+ goto out_single;
|
||||||
|
+ }
|
||||||
|
+ devices = &info->devices[0];
|
||||||
|
+
|
||||||
|
+ trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name);
|
||||||
|
+
|
||||||
|
+ /* Verify that we have all the groups required */
|
||||||
|
+ for (i = 0; i < info->count; i++) {
|
||||||
|
+ PCIHostDeviceAddress host;
|
||||||
|
+ VFIOPCIDevice *tmp;
|
||||||
|
+ VFIODevice *vbasedev_iter;
|
||||||
|
+
|
||||||
|
+ host.domain = devices[i].segment;
|
||||||
|
+ host.bus = devices[i].bus;
|
||||||
|
+ host.slot = PCI_SLOT(devices[i].devfn);
|
||||||
|
+ host.function = PCI_FUNC(devices[i].devfn);
|
||||||
|
+
|
||||||
|
+ trace_vfio_pci_hot_reset_dep_devices(host.domain,
|
||||||
|
+ host.bus, host.slot, host.function, devices[i].group_id);
|
||||||
|
+
|
||||||
|
+ if (vfio_pci_host_match(&host, vdev->vbasedev.name)) {
|
||||||
|
+ continue;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ QLIST_FOREACH(group, &vfio_group_list, next) {
|
||||||
|
+ if (group->groupid == devices[i].group_id) {
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (!group) {
|
||||||
|
+ if (!vdev->has_pm_reset) {
|
||||||
|
+ error_report("vfio: Cannot reset device %s, "
|
||||||
|
+ "depends on group %d which is not owned.",
|
||||||
|
+ vdev->vbasedev.name, devices[i].group_id);
|
||||||
|
+ }
|
||||||
|
+ ret = -EPERM;
|
||||||
|
+ goto out;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /* Prep dependent devices for reset and clear our marker. */
|
||||||
|
+ QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
|
||||||
|
+ if (!vbasedev_iter->dev->realized ||
|
||||||
|
+ vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
|
||||||
|
+ continue;
|
||||||
|
+ }
|
||||||
|
+ tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
|
||||||
|
+ if (vfio_pci_host_match(&host, tmp->vbasedev.name)) {
|
||||||
|
+ if (single) {
|
||||||
|
+ ret = -EINVAL;
|
||||||
|
+ goto out_single;
|
||||||
|
+ }
|
||||||
|
+ vfio_pci_pre_reset(tmp);
|
||||||
|
+ tmp->vbasedev.needs_reset = false;
|
||||||
|
+ multi = true;
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (!single && !multi) {
|
||||||
|
+ ret = -EINVAL;
|
||||||
|
+ goto out_single;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /* Determine how many group fds need to be passed */
|
||||||
|
+ count = 0;
|
||||||
|
+ QLIST_FOREACH(group, &vfio_group_list, next) {
|
||||||
|
+ for (i = 0; i < info->count; i++) {
|
||||||
|
+ if (group->groupid == devices[i].group_id) {
|
||||||
|
+ count++;
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ reset = g_malloc0(sizeof(*reset) + (count * sizeof(*fds)));
|
||||||
|
+ reset->argsz = sizeof(*reset) + (count * sizeof(*fds));
|
||||||
|
+ fds = &reset->group_fds[0];
|
||||||
|
+
|
||||||
|
+ /* Fill in group fds */
|
||||||
|
+ QLIST_FOREACH(group, &vfio_group_list, next) {
|
||||||
|
+ for (i = 0; i < info->count; i++) {
|
||||||
|
+ if (group->groupid == devices[i].group_id) {
|
||||||
|
+ fds[reset->count++] = group->fd;
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /* Bus reset! */
|
||||||
|
+ ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset);
|
||||||
|
+ g_free(reset);
|
||||||
|
+ if (ret) {
|
||||||
|
+ ret = -errno;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ trace_vfio_pci_hot_reset_result(vdev->vbasedev.name,
|
||||||
|
+ ret ? strerror(errno) : "Success");
|
||||||
|
+
|
||||||
|
+out:
|
||||||
|
+ /* Re-enable INTx on affected devices */
|
||||||
|
+ for (i = 0; i < info->count; i++) {
|
||||||
|
+ PCIHostDeviceAddress host;
|
||||||
|
+ VFIOPCIDevice *tmp;
|
||||||
|
+ VFIODevice *vbasedev_iter;
|
||||||
|
+
|
||||||
|
+ host.domain = devices[i].segment;
|
||||||
|
+ host.bus = devices[i].bus;
|
||||||
|
+ host.slot = PCI_SLOT(devices[i].devfn);
|
||||||
|
+ host.function = PCI_FUNC(devices[i].devfn);
|
||||||
|
+
|
||||||
|
+ if (vfio_pci_host_match(&host, vdev->vbasedev.name)) {
|
||||||
|
+ continue;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ QLIST_FOREACH(group, &vfio_group_list, next) {
|
||||||
|
+ if (group->groupid == devices[i].group_id) {
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (!group) {
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
|
||||||
|
+ if (!vbasedev_iter->dev->realized ||
|
||||||
|
+ vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
|
||||||
|
+ continue;
|
||||||
|
+ }
|
||||||
|
+ tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
|
||||||
|
+ if (vfio_pci_host_match(&host, tmp->vbasedev.name)) {
|
||||||
|
+ vfio_pci_post_reset(tmp);
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+out_single:
|
||||||
|
+ if (!single) {
|
||||||
|
+ vfio_pci_post_reset(vdev);
|
||||||
|
+ }
|
||||||
|
+ g_free(info);
|
||||||
|
+
|
||||||
|
+ return ret;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
const VFIOIOMMUOps vfio_legacy_ops = {
|
||||||
|
.dma_map = vfio_legacy_dma_map,
|
||||||
|
.dma_unmap = vfio_legacy_dma_unmap,
|
||||||
|
@@ -929,4 +1098,5 @@ const VFIOIOMMUOps vfio_legacy_ops = {
|
||||||
|
.detach_device = vfio_legacy_detach_device,
|
||||||
|
.set_dirty_page_tracking = vfio_legacy_set_dirty_page_tracking,
|
||||||
|
.query_dirty_bitmap = vfio_legacy_query_dirty_bitmap,
|
||||||
|
+ .pci_hot_reset = vfio_legacy_pci_hot_reset,
|
||||||
|
};
|
||||||
|
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
|
||||||
|
index b482e5479f..83b2561908 100644
|
||||||
|
--- a/hw/vfio/pci.c
|
||||||
|
+++ b/hw/vfio/pci.c
|
||||||
|
@@ -2377,7 +2377,7 @@ static int vfio_add_capabilities(VFIOPCIDevice *vdev, Error **errp)
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
-static void vfio_pci_pre_reset(VFIOPCIDevice *vdev)
|
||||||
|
+void vfio_pci_pre_reset(VFIOPCIDevice *vdev)
|
||||||
|
{
|
||||||
|
PCIDevice *pdev = &vdev->pdev;
|
||||||
|
uint16_t cmd;
|
||||||
|
@@ -2414,7 +2414,7 @@ static void vfio_pci_pre_reset(VFIOPCIDevice *vdev)
|
||||||
|
vfio_pci_write_config(pdev, PCI_COMMAND, cmd, 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
-static void vfio_pci_post_reset(VFIOPCIDevice *vdev)
|
||||||
|
+void vfio_pci_post_reset(VFIOPCIDevice *vdev)
|
||||||
|
{
|
||||||
|
Error *err = NULL;
|
||||||
|
int nr;
|
||||||
|
@@ -2438,7 +2438,7 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev)
|
||||||
|
vfio_quirk_reset(vdev);
|
||||||
|
}
|
||||||
|
|
||||||
|
-static bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name)
|
||||||
|
+bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name)
|
||||||
|
{
|
||||||
|
char tmp[13];
|
||||||
|
|
||||||
|
@@ -2488,166 +2488,10 @@ int vfio_pci_get_pci_hot_reset_info(VFIOPCIDevice *vdev,
|
||||||
|
|
||||||
|
static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
|
||||||
|
{
|
||||||
|
- VFIOGroup *group;
|
||||||
|
- struct vfio_pci_hot_reset_info *info = NULL;
|
||||||
|
- struct vfio_pci_dependent_device *devices;
|
||||||
|
- struct vfio_pci_hot_reset *reset;
|
||||||
|
- int32_t *fds;
|
||||||
|
- int ret, i, count;
|
||||||
|
- bool multi = false;
|
||||||
|
-
|
||||||
|
- trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi");
|
||||||
|
-
|
||||||
|
- if (!single) {
|
||||||
|
- vfio_pci_pre_reset(vdev);
|
||||||
|
- }
|
||||||
|
- vdev->vbasedev.needs_reset = false;
|
||||||
|
-
|
||||||
|
- ret = vfio_pci_get_pci_hot_reset_info(vdev, &info);
|
||||||
|
-
|
||||||
|
- if (ret) {
|
||||||
|
- goto out_single;
|
||||||
|
- }
|
||||||
|
- devices = &info->devices[0];
|
||||||
|
-
|
||||||
|
- trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name);
|
||||||
|
-
|
||||||
|
- /* Verify that we have all the groups required */
|
||||||
|
- for (i = 0; i < info->count; i++) {
|
||||||
|
- PCIHostDeviceAddress host;
|
||||||
|
- VFIOPCIDevice *tmp;
|
||||||
|
- VFIODevice *vbasedev_iter;
|
||||||
|
-
|
||||||
|
- host.domain = devices[i].segment;
|
||||||
|
- host.bus = devices[i].bus;
|
||||||
|
- host.slot = PCI_SLOT(devices[i].devfn);
|
||||||
|
- host.function = PCI_FUNC(devices[i].devfn);
|
||||||
|
-
|
||||||
|
- trace_vfio_pci_hot_reset_dep_devices(host.domain,
|
||||||
|
- host.bus, host.slot, host.function, devices[i].group_id);
|
||||||
|
-
|
||||||
|
- if (vfio_pci_host_match(&host, vdev->vbasedev.name)) {
|
||||||
|
- continue;
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- QLIST_FOREACH(group, &vfio_group_list, next) {
|
||||||
|
- if (group->groupid == devices[i].group_id) {
|
||||||
|
- break;
|
||||||
|
- }
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- if (!group) {
|
||||||
|
- if (!vdev->has_pm_reset) {
|
||||||
|
- error_report("vfio: Cannot reset device %s, "
|
||||||
|
- "depends on group %d which is not owned.",
|
||||||
|
- vdev->vbasedev.name, devices[i].group_id);
|
||||||
|
- }
|
||||||
|
- ret = -EPERM;
|
||||||
|
- goto out;
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- /* Prep dependent devices for reset and clear our marker. */
|
||||||
|
- QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
|
||||||
|
- if (!vbasedev_iter->dev->realized ||
|
||||||
|
- vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
|
||||||
|
- continue;
|
||||||
|
- }
|
||||||
|
- tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
|
||||||
|
- if (vfio_pci_host_match(&host, tmp->vbasedev.name)) {
|
||||||
|
- if (single) {
|
||||||
|
- ret = -EINVAL;
|
||||||
|
- goto out_single;
|
||||||
|
- }
|
||||||
|
- vfio_pci_pre_reset(tmp);
|
||||||
|
- tmp->vbasedev.needs_reset = false;
|
||||||
|
- multi = true;
|
||||||
|
- break;
|
||||||
|
- }
|
||||||
|
- }
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- if (!single && !multi) {
|
||||||
|
- ret = -EINVAL;
|
||||||
|
- goto out_single;
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- /* Determine how many group fds need to be passed */
|
||||||
|
- count = 0;
|
||||||
|
- QLIST_FOREACH(group, &vfio_group_list, next) {
|
||||||
|
- for (i = 0; i < info->count; i++) {
|
||||||
|
- if (group->groupid == devices[i].group_id) {
|
||||||
|
- count++;
|
||||||
|
- break;
|
||||||
|
- }
|
||||||
|
- }
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- reset = g_malloc0(sizeof(*reset) + (count * sizeof(*fds)));
|
||||||
|
- reset->argsz = sizeof(*reset) + (count * sizeof(*fds));
|
||||||
|
- fds = &reset->group_fds[0];
|
||||||
|
-
|
||||||
|
- /* Fill in group fds */
|
||||||
|
- QLIST_FOREACH(group, &vfio_group_list, next) {
|
||||||
|
- for (i = 0; i < info->count; i++) {
|
||||||
|
- if (group->groupid == devices[i].group_id) {
|
||||||
|
- fds[reset->count++] = group->fd;
|
||||||
|
- break;
|
||||||
|
- }
|
||||||
|
- }
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- /* Bus reset! */
|
||||||
|
- ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset);
|
||||||
|
- g_free(reset);
|
||||||
|
-
|
||||||
|
- trace_vfio_pci_hot_reset_result(vdev->vbasedev.name,
|
||||||
|
- ret ? strerror(errno) : "Success");
|
||||||
|
-
|
||||||
|
-out:
|
||||||
|
- /* Re-enable INTx on affected devices */
|
||||||
|
- for (i = 0; i < info->count; i++) {
|
||||||
|
- PCIHostDeviceAddress host;
|
||||||
|
- VFIOPCIDevice *tmp;
|
||||||
|
- VFIODevice *vbasedev_iter;
|
||||||
|
-
|
||||||
|
- host.domain = devices[i].segment;
|
||||||
|
- host.bus = devices[i].bus;
|
||||||
|
- host.slot = PCI_SLOT(devices[i].devfn);
|
||||||
|
- host.function = PCI_FUNC(devices[i].devfn);
|
||||||
|
-
|
||||||
|
- if (vfio_pci_host_match(&host, vdev->vbasedev.name)) {
|
||||||
|
- continue;
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- QLIST_FOREACH(group, &vfio_group_list, next) {
|
||||||
|
- if (group->groupid == devices[i].group_id) {
|
||||||
|
- break;
|
||||||
|
- }
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- if (!group) {
|
||||||
|
- break;
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
|
||||||
|
- if (!vbasedev_iter->dev->realized ||
|
||||||
|
- vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
|
||||||
|
- continue;
|
||||||
|
- }
|
||||||
|
- tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
|
||||||
|
- if (vfio_pci_host_match(&host, tmp->vbasedev.name)) {
|
||||||
|
- vfio_pci_post_reset(tmp);
|
||||||
|
- break;
|
||||||
|
- }
|
||||||
|
- }
|
||||||
|
- }
|
||||||
|
-out_single:
|
||||||
|
- if (!single) {
|
||||||
|
- vfio_pci_post_reset(vdev);
|
||||||
|
- }
|
||||||
|
- g_free(info);
|
||||||
|
+ VFIODevice *vbasedev = &vdev->vbasedev;
|
||||||
|
+ const VFIOIOMMUOps *ops = vbasedev->bcontainer->ops;
|
||||||
|
|
||||||
|
- return ret;
|
||||||
|
+ return ops->pci_hot_reset(vbasedev, single);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
|
||||||
|
index 3568a6135d..b7de39c010 100644
|
||||||
|
--- a/hw/vfio/pci.h
|
||||||
|
+++ b/hw/vfio/pci.h
|
||||||
|
@@ -219,6 +219,9 @@ void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr);
|
||||||
|
|
||||||
|
extern const PropertyInfo qdev_prop_nv_gpudirect_clique;
|
||||||
|
|
||||||
|
+void vfio_pci_pre_reset(VFIOPCIDevice *vdev);
|
||||||
|
+void vfio_pci_post_reset(VFIOPCIDevice *vdev);
|
||||||
|
+bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name);
|
||||||
|
int vfio_pci_get_pci_hot_reset_info(VFIOPCIDevice *vdev,
|
||||||
|
struct vfio_pci_hot_reset_info **info_p);
|
||||||
|
|
||||||
|
diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h
|
||||||
|
index 4b6f017c6f..45bb19c767 100644
|
||||||
|
--- a/include/hw/vfio/vfio-container-base.h
|
||||||
|
+++ b/include/hw/vfio/vfio-container-base.h
|
||||||
|
@@ -106,6 +106,9 @@ struct VFIOIOMMUOps {
|
||||||
|
int (*set_dirty_page_tracking)(VFIOContainerBase *bcontainer, bool start);
|
||||||
|
int (*query_dirty_bitmap)(VFIOContainerBase *bcontainer, VFIOBitmap *vbmap,
|
||||||
|
hwaddr iova, hwaddr size);
|
||||||
|
+ /* PCI specific */
|
||||||
|
+ int (*pci_hot_reset)(VFIODevice *vbasedev, bool single);
|
||||||
|
+
|
||||||
|
/* SPAPR specific */
|
||||||
|
int (*add_window)(VFIOContainerBase *bcontainer,
|
||||||
|
MemoryRegionSection *section,
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
237
kvm-vfio-pci-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch
Normal file
237
kvm-vfio-pci-Make-vfio-cdev-pre-openable-by-passing-a-fi.patch
Normal file
@ -0,0 +1,237 @@
|
|||||||
|
From 965a44793806fef2094906947bd3b428638bf89a Mon Sep 17 00:00:00 2001
|
||||||
|
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Date: Tue, 21 Nov 2023 16:44:10 +0800
|
||||||
|
Subject: [PATCH 031/101] vfio/pci: Make vfio cdev pre-openable by passing a
|
||||||
|
file handle
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [30/67] a14b824b700e8fb36633cd159bcc422d992a316f (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
Conflicts: contextual conflict in hw/vfio/pci.c due to
|
||||||
|
RHEL-only f73562144e492 vfio: cap number of devices that can be assigned
|
||||||
|
|
||||||
|
This gives management tools like libvirt a chance to open the vfio
|
||||||
|
cdev with privilege and pass FD to qemu. This way qemu never needs
|
||||||
|
to have privilege to open a VFIO or iommu cdev node.
|
||||||
|
|
||||||
|
Together with the earlier support of pre-opening /dev/iommu device,
|
||||||
|
now we have full support of passing a vfio device to unprivileged
|
||||||
|
qemu by management tool. This mode is no more considered for the
|
||||||
|
legacy backend. So let's remove the "TODO" comment.
|
||||||
|
|
||||||
|
Add helper functions vfio_device_set_fd() and vfio_device_get_name()
|
||||||
|
to set fd and get device name, they will also be used by other vfio
|
||||||
|
devices.
|
||||||
|
|
||||||
|
There is no easy way to check if a device is mdev with FD passing,
|
||||||
|
so fail the x-balloon-allowed check unconditionally in this case.
|
||||||
|
|
||||||
|
There is also no easy way to get BDF as name with FD passing, so
|
||||||
|
we fake a name by VFIO_FD[fd].
|
||||||
|
|
||||||
|
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Tested-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit da3e04b26fd8d15b344944504d5ffa9c5f20b54b)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/vfio/helpers.c | 43 +++++++++++++++++++++++++++++++++++
|
||||||
|
hw/vfio/iommufd.c | 12 ++++++----
|
||||||
|
hw/vfio/pci.c | 28 +++++++++++++----------
|
||||||
|
include/hw/vfio/vfio-common.h | 4 ++++
|
||||||
|
4 files changed, 71 insertions(+), 16 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
|
||||||
|
index 168847e7c5..3592c3d54e 100644
|
||||||
|
--- a/hw/vfio/helpers.c
|
||||||
|
+++ b/hw/vfio/helpers.c
|
||||||
|
@@ -27,6 +27,7 @@
|
||||||
|
#include "trace.h"
|
||||||
|
#include "qapi/error.h"
|
||||||
|
#include "qemu/error-report.h"
|
||||||
|
+#include "monitor/monitor.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Common VFIO interrupt disable
|
||||||
|
@@ -609,3 +610,45 @@ bool vfio_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type)
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
+
|
||||||
|
+int vfio_device_get_name(VFIODevice *vbasedev, Error **errp)
|
||||||
|
+{
|
||||||
|
+ struct stat st;
|
||||||
|
+
|
||||||
|
+ if (vbasedev->fd < 0) {
|
||||||
|
+ if (stat(vbasedev->sysfsdev, &st) < 0) {
|
||||||
|
+ error_setg_errno(errp, errno, "no such host device");
|
||||||
|
+ error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev);
|
||||||
|
+ return -errno;
|
||||||
|
+ }
|
||||||
|
+ /* User may specify a name, e.g: VFIO platform device */
|
||||||
|
+ if (!vbasedev->name) {
|
||||||
|
+ vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
|
||||||
|
+ }
|
||||||
|
+ } else {
|
||||||
|
+ if (!vbasedev->iommufd) {
|
||||||
|
+ error_setg(errp, "Use FD passing only with iommufd backend");
|
||||||
|
+ return -EINVAL;
|
||||||
|
+ }
|
||||||
|
+ /*
|
||||||
|
+ * Give a name with fd so any function printing out vbasedev->name
|
||||||
|
+ * will not break.
|
||||||
|
+ */
|
||||||
|
+ if (!vbasedev->name) {
|
||||||
|
+ vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd);
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp)
|
||||||
|
+{
|
||||||
|
+ int fd = monitor_fd_param(monitor_cur(), str, errp);
|
||||||
|
+
|
||||||
|
+ if (fd < 0) {
|
||||||
|
+ error_prepend(errp, "Could not parse remote object fd %s:", str);
|
||||||
|
+ return;
|
||||||
|
+ }
|
||||||
|
+ vbasedev->fd = fd;
|
||||||
|
+}
|
||||||
|
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
|
||||||
|
index 6e53e013ef..5accd26484 100644
|
||||||
|
--- a/hw/vfio/iommufd.c
|
||||||
|
+++ b/hw/vfio/iommufd.c
|
||||||
|
@@ -320,11 +320,15 @@ static int iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
|
||||||
|
uint32_t ioas_id;
|
||||||
|
Error *err = NULL;
|
||||||
|
|
||||||
|
- devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
|
||||||
|
- if (devfd < 0) {
|
||||||
|
- return devfd;
|
||||||
|
+ if (vbasedev->fd < 0) {
|
||||||
|
+ devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
|
||||||
|
+ if (devfd < 0) {
|
||||||
|
+ return devfd;
|
||||||
|
+ }
|
||||||
|
+ vbasedev->fd = devfd;
|
||||||
|
+ } else {
|
||||||
|
+ devfd = vbasedev->fd;
|
||||||
|
}
|
||||||
|
- vbasedev->fd = devfd;
|
||||||
|
|
||||||
|
ret = iommufd_cdev_connect_and_bind(vbasedev, errp);
|
||||||
|
if (ret) {
|
||||||
|
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
|
||||||
|
index 39e6a6678e..3412a63bb1 100644
|
||||||
|
--- a/hw/vfio/pci.c
|
||||||
|
+++ b/hw/vfio/pci.c
|
||||||
|
@@ -2949,7 +2949,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
|
||||||
|
VFIOGroup *group;
|
||||||
|
char *tmp, *subsys;
|
||||||
|
Error *err = NULL;
|
||||||
|
- struct stat st;
|
||||||
|
int ret, i = 0;
|
||||||
|
bool is_mdev;
|
||||||
|
char uuid[UUID_STR_LEN];
|
||||||
|
@@ -2976,11 +2975,14 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
- if (!vbasedev->sysfsdev) {
|
||||||
|
+ if (vbasedev->fd < 0 && !vbasedev->sysfsdev) {
|
||||||
|
if (!(~vdev->host.domain || ~vdev->host.bus ||
|
||||||
|
~vdev->host.slot || ~vdev->host.function)) {
|
||||||
|
error_setg(errp, "No provided host device");
|
||||||
|
error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F "
|
||||||
|
+#ifdef CONFIG_IOMMUFD
|
||||||
|
+ "or -device vfio-pci,fd=DEVICE_FD "
|
||||||
|
+#endif
|
||||||
|
"or -device vfio-pci,sysfsdev=PATH_TO_DEVICE\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
@@ -2990,13 +2992,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
|
||||||
|
vdev->host.slot, vdev->host.function);
|
||||||
|
}
|
||||||
|
|
||||||
|
- if (stat(vbasedev->sysfsdev, &st) < 0) {
|
||||||
|
- error_setg_errno(errp, errno, "no such host device");
|
||||||
|
- error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->sysfsdev);
|
||||||
|
+ if (vfio_device_get_name(vbasedev, errp) < 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
-
|
||||||
|
- vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
|
||||||
|
vbasedev->ops = &vfio_pci_ops;
|
||||||
|
vbasedev->type = VFIO_DEVICE_TYPE_PCI;
|
||||||
|
vbasedev->dev = DEVICE(vdev);
|
||||||
|
@@ -3356,6 +3354,7 @@ static void vfio_instance_init(Object *obj)
|
||||||
|
vdev->host.bus = ~0U;
|
||||||
|
vdev->host.slot = ~0U;
|
||||||
|
vdev->host.function = ~0U;
|
||||||
|
+ vdev->vbasedev.fd = -1;
|
||||||
|
|
||||||
|
vdev->nv_gpudirect_clique = 0xFF;
|
||||||
|
|
||||||
|
@@ -3412,11 +3411,6 @@ static Property vfio_pci_dev_properties[] = {
|
||||||
|
qdev_prop_nv_gpudirect_clique, uint8_t),
|
||||||
|
DEFINE_PROP_OFF_AUTO_PCIBAR("x-msix-relocation", VFIOPCIDevice, msix_relo,
|
||||||
|
OFF_AUTOPCIBAR_OFF),
|
||||||
|
- /*
|
||||||
|
- * TODO - support passed fds... is this necessary?
|
||||||
|
- * DEFINE_PROP_STRING("vfiofd", VFIOPCIDevice, vfiofd_name),
|
||||||
|
- * DEFINE_PROP_STRING("vfiogroupfd, VFIOPCIDevice, vfiogroupfd_name),
|
||||||
|
- */
|
||||||
|
#ifdef CONFIG_IOMMUFD
|
||||||
|
DEFINE_PROP_LINK("iommufd", VFIOPCIDevice, vbasedev.iommufd,
|
||||||
|
TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *),
|
||||||
|
@@ -3424,6 +3418,13 @@ static Property vfio_pci_dev_properties[] = {
|
||||||
|
DEFINE_PROP_END_OF_LIST(),
|
||||||
|
};
|
||||||
|
|
||||||
|
+#ifdef CONFIG_IOMMUFD
|
||||||
|
+static void vfio_pci_set_fd(Object *obj, const char *str, Error **errp)
|
||||||
|
+{
|
||||||
|
+ vfio_device_set_fd(&VFIO_PCI(obj)->vbasedev, str, errp);
|
||||||
|
+}
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
|
||||||
|
{
|
||||||
|
DeviceClass *dc = DEVICE_CLASS(klass);
|
||||||
|
@@ -3431,6 +3432,9 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
|
||||||
|
|
||||||
|
dc->reset = vfio_pci_reset;
|
||||||
|
device_class_set_props(dc, vfio_pci_dev_properties);
|
||||||
|
+#ifdef CONFIG_IOMMUFD
|
||||||
|
+ object_class_property_add_str(klass, "fd", NULL, vfio_pci_set_fd);
|
||||||
|
+#endif
|
||||||
|
dc->desc = "VFIO-based PCI device assignment";
|
||||||
|
set_bit(DEVICE_CATEGORY_MISC, dc->categories);
|
||||||
|
pdc->realize = vfio_realize;
|
||||||
|
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
|
||||||
|
index 3dac5c167e..697bf24a35 100644
|
||||||
|
--- a/include/hw/vfio/vfio-common.h
|
||||||
|
+++ b/include/hw/vfio/vfio-common.h
|
||||||
|
@@ -251,4 +251,8 @@ int vfio_devices_query_dirty_bitmap(VFIOContainerBase *bcontainer,
|
||||||
|
hwaddr size);
|
||||||
|
int vfio_get_dirty_bitmap(VFIOContainerBase *bcontainer, uint64_t iova,
|
||||||
|
uint64_t size, ram_addr_t ram_addr);
|
||||||
|
+
|
||||||
|
+/* Returns 0 on success, or a negative errno. */
|
||||||
|
+int vfio_device_get_name(VFIODevice *vbasedev, Error **errp);
|
||||||
|
+void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp);
|
||||||
|
#endif /* HW_VFIO_VFIO_COMMON_H */
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
@ -0,0 +1,70 @@
|
|||||||
|
From 942bd7251d166f558e0e6acf7ba853e940e2fb52 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Date: Tue, 21 Nov 2023 16:44:21 +0800
|
||||||
|
Subject: [PATCH 042/101] vfio/pci: Move VFIODevice initializations in
|
||||||
|
vfio_instance_init
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [41/67] 67392d7a92a6ec2155697a355c88d295338a0785 (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
Some of the VFIODevice initializations is in vfio_realize,
|
||||||
|
move all of them in vfio_instance_init.
|
||||||
|
|
||||||
|
No functional change intended.
|
||||||
|
|
||||||
|
Suggested-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||||
|
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit dd2fcb1716be9b89c726b3446f38446bb99d6b3a)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/vfio/pci.c | 10 ++++++----
|
||||||
|
1 file changed, 6 insertions(+), 4 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
|
||||||
|
index 3412a63bb1..3f5900cc46 100644
|
||||||
|
--- a/hw/vfio/pci.c
|
||||||
|
+++ b/hw/vfio/pci.c
|
||||||
|
@@ -2995,9 +2995,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
|
||||||
|
if (vfio_device_get_name(vbasedev, errp) < 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
- vbasedev->ops = &vfio_pci_ops;
|
||||||
|
- vbasedev->type = VFIO_DEVICE_TYPE_PCI;
|
||||||
|
- vbasedev->dev = DEVICE(vdev);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Mediated devices *might* operate compatibly with discarding of RAM, but
|
||||||
|
@@ -3346,6 +3343,7 @@ static void vfio_instance_init(Object *obj)
|
||||||
|
{
|
||||||
|
PCIDevice *pci_dev = PCI_DEVICE(obj);
|
||||||
|
VFIOPCIDevice *vdev = VFIO_PCI(obj);
|
||||||
|
+ VFIODevice *vbasedev = &vdev->vbasedev;
|
||||||
|
|
||||||
|
device_add_bootindex_property(obj, &vdev->bootindex,
|
||||||
|
"bootindex", NULL,
|
||||||
|
@@ -3354,7 +3352,11 @@ static void vfio_instance_init(Object *obj)
|
||||||
|
vdev->host.bus = ~0U;
|
||||||
|
vdev->host.slot = ~0U;
|
||||||
|
vdev->host.function = ~0U;
|
||||||
|
- vdev->vbasedev.fd = -1;
|
||||||
|
+
|
||||||
|
+ vbasedev->type = VFIO_DEVICE_TYPE_PCI;
|
||||||
|
+ vbasedev->ops = &vfio_pci_ops;
|
||||||
|
+ vbasedev->dev = DEVICE(vdev);
|
||||||
|
+ vbasedev->fd = -1;
|
||||||
|
|
||||||
|
vdev->nv_gpudirect_clique = 0xFF;
|
||||||
|
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
@ -0,0 +1,77 @@
|
|||||||
|
From ede579d6d5fe5be9235d6a218efdb237192aee0e Mon Sep 17 00:00:00 2001
|
||||||
|
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Date: Tue, 21 Nov 2023 16:44:11 +0800
|
||||||
|
Subject: [PATCH 032/101] vfio/platform: Allow the selection of a given iommu
|
||||||
|
backend
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [31/67] aba1dc16cada602edd7be1a28b0f57991131e6f7 (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
Now we support two types of iommu backends, let's add the capability
|
||||||
|
to select one of them. This depends on whether an iommufd object has
|
||||||
|
been linked with the vfio-platform device:
|
||||||
|
|
||||||
|
If the user wants to use the legacy backend, it shall not
|
||||||
|
link the vfio-platform device with any iommufd object:
|
||||||
|
|
||||||
|
-device vfio-platform,host=XXX
|
||||||
|
|
||||||
|
This is called the legacy mode/backend.
|
||||||
|
|
||||||
|
If the user wants to use the iommufd backend (/dev/iommu) it
|
||||||
|
shall pass an iommufd object id in the vfio-platform device options:
|
||||||
|
|
||||||
|
-object iommufd,id=iommufd0
|
||||||
|
-device vfio-platform,host=XXX,iommufd=iommufd0
|
||||||
|
|
||||||
|
Suggested-by: Alex Williamson <alex.williamson@redhat.com>
|
||||||
|
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Reviewed-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit a6c50e1c3f8d0eb77edaea392e61508bb3c516f8)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/vfio/platform.c | 6 ++++++
|
||||||
|
1 file changed, 6 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c
|
||||||
|
index 8e3d4ac458..98ae4bc655 100644
|
||||||
|
--- a/hw/vfio/platform.c
|
||||||
|
+++ b/hw/vfio/platform.c
|
||||||
|
@@ -15,11 +15,13 @@
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "qemu/osdep.h"
|
||||||
|
+#include CONFIG_DEVICES /* CONFIG_IOMMUFD */
|
||||||
|
#include "qapi/error.h"
|
||||||
|
#include <sys/ioctl.h>
|
||||||
|
#include <linux/vfio.h>
|
||||||
|
|
||||||
|
#include "hw/vfio/vfio-platform.h"
|
||||||
|
+#include "sysemu/iommufd.h"
|
||||||
|
#include "migration/vmstate.h"
|
||||||
|
#include "qemu/error-report.h"
|
||||||
|
#include "qemu/lockable.h"
|
||||||
|
@@ -649,6 +651,10 @@ static Property vfio_platform_dev_properties[] = {
|
||||||
|
DEFINE_PROP_UINT32("mmap-timeout-ms", VFIOPlatformDevice,
|
||||||
|
mmap_timeout, 1100),
|
||||||
|
DEFINE_PROP_BOOL("x-irqfd", VFIOPlatformDevice, irqfd_allowed, true),
|
||||||
|
+#ifdef CONFIG_IOMMUFD
|
||||||
|
+ DEFINE_PROP_LINK("iommufd", VFIOPlatformDevice, vbasedev.iommufd,
|
||||||
|
+ TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *),
|
||||||
|
+#endif
|
||||||
|
DEFINE_PROP_END_OF_LIST(),
|
||||||
|
};
|
||||||
|
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
108
kvm-vfio-platform-Make-vfio-cdev-pre-openable-by-passing.patch
Normal file
108
kvm-vfio-platform-Make-vfio-cdev-pre-openable-by-passing.patch
Normal file
@ -0,0 +1,108 @@
|
|||||||
|
From 22664f4115d9b297ef4276e48f8ba0bc195ec99e Mon Sep 17 00:00:00 2001
|
||||||
|
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Date: Tue, 21 Nov 2023 16:44:12 +0800
|
||||||
|
Subject: [PATCH 033/101] vfio/platform: Make vfio cdev pre-openable by passing
|
||||||
|
a file handle
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [32/67] 069867dce64b826e92dc2051405a4ded5261981f (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
This gives management tools like libvirt a chance to open the vfio
|
||||||
|
cdev with privilege and pass FD to qemu. This way qemu never needs
|
||||||
|
to have privilege to open a VFIO or iommu cdev node.
|
||||||
|
|
||||||
|
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit 3016e60f8f715d2058a48e4956be994482c5e218)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/vfio/platform.c | 32 ++++++++++++++++++++++++--------
|
||||||
|
1 file changed, 24 insertions(+), 8 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c
|
||||||
|
index 98ae4bc655..a97d9c6234 100644
|
||||||
|
--- a/hw/vfio/platform.c
|
||||||
|
+++ b/hw/vfio/platform.c
|
||||||
|
@@ -531,14 +531,13 @@ static VFIODeviceOps vfio_platform_ops = {
|
||||||
|
*/
|
||||||
|
static int vfio_base_device_init(VFIODevice *vbasedev, Error **errp)
|
||||||
|
{
|
||||||
|
- struct stat st;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
- /* @sysfsdev takes precedence over @host */
|
||||||
|
- if (vbasedev->sysfsdev) {
|
||||||
|
+ /* @fd takes precedence over @sysfsdev which takes precedence over @host */
|
||||||
|
+ if (vbasedev->fd < 0 && vbasedev->sysfsdev) {
|
||||||
|
g_free(vbasedev->name);
|
||||||
|
vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
|
||||||
|
- } else {
|
||||||
|
+ } else if (vbasedev->fd < 0) {
|
||||||
|
if (!vbasedev->name || strchr(vbasedev->name, '/')) {
|
||||||
|
error_setg(errp, "wrong host device name");
|
||||||
|
return -EINVAL;
|
||||||
|
@@ -548,10 +547,9 @@ static int vfio_base_device_init(VFIODevice *vbasedev, Error **errp)
|
||||||
|
vbasedev->name);
|
||||||
|
}
|
||||||
|
|
||||||
|
- if (stat(vbasedev->sysfsdev, &st) < 0) {
|
||||||
|
- error_setg_errno(errp, errno,
|
||||||
|
- "failed to get the sysfs host device file status");
|
||||||
|
- return -errno;
|
||||||
|
+ ret = vfio_device_get_name(vbasedev, errp);
|
||||||
|
+ if (ret) {
|
||||||
|
+ return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = vfio_attach_device(vbasedev->name, vbasedev,
|
||||||
|
@@ -658,6 +656,20 @@ static Property vfio_platform_dev_properties[] = {
|
||||||
|
DEFINE_PROP_END_OF_LIST(),
|
||||||
|
};
|
||||||
|
|
||||||
|
+static void vfio_platform_instance_init(Object *obj)
|
||||||
|
+{
|
||||||
|
+ VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(obj);
|
||||||
|
+
|
||||||
|
+ vdev->vbasedev.fd = -1;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+#ifdef CONFIG_IOMMUFD
|
||||||
|
+static void vfio_platform_set_fd(Object *obj, const char *str, Error **errp)
|
||||||
|
+{
|
||||||
|
+ vfio_device_set_fd(&VFIO_PLATFORM_DEVICE(obj)->vbasedev, str, errp);
|
||||||
|
+}
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
static void vfio_platform_class_init(ObjectClass *klass, void *data)
|
||||||
|
{
|
||||||
|
DeviceClass *dc = DEVICE_CLASS(klass);
|
||||||
|
@@ -665,6 +677,9 @@ static void vfio_platform_class_init(ObjectClass *klass, void *data)
|
||||||
|
|
||||||
|
dc->realize = vfio_platform_realize;
|
||||||
|
device_class_set_props(dc, vfio_platform_dev_properties);
|
||||||
|
+#ifdef CONFIG_IOMMUFD
|
||||||
|
+ object_class_property_add_str(klass, "fd", NULL, vfio_platform_set_fd);
|
||||||
|
+#endif
|
||||||
|
dc->vmsd = &vfio_platform_vmstate;
|
||||||
|
dc->desc = "VFIO-based platform device assignment";
|
||||||
|
sbc->connect_irq_notifier = vfio_start_irqfd_injection;
|
||||||
|
@@ -677,6 +692,7 @@ static const TypeInfo vfio_platform_dev_info = {
|
||||||
|
.name = TYPE_VFIO_PLATFORM,
|
||||||
|
.parent = TYPE_SYS_BUS_DEVICE,
|
||||||
|
.instance_size = sizeof(VFIOPlatformDevice),
|
||||||
|
+ .instance_init = vfio_platform_instance_init,
|
||||||
|
.class_init = vfio_platform_class_init,
|
||||||
|
.class_size = sizeof(VFIOPlatformDeviceClass),
|
||||||
|
};
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
@ -0,0 +1,64 @@
|
|||||||
|
From 2417020283532030f424fe07dfeb7477e6489640 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Date: Tue, 21 Nov 2023 16:44:22 +0800
|
||||||
|
Subject: [PATCH 043/101] vfio/platform: Move VFIODevice initializations in
|
||||||
|
vfio_platform_instance_init
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [42/67] 53a459b6246d7d7bdc7a62ac92f02f1e775a54a6 (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
Some of the VFIODevice initializations is in vfio_platform_realize,
|
||||||
|
move all of them in vfio_platform_instance_init.
|
||||||
|
|
||||||
|
No functional change intended.
|
||||||
|
|
||||||
|
Suggested-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||||
|
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit a0cf44c8d618578843a65ea7f6d3db8ce52185bc)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/vfio/platform.c | 10 +++++-----
|
||||||
|
1 file changed, 5 insertions(+), 5 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c
|
||||||
|
index a97d9c6234..506eb8193f 100644
|
||||||
|
--- a/hw/vfio/platform.c
|
||||||
|
+++ b/hw/vfio/platform.c
|
||||||
|
@@ -581,10 +581,6 @@ static void vfio_platform_realize(DeviceState *dev, Error **errp)
|
||||||
|
VFIODevice *vbasedev = &vdev->vbasedev;
|
||||||
|
int i, ret;
|
||||||
|
|
||||||
|
- vbasedev->type = VFIO_DEVICE_TYPE_PLATFORM;
|
||||||
|
- vbasedev->dev = dev;
|
||||||
|
- vbasedev->ops = &vfio_platform_ops;
|
||||||
|
-
|
||||||
|
qemu_mutex_init(&vdev->intp_mutex);
|
||||||
|
|
||||||
|
trace_vfio_platform_realize(vbasedev->sysfsdev ?
|
||||||
|
@@ -659,8 +655,12 @@ static Property vfio_platform_dev_properties[] = {
|
||||||
|
static void vfio_platform_instance_init(Object *obj)
|
||||||
|
{
|
||||||
|
VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(obj);
|
||||||
|
+ VFIODevice *vbasedev = &vdev->vbasedev;
|
||||||
|
|
||||||
|
- vdev->vbasedev.fd = -1;
|
||||||
|
+ vbasedev->type = VFIO_DEVICE_TYPE_PLATFORM;
|
||||||
|
+ vbasedev->ops = &vfio_platform_ops;
|
||||||
|
+ vbasedev->dev = DEVICE(vdev);
|
||||||
|
+ vbasedev->fd = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_IOMMUFD
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
129
kvm-vfio-spapr-Extend-VFIOIOMMUOps-with-a-release-handle.patch
Normal file
129
kvm-vfio-spapr-Extend-VFIOIOMMUOps-with-a-release-handle.patch
Normal file
@ -0,0 +1,129 @@
|
|||||||
|
From e75ec2aca351daabe597ca6322c1589885f30d7a Mon Sep 17 00:00:00 2001
|
||||||
|
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
||||||
|
Date: Tue, 19 Dec 2023 07:58:16 +0100
|
||||||
|
Subject: [PATCH 049/101] vfio/spapr: Extend VFIOIOMMUOps with a release
|
||||||
|
handler
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [48/67] 1c4d22a6f69324805d050767fcf178d8566f2030 (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
This allows to abstract a bit more the sPAPR IOMMU support in the
|
||||||
|
legacy IOMMU backend.
|
||||||
|
|
||||||
|
Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Tested-by: Eric Farman <farman@linux.ibm.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit 001a013ea3f125d2ec0e709b5765754149d8d968)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/vfio/container.c | 10 +++-----
|
||||||
|
hw/vfio/spapr.c | 35 +++++++++++++++------------
|
||||||
|
include/hw/vfio/vfio-container-base.h | 1 +
|
||||||
|
3 files changed, 24 insertions(+), 22 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
|
||||||
|
index b22feb8ded..1e77a2929e 100644
|
||||||
|
--- a/hw/vfio/container.c
|
||||||
|
+++ b/hw/vfio/container.c
|
||||||
|
@@ -632,9 +632,8 @@ listener_release_exit:
|
||||||
|
QLIST_REMOVE(bcontainer, next);
|
||||||
|
vfio_kvm_device_del_group(group);
|
||||||
|
memory_listener_unregister(&bcontainer->listener);
|
||||||
|
- if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU ||
|
||||||
|
- container->iommu_type == VFIO_SPAPR_TCE_IOMMU) {
|
||||||
|
- vfio_spapr_container_deinit(container);
|
||||||
|
+ if (bcontainer->ops->release) {
|
||||||
|
+ bcontainer->ops->release(bcontainer);
|
||||||
|
}
|
||||||
|
|
||||||
|
enable_discards_exit:
|
||||||
|
@@ -667,9 +666,8 @@ static void vfio_disconnect_container(VFIOGroup *group)
|
||||||
|
*/
|
||||||
|
if (QLIST_EMPTY(&container->group_list)) {
|
||||||
|
memory_listener_unregister(&bcontainer->listener);
|
||||||
|
- if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU ||
|
||||||
|
- container->iommu_type == VFIO_SPAPR_TCE_IOMMU) {
|
||||||
|
- vfio_spapr_container_deinit(container);
|
||||||
|
+ if (bcontainer->ops->release) {
|
||||||
|
+ bcontainer->ops->release(bcontainer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c
|
||||||
|
index 5c6426e697..44617dfc6b 100644
|
||||||
|
--- a/hw/vfio/spapr.c
|
||||||
|
+++ b/hw/vfio/spapr.c
|
||||||
|
@@ -440,6 +440,24 @@ vfio_spapr_container_del_section_window(VFIOContainerBase *bcontainer,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
+static void vfio_spapr_container_release(VFIOContainerBase *bcontainer)
|
||||||
|
+{
|
||||||
|
+ VFIOContainer *container = container_of(bcontainer, VFIOContainer,
|
||||||
|
+ bcontainer);
|
||||||
|
+ VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer,
|
||||||
|
+ container);
|
||||||
|
+ VFIOHostDMAWindow *hostwin, *next;
|
||||||
|
+
|
||||||
|
+ if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) {
|
||||||
|
+ memory_listener_unregister(&scontainer->prereg_listener);
|
||||||
|
+ }
|
||||||
|
+ QLIST_FOREACH_SAFE(hostwin, &scontainer->hostwin_list, hostwin_next,
|
||||||
|
+ next) {
|
||||||
|
+ QLIST_REMOVE(hostwin, hostwin_next);
|
||||||
|
+ g_free(hostwin);
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static VFIOIOMMUOps vfio_iommu_spapr_ops;
|
||||||
|
|
||||||
|
static void setup_spapr_ops(VFIOContainerBase *bcontainer)
|
||||||
|
@@ -447,6 +465,7 @@ static void setup_spapr_ops(VFIOContainerBase *bcontainer)
|
||||||
|
vfio_iommu_spapr_ops = *bcontainer->ops;
|
||||||
|
vfio_iommu_spapr_ops.add_window = vfio_spapr_container_add_section_window;
|
||||||
|
vfio_iommu_spapr_ops.del_window = vfio_spapr_container_del_section_window;
|
||||||
|
+ vfio_iommu_spapr_ops.release = vfio_spapr_container_release;
|
||||||
|
bcontainer->ops = &vfio_iommu_spapr_ops;
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -527,19 +546,3 @@ listener_unregister_exit:
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
-
|
||||||
|
-void vfio_spapr_container_deinit(VFIOContainer *container)
|
||||||
|
-{
|
||||||
|
- VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer,
|
||||||
|
- container);
|
||||||
|
- VFIOHostDMAWindow *hostwin, *next;
|
||||||
|
-
|
||||||
|
- if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) {
|
||||||
|
- memory_listener_unregister(&scontainer->prereg_listener);
|
||||||
|
- }
|
||||||
|
- QLIST_FOREACH_SAFE(hostwin, &scontainer->hostwin_list, hostwin_next,
|
||||||
|
- next) {
|
||||||
|
- QLIST_REMOVE(hostwin, hostwin_next);
|
||||||
|
- g_free(hostwin);
|
||||||
|
- }
|
||||||
|
-}
|
||||||
|
diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h
|
||||||
|
index 2ae297ccda..5c9594b6c7 100644
|
||||||
|
--- a/include/hw/vfio/vfio-container-base.h
|
||||||
|
+++ b/include/hw/vfio/vfio-container-base.h
|
||||||
|
@@ -117,5 +117,6 @@ struct VFIOIOMMUOps {
|
||||||
|
Error **errp);
|
||||||
|
void (*del_window)(VFIOContainerBase *bcontainer,
|
||||||
|
MemoryRegionSection *section);
|
||||||
|
+ void (*release)(VFIOContainerBase *bcontainer);
|
||||||
|
};
|
||||||
|
#endif /* HW_VFIO_VFIO_CONTAINER_BASE_H */
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
150
kvm-vfio-spapr-Introduce-a-sPAPR-VFIOIOMMU-QOM-interface.patch
Normal file
150
kvm-vfio-spapr-Introduce-a-sPAPR-VFIOIOMMU-QOM-interface.patch
Normal file
@ -0,0 +1,150 @@
|
|||||||
|
From 645ed97633935712edcc2c56f252738b38f15e3a Mon Sep 17 00:00:00 2001
|
||||||
|
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
||||||
|
Date: Tue, 19 Dec 2023 07:58:22 +0100
|
||||||
|
Subject: [PATCH 055/101] vfio/spapr: Introduce a sPAPR VFIOIOMMU QOM interface
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [54/67] 2ceac3c07d71790dc3852fbbbd4084a7affb9373 (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
Move vfio_spapr_container_setup() to a VFIOIOMMUClass::setup handler
|
||||||
|
and convert the sPAPR VFIOIOMMUOps struct to a QOM interface. The
|
||||||
|
sPAPR QOM interface inherits from the legacy QOM interface because
|
||||||
|
because both have the same basic needs. The sPAPR interface is then
|
||||||
|
extended with the handlers specific to the sPAPR IOMMU.
|
||||||
|
|
||||||
|
This allows reuse and provides better abstraction of the backends. It
|
||||||
|
will be useful to avoid compiling the sPAPR IOMMU backend on targets
|
||||||
|
not supporting it.
|
||||||
|
|
||||||
|
Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Tested-by: Eric Farman <farman@linux.ibm.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit f221f641a2fe69c2ca3857759551470664b0bec8)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/vfio/container.c | 18 +++++--------
|
||||||
|
hw/vfio/spapr.c | 39 ++++++++++++++++-----------
|
||||||
|
include/hw/vfio/vfio-container-base.h | 1 +
|
||||||
|
3 files changed, 31 insertions(+), 27 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
|
||||||
|
index c22bdd3216..688cf23bab 100644
|
||||||
|
--- a/hw/vfio/container.c
|
||||||
|
+++ b/hw/vfio/container.c
|
||||||
|
@@ -381,6 +381,10 @@ static const VFIOIOMMUClass *vfio_get_iommu_class(int iommu_type, Error **errp)
|
||||||
|
case VFIO_TYPE1_IOMMU:
|
||||||
|
klass = object_class_by_name(TYPE_VFIO_IOMMU_LEGACY);
|
||||||
|
break;
|
||||||
|
+ case VFIO_SPAPR_TCE_v2_IOMMU:
|
||||||
|
+ case VFIO_SPAPR_TCE_IOMMU:
|
||||||
|
+ klass = object_class_by_name(TYPE_VFIO_IOMMU_SPAPR);
|
||||||
|
+ break;
|
||||||
|
default:
|
||||||
|
g_assert_not_reached();
|
||||||
|
};
|
||||||
|
@@ -623,19 +627,9 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
|
||||||
|
goto free_container_exit;
|
||||||
|
}
|
||||||
|
|
||||||
|
- switch (container->iommu_type) {
|
||||||
|
- case VFIO_TYPE1v2_IOMMU:
|
||||||
|
- case VFIO_TYPE1_IOMMU:
|
||||||
|
- ret = vfio_legacy_setup(bcontainer, errp);
|
||||||
|
- break;
|
||||||
|
- case VFIO_SPAPR_TCE_v2_IOMMU:
|
||||||
|
- case VFIO_SPAPR_TCE_IOMMU:
|
||||||
|
- ret = vfio_spapr_container_init(container, errp);
|
||||||
|
- break;
|
||||||
|
- default:
|
||||||
|
- g_assert_not_reached();
|
||||||
|
- }
|
||||||
|
+ assert(bcontainer->ops->setup);
|
||||||
|
|
||||||
|
+ ret = bcontainer->ops->setup(bcontainer, errp);
|
||||||
|
if (ret) {
|
||||||
|
goto enable_discards_exit;
|
||||||
|
}
|
||||||
|
diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c
|
||||||
|
index 44617dfc6b..0d949bb728 100644
|
||||||
|
--- a/hw/vfio/spapr.c
|
||||||
|
+++ b/hw/vfio/spapr.c
|
||||||
|
@@ -458,20 +458,11 @@ static void vfio_spapr_container_release(VFIOContainerBase *bcontainer)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
-static VFIOIOMMUOps vfio_iommu_spapr_ops;
|
||||||
|
-
|
||||||
|
-static void setup_spapr_ops(VFIOContainerBase *bcontainer)
|
||||||
|
-{
|
||||||
|
- vfio_iommu_spapr_ops = *bcontainer->ops;
|
||||||
|
- vfio_iommu_spapr_ops.add_window = vfio_spapr_container_add_section_window;
|
||||||
|
- vfio_iommu_spapr_ops.del_window = vfio_spapr_container_del_section_window;
|
||||||
|
- vfio_iommu_spapr_ops.release = vfio_spapr_container_release;
|
||||||
|
- bcontainer->ops = &vfio_iommu_spapr_ops;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-int vfio_spapr_container_init(VFIOContainer *container, Error **errp)
|
||||||
|
+static int vfio_spapr_container_setup(VFIOContainerBase *bcontainer,
|
||||||
|
+ Error **errp)
|
||||||
|
{
|
||||||
|
- VFIOContainerBase *bcontainer = &container->bcontainer;
|
||||||
|
+ VFIOContainer *container = container_of(bcontainer, VFIOContainer,
|
||||||
|
+ bcontainer);
|
||||||
|
VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer,
|
||||||
|
container);
|
||||||
|
struct vfio_iommu_spapr_tce_info info;
|
||||||
|
@@ -536,8 +527,6 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp)
|
||||||
|
0x1000);
|
||||||
|
}
|
||||||
|
|
||||||
|
- setup_spapr_ops(bcontainer);
|
||||||
|
-
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
listener_unregister_exit:
|
||||||
|
@@ -546,3 +535,23 @@ listener_unregister_exit:
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
+
|
||||||
|
+static void vfio_iommu_spapr_class_init(ObjectClass *klass, void *data)
|
||||||
|
+{
|
||||||
|
+ VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass);
|
||||||
|
+
|
||||||
|
+ vioc->add_window = vfio_spapr_container_add_section_window;
|
||||||
|
+ vioc->del_window = vfio_spapr_container_del_section_window;
|
||||||
|
+ vioc->release = vfio_spapr_container_release;
|
||||||
|
+ vioc->setup = vfio_spapr_container_setup;
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+static const TypeInfo types[] = {
|
||||||
|
+ {
|
||||||
|
+ .name = TYPE_VFIO_IOMMU_SPAPR,
|
||||||
|
+ .parent = TYPE_VFIO_IOMMU_LEGACY,
|
||||||
|
+ .class_init = vfio_iommu_spapr_class_init,
|
||||||
|
+ },
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+DEFINE_TYPES(types)
|
||||||
|
diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h
|
||||||
|
index ce8b1fba88..9e21d7811f 100644
|
||||||
|
--- a/include/hw/vfio/vfio-container-base.h
|
||||||
|
+++ b/include/hw/vfio/vfio-container-base.h
|
||||||
|
@@ -95,6 +95,7 @@ void vfio_container_destroy(VFIOContainerBase *bcontainer);
|
||||||
|
|
||||||
|
#define TYPE_VFIO_IOMMU "vfio-iommu"
|
||||||
|
#define TYPE_VFIO_IOMMU_LEGACY TYPE_VFIO_IOMMU "-legacy"
|
||||||
|
+#define TYPE_VFIO_IOMMU_SPAPR TYPE_VFIO_IOMMU "-spapr"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* VFIOContainerBase is not an abstract QOM object because it felt
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
@ -0,0 +1,91 @@
|
|||||||
|
From ff0c13c22878eed0f3879c0805bef5b9f9d83e04 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Date: Thu, 2 Nov 2023 15:12:42 +0800
|
||||||
|
Subject: [PATCH 017/101] vfio/spapr: Introduce spapr backend and target
|
||||||
|
interface
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [16/67] e35cda157a2a1afeded3305622c861abd07edb51 (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
Introduce an empty spapr backend which will hold spapr specific
|
||||||
|
content, currently only prereg_listener and hostwin_list.
|
||||||
|
|
||||||
|
Also introduce two spapr specific callbacks add/del_window into
|
||||||
|
VFIOIOMMUOps. Instantiate a spapr ops with a helper setup_spapr_ops
|
||||||
|
and assign it to bcontainer->ops.
|
||||||
|
|
||||||
|
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit 9b7d38bf5a2c1054bfe6de08806954cdc45d8d98)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/vfio/spapr.c | 14 ++++++++++++++
|
||||||
|
include/hw/vfio/vfio-container-base.h | 6 ++++++
|
||||||
|
2 files changed, 20 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c
|
||||||
|
index 7a50975f25..e1a6b35563 100644
|
||||||
|
--- a/hw/vfio/spapr.c
|
||||||
|
+++ b/hw/vfio/spapr.c
|
||||||
|
@@ -24,6 +24,10 @@
|
||||||
|
#include "qapi/error.h"
|
||||||
|
#include "trace.h"
|
||||||
|
|
||||||
|
+typedef struct VFIOSpaprContainer {
|
||||||
|
+ VFIOContainer container;
|
||||||
|
+} VFIOSpaprContainer;
|
||||||
|
+
|
||||||
|
static bool vfio_prereg_listener_skipped_section(MemoryRegionSection *section)
|
||||||
|
{
|
||||||
|
if (memory_region_is_iommu(section->mr)) {
|
||||||
|
@@ -421,6 +425,14 @@ void vfio_container_del_section_window(VFIOContainer *container,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
+static VFIOIOMMUOps vfio_iommu_spapr_ops;
|
||||||
|
+
|
||||||
|
+static void setup_spapr_ops(VFIOContainerBase *bcontainer)
|
||||||
|
+{
|
||||||
|
+ vfio_iommu_spapr_ops = *bcontainer->ops;
|
||||||
|
+ bcontainer->ops = &vfio_iommu_spapr_ops;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
int vfio_spapr_container_init(VFIOContainer *container, Error **errp)
|
||||||
|
{
|
||||||
|
VFIOContainerBase *bcontainer = &container->bcontainer;
|
||||||
|
@@ -486,6 +498,8 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp)
|
||||||
|
0x1000);
|
||||||
|
}
|
||||||
|
|
||||||
|
+ setup_spapr_ops(bcontainer);
|
||||||
|
+
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
listener_unregister_exit:
|
||||||
|
diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h
|
||||||
|
index 9658ffb526..f62a14ac73 100644
|
||||||
|
--- a/include/hw/vfio/vfio-container-base.h
|
||||||
|
+++ b/include/hw/vfio/vfio-container-base.h
|
||||||
|
@@ -101,5 +101,11 @@ struct VFIOIOMMUOps {
|
||||||
|
int (*set_dirty_page_tracking)(VFIOContainerBase *bcontainer, bool start);
|
||||||
|
int (*query_dirty_bitmap)(VFIOContainerBase *bcontainer, VFIOBitmap *vbmap,
|
||||||
|
hwaddr iova, hwaddr size);
|
||||||
|
+ /* SPAPR specific */
|
||||||
|
+ int (*add_window)(VFIOContainerBase *bcontainer,
|
||||||
|
+ MemoryRegionSection *section,
|
||||||
|
+ Error **errp);
|
||||||
|
+ void (*del_window)(VFIOContainerBase *bcontainer,
|
||||||
|
+ MemoryRegionSection *section);
|
||||||
|
};
|
||||||
|
#endif /* HW_VFIO_VFIO_CONTAINER_BASE_H */
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
188
kvm-vfio-spapr-Move-hostwin_list-into-spapr-container.patch
Normal file
188
kvm-vfio-spapr-Move-hostwin_list-into-spapr-container.patch
Normal file
@ -0,0 +1,188 @@
|
|||||||
|
From 3e9e7b57b15ac328f5d663b4e04df546d49f5fa6 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Date: Thu, 2 Nov 2023 15:12:45 +0800
|
||||||
|
Subject: [PATCH 020/101] vfio/spapr: Move hostwin_list into spapr container
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [19/67] 87cfeaa32ad32a260a89b2bb1866d59e20c0fe30 (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
No functional changes intended.
|
||||||
|
|
||||||
|
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit dbb9d0c9691d145338686d3e0920da047f2ab3da)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/vfio/spapr.c | 36 +++++++++++++++++++----------------
|
||||||
|
include/hw/vfio/vfio-common.h | 1 -
|
||||||
|
2 files changed, 20 insertions(+), 17 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c
|
||||||
|
index 68c3dd6c75..5c6426e697 100644
|
||||||
|
--- a/hw/vfio/spapr.c
|
||||||
|
+++ b/hw/vfio/spapr.c
|
||||||
|
@@ -27,6 +27,7 @@
|
||||||
|
typedef struct VFIOSpaprContainer {
|
||||||
|
VFIOContainer container;
|
||||||
|
MemoryListener prereg_listener;
|
||||||
|
+ QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list;
|
||||||
|
} VFIOSpaprContainer;
|
||||||
|
|
||||||
|
static bool vfio_prereg_listener_skipped_section(MemoryRegionSection *section)
|
||||||
|
@@ -154,12 +155,12 @@ static const MemoryListener vfio_prereg_listener = {
|
||||||
|
.region_del = vfio_prereg_listener_region_del,
|
||||||
|
};
|
||||||
|
|
||||||
|
-static void vfio_host_win_add(VFIOContainer *container, hwaddr min_iova,
|
||||||
|
+static void vfio_host_win_add(VFIOSpaprContainer *scontainer, hwaddr min_iova,
|
||||||
|
hwaddr max_iova, uint64_t iova_pgsizes)
|
||||||
|
{
|
||||||
|
VFIOHostDMAWindow *hostwin;
|
||||||
|
|
||||||
|
- QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) {
|
||||||
|
+ QLIST_FOREACH(hostwin, &scontainer->hostwin_list, hostwin_next) {
|
||||||
|
if (ranges_overlap(hostwin->min_iova,
|
||||||
|
hostwin->max_iova - hostwin->min_iova + 1,
|
||||||
|
min_iova,
|
||||||
|
@@ -173,15 +174,15 @@ static void vfio_host_win_add(VFIOContainer *container, hwaddr min_iova,
|
||||||
|
hostwin->min_iova = min_iova;
|
||||||
|
hostwin->max_iova = max_iova;
|
||||||
|
hostwin->iova_pgsizes = iova_pgsizes;
|
||||||
|
- QLIST_INSERT_HEAD(&container->hostwin_list, hostwin, hostwin_next);
|
||||||
|
+ QLIST_INSERT_HEAD(&scontainer->hostwin_list, hostwin, hostwin_next);
|
||||||
|
}
|
||||||
|
|
||||||
|
-static int vfio_host_win_del(VFIOContainer *container,
|
||||||
|
+static int vfio_host_win_del(VFIOSpaprContainer *scontainer,
|
||||||
|
hwaddr min_iova, hwaddr max_iova)
|
||||||
|
{
|
||||||
|
VFIOHostDMAWindow *hostwin;
|
||||||
|
|
||||||
|
- QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) {
|
||||||
|
+ QLIST_FOREACH(hostwin, &scontainer->hostwin_list, hostwin_next) {
|
||||||
|
if (hostwin->min_iova == min_iova && hostwin->max_iova == max_iova) {
|
||||||
|
QLIST_REMOVE(hostwin, hostwin_next);
|
||||||
|
g_free(hostwin);
|
||||||
|
@@ -192,7 +193,7 @@ static int vfio_host_win_del(VFIOContainer *container,
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
-static VFIOHostDMAWindow *vfio_find_hostwin(VFIOContainer *container,
|
||||||
|
+static VFIOHostDMAWindow *vfio_find_hostwin(VFIOSpaprContainer *container,
|
||||||
|
hwaddr iova, hwaddr end)
|
||||||
|
{
|
||||||
|
VFIOHostDMAWindow *hostwin;
|
||||||
|
@@ -329,6 +330,8 @@ vfio_spapr_container_add_section_window(VFIOContainerBase *bcontainer,
|
||||||
|
{
|
||||||
|
VFIOContainer *container = container_of(bcontainer, VFIOContainer,
|
||||||
|
bcontainer);
|
||||||
|
+ VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer,
|
||||||
|
+ container);
|
||||||
|
VFIOHostDMAWindow *hostwin;
|
||||||
|
hwaddr pgsize = 0;
|
||||||
|
int ret;
|
||||||
|
@@ -344,7 +347,7 @@ vfio_spapr_container_add_section_window(VFIOContainerBase *bcontainer,
|
||||||
|
iova = section->offset_within_address_space;
|
||||||
|
end = iova + int128_get64(section->size) - 1;
|
||||||
|
|
||||||
|
- if (!vfio_find_hostwin(container, iova, end)) {
|
||||||
|
+ if (!vfio_find_hostwin(scontainer, iova, end)) {
|
||||||
|
error_setg(errp, "Container %p can't map guest IOVA region"
|
||||||
|
" 0x%"HWADDR_PRIx"..0x%"HWADDR_PRIx, container,
|
||||||
|
iova, end);
|
||||||
|
@@ -358,7 +361,7 @@ vfio_spapr_container_add_section_window(VFIOContainerBase *bcontainer,
|
||||||
|
}
|
||||||
|
|
||||||
|
/* For now intersections are not allowed, we may relax this later */
|
||||||
|
- QLIST_FOREACH(hostwin, &container->hostwin_list, hostwin_next) {
|
||||||
|
+ QLIST_FOREACH(hostwin, &scontainer->hostwin_list, hostwin_next) {
|
||||||
|
if (ranges_overlap(hostwin->min_iova,
|
||||||
|
hostwin->max_iova - hostwin->min_iova + 1,
|
||||||
|
section->offset_within_address_space,
|
||||||
|
@@ -380,7 +383,7 @@ vfio_spapr_container_add_section_window(VFIOContainerBase *bcontainer,
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
- vfio_host_win_add(container, section->offset_within_address_space,
|
||||||
|
+ vfio_host_win_add(scontainer, section->offset_within_address_space,
|
||||||
|
section->offset_within_address_space +
|
||||||
|
int128_get64(section->size) - 1, pgsize);
|
||||||
|
#ifdef CONFIG_KVM
|
||||||
|
@@ -419,6 +422,8 @@ vfio_spapr_container_del_section_window(VFIOContainerBase *bcontainer,
|
||||||
|
{
|
||||||
|
VFIOContainer *container = container_of(bcontainer, VFIOContainer,
|
||||||
|
bcontainer);
|
||||||
|
+ VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer,
|
||||||
|
+ container);
|
||||||
|
|
||||||
|
if (container->iommu_type != VFIO_SPAPR_TCE_v2_IOMMU) {
|
||||||
|
return;
|
||||||
|
@@ -426,7 +431,7 @@ vfio_spapr_container_del_section_window(VFIOContainerBase *bcontainer,
|
||||||
|
|
||||||
|
vfio_spapr_remove_window(container,
|
||||||
|
section->offset_within_address_space);
|
||||||
|
- if (vfio_host_win_del(container,
|
||||||
|
+ if (vfio_host_win_del(scontainer,
|
||||||
|
section->offset_within_address_space,
|
||||||
|
section->offset_within_address_space +
|
||||||
|
int128_get64(section->size) - 1) < 0) {
|
||||||
|
@@ -454,7 +459,7 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp)
|
||||||
|
bool v2 = container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU;
|
||||||
|
int ret, fd = container->fd;
|
||||||
|
|
||||||
|
- QLIST_INIT(&container->hostwin_list);
|
||||||
|
+ QLIST_INIT(&scontainer->hostwin_list);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The host kernel code implementing VFIO_IOMMU_DISABLE is called
|
||||||
|
@@ -506,7 +511,7 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp)
|
||||||
|
} else {
|
||||||
|
/* The default table uses 4K pages */
|
||||||
|
bcontainer->pgsizes = 0x1000;
|
||||||
|
- vfio_host_win_add(container, info.dma32_window_start,
|
||||||
|
+ vfio_host_win_add(scontainer, info.dma32_window_start,
|
||||||
|
info.dma32_window_start +
|
||||||
|
info.dma32_window_size - 1,
|
||||||
|
0x1000);
|
||||||
|
@@ -525,15 +530,14 @@ listener_unregister_exit:
|
||||||
|
|
||||||
|
void vfio_spapr_container_deinit(VFIOContainer *container)
|
||||||
|
{
|
||||||
|
+ VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer,
|
||||||
|
+ container);
|
||||||
|
VFIOHostDMAWindow *hostwin, *next;
|
||||||
|
|
||||||
|
if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) {
|
||||||
|
- VFIOSpaprContainer *scontainer = container_of(container,
|
||||||
|
- VFIOSpaprContainer,
|
||||||
|
- container);
|
||||||
|
memory_listener_unregister(&scontainer->prereg_listener);
|
||||||
|
}
|
||||||
|
- QLIST_FOREACH_SAFE(hostwin, &container->hostwin_list, hostwin_next,
|
||||||
|
+ QLIST_FOREACH_SAFE(hostwin, &scontainer->hostwin_list, hostwin_next,
|
||||||
|
next) {
|
||||||
|
QLIST_REMOVE(hostwin, hostwin_next);
|
||||||
|
g_free(hostwin);
|
||||||
|
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
|
||||||
|
index ed6148c058..24ecc0e7ee 100644
|
||||||
|
--- a/include/hw/vfio/vfio-common.h
|
||||||
|
+++ b/include/hw/vfio/vfio-common.h
|
||||||
|
@@ -79,7 +79,6 @@ typedef struct VFIOContainer {
|
||||||
|
VFIOContainerBase bcontainer;
|
||||||
|
int fd; /* /dev/vfio/vfio, empowered by the attached groups */
|
||||||
|
unsigned iommu_type;
|
||||||
|
- QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list;
|
||||||
|
QLIST_HEAD(, VFIOGroup) group_list;
|
||||||
|
} VFIOContainer;
|
||||||
|
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
120
kvm-vfio-spapr-Move-prereg_listener-into-spapr-container.patch
Normal file
120
kvm-vfio-spapr-Move-prereg_listener-into-spapr-container.patch
Normal file
@ -0,0 +1,120 @@
|
|||||||
|
From 17e6dad3e43e173147c0ca33f6f1f4f317a77d0b Mon Sep 17 00:00:00 2001
|
||||||
|
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Date: Thu, 2 Nov 2023 15:12:44 +0800
|
||||||
|
Subject: [PATCH 019/101] vfio/spapr: Move prereg_listener into spapr container
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [18/67] dbea1b0b759e91b953271da92bba4ca6853bec82 (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
No functional changes intended.
|
||||||
|
|
||||||
|
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit 6ad359ec29af7f21dcb206c8edb26905a4925f80)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/vfio/spapr.c | 24 ++++++++++++++++--------
|
||||||
|
include/hw/vfio/vfio-common.h | 1 -
|
||||||
|
2 files changed, 16 insertions(+), 9 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c
|
||||||
|
index 5be1911aad..68c3dd6c75 100644
|
||||||
|
--- a/hw/vfio/spapr.c
|
||||||
|
+++ b/hw/vfio/spapr.c
|
||||||
|
@@ -26,6 +26,7 @@
|
||||||
|
|
||||||
|
typedef struct VFIOSpaprContainer {
|
||||||
|
VFIOContainer container;
|
||||||
|
+ MemoryListener prereg_listener;
|
||||||
|
} VFIOSpaprContainer;
|
||||||
|
|
||||||
|
static bool vfio_prereg_listener_skipped_section(MemoryRegionSection *section)
|
||||||
|
@@ -48,8 +49,9 @@ static void *vfio_prereg_gpa_to_vaddr(MemoryRegionSection *section, hwaddr gpa)
|
||||||
|
static void vfio_prereg_listener_region_add(MemoryListener *listener,
|
||||||
|
MemoryRegionSection *section)
|
||||||
|
{
|
||||||
|
- VFIOContainer *container = container_of(listener, VFIOContainer,
|
||||||
|
- prereg_listener);
|
||||||
|
+ VFIOSpaprContainer *scontainer = container_of(listener, VFIOSpaprContainer,
|
||||||
|
+ prereg_listener);
|
||||||
|
+ VFIOContainer *container = &scontainer->container;
|
||||||
|
VFIOContainerBase *bcontainer = &container->bcontainer;
|
||||||
|
const hwaddr gpa = section->offset_within_address_space;
|
||||||
|
hwaddr end;
|
||||||
|
@@ -107,8 +109,9 @@ static void vfio_prereg_listener_region_add(MemoryListener *listener,
|
||||||
|
static void vfio_prereg_listener_region_del(MemoryListener *listener,
|
||||||
|
MemoryRegionSection *section)
|
||||||
|
{
|
||||||
|
- VFIOContainer *container = container_of(listener, VFIOContainer,
|
||||||
|
- prereg_listener);
|
||||||
|
+ VFIOSpaprContainer *scontainer = container_of(listener, VFIOSpaprContainer,
|
||||||
|
+ prereg_listener);
|
||||||
|
+ VFIOContainer *container = &scontainer->container;
|
||||||
|
const hwaddr gpa = section->offset_within_address_space;
|
||||||
|
hwaddr end;
|
||||||
|
int ret;
|
||||||
|
@@ -445,6 +448,8 @@ static void setup_spapr_ops(VFIOContainerBase *bcontainer)
|
||||||
|
int vfio_spapr_container_init(VFIOContainer *container, Error **errp)
|
||||||
|
{
|
||||||
|
VFIOContainerBase *bcontainer = &container->bcontainer;
|
||||||
|
+ VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer,
|
||||||
|
+ container);
|
||||||
|
struct vfio_iommu_spapr_tce_info info;
|
||||||
|
bool v2 = container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU;
|
||||||
|
int ret, fd = container->fd;
|
||||||
|
@@ -463,9 +468,9 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp)
|
||||||
|
return -errno;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
- container->prereg_listener = vfio_prereg_listener;
|
||||||
|
+ scontainer->prereg_listener = vfio_prereg_listener;
|
||||||
|
|
||||||
|
- memory_listener_register(&container->prereg_listener,
|
||||||
|
+ memory_listener_register(&scontainer->prereg_listener,
|
||||||
|
&address_space_memory);
|
||||||
|
if (bcontainer->error) {
|
||||||
|
ret = -1;
|
||||||
|
@@ -513,7 +518,7 @@ int vfio_spapr_container_init(VFIOContainer *container, Error **errp)
|
||||||
|
|
||||||
|
listener_unregister_exit:
|
||||||
|
if (v2) {
|
||||||
|
- memory_listener_unregister(&container->prereg_listener);
|
||||||
|
+ memory_listener_unregister(&scontainer->prereg_listener);
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
@@ -523,7 +528,10 @@ void vfio_spapr_container_deinit(VFIOContainer *container)
|
||||||
|
VFIOHostDMAWindow *hostwin, *next;
|
||||||
|
|
||||||
|
if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) {
|
||||||
|
- memory_listener_unregister(&container->prereg_listener);
|
||||||
|
+ VFIOSpaprContainer *scontainer = container_of(container,
|
||||||
|
+ VFIOSpaprContainer,
|
||||||
|
+ container);
|
||||||
|
+ memory_listener_unregister(&scontainer->prereg_listener);
|
||||||
|
}
|
||||||
|
QLIST_FOREACH_SAFE(hostwin, &container->hostwin_list, hostwin_next,
|
||||||
|
next) {
|
||||||
|
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
|
||||||
|
index 055f679363..ed6148c058 100644
|
||||||
|
--- a/include/hw/vfio/vfio-common.h
|
||||||
|
+++ b/include/hw/vfio/vfio-common.h
|
||||||
|
@@ -78,7 +78,6 @@ struct VFIOGroup;
|
||||||
|
typedef struct VFIOContainer {
|
||||||
|
VFIOContainerBase bcontainer;
|
||||||
|
int fd; /* /dev/vfio/vfio, empowered by the attached groups */
|
||||||
|
- MemoryListener prereg_listener;
|
||||||
|
unsigned iommu_type;
|
||||||
|
QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list;
|
||||||
|
QLIST_HEAD(, VFIOGroup) group_list;
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
@ -0,0 +1,46 @@
|
|||||||
|
From 5d485eb1442a81b51688124ce30024e96490acbf Mon Sep 17 00:00:00 2001
|
||||||
|
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
|
||||||
|
Date: Tue, 19 Dec 2023 07:58:24 +0100
|
||||||
|
Subject: [PATCH 057/101] vfio/spapr: Only compile sPAPR IOMMU support when
|
||||||
|
needed
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [56/67] 4dc0cbde470f877a8aac2bf6fab6923f2f919285 (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
sPAPR IOMMU support is only needed for pseries machines. Compile out
|
||||||
|
support when CONFIG_PSERIES is not set. This saves ~7K of text.
|
||||||
|
|
||||||
|
Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Tested-by: Eric Farman <farman@linux.ibm.com>
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit 10164df6ed3d41cbf67105dcd954a663ef4cc3e9)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/vfio/meson.build | 2 +-
|
||||||
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build
|
||||||
|
index e5d98b6adc..bb98493b53 100644
|
||||||
|
--- a/hw/vfio/meson.build
|
||||||
|
+++ b/hw/vfio/meson.build
|
||||||
|
@@ -4,9 +4,9 @@ vfio_ss.add(files(
|
||||||
|
'common.c',
|
||||||
|
'container-base.c',
|
||||||
|
'container.c',
|
||||||
|
- 'spapr.c',
|
||||||
|
'migration.c',
|
||||||
|
))
|
||||||
|
+vfio_ss.add(when: 'CONFIG_PSERIES', if_true: files('spapr.c'))
|
||||||
|
vfio_ss.add(when: 'CONFIG_IOMMUFD', if_true: files(
|
||||||
|
'iommufd.c',
|
||||||
|
))
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
184
kvm-vfio-spapr-switch-to-spapr-IOMMU-BE-add-del_section_.patch
Normal file
184
kvm-vfio-spapr-switch-to-spapr-IOMMU-BE-add-del_section_.patch
Normal file
@ -0,0 +1,184 @@
|
|||||||
|
From 3b7f044f15b4a9daf4ad7eda58777aba6dbe3fc0 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Date: Thu, 2 Nov 2023 15:12:43 +0800
|
||||||
|
Subject: [PATCH 018/101] vfio/spapr: switch to spapr IOMMU BE
|
||||||
|
add/del_section_window
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Eric Auger <eric.auger@redhat.com>
|
||||||
|
RH-MergeRequest: 211: IOMMUFD backend backport
|
||||||
|
RH-Jira: RHEL-19302 RHEL-21057
|
||||||
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
RH-Acked-by: Sebastian Ott <sebott@redhat.com>
|
||||||
|
RH-Commit: [17/67] a0d9f1f2d4d2592f3d9fc2ee5b2c38236a986e38 (eauger1/centos-qemu-kvm)
|
||||||
|
|
||||||
|
No functional change intended.
|
||||||
|
|
||||||
|
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
|
||||||
|
Reviewed-by: Cédric Le Goater <clg@redhat.com
|
||||||
|
Signed-off-by: Cédric Le Goater <clg@redhat.com>
|
||||||
|
(cherry picked from commit 233309e8e4c158af6c6b126d5ad021bae40a918a)
|
||||||
|
Signed-off-by: Eric Auger <eric.auger@redhat.com>
|
||||||
|
---
|
||||||
|
hw/vfio/common.c | 8 ++------
|
||||||
|
hw/vfio/container-base.c | 21 +++++++++++++++++++++
|
||||||
|
hw/vfio/spapr.c | 19 ++++++++++++++-----
|
||||||
|
include/hw/vfio/vfio-common.h | 5 -----
|
||||||
|
include/hw/vfio/vfio-container-base.h | 5 +++++
|
||||||
|
5 files changed, 42 insertions(+), 16 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
|
||||||
|
index 483ba82089..572ae7c934 100644
|
||||||
|
--- a/hw/vfio/common.c
|
||||||
|
+++ b/hw/vfio/common.c
|
||||||
|
@@ -571,8 +571,6 @@ static void vfio_listener_region_add(MemoryListener *listener,
|
||||||
|
{
|
||||||
|
VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase,
|
||||||
|
listener);
|
||||||
|
- VFIOContainer *container = container_of(bcontainer, VFIOContainer,
|
||||||
|
- bcontainer);
|
||||||
|
hwaddr iova, end;
|
||||||
|
Int128 llend, llsize;
|
||||||
|
void *vaddr;
|
||||||
|
@@ -595,7 +593,7 @@ static void vfio_listener_region_add(MemoryListener *listener,
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
- if (vfio_container_add_section_window(container, section, &err)) {
|
||||||
|
+ if (vfio_container_add_section_window(bcontainer, section, &err)) {
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -738,8 +736,6 @@ static void vfio_listener_region_del(MemoryListener *listener,
|
||||||
|
{
|
||||||
|
VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase,
|
||||||
|
listener);
|
||||||
|
- VFIOContainer *container = container_of(bcontainer, VFIOContainer,
|
||||||
|
- bcontainer);
|
||||||
|
hwaddr iova, end;
|
||||||
|
Int128 llend, llsize;
|
||||||
|
int ret;
|
||||||
|
@@ -818,7 +814,7 @@ static void vfio_listener_region_del(MemoryListener *listener,
|
||||||
|
|
||||||
|
memory_region_unref(section->mr);
|
||||||
|
|
||||||
|
- vfio_container_del_section_window(container, section);
|
||||||
|
+ vfio_container_del_section_window(bcontainer, section);
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef struct VFIODirtyRanges {
|
||||||
|
diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c
|
||||||
|
index 0177f43741..71f7274973 100644
|
||||||
|
--- a/hw/vfio/container-base.c
|
||||||
|
+++ b/hw/vfio/container-base.c
|
||||||
|
@@ -31,6 +31,27 @@ int vfio_container_dma_unmap(VFIOContainerBase *bcontainer,
|
||||||
|
return bcontainer->ops->dma_unmap(bcontainer, iova, size, iotlb);
|
||||||
|
}
|
||||||
|
|
||||||
|
+int vfio_container_add_section_window(VFIOContainerBase *bcontainer,
|
||||||
|
+ MemoryRegionSection *section,
|
||||||
|
+ Error **errp)
|
||||||
|
+{
|
||||||
|
+ if (!bcontainer->ops->add_window) {
|
||||||
|
+ return 0;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return bcontainer->ops->add_window(bcontainer, section, errp);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void vfio_container_del_section_window(VFIOContainerBase *bcontainer,
|
||||||
|
+ MemoryRegionSection *section)
|
||||||
|
+{
|
||||||
|
+ if (!bcontainer->ops->del_window) {
|
||||||
|
+ return;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return bcontainer->ops->del_window(bcontainer, section);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer,
|
||||||
|
bool start)
|
||||||
|
{
|
||||||
|
diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c
|
||||||
|
index e1a6b35563..5be1911aad 100644
|
||||||
|
--- a/hw/vfio/spapr.c
|
||||||
|
+++ b/hw/vfio/spapr.c
|
||||||
|
@@ -319,10 +319,13 @@ static int vfio_spapr_create_window(VFIOContainer *container,
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
-int vfio_container_add_section_window(VFIOContainer *container,
|
||||||
|
- MemoryRegionSection *section,
|
||||||
|
- Error **errp)
|
||||||
|
+static int
|
||||||
|
+vfio_spapr_container_add_section_window(VFIOContainerBase *bcontainer,
|
||||||
|
+ MemoryRegionSection *section,
|
||||||
|
+ Error **errp)
|
||||||
|
{
|
||||||
|
+ VFIOContainer *container = container_of(bcontainer, VFIOContainer,
|
||||||
|
+ bcontainer);
|
||||||
|
VFIOHostDMAWindow *hostwin;
|
||||||
|
hwaddr pgsize = 0;
|
||||||
|
int ret;
|
||||||
|
@@ -407,9 +410,13 @@ int vfio_container_add_section_window(VFIOContainer *container,
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
-void vfio_container_del_section_window(VFIOContainer *container,
|
||||||
|
- MemoryRegionSection *section)
|
||||||
|
+static void
|
||||||
|
+vfio_spapr_container_del_section_window(VFIOContainerBase *bcontainer,
|
||||||
|
+ MemoryRegionSection *section)
|
||||||
|
{
|
||||||
|
+ VFIOContainer *container = container_of(bcontainer, VFIOContainer,
|
||||||
|
+ bcontainer);
|
||||||
|
+
|
||||||
|
if (container->iommu_type != VFIO_SPAPR_TCE_v2_IOMMU) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
@@ -430,6 +437,8 @@ static VFIOIOMMUOps vfio_iommu_spapr_ops;
|
||||||
|
static void setup_spapr_ops(VFIOContainerBase *bcontainer)
|
||||||
|
{
|
||||||
|
vfio_iommu_spapr_ops = *bcontainer->ops;
|
||||||
|
+ vfio_iommu_spapr_ops.add_window = vfio_spapr_container_add_section_window;
|
||||||
|
+ vfio_iommu_spapr_ops.del_window = vfio_spapr_container_del_section_window;
|
||||||
|
bcontainer->ops = &vfio_iommu_spapr_ops;
|
||||||
|
}
|
||||||
|
|
||||||
|
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
|
||||||
|
index b9e5a0e64b..055f679363 100644
|
||||||
|
--- a/include/hw/vfio/vfio-common.h
|
||||||
|
+++ b/include/hw/vfio/vfio-common.h
|
||||||
|
@@ -169,11 +169,6 @@ VFIOAddressSpace *vfio_get_address_space(AddressSpace *as);
|
||||||
|
void vfio_put_address_space(VFIOAddressSpace *space);
|
||||||
|
|
||||||
|
/* SPAPR specific */
|
||||||
|
-int vfio_container_add_section_window(VFIOContainer *container,
|
||||||
|
- MemoryRegionSection *section,
|
||||||
|
- Error **errp);
|
||||||
|
-void vfio_container_del_section_window(VFIOContainer *container,
|
||||||
|
- MemoryRegionSection *section);
|
||||||
|
int vfio_spapr_container_init(VFIOContainer *container, Error **errp);
|
||||||
|
void vfio_spapr_container_deinit(VFIOContainer *container);
|
||||||
|
|
||||||
|
diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h
|
||||||
|
index f62a14ac73..4b6f017c6f 100644
|
||||||
|
--- a/include/hw/vfio/vfio-container-base.h
|
||||||
|
+++ b/include/hw/vfio/vfio-container-base.h
|
||||||
|
@@ -75,6 +75,11 @@ int vfio_container_dma_map(VFIOContainerBase *bcontainer,
|
||||||
|
int vfio_container_dma_unmap(VFIOContainerBase *bcontainer,
|
||||||
|
hwaddr iova, ram_addr_t size,
|
||||||
|
IOMMUTLBEntry *iotlb);
|
||||||
|
+int vfio_container_add_section_window(VFIOContainerBase *bcontainer,
|
||||||
|
+ MemoryRegionSection *section,
|
||||||
|
+ Error **errp);
|
||||||
|
+void vfio_container_del_section_window(VFIOContainerBase *bcontainer,
|
||||||
|
+ MemoryRegionSection *section);
|
||||||
|
int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer,
|
||||||
|
bool start);
|
||||||
|
int vfio_container_query_dirty_bitmap(VFIOContainerBase *bcontainer,
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
177
kvm-virtio-blk-add-lock-to-protect-s-rq.patch
Normal file
177
kvm-virtio-blk-add-lock-to-protect-s-rq.patch
Normal file
@ -0,0 +1,177 @@
|
|||||||
|
From d54e88103aa76f3bf755b3f4308d8ab60367c6ef Mon Sep 17 00:00:00 2001
|
||||||
|
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
Date: Thu, 14 Sep 2023 10:00:59 -0400
|
||||||
|
Subject: [PATCH 074/101] virtio-blk: add lock to protect s->rq
|
||||||
|
|
||||||
|
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
RH-MergeRequest: 214: Remove AioContext lock
|
||||||
|
RH-Jira: RHEL-15965
|
||||||
|
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||||
|
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
RH-Commit: [5/26] 17dcd5ba18c03e5633a014d8d62d34d8dd7b43bf (kmwolf/centos-qemu-kvm)
|
||||||
|
|
||||||
|
s->rq is accessed from IO_CODE and GLOBAL_STATE_CODE. Introduce a lock
|
||||||
|
to protect s->rq and eliminate reliance on the AioContext lock.
|
||||||
|
|
||||||
|
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
Message-ID: <20230914140101.1065008-3-stefanha@redhat.com>
|
||||||
|
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||||
|
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
||||||
|
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
---
|
||||||
|
hw/block/virtio-blk.c | 67 +++++++++++++++++++++++-----------
|
||||||
|
include/hw/virtio/virtio-blk.h | 3 +-
|
||||||
|
2 files changed, 47 insertions(+), 23 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
|
||||||
|
index a1f8e15522..ee38e089bc 100644
|
||||||
|
--- a/hw/block/virtio-blk.c
|
||||||
|
+++ b/hw/block/virtio-blk.c
|
||||||
|
@@ -82,8 +82,11 @@ static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error,
|
||||||
|
/* Break the link as the next request is going to be parsed from the
|
||||||
|
* ring again. Otherwise we may end up doing a double completion! */
|
||||||
|
req->mr_next = NULL;
|
||||||
|
- req->next = s->rq;
|
||||||
|
- s->rq = req;
|
||||||
|
+
|
||||||
|
+ WITH_QEMU_LOCK_GUARD(&s->rq_lock) {
|
||||||
|
+ req->next = s->rq;
|
||||||
|
+ s->rq = req;
|
||||||
|
+ }
|
||||||
|
} else if (action == BLOCK_ERROR_ACTION_REPORT) {
|
||||||
|
virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
|
||||||
|
if (acct_failed) {
|
||||||
|
@@ -1183,10 +1186,13 @@ static void virtio_blk_dma_restart_bh(void *opaque)
|
||||||
|
{
|
||||||
|
VirtIOBlock *s = opaque;
|
||||||
|
|
||||||
|
- VirtIOBlockReq *req = s->rq;
|
||||||
|
+ VirtIOBlockReq *req;
|
||||||
|
MultiReqBuffer mrb = {};
|
||||||
|
|
||||||
|
- s->rq = NULL;
|
||||||
|
+ WITH_QEMU_LOCK_GUARD(&s->rq_lock) {
|
||||||
|
+ req = s->rq;
|
||||||
|
+ s->rq = NULL;
|
||||||
|
+ }
|
||||||
|
|
||||||
|
aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
|
||||||
|
while (req) {
|
||||||
|
@@ -1238,22 +1244,29 @@ static void virtio_blk_reset(VirtIODevice *vdev)
|
||||||
|
AioContext *ctx;
|
||||||
|
VirtIOBlockReq *req;
|
||||||
|
|
||||||
|
+ /* Dataplane has stopped... */
|
||||||
|
+ assert(!s->dataplane_started);
|
||||||
|
+
|
||||||
|
+ /* ...but requests may still be in flight. */
|
||||||
|
ctx = blk_get_aio_context(s->blk);
|
||||||
|
aio_context_acquire(ctx);
|
||||||
|
blk_drain(s->blk);
|
||||||
|
+ aio_context_release(ctx);
|
||||||
|
|
||||||
|
/* We drop queued requests after blk_drain() because blk_drain() itself can
|
||||||
|
* produce them. */
|
||||||
|
- while (s->rq) {
|
||||||
|
- req = s->rq;
|
||||||
|
- s->rq = req->next;
|
||||||
|
- virtqueue_detach_element(req->vq, &req->elem, 0);
|
||||||
|
- virtio_blk_free_request(req);
|
||||||
|
- }
|
||||||
|
+ WITH_QEMU_LOCK_GUARD(&s->rq_lock) {
|
||||||
|
+ while (s->rq) {
|
||||||
|
+ req = s->rq;
|
||||||
|
+ s->rq = req->next;
|
||||||
|
|
||||||
|
- aio_context_release(ctx);
|
||||||
|
+ /* No other threads can access req->vq here */
|
||||||
|
+ virtqueue_detach_element(req->vq, &req->elem, 0);
|
||||||
|
+
|
||||||
|
+ virtio_blk_free_request(req);
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
|
||||||
|
- assert(!s->dataplane_started);
|
||||||
|
blk_set_enable_write_cache(s->blk, s->original_wce);
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -1443,18 +1456,22 @@ static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status)
|
||||||
|
static void virtio_blk_save_device(VirtIODevice *vdev, QEMUFile *f)
|
||||||
|
{
|
||||||
|
VirtIOBlock *s = VIRTIO_BLK(vdev);
|
||||||
|
- VirtIOBlockReq *req = s->rq;
|
||||||
|
|
||||||
|
- while (req) {
|
||||||
|
- qemu_put_sbyte(f, 1);
|
||||||
|
+ WITH_QEMU_LOCK_GUARD(&s->rq_lock) {
|
||||||
|
+ VirtIOBlockReq *req = s->rq;
|
||||||
|
|
||||||
|
- if (s->conf.num_queues > 1) {
|
||||||
|
- qemu_put_be32(f, virtio_get_queue_index(req->vq));
|
||||||
|
- }
|
||||||
|
+ while (req) {
|
||||||
|
+ qemu_put_sbyte(f, 1);
|
||||||
|
|
||||||
|
- qemu_put_virtqueue_element(vdev, f, &req->elem);
|
||||||
|
- req = req->next;
|
||||||
|
+ if (s->conf.num_queues > 1) {
|
||||||
|
+ qemu_put_be32(f, virtio_get_queue_index(req->vq));
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ qemu_put_virtqueue_element(vdev, f, &req->elem);
|
||||||
|
+ req = req->next;
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
+
|
||||||
|
qemu_put_sbyte(f, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -1480,8 +1497,11 @@ static int virtio_blk_load_device(VirtIODevice *vdev, QEMUFile *f,
|
||||||
|
|
||||||
|
req = qemu_get_virtqueue_element(vdev, f, sizeof(VirtIOBlockReq));
|
||||||
|
virtio_blk_init_request(s, virtio_get_queue(vdev, vq_idx), req);
|
||||||
|
- req->next = s->rq;
|
||||||
|
- s->rq = req;
|
||||||
|
+
|
||||||
|
+ WITH_QEMU_LOCK_GUARD(&s->rq_lock) {
|
||||||
|
+ req->next = s->rq;
|
||||||
|
+ s->rq = req;
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
@@ -1628,6 +1648,8 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
|
||||||
|
s->host_features);
|
||||||
|
virtio_init(vdev, VIRTIO_ID_BLOCK, s->config_size);
|
||||||
|
|
||||||
|
+ qemu_mutex_init(&s->rq_lock);
|
||||||
|
+
|
||||||
|
s->blk = conf->conf.blk;
|
||||||
|
s->rq = NULL;
|
||||||
|
s->sector_mask = (s->conf.conf.logical_block_size / BDRV_SECTOR_SIZE) - 1;
|
||||||
|
@@ -1679,6 +1701,7 @@ static void virtio_blk_device_unrealize(DeviceState *dev)
|
||||||
|
virtio_del_queue(vdev, i);
|
||||||
|
}
|
||||||
|
qemu_coroutine_dec_pool_size(conf->num_queues * conf->queue_size / 2);
|
||||||
|
+ qemu_mutex_destroy(&s->rq_lock);
|
||||||
|
blk_ram_registrar_destroy(&s->blk_ram_registrar);
|
||||||
|
qemu_del_vm_change_state_handler(s->change);
|
||||||
|
blockdev_mark_auto_del(s->blk);
|
||||||
|
diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
|
||||||
|
index dafec432ce..9881009c22 100644
|
||||||
|
--- a/include/hw/virtio/virtio-blk.h
|
||||||
|
+++ b/include/hw/virtio/virtio-blk.h
|
||||||
|
@@ -54,7 +54,8 @@ struct VirtIOBlockReq;
|
||||||
|
struct VirtIOBlock {
|
||||||
|
VirtIODevice parent_obj;
|
||||||
|
BlockBackend *blk;
|
||||||
|
- void *rq;
|
||||||
|
+ QemuMutex rq_lock;
|
||||||
|
+ void *rq; /* protected by rq_lock */
|
||||||
|
VirtIOBlkConf conf;
|
||||||
|
unsigned short sector_mask;
|
||||||
|
bool original_wce;
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
167
kvm-virtio-blk-don-t-lock-AioContext-in-the-completion-c.patch
Normal file
167
kvm-virtio-blk-don-t-lock-AioContext-in-the-completion-c.patch
Normal file
@ -0,0 +1,167 @@
|
|||||||
|
From a2069ff76637365cacf5b96f9427b98a6ca2c9ba Mon Sep 17 00:00:00 2001
|
||||||
|
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
Date: Thu, 14 Sep 2023 10:01:00 -0400
|
||||||
|
Subject: [PATCH 075/101] virtio-blk: don't lock AioContext in the completion
|
||||||
|
code path
|
||||||
|
|
||||||
|
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
RH-MergeRequest: 214: Remove AioContext lock
|
||||||
|
RH-Jira: RHEL-15965
|
||||||
|
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||||
|
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
RH-Commit: [6/26] 3426f62c2156f6967bb4ffbce75a4ff46d3312a3 (kmwolf/centos-qemu-kvm)
|
||||||
|
|
||||||
|
Nothing in the completion code path relies on the AioContext lock
|
||||||
|
anymore. Virtqueues are only accessed from one thread at any moment and
|
||||||
|
the s->rq global state is protected by its own lock now.
|
||||||
|
|
||||||
|
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
Message-ID: <20230914140101.1065008-4-stefanha@redhat.com>
|
||||||
|
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||||
|
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
||||||
|
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
---
|
||||||
|
hw/block/virtio-blk.c | 34 ++++------------------------------
|
||||||
|
1 file changed, 4 insertions(+), 30 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
|
||||||
|
index ee38e089bc..f5315df042 100644
|
||||||
|
--- a/hw/block/virtio-blk.c
|
||||||
|
+++ b/hw/block/virtio-blk.c
|
||||||
|
@@ -105,7 +105,6 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
|
||||||
|
VirtIOBlock *s = next->dev;
|
||||||
|
VirtIODevice *vdev = VIRTIO_DEVICE(s);
|
||||||
|
|
||||||
|
- aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
|
||||||
|
while (next) {
|
||||||
|
VirtIOBlockReq *req = next;
|
||||||
|
next = req->mr_next;
|
||||||
|
@@ -138,7 +137,6 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
|
||||||
|
block_acct_done(blk_get_stats(s->blk), &req->acct);
|
||||||
|
virtio_blk_free_request(req);
|
||||||
|
}
|
||||||
|
- aio_context_release(blk_get_aio_context(s->conf.conf.blk));
|
||||||
|
}
|
||||||
|
|
||||||
|
static void virtio_blk_flush_complete(void *opaque, int ret)
|
||||||
|
@@ -146,19 +144,13 @@ static void virtio_blk_flush_complete(void *opaque, int ret)
|
||||||
|
VirtIOBlockReq *req = opaque;
|
||||||
|
VirtIOBlock *s = req->dev;
|
||||||
|
|
||||||
|
- aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
|
||||||
|
- if (ret) {
|
||||||
|
- if (virtio_blk_handle_rw_error(req, -ret, 0, true)) {
|
||||||
|
- goto out;
|
||||||
|
- }
|
||||||
|
+ if (ret && virtio_blk_handle_rw_error(req, -ret, 0, true)) {
|
||||||
|
+ return;
|
||||||
|
}
|
||||||
|
|
||||||
|
virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
|
||||||
|
block_acct_done(blk_get_stats(s->blk), &req->acct);
|
||||||
|
virtio_blk_free_request(req);
|
||||||
|
-
|
||||||
|
-out:
|
||||||
|
- aio_context_release(blk_get_aio_context(s->conf.conf.blk));
|
||||||
|
}
|
||||||
|
|
||||||
|
static void virtio_blk_discard_write_zeroes_complete(void *opaque, int ret)
|
||||||
|
@@ -168,11 +160,8 @@ static void virtio_blk_discard_write_zeroes_complete(void *opaque, int ret)
|
||||||
|
bool is_write_zeroes = (virtio_ldl_p(VIRTIO_DEVICE(s), &req->out.type) &
|
||||||
|
~VIRTIO_BLK_T_BARRIER) == VIRTIO_BLK_T_WRITE_ZEROES;
|
||||||
|
|
||||||
|
- aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
|
||||||
|
- if (ret) {
|
||||||
|
- if (virtio_blk_handle_rw_error(req, -ret, false, is_write_zeroes)) {
|
||||||
|
- goto out;
|
||||||
|
- }
|
||||||
|
+ if (ret && virtio_blk_handle_rw_error(req, -ret, false, is_write_zeroes)) {
|
||||||
|
+ return;
|
||||||
|
}
|
||||||
|
|
||||||
|
virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
|
||||||
|
@@ -180,9 +169,6 @@ static void virtio_blk_discard_write_zeroes_complete(void *opaque, int ret)
|
||||||
|
block_acct_done(blk_get_stats(s->blk), &req->acct);
|
||||||
|
}
|
||||||
|
virtio_blk_free_request(req);
|
||||||
|
-
|
||||||
|
-out:
|
||||||
|
- aio_context_release(blk_get_aio_context(s->conf.conf.blk));
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef __linux__
|
||||||
|
@@ -229,10 +215,8 @@ static void virtio_blk_ioctl_complete(void *opaque, int status)
|
||||||
|
virtio_stl_p(vdev, &scsi->data_len, hdr->dxfer_len);
|
||||||
|
|
||||||
|
out:
|
||||||
|
- aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
|
||||||
|
virtio_blk_req_complete(req, status);
|
||||||
|
virtio_blk_free_request(req);
|
||||||
|
- aio_context_release(blk_get_aio_context(s->conf.conf.blk));
|
||||||
|
g_free(ioctl_req);
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -672,7 +656,6 @@ static void virtio_blk_zone_report_complete(void *opaque, int ret)
|
||||||
|
{
|
||||||
|
ZoneCmdData *data = opaque;
|
||||||
|
VirtIOBlockReq *req = data->req;
|
||||||
|
- VirtIOBlock *s = req->dev;
|
||||||
|
VirtIODevice *vdev = VIRTIO_DEVICE(req->dev);
|
||||||
|
struct iovec *in_iov = data->in_iov;
|
||||||
|
unsigned in_num = data->in_num;
|
||||||
|
@@ -763,10 +746,8 @@ static void virtio_blk_zone_report_complete(void *opaque, int ret)
|
||||||
|
}
|
||||||
|
|
||||||
|
out:
|
||||||
|
- aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
|
||||||
|
virtio_blk_req_complete(req, err_status);
|
||||||
|
virtio_blk_free_request(req);
|
||||||
|
- aio_context_release(blk_get_aio_context(s->conf.conf.blk));
|
||||||
|
g_free(data->zone_report_data.zones);
|
||||||
|
g_free(data);
|
||||||
|
}
|
||||||
|
@@ -829,10 +810,8 @@ static void virtio_blk_zone_mgmt_complete(void *opaque, int ret)
|
||||||
|
err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD;
|
||||||
|
}
|
||||||
|
|
||||||
|
- aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
|
||||||
|
virtio_blk_req_complete(req, err_status);
|
||||||
|
virtio_blk_free_request(req);
|
||||||
|
- aio_context_release(blk_get_aio_context(s->conf.conf.blk));
|
||||||
|
}
|
||||||
|
|
||||||
|
static int virtio_blk_handle_zone_mgmt(VirtIOBlockReq *req, BlockZoneOp op)
|
||||||
|
@@ -882,7 +861,6 @@ static void virtio_blk_zone_append_complete(void *opaque, int ret)
|
||||||
|
{
|
||||||
|
ZoneCmdData *data = opaque;
|
||||||
|
VirtIOBlockReq *req = data->req;
|
||||||
|
- VirtIOBlock *s = req->dev;
|
||||||
|
VirtIODevice *vdev = VIRTIO_DEVICE(req->dev);
|
||||||
|
int64_t append_sector, n;
|
||||||
|
uint8_t err_status = VIRTIO_BLK_S_OK;
|
||||||
|
@@ -905,10 +883,8 @@ static void virtio_blk_zone_append_complete(void *opaque, int ret)
|
||||||
|
trace_virtio_blk_zone_append_complete(vdev, req, append_sector, ret);
|
||||||
|
|
||||||
|
out:
|
||||||
|
- aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
|
||||||
|
virtio_blk_req_complete(req, err_status);
|
||||||
|
virtio_blk_free_request(req);
|
||||||
|
- aio_context_release(blk_get_aio_context(s->conf.conf.blk));
|
||||||
|
g_free(data);
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -944,10 +920,8 @@ static int virtio_blk_handle_zone_append(VirtIOBlockReq *req,
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
out:
|
||||||
|
- aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
|
||||||
|
virtio_blk_req_complete(req, err_status);
|
||||||
|
virtio_blk_free_request(req);
|
||||||
|
- aio_context_release(blk_get_aio_context(s->conf.conf.blk));
|
||||||
|
return err_status;
|
||||||
|
}
|
||||||
|
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
@ -0,0 +1,67 @@
|
|||||||
|
From 2816f6ce20c496e21947f215112be34a5cb93606 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
Date: Thu, 14 Sep 2023 10:01:01 -0400
|
||||||
|
Subject: [PATCH 076/101] virtio-blk: don't lock AioContext in the submission
|
||||||
|
code path
|
||||||
|
|
||||||
|
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
RH-MergeRequest: 214: Remove AioContext lock
|
||||||
|
RH-Jira: RHEL-15965
|
||||||
|
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||||
|
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
RH-Commit: [7/26] e0de2744cb319569ea008334e45ee5fc2ba9b6d7 (kmwolf/centos-qemu-kvm)
|
||||||
|
|
||||||
|
There is no need to acquire the AioContext lock around blk_aio_*() or
|
||||||
|
blk_get_geometry() anymore. I/O plugging (defer_call()) also does not
|
||||||
|
require the AioContext lock anymore.
|
||||||
|
|
||||||
|
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
Message-ID: <20230914140101.1065008-5-stefanha@redhat.com>
|
||||||
|
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||||
|
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
||||||
|
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
---
|
||||||
|
hw/block/virtio-blk.c | 5 -----
|
||||||
|
1 file changed, 5 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
|
||||||
|
index f5315df042..e110f9718b 100644
|
||||||
|
--- a/hw/block/virtio-blk.c
|
||||||
|
+++ b/hw/block/virtio-blk.c
|
||||||
|
@@ -1111,7 +1111,6 @@ void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq)
|
||||||
|
MultiReqBuffer mrb = {};
|
||||||
|
bool suppress_notifications = virtio_queue_get_notification(vq);
|
||||||
|
|
||||||
|
- aio_context_acquire(blk_get_aio_context(s->blk));
|
||||||
|
defer_call_begin();
|
||||||
|
|
||||||
|
do {
|
||||||
|
@@ -1137,7 +1136,6 @@ void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq)
|
||||||
|
}
|
||||||
|
|
||||||
|
defer_call_end();
|
||||||
|
- aio_context_release(blk_get_aio_context(s->blk));
|
||||||
|
}
|
||||||
|
|
||||||
|
static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
|
||||||
|
@@ -1168,7 +1166,6 @@ static void virtio_blk_dma_restart_bh(void *opaque)
|
||||||
|
s->rq = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
- aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
|
||||||
|
while (req) {
|
||||||
|
VirtIOBlockReq *next = req->next;
|
||||||
|
if (virtio_blk_handle_request(req, &mrb)) {
|
||||||
|
@@ -1192,8 +1189,6 @@ static void virtio_blk_dma_restart_bh(void *opaque)
|
||||||
|
|
||||||
|
/* Paired with inc in virtio_blk_dma_restart_cb() */
|
||||||
|
blk_dec_in_flight(s->conf.conf.blk);
|
||||||
|
-
|
||||||
|
- aio_context_release(blk_get_aio_context(s->conf.conf.blk));
|
||||||
|
}
|
||||||
|
|
||||||
|
static void virtio_blk_dma_restart_cb(void *opaque, bool running,
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
87
kvm-virtio-gpu-block-migration-of-VMs-with-blob-true.patch
Normal file
87
kvm-virtio-gpu-block-migration-of-VMs-with-blob-true.patch
Normal file
@ -0,0 +1,87 @@
|
|||||||
|
From 5db0b4131c56d96760b3300298f4bedab99d35cb Mon Sep 17 00:00:00 2001
|
||||||
|
From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= <marcandre.lureau@redhat.com>
|
||||||
|
Date: Wed, 6 Sep 2023 17:00:22 +0400
|
||||||
|
Subject: [PATCH 100/101] virtio-gpu: block migration of VMs with blob=true
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
RH-Author: Marc-André Lureau <marcandre.lureau@redhat.com>
|
||||||
|
RH-MergeRequest: 217: virtio-gpu: block migration of VMs with blob=true
|
||||||
|
RH-Jira: RHEL-7565
|
||||||
|
RH-Commit: [1/1] f978ca697d574b1419eb027a1007c060dfb83298
|
||||||
|
|
||||||
|
JIRA: https://issues.redhat.com/browse/RHEL-7565
|
||||||
|
|
||||||
|
commit 9c549ab6895a43ad0cb33e684e11cdb0b5400897
|
||||||
|
Author: Marc-André Lureau <marcandre.lureau@redhat.com>
|
||||||
|
Date: Wed Sep 6 17:00:22 2023 +0400
|
||||||
|
|
||||||
|
virtio-gpu: block migration of VMs with blob=true
|
||||||
|
|
||||||
|
"blob" resources don't have an associated pixman image:
|
||||||
|
|
||||||
|
#0 pixman_image_get_stride (image=0x0) at ../pixman/pixman-image.c:921
|
||||||
|
#1 0x0000562327c25236 in virtio_gpu_save (f=0x56232bb13b00, opaque=0x56232b555a60, size=0, field=0x5623289ab6c8 <__compound_literal.3+104>, vmdesc=0x56232ab59fe0) at ../hw/display/virtio-gpu.c:1225
|
||||||
|
|
||||||
|
Related to:
|
||||||
|
https://bugzilla.redhat.com/show_bug.cgi?id=2236353
|
||||||
|
|
||||||
|
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
|
||||||
|
Acked-by: Peter Xu <peterx@redhat.com>
|
||||||
|
|
||||||
|
[ rhel backport - fix Error* vs Error** argument ]
|
||||||
|
Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
|
||||||
|
---
|
||||||
|
hw/display/virtio-gpu.c | 14 ++++++++++++++
|
||||||
|
1 file changed, 14 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
|
||||||
|
index b016d3bac8..1702190ead 100644
|
||||||
|
--- a/hw/display/virtio-gpu.c
|
||||||
|
+++ b/hw/display/virtio-gpu.c
|
||||||
|
@@ -27,6 +27,7 @@
|
||||||
|
#include "hw/virtio/virtio-gpu-pixman.h"
|
||||||
|
#include "hw/virtio/virtio-bus.h"
|
||||||
|
#include "hw/qdev-properties.h"
|
||||||
|
+#include "migration/blocker.h"
|
||||||
|
#include "qemu/log.h"
|
||||||
|
#include "qemu/module.h"
|
||||||
|
#include "qapi/error.h"
|
||||||
|
@@ -41,6 +42,8 @@ virtio_gpu_find_check_resource(VirtIOGPU *g, uint32_t resource_id,
|
||||||
|
|
||||||
|
static void virtio_gpu_reset_bh(void *opaque);
|
||||||
|
|
||||||
|
+static Error *blob_mig_blocker;
|
||||||
|
+
|
||||||
|
void virtio_gpu_update_cursor_data(VirtIOGPU *g,
|
||||||
|
struct virtio_gpu_scanout *s,
|
||||||
|
uint32_t resource_id)
|
||||||
|
@@ -1452,6 +1455,14 @@ void virtio_gpu_device_realize(DeviceState *qdev, Error **errp)
|
||||||
|
error_setg(errp, "blobs and virgl are not compatible (yet)");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
+
|
||||||
|
+ if (!blob_mig_blocker) {
|
||||||
|
+ error_setg(&blob_mig_blocker,
|
||||||
|
+ "virtio-gpu blob VMs are currently not migratable.");
|
||||||
|
+ }
|
||||||
|
+ if (migrate_add_blocker(&blob_mig_blocker, errp)) {
|
||||||
|
+ return;
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!virtio_gpu_base_device_realize(qdev,
|
||||||
|
@@ -1478,6 +1489,9 @@ static void virtio_gpu_device_unrealize(DeviceState *qdev)
|
||||||
|
{
|
||||||
|
VirtIOGPU *g = VIRTIO_GPU(qdev);
|
||||||
|
|
||||||
|
+ if (virtio_gpu_blob_enabled(g->parent_obj.conf)) {
|
||||||
|
+ migrate_del_blocker(&blob_mig_blocker);
|
||||||
|
+ }
|
||||||
|
g_clear_pointer(&g->ctrl_bh, qemu_bh_delete);
|
||||||
|
g_clear_pointer(&g->cursor_bh, qemu_bh_delete);
|
||||||
|
g_clear_pointer(&g->reset_bh, qemu_bh_delete);
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
@ -0,0 +1,58 @@
|
|||||||
|
From 1ee3f919a51135a0798a14c734ca80d74d30025d Mon Sep 17 00:00:00 2001
|
||||||
|
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
Date: Mon, 4 Dec 2023 11:42:57 -0500
|
||||||
|
Subject: [PATCH 078/101] virtio-scsi: don't lock AioContext around
|
||||||
|
virtio_queue_aio_attach_host_notifier()
|
||||||
|
|
||||||
|
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
RH-MergeRequest: 214: Remove AioContext lock
|
||||||
|
RH-Jira: RHEL-15965
|
||||||
|
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||||
|
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
RH-Commit: [9/26] 5e1179e617d05bf765b285ba42393ec1ddbeba28 (kmwolf/centos-qemu-kvm)
|
||||||
|
|
||||||
|
virtio_queue_aio_attach_host_notifier() does not require the AioContext
|
||||||
|
lock. Stop taking the lock and add an explicit smp_wmb() because we were
|
||||||
|
relying on the implicit barrier in the AioContext lock before.
|
||||||
|
|
||||||
|
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||||
|
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
Message-ID: <20231204164259.1515217-3-stefanha@redhat.com>
|
||||||
|
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
---
|
||||||
|
hw/scsi/virtio-scsi-dataplane.c | 8 +-------
|
||||||
|
1 file changed, 1 insertion(+), 7 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c
|
||||||
|
index 1e684beebe..135e23fe54 100644
|
||||||
|
--- a/hw/scsi/virtio-scsi-dataplane.c
|
||||||
|
+++ b/hw/scsi/virtio-scsi-dataplane.c
|
||||||
|
@@ -149,23 +149,17 @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev)
|
||||||
|
|
||||||
|
memory_region_transaction_commit();
|
||||||
|
|
||||||
|
- /*
|
||||||
|
- * These fields are visible to the IOThread so we rely on implicit barriers
|
||||||
|
- * in aio_context_acquire() on the write side and aio_notify_accept() on
|
||||||
|
- * the read side.
|
||||||
|
- */
|
||||||
|
s->dataplane_starting = false;
|
||||||
|
s->dataplane_started = true;
|
||||||
|
+ smp_wmb(); /* paired with aio_notify_accept() */
|
||||||
|
|
||||||
|
if (s->bus.drain_count == 0) {
|
||||||
|
- aio_context_acquire(s->ctx);
|
||||||
|
virtio_queue_aio_attach_host_notifier(vs->ctrl_vq, s->ctx);
|
||||||
|
virtio_queue_aio_attach_host_notifier_no_poll(vs->event_vq, s->ctx);
|
||||||
|
|
||||||
|
for (i = 0; i < vs->conf.num_queues; i++) {
|
||||||
|
virtio_queue_aio_attach_host_notifier(vs->cmd_vqs[i], s->ctx);
|
||||||
|
}
|
||||||
|
- aio_context_release(s->ctx);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
173
kvm-virtio-scsi-replace-AioContext-lock-with-tmf_bh_lock.patch
Normal file
173
kvm-virtio-scsi-replace-AioContext-lock-with-tmf_bh_lock.patch
Normal file
@ -0,0 +1,173 @@
|
|||||||
|
From c2d7633ead6e19d4b6af5552ca907ae071b8734b Mon Sep 17 00:00:00 2001
|
||||||
|
From: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
Date: Tue, 5 Dec 2023 13:19:58 -0500
|
||||||
|
Subject: [PATCH 081/101] virtio-scsi: replace AioContext lock with tmf_bh_lock
|
||||||
|
|
||||||
|
RH-Author: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
RH-MergeRequest: 214: Remove AioContext lock
|
||||||
|
RH-Jira: RHEL-15965
|
||||||
|
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||||
|
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
RH-Commit: [12/26] 8fb375bfd72a491d47321c78078577071a4e90fb (kmwolf/centos-qemu-kvm)
|
||||||
|
|
||||||
|
Protect the Task Management Function BH state with a lock. The TMF BH
|
||||||
|
runs in the main loop thread. An IOThread might process a TMF at the
|
||||||
|
same time as the TMF BH is running. Therefore tmf_bh_list and tmf_bh
|
||||||
|
must be protected by a lock.
|
||||||
|
|
||||||
|
Run TMF request completion in the IOThread using aio_wait_bh_oneshot().
|
||||||
|
This avoids more locking to protect the virtqueue and SCSI layer state.
|
||||||
|
|
||||||
|
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||||||
|
Reviewed-by: Eric Blake <eblake@redhat.com>
|
||||||
|
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
Message-ID: <20231205182011.1976568-2-stefanha@redhat.com>
|
||||||
|
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
|
||||||
|
---
|
||||||
|
hw/scsi/virtio-scsi.c | 62 ++++++++++++++++++++++-----------
|
||||||
|
include/hw/virtio/virtio-scsi.h | 3 +-
|
||||||
|
2 files changed, 43 insertions(+), 22 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
|
||||||
|
index 9c751bf296..4f8d35facc 100644
|
||||||
|
--- a/hw/scsi/virtio-scsi.c
|
||||||
|
+++ b/hw/scsi/virtio-scsi.c
|
||||||
|
@@ -123,6 +123,30 @@ static void virtio_scsi_complete_req(VirtIOSCSIReq *req)
|
||||||
|
virtio_scsi_free_req(req);
|
||||||
|
}
|
||||||
|
|
||||||
|
+static void virtio_scsi_complete_req_bh(void *opaque)
|
||||||
|
+{
|
||||||
|
+ VirtIOSCSIReq *req = opaque;
|
||||||
|
+
|
||||||
|
+ virtio_scsi_complete_req(req);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/*
|
||||||
|
+ * Called from virtio_scsi_do_one_tmf_bh() in main loop thread. The main loop
|
||||||
|
+ * thread cannot touch the virtqueue since that could race with an IOThread.
|
||||||
|
+ */
|
||||||
|
+static void virtio_scsi_complete_req_from_main_loop(VirtIOSCSIReq *req)
|
||||||
|
+{
|
||||||
|
+ VirtIOSCSI *s = req->dev;
|
||||||
|
+
|
||||||
|
+ if (!s->ctx || s->ctx == qemu_get_aio_context()) {
|
||||||
|
+ /* No need to schedule a BH when there is no IOThread */
|
||||||
|
+ virtio_scsi_complete_req(req);
|
||||||
|
+ } else {
|
||||||
|
+ /* Run request completion in the IOThread */
|
||||||
|
+ aio_wait_bh_oneshot(s->ctx, virtio_scsi_complete_req_bh, req);
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static void virtio_scsi_bad_req(VirtIOSCSIReq *req)
|
||||||
|
{
|
||||||
|
virtio_error(VIRTIO_DEVICE(req->dev), "wrong size for virtio-scsi headers");
|
||||||
|
@@ -338,10 +362,7 @@ static void virtio_scsi_do_one_tmf_bh(VirtIOSCSIReq *req)
|
||||||
|
|
||||||
|
out:
|
||||||
|
object_unref(OBJECT(d));
|
||||||
|
-
|
||||||
|
- virtio_scsi_acquire(s);
|
||||||
|
- virtio_scsi_complete_req(req);
|
||||||
|
- virtio_scsi_release(s);
|
||||||
|
+ virtio_scsi_complete_req_from_main_loop(req);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Some TMFs must be processed from the main loop thread */
|
||||||
|
@@ -354,18 +375,16 @@ static void virtio_scsi_do_tmf_bh(void *opaque)
|
||||||
|
|
||||||
|
GLOBAL_STATE_CODE();
|
||||||
|
|
||||||
|
- virtio_scsi_acquire(s);
|
||||||
|
+ WITH_QEMU_LOCK_GUARD(&s->tmf_bh_lock) {
|
||||||
|
+ QTAILQ_FOREACH_SAFE(req, &s->tmf_bh_list, next, tmp) {
|
||||||
|
+ QTAILQ_REMOVE(&s->tmf_bh_list, req, next);
|
||||||
|
+ QTAILQ_INSERT_TAIL(&reqs, req, next);
|
||||||
|
+ }
|
||||||
|
|
||||||
|
- QTAILQ_FOREACH_SAFE(req, &s->tmf_bh_list, next, tmp) {
|
||||||
|
- QTAILQ_REMOVE(&s->tmf_bh_list, req, next);
|
||||||
|
- QTAILQ_INSERT_TAIL(&reqs, req, next);
|
||||||
|
+ qemu_bh_delete(s->tmf_bh);
|
||||||
|
+ s->tmf_bh = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
- qemu_bh_delete(s->tmf_bh);
|
||||||
|
- s->tmf_bh = NULL;
|
||||||
|
-
|
||||||
|
- virtio_scsi_release(s);
|
||||||
|
-
|
||||||
|
QTAILQ_FOREACH_SAFE(req, &reqs, next, tmp) {
|
||||||
|
QTAILQ_REMOVE(&reqs, req, next);
|
||||||
|
virtio_scsi_do_one_tmf_bh(req);
|
||||||
|
@@ -379,8 +398,7 @@ static void virtio_scsi_reset_tmf_bh(VirtIOSCSI *s)
|
||||||
|
|
||||||
|
GLOBAL_STATE_CODE();
|
||||||
|
|
||||||
|
- virtio_scsi_acquire(s);
|
||||||
|
-
|
||||||
|
+ /* Called after ioeventfd has been stopped, so tmf_bh_lock is not needed */
|
||||||
|
if (s->tmf_bh) {
|
||||||
|
qemu_bh_delete(s->tmf_bh);
|
||||||
|
s->tmf_bh = NULL;
|
||||||
|
@@ -393,19 +411,19 @@ static void virtio_scsi_reset_tmf_bh(VirtIOSCSI *s)
|
||||||
|
req->resp.tmf.response = VIRTIO_SCSI_S_TARGET_FAILURE;
|
||||||
|
virtio_scsi_complete_req(req);
|
||||||
|
}
|
||||||
|
-
|
||||||
|
- virtio_scsi_release(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void virtio_scsi_defer_tmf_to_bh(VirtIOSCSIReq *req)
|
||||||
|
{
|
||||||
|
VirtIOSCSI *s = req->dev;
|
||||||
|
|
||||||
|
- QTAILQ_INSERT_TAIL(&s->tmf_bh_list, req, next);
|
||||||
|
+ WITH_QEMU_LOCK_GUARD(&s->tmf_bh_lock) {
|
||||||
|
+ QTAILQ_INSERT_TAIL(&s->tmf_bh_list, req, next);
|
||||||
|
|
||||||
|
- if (!s->tmf_bh) {
|
||||||
|
- s->tmf_bh = qemu_bh_new(virtio_scsi_do_tmf_bh, s);
|
||||||
|
- qemu_bh_schedule(s->tmf_bh);
|
||||||
|
+ if (!s->tmf_bh) {
|
||||||
|
+ s->tmf_bh = qemu_bh_new(virtio_scsi_do_tmf_bh, s);
|
||||||
|
+ qemu_bh_schedule(s->tmf_bh);
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -1235,6 +1253,7 @@ static void virtio_scsi_device_realize(DeviceState *dev, Error **errp)
|
||||||
|
Error *err = NULL;
|
||||||
|
|
||||||
|
QTAILQ_INIT(&s->tmf_bh_list);
|
||||||
|
+ qemu_mutex_init(&s->tmf_bh_lock);
|
||||||
|
|
||||||
|
virtio_scsi_common_realize(dev,
|
||||||
|
virtio_scsi_handle_ctrl,
|
||||||
|
@@ -1277,6 +1296,7 @@ static void virtio_scsi_device_unrealize(DeviceState *dev)
|
||||||
|
|
||||||
|
qbus_set_hotplug_handler(BUS(&s->bus), NULL);
|
||||||
|
virtio_scsi_common_unrealize(dev);
|
||||||
|
+ qemu_mutex_destroy(&s->tmf_bh_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
static Property virtio_scsi_properties[] = {
|
||||||
|
diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h
|
||||||
|
index 779568ab5d..da8cb928d9 100644
|
||||||
|
--- a/include/hw/virtio/virtio-scsi.h
|
||||||
|
+++ b/include/hw/virtio/virtio-scsi.h
|
||||||
|
@@ -85,8 +85,9 @@ struct VirtIOSCSI {
|
||||||
|
|
||||||
|
/*
|
||||||
|
* TMFs deferred to main loop BH. These fields are protected by
|
||||||
|
- * virtio_scsi_acquire().
|
||||||
|
+ * tmf_bh_lock.
|
||||||
|
*/
|
||||||
|
+ QemuMutex tmf_bh_lock;
|
||||||
|
QEMUBH *tmf_bh;
|
||||||
|
QTAILQ_HEAD(, VirtIOSCSIReq) tmf_bh_list;
|
||||||
|
|
||||||
|
--
|
||||||
|
2.39.3
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user