- kvm-target-i386-Make-invtsc-migratable-when-user-sets-ts.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-target-i386-Enable-fdp-excptn-only-and-zero-fcs-fds.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-kvm-i386-make-kvm_filter_msr-and-related-definitions.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-kvm-remove-unnecessary-ifdef.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-crypto-Define-macros-for-hash-algorithm-digest-lengt.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-cpu-Drop-the-check-of-phys_bits-in-host_cpu_rea.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-cpu-Extract-a-common-fucntion-to-setup-value-of.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-cpu-Drop-the-variable-smp_cores-and-smp_threads.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-cpu-Drop-cores_per_pkg-in-cpu_x86_cpuid.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-topology-Update-the-comment-of-x86_apicid_from_.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-topology-Introduce-helpers-for-various-topology.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-cpu-Track-a-X86CPUTopoInfo-directly-in-CPUX86St.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-cpu-Hoist-check-of-CPUID_EXT3_TOPOEXT-against-t.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-cpu-Remove-nr_cores-from-struct-CPUState.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-cpu-Set-up-CPUID_HT-in-x86_cpu_expand_features-.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-cpu-Set-and-track-CPUID_EXT3_CMP_LEG-in-env-fea.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-Remove-unused-parameter-uint32_t-bit-in-feature.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-target-i386-Print-CPUID-subleaf-info-for-unsupported.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-target-i386-sev-Reduce-system-specific-declarations.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-physmem-replace-assertion-with-error.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-redhat-target-i386-add-CPUID-and-MSR-bits-from-Clear.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-qom-reverse-order-of-instance_post_init-calls.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-target-i386-Remove-AccelCPUClass-cpu_class_init-need.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-cpu-Consolidate-the-helper-to-get-Host-s-vendor.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-rocker-do-not-pollute-the-namespace.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-linux-headers-Update-to-Linux-v6.14-rc3.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-linux-headers-Update-to-Linux-v6.15-rc3.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-linux-headers-update-from-6.15-kvm-next.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-update-Linux-headers-to-v6.16-rc3.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-update-Linux-headers-to-KVM-tree-master.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-Introduce-tdx-guest-object.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdx-Implement-tdx_kvm_type-for-TDX.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdx-Implement-tdx_kvm_init-to-initialize-TDX-VM.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdx-Get-tdx_capabilities-via-KVM_TDX_CAPABILITI.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdx-Introduce-is_tdx_vm-helper-and-cache-tdx_gu.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-kvm-Introduce-kvm_arch_pre_create_vcpu.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdx-Initialize-TDX-before-creating-TD-vcpus.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdx-Add-property-sept-ve-disable-for-tdx-guest-.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdx-Make-sept_ve_disable-set-by-default.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdx-Wire-CPU-features-up-with-attributes-of-TD-.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdx-Validate-TD-attributes.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdx-Support-user-configurable-mrconfigid-mrowne.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdx-Set-APIC-bus-rate-to-match-with-what-TDX-mo.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdx-Implement-user-specified-tsc-frequency.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdx-load-TDVF-for-TD-guest.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdvf-Introduce-function-to-parse-TDVF-metadata.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdx-Parse-TDVF-metadata-for-TDX-VM.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdx-Don-t-initialize-pc.rom-for-TDX-VMs.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdx-Track-mem_ptr-for-each-firmware-entry-of-TD.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdx-Track-RAM-entries-for-TDX-VM.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-headers-Add-definitions-from-UEFI-spec-for-volumes-r.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdx-Setup-the-TD-HOB-list.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdx-Add-TDVF-memory-via-KVM_TDX_INIT_MEM_REGION.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdx-Call-KVM_TDX_INIT_VCPU-to-initialize-TDX-vc.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdx-Finalize-TDX-VM.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdx-Enable-user-exit-on-KVM_HC_MAP_GPA_RANGE.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdx-Handle-KVM_SYSTEM_EVENT_TDX_FATAL.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdx-Wire-TDX_REPORT_FATAL_ERROR-with-GuestPanic.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-kvm-Check-KVM_CAP_MAX_VCPUS-at-vm-level.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-cpu-introduce-x86_confidential_guest_cpu_instan.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdx-implement-tdx_cpu_instance_init.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-cpu-Introduce-enable_cpuid_0x1f-to-force-exposi.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdx-Force-exposing-CPUID-0x1f.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdx-Set-kvm_readonly_mem_enabled-to-false-for-T.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdx-Disable-SMM-for-TDX-VMs.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdx-Disable-PIC-for-TDX-VMs.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdx-Set-and-check-kernel_irqchip-mode-for-TDX.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdx-Don-t-synchronize-guest-tsc-for-TDs.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdx-Only-configure-MSR_IA32_UCODE_REV-in-kvm_in.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-apic-Skip-kvm_apic_put-for-TDX.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-cpu-Don-t-set-vcpu_dirty-when-guest_state_protected.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-cgs-Rename-mask_cpuid_features-to-adjust_cpuid_.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdx-Implement-adjust_cpuid_features-for-TDX.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdx-Add-TDX-fixed1-bits-to-supported-CPUIDs.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdx-Add-supported-CPUID-bits-related-to-TD-Attr.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdx-Add-supported-CPUID-bits-relates-to-XFAM.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdx-Add-XFD-to-supported-bit-of-TDX.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdx-Define-supported-KVM-features-for-TDX.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-cgs-Introduce-x86_confidential_guest_check_feat.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdx-Fetch-and-validate-CPUID-of-TD-guest.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdx-Don-t-treat-SYSCALL-as-unavailable.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdx-Make-invtsc-default-on.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdx-Validate-phys_bits-against-host-value.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-docs-Add-TDX-documentation.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdx-Fix-build-on-32-bit-host.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdvf-Fix-build-on-32-bit-host.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-cpu-Move-adjustment-of-CPUID_EXT_PDCM-before-fe.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdx-Error-and-exit-when-named-cpu-model-is-requ.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-cpu-Rename-enable_cpuid_0x1f-to-force_cpuid_0x1.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdx-Fix-the-typo-of-the-comment-of-struct-TdxGu.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdx-Clarify-the-error-message-of-mrconfigid-mro.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdx-handle-TDG.VP.VMCALL-GetTdVmCallInfo.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdx-handle-TDG.VP.VMCALL-GetQuote.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-target-i386-move-max_features-to-class.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-target-i386-nvmm-whpx-add-accel-CPU-class-that-sets-.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-target-i386-allow-reordering-max_x86_cpu_initfn-vs-a.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-target-i386-move-accel_cpu_instance_init-to-.instanc.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-target-i386-merge-host_cpu_instance_init-and-host_cp.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdx-Remove-enumeration-of-GetQuote-in-tdx_handl.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdx-Set-value-of-GetTdVmCallInfo-based-on-capab.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdx-handle-TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUP.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdx-Fix-the-report-of-gpa-in-QAPI.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdx-Remove-task-watch-only-when-it-s-valid.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdx-Don-t-mask-off-CPUID_EXT_PDCM.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-cpu-Move-x86_ext_save_areas-initialization-to-..patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-target-i386-tdx-fix-locking-for-interrupt-injection.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-cpu-Cleanup-host_cpu_max_instance_init.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-i386-tdx-Remove-the-redundant-qemu_mutex_init-tdx-lo.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-redhat-enable-CONFIG_TDX.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-redhat-allow-5-level-paging-for-TDX-VMs.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-memory-Export-a-helper-to-get-intersection-of-a-Memo.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-memory-Change-memory_region_set_ram_discard_manager-.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-memory-Unify-the-definiton-of-ReplayRamPopulate-and-.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-ram-block-attributes-Introduce-RamBlockAttributes-to.patch [RHEL-15710 RHEL-20798 RHEL-49728] - kvm-physmem-Support-coordinated-discarding-of-RAM-with-g.patch [RHEL-15710 RHEL-20798 RHEL-49728] - Resolves: RHEL-15710 ([Intel 9.7 FEAT] TDX: QEMU Support) - Resolves: RHEL-20798 ([Intel 9.6 FEAT] TDX: host: Virt-QEMU: Add safe device pass-through for TD) - Resolves: RHEL-49728 ([Intel 9.7 FEAT] Virt-QEMU: TDX: Allow to configure apic bus clock)
614 lines
22 KiB
Diff
614 lines
22 KiB
Diff
From 13a29003f5a5502fc2cd13cb22f3fd6318e80196 Mon Sep 17 00:00:00 2001
|
|
From: Paolo Bonzini <pbonzini@redhat.com>
|
|
Date: Fri, 18 Jul 2025 18:11:28 +0200
|
|
Subject: [PATCH 114/115] ram-block-attributes: Introduce RamBlockAttributes to
|
|
manage RAMBlock with guest_memfd
|
|
|
|
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
|
RH-MergeRequest: 391: TDX support, including attestation and device assignment
|
|
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
|
|
RH-Acked-by: Yash Mankad <None>
|
|
RH-Acked-by: Peter Xu <peterx@redhat.com>
|
|
RH-Acked-by: David Hildenbrand <david@redhat.com>
|
|
RH-Commit: [114/115] eca60c04c6204ee91e4aedb84c993155605e6f5a (bonzini/rhel-qemu-kvm)
|
|
|
|
Commit 852f0048f3 ("RAMBlock: make guest_memfd require uncoordinated
|
|
discard") highlighted that subsystems like VFIO may disable RAM block
|
|
discard. However, guest_memfd relies on discard operations for page
|
|
conversion between private and shared memory, potentially leading to
|
|
the stale IOMMU mapping issue when assigning hardware devices to
|
|
confidential VMs via shared memory. To address this and allow shared
|
|
device assignement, it is crucial to ensure the VFIO system refreshes
|
|
its IOMMU mappings.
|
|
|
|
RamDiscardManager is an existing interface (used by virtio-mem) to
|
|
adjust VFIO mappings in relation to VM page assignment. Effectively page
|
|
conversion is similar to hot-removing a page in one mode and adding it
|
|
back in the other. Therefore, similar actions are required for page
|
|
conversion events. Introduce the RamDiscardManager to guest_memfd to
|
|
facilitate this process.
|
|
|
|
Since guest_memfd is not an object, it cannot directly implement the
|
|
RamDiscardManager interface. Implementing it in HostMemoryBackend is
|
|
not appropriate because guest_memfd is per RAMBlock, and some RAMBlocks
|
|
have a memory backend while others do not. Notably, virtual BIOS
|
|
RAMBlocks using memory_region_init_ram_guest_memfd() do not have a
|
|
backend.
|
|
|
|
To manage RAMBlocks with guest_memfd, define a new object named
|
|
RamBlockAttributes to implement the RamDiscardManager interface. This
|
|
object can store the guest_memfd information such as the bitmap for
|
|
shared memory and the registered listeners for event notifications. A
|
|
new state_change() helper function is provided to notify listeners, such
|
|
as VFIO, allowing VFIO to do dynamically DMA map and unmap for the shared
|
|
memory according to conversion events. Note that in the current context
|
|
of RamDiscardManager for guest_memfd, the shared state is analogous to
|
|
being populated, while the private state can be considered discarded for
|
|
simplicity. In the future, it would be more complicated if considering
|
|
more states like private/shared/discarded at the same time.
|
|
|
|
In current implementation, memory state tracking is performed at the
|
|
host page size granularity, as the minimum conversion size can be one
|
|
page per request. Additionally, VFIO expected the DMA mapping for a
|
|
specific IOVA to be mapped and unmapped with the same granularity.
|
|
Confidential VMs may perform partial conversions, such as conversions on
|
|
small regions within a larger one. To prevent such invalid cases and
|
|
until support for DMA mapping cut operations is available, all
|
|
operations are performed with 4K granularity.
|
|
|
|
In addition, memory conversion failures cause QEMU to quit rather than
|
|
resuming the guest or retrying the operation at present. It would be
|
|
future work to add more error handling or rollback mechanisms once
|
|
conversion failures are allowed. For example, in-place conversion of
|
|
guest_memfd could retry the unmap operation during the conversion from
|
|
shared to private. For now, keep the complex error handling out of the
|
|
picture as it is not required.
|
|
|
|
Tested-by: Alexey Kardashevskiy <aik@amd.com>
|
|
Reviewed-by: Alexey Kardashevskiy <aik@amd.com>
|
|
Reviewed-by: Pankaj Gupta <pankaj.gupta@amd.com>
|
|
Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
|
|
Link: https://lore.kernel.org/r/20250612082747.51539-5-chenyi.qiang@intel.com
|
|
[peterx: squash fixup from Chenyi to fix builds]
|
|
Signed-off-by: Peter Xu <peterx@redhat.com>
|
|
(cherry picked from commit 5d6483edaa9232d8f3709f68c8eab4bc2033fb70)
|
|
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
|
|
|
Conflicts: system/->sysemu/ or exec/, context, class_init argument is not const
|
|
---
|
|
MAINTAINERS | 1 +
|
|
include/exec/ramblock.h | 22 ++
|
|
system/meson.build | 1 +
|
|
system/ram-block-attributes.c | 444 ++++++++++++++++++++++++++++++++++
|
|
system/trace-events | 3 +
|
|
5 files changed, 471 insertions(+)
|
|
create mode 100644 system/ram-block-attributes.c
|
|
|
|
diff --git a/MAINTAINERS b/MAINTAINERS
|
|
index f7b7ceffc4..87ba88da84 100644
|
|
--- a/MAINTAINERS
|
|
+++ b/MAINTAINERS
|
|
@@ -3056,6 +3056,7 @@ F: system/memory.c
|
|
F: system/memory_mapping.c
|
|
F: system/physmem.c
|
|
F: include/exec/memory-internal.h
|
|
+F: system/ram-block-attributes.c
|
|
F: scripts/coccinelle/memory-region-housekeeping.cocci
|
|
|
|
Memory devices
|
|
diff --git a/include/exec/ramblock.h b/include/exec/ramblock.h
|
|
index 0babd105c0..9ae774d268 100644
|
|
--- a/include/exec/ramblock.h
|
|
+++ b/include/exec/ramblock.h
|
|
@@ -23,6 +23,10 @@
|
|
#include "cpu-common.h"
|
|
#include "qemu/rcu.h"
|
|
#include "exec/ramlist.h"
|
|
+#include "sysemu/hostmem.h"
|
|
+
|
|
+#define TYPE_RAM_BLOCK_ATTRIBUTES "ram-block-attributes"
|
|
+OBJECT_DECLARE_SIMPLE_TYPE(RamBlockAttributes, RAM_BLOCK_ATTRIBUTES)
|
|
|
|
struct RAMBlock {
|
|
struct rcu_head rcu;
|
|
@@ -90,5 +94,23 @@ struct RAMBlock {
|
|
*/
|
|
ram_addr_t postcopy_length;
|
|
};
|
|
+
|
|
+struct RamBlockAttributes {
|
|
+ Object parent;
|
|
+
|
|
+ RAMBlock *ram_block;
|
|
+
|
|
+ /* 1-setting of the bitmap represents ram is populated (shared) */
|
|
+ unsigned bitmap_size;
|
|
+ unsigned long *bitmap;
|
|
+
|
|
+ QLIST_HEAD(, RamDiscardListener) rdl_list;
|
|
+};
|
|
+
|
|
+RamBlockAttributes *ram_block_attributes_create(RAMBlock *ram_block);
|
|
+void ram_block_attributes_destroy(RamBlockAttributes *attr);
|
|
+int ram_block_attributes_state_change(RamBlockAttributes *attr, uint64_t offset,
|
|
+ uint64_t size, bool to_discard);
|
|
+
|
|
#endif
|
|
#endif
|
|
diff --git a/system/meson.build b/system/meson.build
|
|
index a296270cb0..b13d9e71ff 100644
|
|
--- a/system/meson.build
|
|
+++ b/system/meson.build
|
|
@@ -16,6 +16,7 @@ system_ss.add(files(
|
|
'dirtylimit.c',
|
|
'dma-helpers.c',
|
|
'globals.c',
|
|
+ 'ram-block-attributes.c',
|
|
'memory_mapping.c',
|
|
'qdev-monitor.c',
|
|
'qtest.c',
|
|
diff --git a/system/ram-block-attributes.c b/system/ram-block-attributes.c
|
|
new file mode 100644
|
|
index 0000000000..0bded54e9c
|
|
--- /dev/null
|
|
+++ b/system/ram-block-attributes.c
|
|
@@ -0,0 +1,444 @@
|
|
+/*
|
|
+ * QEMU ram block attributes
|
|
+ *
|
|
+ * Copyright Intel
|
|
+ *
|
|
+ * Author:
|
|
+ * Chenyi Qiang <chenyi.qiang@intel.com>
|
|
+ *
|
|
+ * SPDX-License-Identifier: GPL-2.0-or-later
|
|
+ */
|
|
+
|
|
+#include "qemu/osdep.h"
|
|
+#include "qemu/error-report.h"
|
|
+#include "exec/ramblock.h"
|
|
+#include "trace.h"
|
|
+
|
|
+OBJECT_DEFINE_SIMPLE_TYPE_WITH_INTERFACES(RamBlockAttributes,
|
|
+ ram_block_attributes,
|
|
+ RAM_BLOCK_ATTRIBUTES,
|
|
+ OBJECT,
|
|
+ { TYPE_RAM_DISCARD_MANAGER },
|
|
+ { })
|
|
+
|
|
+static size_t
|
|
+ram_block_attributes_get_block_size(const RamBlockAttributes *attr)
|
|
+{
|
|
+ /*
|
|
+ * Because page conversion could be manipulated in the size of at least 4K
|
|
+ * or 4K aligned, Use the host page size as the granularity to track the
|
|
+ * memory attribute.
|
|
+ */
|
|
+ g_assert(attr && attr->ram_block);
|
|
+ g_assert(attr->ram_block->page_size == qemu_real_host_page_size());
|
|
+ return attr->ram_block->page_size;
|
|
+}
|
|
+
|
|
+
|
|
+static bool
|
|
+ram_block_attributes_rdm_is_populated(const RamDiscardManager *rdm,
|
|
+ const MemoryRegionSection *section)
|
|
+{
|
|
+ const RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(rdm);
|
|
+ const size_t block_size = ram_block_attributes_get_block_size(attr);
|
|
+ const uint64_t first_bit = section->offset_within_region / block_size;
|
|
+ const uint64_t last_bit =
|
|
+ first_bit + int128_get64(section->size) / block_size - 1;
|
|
+ unsigned long first_discarded_bit;
|
|
+
|
|
+ first_discarded_bit = find_next_zero_bit(attr->bitmap, last_bit + 1,
|
|
+ first_bit);
|
|
+ return first_discarded_bit > last_bit;
|
|
+}
|
|
+
|
|
+typedef int (*ram_block_attributes_section_cb)(MemoryRegionSection *s,
|
|
+ void *arg);
|
|
+
|
|
+static int
|
|
+ram_block_attributes_notify_populate_cb(MemoryRegionSection *section,
|
|
+ void *arg)
|
|
+{
|
|
+ RamDiscardListener *rdl = arg;
|
|
+
|
|
+ return rdl->notify_populate(rdl, section);
|
|
+}
|
|
+
|
|
+static int
|
|
+ram_block_attributes_notify_discard_cb(MemoryRegionSection *section,
|
|
+ void *arg)
|
|
+{
|
|
+ RamDiscardListener *rdl = arg;
|
|
+
|
|
+ rdl->notify_discard(rdl, section);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int
|
|
+ram_block_attributes_for_each_populated_section(const RamBlockAttributes *attr,
|
|
+ MemoryRegionSection *section,
|
|
+ void *arg,
|
|
+ ram_block_attributes_section_cb cb)
|
|
+{
|
|
+ unsigned long first_bit, last_bit;
|
|
+ uint64_t offset, size;
|
|
+ const size_t block_size = ram_block_attributes_get_block_size(attr);
|
|
+ int ret = 0;
|
|
+
|
|
+ first_bit = section->offset_within_region / block_size;
|
|
+ first_bit = find_next_bit(attr->bitmap, attr->bitmap_size,
|
|
+ first_bit);
|
|
+
|
|
+ while (first_bit < attr->bitmap_size) {
|
|
+ MemoryRegionSection tmp = *section;
|
|
+
|
|
+ offset = first_bit * block_size;
|
|
+ last_bit = find_next_zero_bit(attr->bitmap, attr->bitmap_size,
|
|
+ first_bit + 1) - 1;
|
|
+ size = (last_bit - first_bit + 1) * block_size;
|
|
+
|
|
+ if (!memory_region_section_intersect_range(&tmp, offset, size)) {
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ ret = cb(&tmp, arg);
|
|
+ if (ret) {
|
|
+ error_report("%s: Failed to notify RAM discard listener: %s",
|
|
+ __func__, strerror(-ret));
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ first_bit = find_next_bit(attr->bitmap, attr->bitmap_size,
|
|
+ last_bit + 2);
|
|
+ }
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static int
|
|
+ram_block_attributes_for_each_discarded_section(const RamBlockAttributes *attr,
|
|
+ MemoryRegionSection *section,
|
|
+ void *arg,
|
|
+ ram_block_attributes_section_cb cb)
|
|
+{
|
|
+ unsigned long first_bit, last_bit;
|
|
+ uint64_t offset, size;
|
|
+ const size_t block_size = ram_block_attributes_get_block_size(attr);
|
|
+ int ret = 0;
|
|
+
|
|
+ first_bit = section->offset_within_region / block_size;
|
|
+ first_bit = find_next_zero_bit(attr->bitmap, attr->bitmap_size,
|
|
+ first_bit);
|
|
+
|
|
+ while (first_bit < attr->bitmap_size) {
|
|
+ MemoryRegionSection tmp = *section;
|
|
+
|
|
+ offset = first_bit * block_size;
|
|
+ last_bit = find_next_bit(attr->bitmap, attr->bitmap_size,
|
|
+ first_bit + 1) - 1;
|
|
+ size = (last_bit - first_bit + 1) * block_size;
|
|
+
|
|
+ if (!memory_region_section_intersect_range(&tmp, offset, size)) {
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ ret = cb(&tmp, arg);
|
|
+ if (ret) {
|
|
+ error_report("%s: Failed to notify RAM discard listener: %s",
|
|
+ __func__, strerror(-ret));
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ first_bit = find_next_zero_bit(attr->bitmap,
|
|
+ attr->bitmap_size,
|
|
+ last_bit + 2);
|
|
+ }
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static uint64_t
|
|
+ram_block_attributes_rdm_get_min_granularity(const RamDiscardManager *rdm,
|
|
+ const MemoryRegion *mr)
|
|
+{
|
|
+ const RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(rdm);
|
|
+
|
|
+ g_assert(mr == attr->ram_block->mr);
|
|
+ return ram_block_attributes_get_block_size(attr);
|
|
+}
|
|
+
|
|
+static void
|
|
+ram_block_attributes_rdm_register_listener(RamDiscardManager *rdm,
|
|
+ RamDiscardListener *rdl,
|
|
+ MemoryRegionSection *section)
|
|
+{
|
|
+ RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(rdm);
|
|
+ int ret;
|
|
+
|
|
+ g_assert(section->mr == attr->ram_block->mr);
|
|
+ rdl->section = memory_region_section_new_copy(section);
|
|
+
|
|
+ QLIST_INSERT_HEAD(&attr->rdl_list, rdl, next);
|
|
+
|
|
+ ret = ram_block_attributes_for_each_populated_section(attr, section, rdl,
|
|
+ ram_block_attributes_notify_populate_cb);
|
|
+ if (ret) {
|
|
+ error_report("%s: Failed to register RAM discard listener: %s",
|
|
+ __func__, strerror(-ret));
|
|
+ exit(1);
|
|
+ }
|
|
+}
|
|
+
|
|
+static void
|
|
+ram_block_attributes_rdm_unregister_listener(RamDiscardManager *rdm,
|
|
+ RamDiscardListener *rdl)
|
|
+{
|
|
+ RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(rdm);
|
|
+ int ret;
|
|
+
|
|
+ g_assert(rdl->section);
|
|
+ g_assert(rdl->section->mr == attr->ram_block->mr);
|
|
+
|
|
+ if (rdl->double_discard_supported) {
|
|
+ rdl->notify_discard(rdl, rdl->section);
|
|
+ } else {
|
|
+ ret = ram_block_attributes_for_each_populated_section(attr,
|
|
+ rdl->section, rdl, ram_block_attributes_notify_discard_cb);
|
|
+ if (ret) {
|
|
+ error_report("%s: Failed to unregister RAM discard listener: %s",
|
|
+ __func__, strerror(-ret));
|
|
+ exit(1);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ memory_region_section_free_copy(rdl->section);
|
|
+ rdl->section = NULL;
|
|
+ QLIST_REMOVE(rdl, next);
|
|
+}
|
|
+
|
|
+typedef struct RamBlockAttributesReplayData {
|
|
+ ReplayRamDiscardState fn;
|
|
+ void *opaque;
|
|
+} RamBlockAttributesReplayData;
|
|
+
|
|
+static int ram_block_attributes_rdm_replay_cb(MemoryRegionSection *section,
|
|
+ void *arg)
|
|
+{
|
|
+ RamBlockAttributesReplayData *data = arg;
|
|
+
|
|
+ return data->fn(section, data->opaque);
|
|
+}
|
|
+
|
|
+static int
|
|
+ram_block_attributes_rdm_replay_populated(const RamDiscardManager *rdm,
|
|
+ MemoryRegionSection *section,
|
|
+ ReplayRamDiscardState replay_fn,
|
|
+ void *opaque)
|
|
+{
|
|
+ RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(rdm);
|
|
+ RamBlockAttributesReplayData data = { .fn = replay_fn, .opaque = opaque };
|
|
+
|
|
+ g_assert(section->mr == attr->ram_block->mr);
|
|
+ return ram_block_attributes_for_each_populated_section(attr, section, &data,
|
|
+ ram_block_attributes_rdm_replay_cb);
|
|
+}
|
|
+
|
|
+static int
|
|
+ram_block_attributes_rdm_replay_discarded(const RamDiscardManager *rdm,
|
|
+ MemoryRegionSection *section,
|
|
+ ReplayRamDiscardState replay_fn,
|
|
+ void *opaque)
|
|
+{
|
|
+ RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(rdm);
|
|
+ RamBlockAttributesReplayData data = { .fn = replay_fn, .opaque = opaque };
|
|
+
|
|
+ g_assert(section->mr == attr->ram_block->mr);
|
|
+ return ram_block_attributes_for_each_discarded_section(attr, section, &data,
|
|
+ ram_block_attributes_rdm_replay_cb);
|
|
+}
|
|
+
|
|
+static bool
|
|
+ram_block_attributes_is_valid_range(RamBlockAttributes *attr, uint64_t offset,
|
|
+ uint64_t size)
|
|
+{
|
|
+ MemoryRegion *mr = attr->ram_block->mr;
|
|
+
|
|
+ g_assert(mr);
|
|
+
|
|
+ uint64_t region_size = memory_region_size(mr);
|
|
+ const size_t block_size = ram_block_attributes_get_block_size(attr);
|
|
+
|
|
+ if (!QEMU_IS_ALIGNED(offset, block_size) ||
|
|
+ !QEMU_IS_ALIGNED(size, block_size)) {
|
|
+ return false;
|
|
+ }
|
|
+ if (offset + size <= offset) {
|
|
+ return false;
|
|
+ }
|
|
+ if (offset + size > region_size) {
|
|
+ return false;
|
|
+ }
|
|
+ return true;
|
|
+}
|
|
+
|
|
+static void ram_block_attributes_notify_discard(RamBlockAttributes *attr,
|
|
+ uint64_t offset,
|
|
+ uint64_t size)
|
|
+{
|
|
+ RamDiscardListener *rdl;
|
|
+
|
|
+ QLIST_FOREACH(rdl, &attr->rdl_list, next) {
|
|
+ MemoryRegionSection tmp = *rdl->section;
|
|
+
|
|
+ if (!memory_region_section_intersect_range(&tmp, offset, size)) {
|
|
+ continue;
|
|
+ }
|
|
+ rdl->notify_discard(rdl, &tmp);
|
|
+ }
|
|
+}
|
|
+
|
|
+static int
|
|
+ram_block_attributes_notify_populate(RamBlockAttributes *attr,
|
|
+ uint64_t offset, uint64_t size)
|
|
+{
|
|
+ RamDiscardListener *rdl;
|
|
+ int ret = 0;
|
|
+
|
|
+ QLIST_FOREACH(rdl, &attr->rdl_list, next) {
|
|
+ MemoryRegionSection tmp = *rdl->section;
|
|
+
|
|
+ if (!memory_region_section_intersect_range(&tmp, offset, size)) {
|
|
+ continue;
|
|
+ }
|
|
+ ret = rdl->notify_populate(rdl, &tmp);
|
|
+ if (ret) {
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+int ram_block_attributes_state_change(RamBlockAttributes *attr,
|
|
+ uint64_t offset, uint64_t size,
|
|
+ bool to_discard)
|
|
+{
|
|
+ const size_t block_size = ram_block_attributes_get_block_size(attr);
|
|
+ const unsigned long first_bit = offset / block_size;
|
|
+ const unsigned long nbits = size / block_size;
|
|
+ const unsigned long last_bit = first_bit + nbits - 1;
|
|
+ const bool is_discarded = find_next_bit(attr->bitmap, attr->bitmap_size,
|
|
+ first_bit) > last_bit;
|
|
+ const bool is_populated = find_next_zero_bit(attr->bitmap,
|
|
+ attr->bitmap_size, first_bit) > last_bit;
|
|
+ unsigned long bit;
|
|
+ int ret = 0;
|
|
+
|
|
+ if (!ram_block_attributes_is_valid_range(attr, offset, size)) {
|
|
+ error_report("%s, invalid range: offset 0x%" PRIx64 ", size "
|
|
+ "0x%" PRIx64, __func__, offset, size);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ trace_ram_block_attributes_state_change(offset, size,
|
|
+ is_discarded ? "discarded" :
|
|
+ is_populated ? "populated" :
|
|
+ "mixture",
|
|
+ to_discard ? "discarded" :
|
|
+ "populated");
|
|
+ if (to_discard) {
|
|
+ if (is_discarded) {
|
|
+ /* Already private */
|
|
+ } else if (is_populated) {
|
|
+ /* Completely shared */
|
|
+ bitmap_clear(attr->bitmap, first_bit, nbits);
|
|
+ ram_block_attributes_notify_discard(attr, offset, size);
|
|
+ } else {
|
|
+ /* Unexpected mixture: process individual blocks */
|
|
+ for (bit = first_bit; bit < first_bit + nbits; bit++) {
|
|
+ if (!test_bit(bit, attr->bitmap)) {
|
|
+ continue;
|
|
+ }
|
|
+ clear_bit(bit, attr->bitmap);
|
|
+ ram_block_attributes_notify_discard(attr, bit * block_size,
|
|
+ block_size);
|
|
+ }
|
|
+ }
|
|
+ } else {
|
|
+ if (is_populated) {
|
|
+ /* Already shared */
|
|
+ } else if (is_discarded) {
|
|
+ /* Completely private */
|
|
+ bitmap_set(attr->bitmap, first_bit, nbits);
|
|
+ ret = ram_block_attributes_notify_populate(attr, offset, size);
|
|
+ } else {
|
|
+ /* Unexpected mixture: process individual blocks */
|
|
+ for (bit = first_bit; bit < first_bit + nbits; bit++) {
|
|
+ if (test_bit(bit, attr->bitmap)) {
|
|
+ continue;
|
|
+ }
|
|
+ set_bit(bit, attr->bitmap);
|
|
+ ret = ram_block_attributes_notify_populate(attr,
|
|
+ bit * block_size,
|
|
+ block_size);
|
|
+ if (ret) {
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+RamBlockAttributes *ram_block_attributes_create(RAMBlock *ram_block)
|
|
+{
|
|
+ const int block_size = qemu_real_host_page_size();
|
|
+ RamBlockAttributes *attr;
|
|
+ MemoryRegion *mr = ram_block->mr;
|
|
+
|
|
+ attr = RAM_BLOCK_ATTRIBUTES(object_new(TYPE_RAM_BLOCK_ATTRIBUTES));
|
|
+
|
|
+ attr->ram_block = ram_block;
|
|
+ if (memory_region_set_ram_discard_manager(mr, RAM_DISCARD_MANAGER(attr))) {
|
|
+ object_unref(OBJECT(attr));
|
|
+ return NULL;
|
|
+ }
|
|
+ attr->bitmap_size =
|
|
+ ROUND_UP(int128_get64(mr->size), block_size) / block_size;
|
|
+ attr->bitmap = bitmap_new(attr->bitmap_size);
|
|
+
|
|
+ return attr;
|
|
+}
|
|
+
|
|
+void ram_block_attributes_destroy(RamBlockAttributes *attr)
|
|
+{
|
|
+ g_assert(attr);
|
|
+
|
|
+ g_free(attr->bitmap);
|
|
+ memory_region_set_ram_discard_manager(attr->ram_block->mr, NULL);
|
|
+ object_unref(OBJECT(attr));
|
|
+}
|
|
+
|
|
+static void ram_block_attributes_init(Object *obj)
|
|
+{
|
|
+ RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(obj);
|
|
+
|
|
+ QLIST_INIT(&attr->rdl_list);
|
|
+}
|
|
+
|
|
+static void ram_block_attributes_finalize(Object *obj)
|
|
+{
|
|
+}
|
|
+
|
|
+static void ram_block_attributes_class_init(ObjectClass *klass,
|
|
+ void *data)
|
|
+{
|
|
+ RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_CLASS(klass);
|
|
+
|
|
+ rdmc->get_min_granularity = ram_block_attributes_rdm_get_min_granularity;
|
|
+ rdmc->register_listener = ram_block_attributes_rdm_register_listener;
|
|
+ rdmc->unregister_listener = ram_block_attributes_rdm_unregister_listener;
|
|
+ rdmc->is_populated = ram_block_attributes_rdm_is_populated;
|
|
+ rdmc->replay_populated = ram_block_attributes_rdm_replay_populated;
|
|
+ rdmc->replay_discarded = ram_block_attributes_rdm_replay_discarded;
|
|
+}
|
|
diff --git a/system/trace-events b/system/trace-events
|
|
index 2ed1d59b1f..9fd7217472 100644
|
|
--- a/system/trace-events
|
|
+++ b/system/trace-events
|
|
@@ -44,3 +44,6 @@ dirtylimit_state_finalize(void)
|
|
dirtylimit_throttle_pct(int cpu_index, uint64_t pct, int64_t time_us) "CPU[%d] throttle percent: %" PRIu64 ", throttle adjust time %"PRIi64 " us"
|
|
dirtylimit_set_vcpu(int cpu_index, uint64_t quota) "CPU[%d] set dirty page rate limit %"PRIu64
|
|
dirtylimit_vcpu_execute(int cpu_index, int64_t sleep_time_us) "CPU[%d] sleep %"PRIi64 " us"
|
|
+
|
|
+# ram-block-attributes.c
|
|
+ram_block_attributes_state_change(uint64_t offset, uint64_t size, const char *from, const char *to) "offset 0x%"PRIx64" size 0x%"PRIx64" from '%s' to '%s'"
|
|
--
|
|
2.50.1
|
|
|