* Tue Sep 09 2025 Jon Maloy <jmaloy@redhat.com> - 9.1.0-27

- kvm-target-i386-Make-invtsc-migratable-when-user-sets-ts.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-target-i386-Enable-fdp-excptn-only-and-zero-fcs-fds.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-kvm-i386-make-kvm_filter_msr-and-related-definitions.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-kvm-remove-unnecessary-ifdef.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-crypto-Define-macros-for-hash-algorithm-digest-lengt.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-cpu-Drop-the-check-of-phys_bits-in-host_cpu_rea.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-cpu-Extract-a-common-fucntion-to-setup-value-of.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-cpu-Drop-the-variable-smp_cores-and-smp_threads.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-cpu-Drop-cores_per_pkg-in-cpu_x86_cpuid.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-topology-Update-the-comment-of-x86_apicid_from_.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-topology-Introduce-helpers-for-various-topology.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-cpu-Track-a-X86CPUTopoInfo-directly-in-CPUX86St.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-cpu-Hoist-check-of-CPUID_EXT3_TOPOEXT-against-t.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-cpu-Remove-nr_cores-from-struct-CPUState.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-cpu-Set-up-CPUID_HT-in-x86_cpu_expand_features-.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-cpu-Set-and-track-CPUID_EXT3_CMP_LEG-in-env-fea.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-Remove-unused-parameter-uint32_t-bit-in-feature.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-target-i386-Print-CPUID-subleaf-info-for-unsupported.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-target-i386-sev-Reduce-system-specific-declarations.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-physmem-replace-assertion-with-error.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-redhat-target-i386-add-CPUID-and-MSR-bits-from-Clear.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-qom-reverse-order-of-instance_post_init-calls.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-target-i386-Remove-AccelCPUClass-cpu_class_init-need.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-cpu-Consolidate-the-helper-to-get-Host-s-vendor.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-rocker-do-not-pollute-the-namespace.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-linux-headers-Update-to-Linux-v6.14-rc3.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-linux-headers-Update-to-Linux-v6.15-rc3.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-linux-headers-update-from-6.15-kvm-next.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-update-Linux-headers-to-v6.16-rc3.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-update-Linux-headers-to-KVM-tree-master.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-Introduce-tdx-guest-object.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdx-Implement-tdx_kvm_type-for-TDX.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdx-Implement-tdx_kvm_init-to-initialize-TDX-VM.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdx-Get-tdx_capabilities-via-KVM_TDX_CAPABILITI.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdx-Introduce-is_tdx_vm-helper-and-cache-tdx_gu.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-kvm-Introduce-kvm_arch_pre_create_vcpu.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdx-Initialize-TDX-before-creating-TD-vcpus.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdx-Add-property-sept-ve-disable-for-tdx-guest-.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdx-Make-sept_ve_disable-set-by-default.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdx-Wire-CPU-features-up-with-attributes-of-TD-.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdx-Validate-TD-attributes.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdx-Support-user-configurable-mrconfigid-mrowne.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdx-Set-APIC-bus-rate-to-match-with-what-TDX-mo.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdx-Implement-user-specified-tsc-frequency.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdx-load-TDVF-for-TD-guest.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdvf-Introduce-function-to-parse-TDVF-metadata.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdx-Parse-TDVF-metadata-for-TDX-VM.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdx-Don-t-initialize-pc.rom-for-TDX-VMs.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdx-Track-mem_ptr-for-each-firmware-entry-of-TD.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdx-Track-RAM-entries-for-TDX-VM.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-headers-Add-definitions-from-UEFI-spec-for-volumes-r.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdx-Setup-the-TD-HOB-list.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdx-Add-TDVF-memory-via-KVM_TDX_INIT_MEM_REGION.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdx-Call-KVM_TDX_INIT_VCPU-to-initialize-TDX-vc.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdx-Finalize-TDX-VM.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdx-Enable-user-exit-on-KVM_HC_MAP_GPA_RANGE.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdx-Handle-KVM_SYSTEM_EVENT_TDX_FATAL.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdx-Wire-TDX_REPORT_FATAL_ERROR-with-GuestPanic.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-kvm-Check-KVM_CAP_MAX_VCPUS-at-vm-level.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-cpu-introduce-x86_confidential_guest_cpu_instan.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdx-implement-tdx_cpu_instance_init.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-cpu-Introduce-enable_cpuid_0x1f-to-force-exposi.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdx-Force-exposing-CPUID-0x1f.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdx-Set-kvm_readonly_mem_enabled-to-false-for-T.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdx-Disable-SMM-for-TDX-VMs.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdx-Disable-PIC-for-TDX-VMs.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdx-Set-and-check-kernel_irqchip-mode-for-TDX.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdx-Don-t-synchronize-guest-tsc-for-TDs.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdx-Only-configure-MSR_IA32_UCODE_REV-in-kvm_in.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-apic-Skip-kvm_apic_put-for-TDX.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-cpu-Don-t-set-vcpu_dirty-when-guest_state_protected.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-cgs-Rename-mask_cpuid_features-to-adjust_cpuid_.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdx-Implement-adjust_cpuid_features-for-TDX.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdx-Add-TDX-fixed1-bits-to-supported-CPUIDs.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdx-Add-supported-CPUID-bits-related-to-TD-Attr.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdx-Add-supported-CPUID-bits-relates-to-XFAM.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdx-Add-XFD-to-supported-bit-of-TDX.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdx-Define-supported-KVM-features-for-TDX.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-cgs-Introduce-x86_confidential_guest_check_feat.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdx-Fetch-and-validate-CPUID-of-TD-guest.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdx-Don-t-treat-SYSCALL-as-unavailable.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdx-Make-invtsc-default-on.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdx-Validate-phys_bits-against-host-value.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-docs-Add-TDX-documentation.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdx-Fix-build-on-32-bit-host.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdvf-Fix-build-on-32-bit-host.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-cpu-Move-adjustment-of-CPUID_EXT_PDCM-before-fe.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdx-Error-and-exit-when-named-cpu-model-is-requ.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-cpu-Rename-enable_cpuid_0x1f-to-force_cpuid_0x1.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdx-Fix-the-typo-of-the-comment-of-struct-TdxGu.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdx-Clarify-the-error-message-of-mrconfigid-mro.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdx-handle-TDG.VP.VMCALL-GetTdVmCallInfo.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdx-handle-TDG.VP.VMCALL-GetQuote.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-target-i386-move-max_features-to-class.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-target-i386-nvmm-whpx-add-accel-CPU-class-that-sets-.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-target-i386-allow-reordering-max_x86_cpu_initfn-vs-a.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-target-i386-move-accel_cpu_instance_init-to-.instanc.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-target-i386-merge-host_cpu_instance_init-and-host_cp.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdx-Remove-enumeration-of-GetQuote-in-tdx_handl.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdx-Set-value-of-GetTdVmCallInfo-based-on-capab.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdx-handle-TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUP.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdx-Fix-the-report-of-gpa-in-QAPI.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdx-Remove-task-watch-only-when-it-s-valid.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdx-Don-t-mask-off-CPUID_EXT_PDCM.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-cpu-Move-x86_ext_save_areas-initialization-to-..patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-target-i386-tdx-fix-locking-for-interrupt-injection.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-cpu-Cleanup-host_cpu_max_instance_init.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-i386-tdx-Remove-the-redundant-qemu_mutex_init-tdx-lo.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-redhat-enable-CONFIG_TDX.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-redhat-allow-5-level-paging-for-TDX-VMs.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-memory-Export-a-helper-to-get-intersection-of-a-Memo.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-memory-Change-memory_region_set_ram_discard_manager-.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-memory-Unify-the-definiton-of-ReplayRamPopulate-and-.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-ram-block-attributes-Introduce-RamBlockAttributes-to.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- kvm-physmem-Support-coordinated-discarding-of-RAM-with-g.patch [RHEL-15710 RHEL-20798 RHEL-49728]
- Resolves: RHEL-15710
  ([Intel 9.7 FEAT] TDX: QEMU Support)
- Resolves: RHEL-20798
  ([Intel 9.6 FEAT] TDX: host: Virt-QEMU: Add safe device pass-through for TD)
- Resolves: RHEL-49728
  ([Intel 9.7 FEAT] Virt-QEMU: TDX: Allow to configure apic bus clock)
This commit is contained in:
Jon Maloy 2025-09-09 16:41:47 -04:00
parent 0d036c6762
commit a92c51d39e
116 changed files with 15788 additions and 1 deletions

View File

@ -0,0 +1,48 @@
From dd4ab64754a52f1e50273cb8153567b0d2f382de Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:48 +0200
Subject: [PATCH 071/115] cpu: Don't set vcpu_dirty when guest_state_protected
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [71/115] c876a59ee5bbfccea9837ac373a2e664354db3ae (bonzini/rhel-qemu-kvm)
QEMU calls kvm_arch_put_registers() when vcpu_dirty is true in
kvm_vcpu_exec(). However, for confidential guest, like TDX, putting
registers is disallowed due to guest state is protected.
Only set vcpu_dirty to true with guest state is not protected when
creating the vcpu.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20250508150002.689633-43-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit b4b7fb5a773e1d2215c2aaa99789eca51914b78f)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
accel/kvm/kvm-all.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index c1605bc4fa..43c10c82f6 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -456,7 +456,9 @@ int kvm_create_vcpu(CPUState *cpu)
cpu->kvm_fd = kvm_fd;
cpu->kvm_state = s;
- cpu->vcpu_dirty = true;
+ if (!s->guest_state_protected) {
+ cpu->vcpu_dirty = true;
+ }
cpu->dirty_pages = 0;
cpu->throttle_us_per_full = 0;
--
2.50.1

View File

@ -0,0 +1,76 @@
From c57b5e38fd95a68f36a342e19ba7ccb6cbb07948 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:44 +0200
Subject: [PATCH 014/115] cpu: Remove nr_cores from struct CPUState
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [14/115] 80f6414c5f8f5e963b0f2251147b8a1ca04f55e4 (bonzini/rhel-qemu-kvm)
There is no user of it now, remove it.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20241219110125.1266461-9-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 6e090ffe0d188e1f09d4efcd10d82158f92abfbb)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit d4c699c310519b99bedf1bdb516cab230d5d846c)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
hw/core/cpu-common.c | 1 -
include/hw/core/cpu.h | 2 --
system/cpus.c | 1 -
3 files changed, 4 deletions(-)
diff --git a/hw/core/cpu-common.c b/hw/core/cpu-common.c
index 7982ecd39a..1ac8ab488f 100644
--- a/hw/core/cpu-common.c
+++ b/hw/core/cpu-common.c
@@ -242,7 +242,6 @@ static void cpu_common_initfn(Object *obj)
cpu->cluster_index = UNASSIGNED_CLUSTER_INDEX;
/* user-mode doesn't have configurable SMP topology */
/* the default value is changed by qemu_init_vcpu() for system-mode */
- cpu->nr_cores = 1;
cpu->nr_threads = 1;
cpu->cflags_next_tb = -1;
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index 1c9c775df6..d90e3b3f2c 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -402,7 +402,6 @@ struct qemu_work_item;
* Under TCG this value is propagated to @tcg_cflags.
* See TranslationBlock::TCG CF_CLUSTER_MASK.
* @tcg_cflags: Pre-computed cflags for this cpu.
- * @nr_cores: Number of cores within this CPU package.
* @nr_threads: Number of threads within this CPU core.
* @thread: Host thread details, only live once @created is #true
* @sem: WIN32 only semaphore used only for qtest
@@ -461,7 +460,6 @@ struct CPUState {
CPUClass *cc;
/*< public >*/
- int nr_cores;
int nr_threads;
struct QemuThread *thread;
diff --git a/system/cpus.c b/system/cpus.c
index 1c818ff682..909d8128e8 100644
--- a/system/cpus.c
+++ b/system/cpus.c
@@ -666,7 +666,6 @@ void qemu_init_vcpu(CPUState *cpu)
{
MachineState *ms = MACHINE(qdev_get_machine());
- cpu->nr_cores = machine_topo_get_cores_per_socket(ms);
cpu->nr_threads = ms->smp.threads;
cpu->stopped = true;
cpu->random_seed = qemu_guest_random_seed_thread_part1();
--
2.50.1

View File

@ -0,0 +1,74 @@
From 4df071fec89ab867f8e2d970de48256034e4b286 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:29:04 +0200
Subject: [PATCH 005/115] crypto: Define macros for hash algorithm digest
lengths
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [5/115] f5f6e0c3cd10baf7a490b67dd6c0542bddba8dfe (bonzini/rhel-qemu-kvm)
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Dorjoy Chowdhury <dorjoychy111@gmail.com>
Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
(cherry picked from commit 5d04de7de54e163b056980be10ee1c281a600276)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
crypto/hash.c | 14 +++++++-------
include/crypto/hash.h | 8 ++++++++
2 files changed, 15 insertions(+), 7 deletions(-)
diff --git a/crypto/hash.c b/crypto/hash.c
index b0f8228bdc..8087f5dae6 100644
--- a/crypto/hash.c
+++ b/crypto/hash.c
@@ -23,13 +23,13 @@
#include "hashpriv.h"
static size_t qcrypto_hash_alg_size[QCRYPTO_HASH_ALG__MAX] = {
- [QCRYPTO_HASH_ALG_MD5] = 16,
- [QCRYPTO_HASH_ALG_SHA1] = 20,
- [QCRYPTO_HASH_ALG_SHA224] = 28,
- [QCRYPTO_HASH_ALG_SHA256] = 32,
- [QCRYPTO_HASH_ALG_SHA384] = 48,
- [QCRYPTO_HASH_ALG_SHA512] = 64,
- [QCRYPTO_HASH_ALG_RIPEMD160] = 20,
+ [QCRYPTO_HASH_ALG_MD5] = QCRYPTO_HASH_DIGEST_LEN_MD5,
+ [QCRYPTO_HASH_ALG_SHA1] = QCRYPTO_HASH_DIGEST_LEN_SHA1,
+ [QCRYPTO_HASH_ALG_SHA224] = QCRYPTO_HASH_DIGEST_LEN_SHA224,
+ [QCRYPTO_HASH_ALG_SHA256] = QCRYPTO_HASH_DIGEST_LEN_SHA256,
+ [QCRYPTO_HASH_ALG_SHA384] = QCRYPTO_HASH_DIGEST_LEN_SHA384,
+ [QCRYPTO_HASH_ALG_SHA512] = QCRYPTO_HASH_DIGEST_LEN_SHA512,
+ [QCRYPTO_HASH_ALG_RIPEMD160] = QCRYPTO_HASH_DIGEST_LEN_RIPEMD160,
};
size_t qcrypto_hash_digest_len(QCryptoHashAlgorithm alg)
diff --git a/include/crypto/hash.h b/include/crypto/hash.h
index 54d87aa2a1..a113cc3b04 100644
--- a/include/crypto/hash.h
+++ b/include/crypto/hash.h
@@ -23,6 +23,14 @@
#include "qapi/qapi-types-crypto.h"
+#define QCRYPTO_HASH_DIGEST_LEN_MD5 16
+#define QCRYPTO_HASH_DIGEST_LEN_SHA1 20
+#define QCRYPTO_HASH_DIGEST_LEN_SHA224 28
+#define QCRYPTO_HASH_DIGEST_LEN_SHA256 32
+#define QCRYPTO_HASH_DIGEST_LEN_SHA384 48
+#define QCRYPTO_HASH_DIGEST_LEN_SHA512 64
+#define QCRYPTO_HASH_DIGEST_LEN_RIPEMD160 20
+
/* See also "QCryptoHashAlgorithm" defined in qapi/crypto.json */
/**
--
2.50.1

View File

@ -0,0 +1,222 @@
From a9c7bbb7a32ba2ea5cd76b87c41f1fbdd789fb3b Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:48 +0200
Subject: [PATCH 084/115] docs: Add TDX documentation
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [84/115] 4f9930f4415e7195bf5bbbcc0d79cfc6aaa385e1 (bonzini/rhel-qemu-kvm)
Add docs/system/i386/tdx.rst for TDX support, and add tdx in
confidential-guest-support.rst
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Link: https://lore.kernel.org/r/20250508150002.689633-56-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit dc1424319311f86449c6825ceec2364ee645a363)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
docs/system/confidential-guest-support.rst | 1 +
docs/system/i386/tdx.rst | 161 +++++++++++++++++++++
docs/system/target-i386.rst | 1 +
3 files changed, 163 insertions(+)
create mode 100644 docs/system/i386/tdx.rst
diff --git a/docs/system/confidential-guest-support.rst b/docs/system/confidential-guest-support.rst
index 0c490dbda2..66129fbab6 100644
--- a/docs/system/confidential-guest-support.rst
+++ b/docs/system/confidential-guest-support.rst
@@ -38,6 +38,7 @@ Supported mechanisms
Currently supported confidential guest mechanisms are:
* AMD Secure Encrypted Virtualization (SEV) (see :doc:`i386/amd-memory-encryption`)
+* Intel Trust Domain Extension (TDX) (see :doc:`i386/tdx`)
* POWER Protected Execution Facility (PEF) (see :ref:`power-papr-protected-execution-facility-pef`)
* s390x Protected Virtualization (PV) (see :doc:`s390x/protvirt`)
diff --git a/docs/system/i386/tdx.rst b/docs/system/i386/tdx.rst
new file mode 100644
index 0000000000..8131750b64
--- /dev/null
+++ b/docs/system/i386/tdx.rst
@@ -0,0 +1,161 @@
+Intel Trusted Domain eXtension (TDX)
+====================================
+
+Intel Trusted Domain eXtensions (TDX) refers to an Intel technology that extends
+Virtual Machine Extensions (VMX) and Multi-Key Total Memory Encryption (MKTME)
+with a new kind of virtual machine guest called a Trust Domain (TD). A TD runs
+in a CPU mode that is designed to protect the confidentiality of its memory
+contents and its CPU state from any other software, including the hosting
+Virtual Machine Monitor (VMM), unless explicitly shared by the TD itself.
+
+Prerequisites
+-------------
+
+To run TD, the physical machine needs to have TDX module loaded and initialized
+while KVM hypervisor has TDX support and has TDX enabled. If those requirements
+are met, the ``KVM_CAP_VM_TYPES`` will report the support of ``KVM_X86_TDX_VM``.
+
+Trust Domain Virtual Firmware (TDVF)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Trust Domain Virtual Firmware (TDVF) is required to provide TD services to boot
+TD Guest OS. TDVF needs to be copied to guest private memory and measured before
+the TD boots.
+
+KVM vcpu ioctl ``KVM_TDX_INIT_MEM_REGION`` can be used to populate the TDVF
+content into its private memory.
+
+Since TDX doesn't support readonly memslot, TDVF cannot be mapped as pflash
+device and it actually works as RAM. "-bios" option is chosen to load TDVF.
+
+OVMF is the opensource firmware that implements the TDVF support. Thus the
+command line to specify and load TDVF is ``-bios OVMF.fd``
+
+Feature Configuration
+---------------------
+
+Unlike non-TDX VM, the CPU features (enumerated by CPU or MSR) of a TD are not
+under full control of VMM. VMM can only configure part of features of a TD on
+``KVM_TDX_INIT_VM`` command of VM scope ``MEMORY_ENCRYPT_OP`` ioctl.
+
+The configurable features have three types:
+
+- Attributes:
+ - PKS (bit 30) controls whether Supervisor Protection Keys is exposed to TD,
+ which determines related CPUID bit and CR4 bit;
+ - PERFMON (bit 63) controls whether PMU is exposed to TD.
+
+- XSAVE related features (XFAM):
+ XFAM is a 64b mask, which has the same format as XCR0 or IA32_XSS MSR. It
+ determines the set of extended features available for use by the guest TD.
+
+- CPUID features:
+ Only some bits of some CPUID leaves are directly configurable by VMM.
+
+What features can be configured is reported via TDX capabilities.
+
+TDX capabilities
+~~~~~~~~~~~~~~~~
+
+The VM scope ``MEMORY_ENCRYPT_OP`` ioctl provides command ``KVM_TDX_CAPABILITIES``
+to get the TDX capabilities from KVM. It returns a data structure of
+``struct kvm_tdx_capabilities``, which tells the supported configuration of
+attributes, XFAM and CPUIDs.
+
+TD attributes
+~~~~~~~~~~~~~
+
+QEMU supports configuring raw 64-bit TD attributes directly via "attributes"
+property of "tdx-guest" object. Note, it's users' responsibility to provide a
+valid value because some bits may not supported by current QEMU or KVM yet.
+
+QEMU also supports the configuration of individual attribute bits that are
+supported by it, via properties of "tdx-guest" object.
+E.g., "sept-ve-disable" (bit 28).
+
+MSR based features
+~~~~~~~~~~~~~~~~~~
+
+Current KVM doesn't support MSR based feature (e.g., MSR_IA32_ARCH_CAPABILITIES)
+configuration for TDX, and it's a future work to enable it in QEMU when KVM adds
+support of it.
+
+Feature check
+~~~~~~~~~~~~~
+
+QEMU checks if the final (CPU) features, determined by given cpu model and
+explicit feature adjustment of "+featureA/-featureB", can be supported or not.
+It can produce feature not supported warning like
+
+ "warning: host doesn't support requested feature: CPUID.07H:EBX.intel-pt [bit 25]"
+
+It can also produce warning like
+
+ "warning: TDX forcibly sets the feature: CPUID.80000007H:EDX.invtsc [bit 8]"
+
+if the fixed-1 feature is requested to be disabled explicitly. This is newly
+added to QEMU for TDX because TDX has fixed-1 features that are forcibly enabled
+by TDX module and VMM cannot disable them.
+
+Launching a TD (TDX VM)
+-----------------------
+
+To launch a TD, the necessary command line options are tdx-guest object and
+split kernel-irqchip, as below:
+
+.. parsed-literal::
+
+ |qemu_system_x86| \\
+ -accel kvm \\
+ -cpu host \\
+ -object tdx-guest,id=tdx0 \\
+ -machine ...,confidential-guest-support=tdx0 \\
+ -bios OVMF.fd \\
+
+Restrictions
+------------
+
+ - kernel-irqchip must be split;
+
+ This is set by default for TDX guest if kernel-irqchip is left on its default
+ 'auto' setting.
+
+ - No readonly support for private memory;
+
+ - No SMM support: SMM support requires manipulating the guest register states
+ which is not allowed;
+
+Debugging
+---------
+
+Bit 0 of TD attributes, is DEBUG bit, which decides if the TD runs in off-TD
+debug mode. When in off-TD debug mode, TD's VCPU state and private memory are
+accessible via given SEAMCALLs. This requires KVM to expose APIs to invoke those
+SEAMCALLs and corresonponding QEMU change.
+
+It's targeted as future work.
+
+TD attestation
+--------------
+
+In TD guest, the attestation process is used to verify the TDX guest
+trustworthiness to other entities before provisioning secrets to the guest.
+
+TD attestation is initiated first by calling TDG.MR.REPORT inside TD to get the
+REPORT. Then the REPORT data needs to be converted into a remotely verifiable
+Quote by SGX Quoting Enclave (QE).
+
+It's a future work in QEMU to add support of TD attestation since it lacks
+support in current KVM.
+
+Live Migration
+--------------
+
+Future work.
+
+References
+----------
+
+- `TDX Homepage <https://www.intel.com/content/www/us/en/developer/articles/technical/intel-trust-domain-extensions.html>`__
+
+- `SGX QE <https://github.com/intel/SGXDataCenterAttestationPrimitives/tree/master/QuoteGeneration>`__
diff --git a/docs/system/target-i386.rst b/docs/system/target-i386.rst
index 1b8a1f248a..4d58cdbc4e 100644
--- a/docs/system/target-i386.rst
+++ b/docs/system/target-i386.rst
@@ -29,6 +29,7 @@ Architectural features
i386/kvm-pv
i386/sgx
i386/amd-memory-encryption
+ i386/tdx
OS requirements
~~~~~~~~~~~~~~~
--
2.50.1

View File

@ -0,0 +1,233 @@
From 43245dc5a297d6c4097a0191af4f818e416a3f45 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:46 +0200
Subject: [PATCH 051/115] headers: Add definitions from UEFI spec for volumes,
resources, etc...
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [51/115] c44f8b832aa268a5b2d3e98a8ce6bfbda7e1c684 (bonzini/rhel-qemu-kvm)
Add UEFI definitions for literals, enums, structs, GUIDs, etc... that
will be used by TDX to build the UEFI Hand-Off Block (HOB) that is passed
to the Trusted Domain Virtual Firmware (TDVF).
All values come from the UEFI specification [1], PI spec [2] and TDVF
design guide[3].
[1] UEFI Specification v2.1.0 https://uefi.org/sites/default/files/resources/UEFI_Spec_2_10_Aug29.pdf
[2] UEFI PI spec v1.8 https://uefi.org/sites/default/files/resources/UEFI_PI_Spec_1_8_March3.pdf
[3] https://software.intel.com/content/dam/develop/external/us/en/documents/tdx-virtual-firmware-design-guide-rev-1.pdf
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20250508150002.689633-23-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 88aa6576e4ab40b538f543852128cb17fce37f87)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
include/standard-headers/uefi/uefi.h | 187 +++++++++++++++++++++++++++
1 file changed, 187 insertions(+)
create mode 100644 include/standard-headers/uefi/uefi.h
diff --git a/include/standard-headers/uefi/uefi.h b/include/standard-headers/uefi/uefi.h
new file mode 100644
index 0000000000..5256349ec0
--- /dev/null
+++ b/include/standard-headers/uefi/uefi.h
@@ -0,0 +1,187 @@
+/*
+ * Copyright (C) 2025 Intel Corporation
+ *
+ * Author: Isaku Yamahata <isaku.yamahata at gmail.com>
+ * <isaku.yamahata at intel.com>
+ * Xiaoyao Li <xiaoyao.li@intel.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_I386_UEFI_H
+#define HW_I386_UEFI_H
+
+/***************************************************************************/
+/*
+ * basic EFI definitions
+ * supplemented with UEFI Specification Version 2.8 (Errata A)
+ * released February 2020
+ */
+/* UEFI integer is little endian */
+
+typedef struct {
+ uint32_t Data1;
+ uint16_t Data2;
+ uint16_t Data3;
+ uint8_t Data4[8];
+} EFI_GUID;
+
+typedef enum {
+ EfiReservedMemoryType,
+ EfiLoaderCode,
+ EfiLoaderData,
+ EfiBootServicesCode,
+ EfiBootServicesData,
+ EfiRuntimeServicesCode,
+ EfiRuntimeServicesData,
+ EfiConventionalMemory,
+ EfiUnusableMemory,
+ EfiACPIReclaimMemory,
+ EfiACPIMemoryNVS,
+ EfiMemoryMappedIO,
+ EfiMemoryMappedIOPortSpace,
+ EfiPalCode,
+ EfiPersistentMemory,
+ EfiUnacceptedMemoryType,
+ EfiMaxMemoryType
+} EFI_MEMORY_TYPE;
+
+#define EFI_HOB_HANDOFF_TABLE_VERSION 0x0009
+
+#define EFI_HOB_TYPE_HANDOFF 0x0001
+#define EFI_HOB_TYPE_MEMORY_ALLOCATION 0x0002
+#define EFI_HOB_TYPE_RESOURCE_DESCRIPTOR 0x0003
+#define EFI_HOB_TYPE_GUID_EXTENSION 0x0004
+#define EFI_HOB_TYPE_FV 0x0005
+#define EFI_HOB_TYPE_CPU 0x0006
+#define EFI_HOB_TYPE_MEMORY_POOL 0x0007
+#define EFI_HOB_TYPE_FV2 0x0009
+#define EFI_HOB_TYPE_LOAD_PEIM_UNUSED 0x000A
+#define EFI_HOB_TYPE_UEFI_CAPSULE 0x000B
+#define EFI_HOB_TYPE_FV3 0x000C
+#define EFI_HOB_TYPE_UNUSED 0xFFFE
+#define EFI_HOB_TYPE_END_OF_HOB_LIST 0xFFFF
+
+typedef struct {
+ uint16_t HobType;
+ uint16_t HobLength;
+ uint32_t Reserved;
+} EFI_HOB_GENERIC_HEADER;
+
+typedef uint64_t EFI_PHYSICAL_ADDRESS;
+typedef uint32_t EFI_BOOT_MODE;
+
+typedef struct {
+ EFI_HOB_GENERIC_HEADER Header;
+ uint32_t Version;
+ EFI_BOOT_MODE BootMode;
+ EFI_PHYSICAL_ADDRESS EfiMemoryTop;
+ EFI_PHYSICAL_ADDRESS EfiMemoryBottom;
+ EFI_PHYSICAL_ADDRESS EfiFreeMemoryTop;
+ EFI_PHYSICAL_ADDRESS EfiFreeMemoryBottom;
+ EFI_PHYSICAL_ADDRESS EfiEndOfHobList;
+} EFI_HOB_HANDOFF_INFO_TABLE;
+
+#define EFI_RESOURCE_SYSTEM_MEMORY 0x00000000
+#define EFI_RESOURCE_MEMORY_MAPPED_IO 0x00000001
+#define EFI_RESOURCE_IO 0x00000002
+#define EFI_RESOURCE_FIRMWARE_DEVICE 0x00000003
+#define EFI_RESOURCE_MEMORY_MAPPED_IO_PORT 0x00000004
+#define EFI_RESOURCE_MEMORY_RESERVED 0x00000005
+#define EFI_RESOURCE_IO_RESERVED 0x00000006
+#define EFI_RESOURCE_MEMORY_UNACCEPTED 0x00000007
+#define EFI_RESOURCE_MAX_MEMORY_TYPE 0x00000008
+
+#define EFI_RESOURCE_ATTRIBUTE_PRESENT 0x00000001
+#define EFI_RESOURCE_ATTRIBUTE_INITIALIZED 0x00000002
+#define EFI_RESOURCE_ATTRIBUTE_TESTED 0x00000004
+#define EFI_RESOURCE_ATTRIBUTE_SINGLE_BIT_ECC 0x00000008
+#define EFI_RESOURCE_ATTRIBUTE_MULTIPLE_BIT_ECC 0x00000010
+#define EFI_RESOURCE_ATTRIBUTE_ECC_RESERVED_1 0x00000020
+#define EFI_RESOURCE_ATTRIBUTE_ECC_RESERVED_2 0x00000040
+#define EFI_RESOURCE_ATTRIBUTE_READ_PROTECTED 0x00000080
+#define EFI_RESOURCE_ATTRIBUTE_WRITE_PROTECTED 0x00000100
+#define EFI_RESOURCE_ATTRIBUTE_EXECUTION_PROTECTED 0x00000200
+#define EFI_RESOURCE_ATTRIBUTE_UNCACHEABLE 0x00000400
+#define EFI_RESOURCE_ATTRIBUTE_WRITE_COMBINEABLE 0x00000800
+#define EFI_RESOURCE_ATTRIBUTE_WRITE_THROUGH_CACHEABLE 0x00001000
+#define EFI_RESOURCE_ATTRIBUTE_WRITE_BACK_CACHEABLE 0x00002000
+#define EFI_RESOURCE_ATTRIBUTE_16_BIT_IO 0x00004000
+#define EFI_RESOURCE_ATTRIBUTE_32_BIT_IO 0x00008000
+#define EFI_RESOURCE_ATTRIBUTE_64_BIT_IO 0x00010000
+#define EFI_RESOURCE_ATTRIBUTE_UNCACHED_EXPORTED 0x00020000
+#define EFI_RESOURCE_ATTRIBUTE_READ_ONLY_PROTECTED 0x00040000
+#define EFI_RESOURCE_ATTRIBUTE_READ_ONLY_PROTECTABLE 0x00080000
+#define EFI_RESOURCE_ATTRIBUTE_READ_PROTECTABLE 0x00100000
+#define EFI_RESOURCE_ATTRIBUTE_WRITE_PROTECTABLE 0x00200000
+#define EFI_RESOURCE_ATTRIBUTE_EXECUTION_PROTECTABLE 0x00400000
+#define EFI_RESOURCE_ATTRIBUTE_PERSISTENT 0x00800000
+#define EFI_RESOURCE_ATTRIBUTE_PERSISTABLE 0x01000000
+#define EFI_RESOURCE_ATTRIBUTE_MORE_RELIABLE 0x02000000
+
+typedef uint32_t EFI_RESOURCE_TYPE;
+typedef uint32_t EFI_RESOURCE_ATTRIBUTE_TYPE;
+
+typedef struct {
+ EFI_HOB_GENERIC_HEADER Header;
+ EFI_GUID Owner;
+ EFI_RESOURCE_TYPE ResourceType;
+ EFI_RESOURCE_ATTRIBUTE_TYPE ResourceAttribute;
+ EFI_PHYSICAL_ADDRESS PhysicalStart;
+ uint64_t ResourceLength;
+} EFI_HOB_RESOURCE_DESCRIPTOR;
+
+typedef struct {
+ EFI_HOB_GENERIC_HEADER Header;
+ EFI_GUID Name;
+
+ /* guid specific data follows */
+} EFI_HOB_GUID_TYPE;
+
+typedef struct {
+ EFI_HOB_GENERIC_HEADER Header;
+ EFI_PHYSICAL_ADDRESS BaseAddress;
+ uint64_t Length;
+} EFI_HOB_FIRMWARE_VOLUME;
+
+typedef struct {
+ EFI_HOB_GENERIC_HEADER Header;
+ EFI_PHYSICAL_ADDRESS BaseAddress;
+ uint64_t Length;
+ EFI_GUID FvName;
+ EFI_GUID FileName;
+} EFI_HOB_FIRMWARE_VOLUME2;
+
+typedef struct {
+ EFI_HOB_GENERIC_HEADER Header;
+ EFI_PHYSICAL_ADDRESS BaseAddress;
+ uint64_t Length;
+ uint32_t AuthenticationStatus;
+ bool ExtractedFv;
+ EFI_GUID FvName;
+ EFI_GUID FileName;
+} EFI_HOB_FIRMWARE_VOLUME3;
+
+typedef struct {
+ EFI_HOB_GENERIC_HEADER Header;
+ uint8_t SizeOfMemorySpace;
+ uint8_t SizeOfIoSpace;
+ uint8_t Reserved[6];
+} EFI_HOB_CPU;
+
+typedef struct {
+ EFI_HOB_GENERIC_HEADER Header;
+} EFI_HOB_MEMORY_POOL;
+
+typedef struct {
+ EFI_HOB_GENERIC_HEADER Header;
+
+ EFI_PHYSICAL_ADDRESS BaseAddress;
+ uint64_t Length;
+} EFI_HOB_UEFI_CAPSULE;
+
+#define EFI_HOB_OWNER_ZERO \
+ ((EFI_GUID){ 0x00000000, 0x0000, 0x0000, \
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } })
+
+#endif
--
2.50.1

View File

@ -0,0 +1,214 @@
From dc14d1444d4ad525663848160cd7687ef291c85e Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:45 +0200
Subject: [PATCH 031/115] i386: Introduce tdx-guest object
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [31/115] f279a3b477fbbe84bb5d6c3a8eb588916b41128e (bonzini/rhel-qemu-kvm)
Introduce tdx-guest object which inherits X86_CONFIDENTIAL_GUEST,
and will be used to create TDX VMs (TDs) by
qemu -machine ...,confidential-guest-support=tdx0 \
-object tdx-guest,id=tdx0
It has one QAPI member 'attributes' defined, which allows user to set
TD's attributes directly.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
Acked-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20250508150002.689633-3-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 756e12e791771034ac105a5d2c9887bbbb6b7c73)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Conflict: class_init's second argument is not const
---
configs/devices/i386-softmmu/default.mak | 1 +
hw/i386/Kconfig | 5 +++
qapi/qom.json | 15 +++++++++
target/i386/kvm/meson.build | 2 ++
target/i386/kvm/tdx.c | 43 ++++++++++++++++++++++++
target/i386/kvm/tdx.h | 21 ++++++++++++
6 files changed, 87 insertions(+)
create mode 100644 target/i386/kvm/tdx.c
create mode 100644 target/i386/kvm/tdx.h
diff --git a/configs/devices/i386-softmmu/default.mak b/configs/devices/i386-softmmu/default.mak
index 448e3e3b1b..34c21224eb 100644
--- a/configs/devices/i386-softmmu/default.mak
+++ b/configs/devices/i386-softmmu/default.mak
@@ -18,6 +18,7 @@
#CONFIG_QXL=n
#CONFIG_SEV=n
#CONFIG_SGA=n
+#CONFIG_TDX=n
#CONFIG_TEST_DEVICES=n
#CONFIG_TPM_CRB=n
#CONFIG_TPM_TIS_ISA=n
diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig
index f4a33b6c08..edd61cd2aa 100644
--- a/hw/i386/Kconfig
+++ b/hw/i386/Kconfig
@@ -10,6 +10,10 @@ config SGX
bool
depends on KVM
+config TDX
+ bool
+ depends on KVM
+
config PC
bool
imply APPLESMC
@@ -26,6 +30,7 @@ config PC
imply QXL
imply SEV
imply SGX
+ imply TDX
imply TEST_DEVICES
imply TPM_CRB
imply TPM_TIS_ISA
diff --git a/qapi/qom.json b/qapi/qom.json
index 321ccd708a..530efeb7c5 100644
--- a/qapi/qom.json
+++ b/qapi/qom.json
@@ -1008,6 +1008,19 @@
'*host-data': 'str',
'*vcek-disabled': 'bool' } }
+##
+# @TdxGuestProperties:
+#
+# Properties for tdx-guest objects.
+#
+# @attributes: The 'attributes' of a TD guest that is passed to
+# KVM_TDX_INIT_VM
+#
+# Since: 10.1
+##
+{ 'struct': 'TdxGuestProperties',
+ 'data': { '*attributes': 'uint64' } }
+
##
# @ThreadContextProperties:
#
@@ -1092,6 +1105,7 @@
'sev-snp-guest',
'thread-context',
's390-pv-guest',
+ 'tdx-guest',
'throttle-group',
'tls-creds-anon',
'tls-creds-psk',
@@ -1163,6 +1177,7 @@
'if': 'CONFIG_SECRET_KEYRING' },
'sev-guest': 'SevGuestProperties',
'sev-snp-guest': 'SevSnpGuestProperties',
+ 'tdx-guest': 'TdxGuestProperties',
'thread-context': 'ThreadContextProperties',
'throttle-group': 'ThrottleGroupProperties',
'tls-creds-anon': 'TlsCredsAnonProperties',
diff --git a/target/i386/kvm/meson.build b/target/i386/kvm/meson.build
index 3996cafaf2..466bccb9cb 100644
--- a/target/i386/kvm/meson.build
+++ b/target/i386/kvm/meson.build
@@ -8,6 +8,8 @@ i386_kvm_ss.add(files(
i386_kvm_ss.add(when: 'CONFIG_XEN_EMU', if_true: files('xen-emu.c'))
+i386_kvm_ss.add(when: 'CONFIG_TDX', if_true: files('tdx.c'))
+
i386_system_ss.add(when: 'CONFIG_HYPERV', if_true: files('hyperv.c'), if_false: files('hyperv-stub.c'))
i386_system_ss.add_all(when: 'CONFIG_KVM', if_true: i386_kvm_ss)
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
new file mode 100644
index 0000000000..ec84ae2947
--- /dev/null
+++ b/target/i386/kvm/tdx.c
@@ -0,0 +1,43 @@
+/*
+ * QEMU TDX support
+ *
+ * Copyright (c) 2025 Intel Corporation
+ *
+ * Author:
+ * Xiaoyao Li <xiaoyao.li@intel.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qom/object_interfaces.h"
+
+#include "tdx.h"
+
+/* tdx guest */
+OBJECT_DEFINE_TYPE_WITH_INTERFACES(TdxGuest,
+ tdx_guest,
+ TDX_GUEST,
+ X86_CONFIDENTIAL_GUEST,
+ { TYPE_USER_CREATABLE },
+ { NULL })
+
+static void tdx_guest_init(Object *obj)
+{
+ ConfidentialGuestSupport *cgs = CONFIDENTIAL_GUEST_SUPPORT(obj);
+ TdxGuest *tdx = TDX_GUEST(obj);
+
+ cgs->require_guest_memfd = true;
+ tdx->attributes = 0;
+
+ object_property_add_uint64_ptr(obj, "attributes", &tdx->attributes,
+ OBJ_PROP_FLAG_READWRITE);
+}
+
+static void tdx_guest_finalize(Object *obj)
+{
+}
+
+static void tdx_guest_class_init(ObjectClass *oc, void *data)
+{
+}
diff --git a/target/i386/kvm/tdx.h b/target/i386/kvm/tdx.h
new file mode 100644
index 0000000000..f3b7253361
--- /dev/null
+++ b/target/i386/kvm/tdx.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef QEMU_I386_TDX_H
+#define QEMU_I386_TDX_H
+
+#include "confidential-guest.h"
+
+#define TYPE_TDX_GUEST "tdx-guest"
+#define TDX_GUEST(obj) OBJECT_CHECK(TdxGuest, (obj), TYPE_TDX_GUEST)
+
+typedef struct TdxGuestClass {
+ X86ConfidentialGuestClass parent_class;
+} TdxGuestClass;
+
+typedef struct TdxGuest {
+ X86ConfidentialGuest parent_obj;
+
+ uint64_t attributes; /* TD attributes */
+} TdxGuest;
+
+#endif /* QEMU_I386_TDX_H */
--
2.50.1

View File

@ -0,0 +1,62 @@
From 9d654537f0f667a36eb45d80fda283b31ace3d39 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:44 +0200
Subject: [PATCH 017/115] i386: Remove unused parameter "uint32_t bit" in
feature_word_description()
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [17/115] 37ee215225ee0757e51718df4548d4a43fe09e99 (bonzini/rhel-qemu-kvm)
Parameter "uint32_t bit" is not used in function feature_word_description(),
so remove it.
Signed-off-by: Lei Wang <lei4.wang@intel.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Message-ID: <20241217123932.948789-2-xiaoyao.li@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit bab32b8b4bf9da5d13386c8faa5a9389e63244b7)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/cpu.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index a97d042a2e..32e89f1a5c 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -5897,7 +5897,7 @@ static const TypeInfo max_x86_cpu_type_info = {
.class_init = max_x86_cpu_class_init,
};
-static char *feature_word_description(FeatureWordInfo *f, uint32_t bit)
+static char *feature_word_description(FeatureWordInfo *f)
{
assert(f->type == CPUID_FEATURE_WORD || f->type == MSR_FEATURE_WORD);
@@ -5936,6 +5936,7 @@ static void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint64_t mask,
CPUX86State *env = &cpu->env;
FeatureWordInfo *f = &feature_word_info[w];
int i;
+ g_autofree char *feat_word_str = feature_word_description(f);
if (!cpu->force_features) {
env->features[w] &= ~mask;
@@ -5948,7 +5949,6 @@ static void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint64_t mask,
for (i = 0; i < 64; ++i) {
if ((1ULL << i) & mask) {
- g_autofree char *feat_word_str = feature_word_description(f, i);
warn_report("%s: %s%s%s [bit %d]",
verbose_prefix,
feat_word_str,
--
2.50.1

View File

@ -0,0 +1,63 @@
From f5b6984efa1bf825410011b957b4f46fcfe963db Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:48 +0200
Subject: [PATCH 070/115] i386/apic: Skip kvm_apic_put() for TDX
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [70/115] d4e1631ebceb6441a608ff92c1964b64cf116094 (bonzini/rhel-qemu-kvm)
KVM neithers allow writing to MSR_IA32_APICBASE for TDs, nor allow for
KVM_SET_LAPIC[*].
Note, KVM_GET_LAPIC is also disallowed for TDX. It is called in the path
do_kvm_cpu_synchronize_state()
-> kvm_arch_get_registers()
-> kvm_get_apic()
and it's already disllowed for confidential guest through
guest_state_protected.
[*] https://lore.kernel.org/all/Z3w4Ku4Jq0CrtXne@google.com/
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20250508150002.689633-42-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 62a1a8b89d90cd3fbee0e6d38e6a4c0d833e978a)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
hw/i386/kvm/apic.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/hw/i386/kvm/apic.c b/hw/i386/kvm/apic.c
index a72c28e8a7..9c12a9c856 100644
--- a/hw/i386/kvm/apic.c
+++ b/hw/i386/kvm/apic.c
@@ -17,6 +17,7 @@
#include "sysemu/hw_accel.h"
#include "sysemu/kvm.h"
#include "kvm/kvm_i386.h"
+#include "kvm/tdx.h"
static inline void kvm_apic_set_reg(struct kvm_lapic_state *kapic,
int reg_id, uint32_t val)
@@ -141,6 +142,10 @@ static void kvm_apic_put(CPUState *cs, run_on_cpu_data data)
struct kvm_lapic_state kapic;
int ret;
+ if (is_tdx_vm()) {
+ return;
+ }
+
kvm_put_apicbase(s->cpu, s->apicbase);
kvm_put_apic_state(s, &kapic);
--
2.50.1

View File

@ -0,0 +1,80 @@
From 0d8993cabc26807ef973630f38ec2b09557497fe Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:48 +0200
Subject: [PATCH 079/115] i386/cgs: Introduce
x86_confidential_guest_check_features()
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [79/115] 1fce5742b7746e6ba589c486fb1a6aec8ab8391a (bonzini/rhel-qemu-kvm)
To do cgs specific feature checking. Note the feature checking in
x86_cpu_filter_features() is valid for non-cgs VMs. For cgs VMs like
TDX, what features can be supported has more restrictions.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20250508150002.689633-51-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit dc0b08b303ad34983b43936a4c978672e0f9a9d8)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/confidential-guest.h | 13 +++++++++++++
target/i386/kvm/kvm.c | 8 ++++++++
2 files changed, 21 insertions(+)
diff --git a/target/i386/confidential-guest.h b/target/i386/confidential-guest.h
index 8a5cc7ecff..4e7eb43416 100644
--- a/target/i386/confidential-guest.h
+++ b/target/i386/confidential-guest.h
@@ -42,6 +42,7 @@ struct X86ConfidentialGuestClass {
void (*cpu_instance_init)(X86ConfidentialGuest *cg, CPUState *cpu);
uint32_t (*adjust_cpuid_features)(X86ConfidentialGuest *cg, uint32_t feature,
uint32_t index, int reg, uint32_t value);
+ int (*check_features)(X86ConfidentialGuest *cg, CPUState *cs);
};
/**
@@ -91,4 +92,16 @@ static inline int x86_confidential_guest_adjust_cpuid_features(X86ConfidentialGu
}
}
+static inline int x86_confidential_guest_check_features(X86ConfidentialGuest *cg,
+ CPUState *cs)
+{
+ X86ConfidentialGuestClass *klass = X86_CONFIDENTIAL_GUEST_GET_CLASS(cg);
+
+ if (klass->check_features) {
+ return klass->check_features(cg, cs);
+ }
+
+ return 0;
+}
+
#endif
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 76352323e4..b6fddcd543 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -2081,6 +2081,14 @@ int kvm_arch_init_vcpu(CPUState *cs)
int r;
Error *local_err = NULL;
+ if (current_machine->cgs) {
+ r = x86_confidential_guest_check_features(
+ X86_CONFIDENTIAL_GUEST(current_machine->cgs), cs);
+ if (r < 0) {
+ return r;
+ }
+ }
+
memset(&cpuid_data, 0, sizeof(cpuid_data));
cpuid_i = 0;
--
2.50.1

View File

@ -0,0 +1,116 @@
From fa35367ae78505390b5915c9bf96542ffed1787d Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:48 +0200
Subject: [PATCH 072/115] i386/cgs: Rename *mask_cpuid_features() to
*adjust_cpuid_features()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [72/115] 0633336351c17f73620bc7d13cee5ba53b100e13 (bonzini/rhel-qemu-kvm)
Because for TDX case, there are also fixed-1 bits that enforced by TDX
module.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20250508150002.689633-44-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 695bfaee7153153708228946aa26c6d879599c04)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/confidential-guest.h | 20 ++++++++++----------
target/i386/kvm/kvm.c | 2 +-
target/i386/sev.c | 4 ++--
3 files changed, 13 insertions(+), 13 deletions(-)
diff --git a/target/i386/confidential-guest.h b/target/i386/confidential-guest.h
index 38169ed68e..8a5cc7ecff 100644
--- a/target/i386/confidential-guest.h
+++ b/target/i386/confidential-guest.h
@@ -40,8 +40,8 @@ struct X86ConfidentialGuestClass {
/* <public> */
int (*kvm_type)(X86ConfidentialGuest *cg);
void (*cpu_instance_init)(X86ConfidentialGuest *cg, CPUState *cpu);
- uint32_t (*mask_cpuid_features)(X86ConfidentialGuest *cg, uint32_t feature, uint32_t index,
- int reg, uint32_t value);
+ uint32_t (*adjust_cpuid_features)(X86ConfidentialGuest *cg, uint32_t feature,
+ uint32_t index, int reg, uint32_t value);
};
/**
@@ -71,21 +71,21 @@ static inline void x86_confidential_guest_cpu_instance_init(X86ConfidentialGuest
}
/**
- * x86_confidential_guest_mask_cpuid_features:
+ * x86_confidential_guest_adjust_cpuid_features:
*
- * Removes unsupported features from a confidential guest's CPUID values, returns
- * the value with the bits removed. The bits removed should be those that KVM
- * provides independent of host-supported CPUID features, but are not supported by
- * the confidential computing firmware.
+ * Adjust the supported features from a confidential guest's CPUID values,
+ * returns the adjusted value. There are bits being removed that are not
+ * supported by the confidential computing firmware or bits being added that
+ * are forcibly exposed to guest by the confidential computing firmware.
*/
-static inline int x86_confidential_guest_mask_cpuid_features(X86ConfidentialGuest *cg,
+static inline int x86_confidential_guest_adjust_cpuid_features(X86ConfidentialGuest *cg,
uint32_t feature, uint32_t index,
int reg, uint32_t value)
{
X86ConfidentialGuestClass *klass = X86_CONFIDENTIAL_GUEST_GET_CLASS(cg);
- if (klass->mask_cpuid_features) {
- return klass->mask_cpuid_features(cg, feature, index, reg, value);
+ if (klass->adjust_cpuid_features) {
+ return klass->adjust_cpuid_features(cg, feature, index, reg, value);
} else {
return value;
}
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index f3fe553151..5349ff4db7 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -565,7 +565,7 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
}
if (current_machine->cgs) {
- ret = x86_confidential_guest_mask_cpuid_features(
+ ret = x86_confidential_guest_adjust_cpuid_features(
X86_CONFIDENTIAL_GUEST(current_machine->cgs),
function, index, reg, ret);
}
diff --git a/target/i386/sev.c b/target/i386/sev.c
index a0d271f898..24fcd078fc 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -946,7 +946,7 @@ out:
}
static uint32_t
-sev_snp_mask_cpuid_features(X86ConfidentialGuest *cg, uint32_t feature, uint32_t index,
+sev_snp_adjust_cpuid_features(X86ConfidentialGuest *cg, uint32_t feature, uint32_t index,
int reg, uint32_t value)
{
switch (feature) {
@@ -2404,7 +2404,7 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data)
klass->launch_finish = sev_snp_launch_finish;
klass->launch_update_data = sev_snp_launch_update_data;
klass->kvm_init = sev_snp_kvm_init;
- x86_klass->mask_cpuid_features = sev_snp_mask_cpuid_features;
+ x86_klass->adjust_cpuid_features = sev_snp_adjust_cpuid_features;
x86_klass->kvm_type = sev_snp_kvm_type;
object_class_property_add(oc, "policy", "uint64",
--
2.50.1

View File

@ -0,0 +1,49 @@
From 70ffde0038f36b6720b73136a4368b26f2bf6181 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:50 +0200
Subject: [PATCH 107/115] i386/cpu: Cleanup host_cpu_max_instance_init()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [107/115] 140ba66d2ff544b6ae498798e1f6ad3ade1791bc (bonzini/rhel-qemu-kvm)
The implementation of host_cpu_max_instance_init() was merged into
host_cpu_instance_init() by commit 29f1ba338baf ("target/i386: merge
host_cpu_instance_init() and host_cpu_max_instance_init()"), while the
declaration of it remains in host-cpu.h.
Clean it up.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20250716063117.602050-1-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 5fe6b9a854a91df86fdb794cbeb67d0656756137)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/host-cpu.h | 1 -
1 file changed, 1 deletion(-)
diff --git a/target/i386/host-cpu.h b/target/i386/host-cpu.h
index b97ec01c9b..5b2ad491a8 100644
--- a/target/i386/host-cpu.h
+++ b/target/i386/host-cpu.h
@@ -12,7 +12,6 @@
uint32_t host_cpu_phys_bits(void);
void host_cpu_instance_init(X86CPU *cpu);
-void host_cpu_max_instance_init(X86CPU *cpu);
bool host_cpu_realizefn(CPUState *cs, Error **errp);
void host_cpu_vendor_fms(char *vendor, int *family, int *model, int *stepping);
--
2.50.1

View File

@ -0,0 +1,78 @@
From 0a568fbac880efdda740808e6fbbd8be09cfb46a Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:45 +0200
Subject: [PATCH 024/115] i386/cpu: Consolidate the helper to get Host's vendor
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [24/115] cd823d475dc0a0ec4135f0c9d5546db996579d30 (bonzini/rhel-qemu-kvm)
Extend host_cpu_vendor_fms() to help more cases to get Host's vendor
information.
Cc: Dongli Zhang <dongli.zhang@oracle.com>
Signed-off-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20250410075619.145792-1-zhao1.liu@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit ae39acef49e29169f90cd3a799d6cd0b50bc65d2)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/host-cpu.c | 10 ++++++----
target/i386/kvm/vmsr_energy.c | 3 +--
2 files changed, 7 insertions(+), 6 deletions(-)
diff --git a/target/i386/host-cpu.c b/target/i386/host-cpu.c
index 03b9d1b169..4a77ecc1fc 100644
--- a/target/i386/host-cpu.c
+++ b/target/i386/host-cpu.c
@@ -109,9 +109,13 @@ void host_cpu_vendor_fms(char *vendor, int *family, int *model, int *stepping)
{
uint32_t eax, ebx, ecx, edx;
- host_cpuid(0x0, 0, &eax, &ebx, &ecx, &edx);
+ host_cpuid(0x0, 0, NULL, &ebx, &ecx, &edx);
x86_cpu_vendor_words2str(vendor, ebx, edx, ecx);
+ if (!family && !model && !stepping) {
+ return;
+ }
+
host_cpuid(0x1, 0, &eax, &ebx, &ecx, &edx);
if (family) {
*family = ((eax >> 8) & 0x0F) + ((eax >> 20) & 0xFF);
@@ -129,11 +133,9 @@ void host_cpu_instance_init(X86CPU *cpu)
X86CPUClass *xcc = X86_CPU_GET_CLASS(cpu);
if (xcc->model) {
- uint32_t ebx = 0, ecx = 0, edx = 0;
char vendor[CPUID_VENDOR_SZ + 1];
- host_cpuid(0, 0, NULL, &ebx, &ecx, &edx);
- x86_cpu_vendor_words2str(vendor, ebx, edx, ecx);
+ host_cpu_vendor_fms(vendor, NULL, NULL, NULL);
object_property_set_str(OBJECT(cpu), "vendor", vendor, &error_abort);
}
}
diff --git a/target/i386/kvm/vmsr_energy.c b/target/i386/kvm/vmsr_energy.c
index 7e064c5aef..615f23b6cf 100644
--- a/target/i386/kvm/vmsr_energy.c
+++ b/target/i386/kvm/vmsr_energy.c
@@ -29,10 +29,9 @@ char *vmsr_compute_default_paths(void)
bool is_host_cpu_intel(void)
{
- int family, model, stepping;
char vendor[CPUID_VENDOR_SZ + 1];
- host_cpu_vendor_fms(vendor, &family, &model, &stepping);
+ host_cpu_vendor_fms(vendor, NULL, NULL, NULL);
return strcmp(vendor, CPUID_VENDOR_INTEL);
}
--
2.50.1

View File

@ -0,0 +1,55 @@
From da6c7cae87a945451617014a97c83b0e38879786 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:44 +0200
Subject: [PATCH 009/115] i386/cpu: Drop cores_per_pkg in cpu_x86_cpuid()
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [9/115] 1e8cca4b6784adde542654cd45b4f921fbc91fcd (bonzini/rhel-qemu-kvm)
Local variable cores_per_pkg is only used to calculate threads_per_pkg.
No need for it. Drop it and open-code it instead.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20241219110125.1266461-4-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 00ec7be67c3981b486293aa8e0aef9534f229c5e)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 589e1863d4e871e09af4ff176df97c23e2a33b8b)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/cpu.c | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 1fe492f33d..b639769ef3 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -6950,7 +6950,6 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
uint32_t limit;
uint32_t signature[3];
X86CPUTopoInfo topo_info;
- uint32_t cores_per_pkg;
uint32_t threads_per_pkg;
topo_info.dies_per_pkg = env->nr_dies;
@@ -6958,9 +6957,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
topo_info.cores_per_module = cs->nr_cores / env->nr_dies / env->nr_modules;
topo_info.threads_per_core = cs->nr_threads;
- cores_per_pkg = topo_info.cores_per_module * topo_info.modules_per_die *
- topo_info.dies_per_pkg;
- threads_per_pkg = cores_per_pkg * topo_info.threads_per_core;
+ threads_per_pkg = topo_info.threads_per_core * topo_info.cores_per_module *
+ topo_info.modules_per_die * topo_info.dies_per_pkg;
/* Calculate & apply limits for different index ranges */
if (index >= 0xC0000000) {
--
2.50.1

View File

@ -0,0 +1,79 @@
From 8e732c71def28ac963a8f8d530c7dc063abc6d97 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:44 +0200
Subject: [PATCH 006/115] i386/cpu: Drop the check of phys_bits in
host_cpu_realizefn()
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [6/115] a53971f358752d7c850baee4f8f3c7912854f160 (bonzini/rhel-qemu-kvm)
The check of cpu->phys_bits to be in range between
[32, TARGET_PHYS_ADDR_SPACE_BITS] in host_cpu_realizefn()
is duplicated with check in x86_cpu_realizefn().
Since the ckeck in x86_cpu_realizefn() is called later and can cover all
the x86 cases. Remove the one in host_cpu_realizefn().
Opportunistically adjust cpu->phys_bits directly in
host_cpu_adjust_phys_bits(), which matches more with the function name.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20240929085747.2023198-1-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 855bdb6c8a60ae20043531dc965fcb1ed171d7d9)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/host-cpu.c | 16 +++-------------
1 file changed, 3 insertions(+), 13 deletions(-)
diff --git a/target/i386/host-cpu.c b/target/i386/host-cpu.c
index 8b8bf5afec..03b9d1b169 100644
--- a/target/i386/host-cpu.c
+++ b/target/i386/host-cpu.c
@@ -42,7 +42,7 @@ static uint32_t host_cpu_phys_bits(void)
return host_phys_bits;
}
-static uint32_t host_cpu_adjust_phys_bits(X86CPU *cpu)
+static void host_cpu_adjust_phys_bits(X86CPU *cpu)
{
uint32_t host_phys_bits = host_cpu_phys_bits();
uint32_t phys_bits = cpu->phys_bits;
@@ -66,7 +66,7 @@ static uint32_t host_cpu_adjust_phys_bits(X86CPU *cpu)
}
}
- return phys_bits;
+ cpu->phys_bits = phys_bits;
}
bool host_cpu_realizefn(CPUState *cs, Error **errp)
@@ -75,17 +75,7 @@ bool host_cpu_realizefn(CPUState *cs, Error **errp)
CPUX86State *env = &cpu->env;
if (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) {
- uint32_t phys_bits = host_cpu_adjust_phys_bits(cpu);
-
- if (phys_bits &&
- (phys_bits > TARGET_PHYS_ADDR_SPACE_BITS ||
- phys_bits < 32)) {
- error_setg(errp, "phys-bits should be between 32 and %u "
- " (but is %u)",
- TARGET_PHYS_ADDR_SPACE_BITS, phys_bits);
- return false;
- }
- cpu->phys_bits = phys_bits;
+ host_cpu_adjust_phys_bits(cpu);
}
return true;
}
--
2.50.1

View File

@ -0,0 +1,68 @@
From 368e0e988c60d0391c1e47db7e3f1aff7742f697 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:44 +0200
Subject: [PATCH 008/115] i386/cpu: Drop the variable smp_cores and smp_threads
in x86_cpu_pre_plug()
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [8/115] bc67c58c23402a05899559f8ecfa61218aa3ef2a (bonzini/rhel-qemu-kvm)
No need to define smp_cores and smp_threads, just using ms->smp.cores
and ms->smp.threads is straightforward. It's also consistent with other
checks of socket/die/module.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20241219110125.1266461-3-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 81bd60625fc23cb8d4d0e682dcc4223d5e1ead84)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit c263000e490ddb361e0ef8a45342dea034036682)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
hw/i386/x86-common.c | 10 ++++------
1 file changed, 4 insertions(+), 6 deletions(-)
diff --git a/hw/i386/x86-common.c b/hw/i386/x86-common.c
index 992ea1f25e..2806be98f3 100644
--- a/hw/i386/x86-common.c
+++ b/hw/i386/x86-common.c
@@ -248,8 +248,6 @@ void x86_cpu_pre_plug(HotplugHandler *hotplug_dev,
CPUX86State *env = &cpu->env;
MachineState *ms = MACHINE(hotplug_dev);
X86MachineState *x86ms = X86_MACHINE(hotplug_dev);
- unsigned int smp_cores = ms->smp.cores;
- unsigned int smp_threads = ms->smp.threads;
X86CPUTopoInfo topo_info;
if (!object_dynamic_cast(OBJECT(cpu), ms->cpu_type)) {
@@ -329,17 +327,17 @@ void x86_cpu_pre_plug(HotplugHandler *hotplug_dev,
if (cpu->core_id < 0) {
error_setg(errp, "CPU core-id is not set");
return;
- } else if (cpu->core_id > (smp_cores - 1)) {
+ } else if (cpu->core_id > (ms->smp.cores - 1)) {
error_setg(errp, "Invalid CPU core-id: %u must be in range 0:%u",
- cpu->core_id, smp_cores - 1);
+ cpu->core_id, ms->smp.cores - 1);
return;
}
if (cpu->thread_id < 0) {
error_setg(errp, "CPU thread-id is not set");
return;
- } else if (cpu->thread_id > (smp_threads - 1)) {
+ } else if (cpu->thread_id > (ms->smp.threads - 1)) {
error_setg(errp, "Invalid CPU thread-id: %u must be in range 0:%u",
- cpu->thread_id, smp_threads - 1);
+ cpu->thread_id, ms->smp.threads - 1);
return;
}
--
2.50.1

View File

@ -0,0 +1,112 @@
From 54403f52f9be5f4d05f1cc866b397820340099ab Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:44 +0200
Subject: [PATCH 007/115] i386/cpu: Extract a common fucntion to setup value of
MSR_CORE_THREAD_COUNT
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [7/115] d61186d383365a629a8d74a5618b4df6b2723a88 (bonzini/rhel-qemu-kvm)
There are duplicated code to setup the value of MSR_CORE_THREAD_COUNT.
Extract a common function for it.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20241219110125.1266461-2-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit d3bb5d0d4f5d4ad7dc6c02ea5fea51ca2f946593)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 66f4008d32a6cddd1ada5906487ecc9fb0b62fed)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/cpu-sysemu.c | 11 +++++++++++
target/i386/cpu.h | 2 ++
target/i386/hvf/x86_emu.c | 3 +--
target/i386/kvm/kvm.c | 5 +----
target/i386/tcg/sysemu/misc_helper.c | 3 +--
5 files changed, 16 insertions(+), 8 deletions(-)
diff --git a/target/i386/cpu-sysemu.c b/target/i386/cpu-sysemu.c
index 227ac021f6..4e9df0bc01 100644
--- a/target/i386/cpu-sysemu.c
+++ b/target/i386/cpu-sysemu.c
@@ -309,3 +309,14 @@ void x86_cpu_get_crash_info_qom(Object *obj, Visitor *v,
errp);
qapi_free_GuestPanicInformation(panic_info);
}
+
+uint64_t cpu_x86_get_msr_core_thread_count(X86CPU *cpu)
+{
+ CPUState *cs = CPU(cpu);
+ uint64_t val;
+
+ val = cs->nr_threads * cs->nr_cores; /* thread count, bits 15..0 */
+ val |= ((uint32_t)cs->nr_cores << 16); /* core count, bits 31..16 */
+
+ return val;
+}
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 5924761551..8c9216d9d0 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -2368,6 +2368,8 @@ static inline void cpu_x86_load_seg_cache_sipi(X86CPU *cpu,
cs->halted = 0;
}
+uint64_t cpu_x86_get_msr_core_thread_count(X86CPU *cpu);
+
int cpu_x86_get_descr_debug(CPUX86State *env, unsigned int selector,
target_ulong *base, unsigned int *limit,
unsigned int *flags);
diff --git a/target/i386/hvf/x86_emu.c b/target/i386/hvf/x86_emu.c
index 38c782b8e3..425f1afda8 100644
--- a/target/i386/hvf/x86_emu.c
+++ b/target/i386/hvf/x86_emu.c
@@ -745,8 +745,7 @@ void simulate_rdmsr(CPUX86State *env)
val = env->mtrr_deftype;
break;
case MSR_CORE_THREAD_COUNT:
- val = cs->nr_threads * cs->nr_cores; /* thread count, bits 15..0 */
- val |= ((uint32_t)cs->nr_cores << 16); /* core count, bits 31..16 */
+ val = cpu_x86_get_msr_core_thread_count(cpu);
break;
default:
/* fprintf(stderr, "%s: unknown msr 0x%x\n", __func__, msr); */
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index b02aec915c..fe6b34bb10 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -2586,10 +2586,7 @@ static bool kvm_rdmsr_core_thread_count(X86CPU *cpu,
uint32_t msr,
uint64_t *val)
{
- CPUState *cs = CPU(cpu);
-
- *val = cs->nr_threads * cs->nr_cores; /* thread count, bits 15..0 */
- *val |= ((uint32_t)cs->nr_cores << 16); /* core count, bits 31..16 */
+ *val = cpu_x86_get_msr_core_thread_count(cpu);
return true;
}
diff --git a/target/i386/tcg/sysemu/misc_helper.c b/target/i386/tcg/sysemu/misc_helper.c
index 094aa56a20..ff7b201b44 100644
--- a/target/i386/tcg/sysemu/misc_helper.c
+++ b/target/i386/tcg/sysemu/misc_helper.c
@@ -468,8 +468,7 @@ void helper_rdmsr(CPUX86State *env)
val = x86_cpu->ucode_rev;
break;
case MSR_CORE_THREAD_COUNT: {
- CPUState *cs = CPU(x86_cpu);
- val = (cs->nr_threads * cs->nr_cores) | (cs->nr_cores << 16);
+ val = cpu_x86_get_msr_core_thread_count(x86_cpu);
break;
}
case MSR_APIC_START ... MSR_APIC_END: {
--
2.50.1

View File

@ -0,0 +1,80 @@
From 97edfb8e45b34e3909f387162785f0aa8979aba6 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:44 +0200
Subject: [PATCH 013/115] i386/cpu: Hoist check of CPUID_EXT3_TOPOEXT against
threads_per_core
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [13/115] e8e81cac13bb9363c167e92a8478efb97ceab79a (bonzini/rhel-qemu-kvm)
Now it changes to use env->topo_info.threads_per_core and doesn't depend
on qemu_init_vcpu() anymore. Put it together with other feature checks
before qemu_init_vcpu()
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Link: https://lore.kernel.org/r/20241219110125.1266461-8-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 473d79b56a1645be90b890f9623b27acd0afba49)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 8098c705a5d8f82d2a772194ebdbef87784b0461)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/cpu.c | 30 +++++++++++++++---------------
1 file changed, 15 insertions(+), 15 deletions(-)
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 554455169a..2295149bfa 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -8327,6 +8327,21 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp)
*/
cpu->mwait.ecx |= CPUID_MWAIT_EMX | CPUID_MWAIT_IBE;
+ /*
+ * Most Intel and certain AMD CPUs support hyperthreading. Even though QEMU
+ * fixes this issue by adjusting CPUID_0000_0001_EBX and CPUID_8000_0008_ECX
+ * based on inputs (sockets,cores,threads), it is still better to give
+ * users a warning.
+ */
+ if (IS_AMD_CPU(env) &&
+ !(env->features[FEAT_8000_0001_ECX] & CPUID_EXT3_TOPOEXT) &&
+ env->topo_info.threads_per_core > 1) {
+ warn_report_once("This family of AMD CPU doesn't support "
+ "hyperthreading(%d). Please configure -smp "
+ "options properly or try enabling topoext "
+ "feature.", env->topo_info.threads_per_core);
+ }
+
/* For 64bit systems think about the number of physical bits to present.
* ideally this should be the same as the host; anything other than matching
* the host can cause incorrect guest behaviour.
@@ -8430,21 +8445,6 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp)
qemu_init_vcpu(cs);
- /*
- * Most Intel and certain AMD CPUs support hyperthreading. Even though QEMU
- * fixes this issue by adjusting CPUID_0000_0001_EBX and CPUID_8000_0008_ECX
- * based on inputs (sockets,cores,threads), it is still better to give
- * users a warning.
- */
- if (IS_AMD_CPU(env) &&
- !(env->features[FEAT_8000_0001_ECX] & CPUID_EXT3_TOPOEXT) &&
- env->topo_info.threads_per_core > 1) {
- warn_report_once("This family of AMD CPU doesn't support "
- "hyperthreading(%d). Please configure -smp "
- "options properly or try enabling topoext "
- "feature.", env->topo_info.threads_per_core);
- }
-
#ifndef CONFIG_USER_ONLY
x86_cpu_apic_realize(cpu, &local_err);
if (local_err != NULL) {
--
2.50.1

View File

@ -0,0 +1,105 @@
From c45bc21b4c191ceb2523bfeac4ff2eb042469d89 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:47 +0200
Subject: [PATCH 062/115] i386/cpu: Introduce enable_cpuid_0x1f to force
exposing CPUID 0x1f
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [62/115] 046ef898225f70d84fbcf20266aff8478c6fb428 (bonzini/rhel-qemu-kvm)
Currently, QEMU exposes CPUID 0x1f to guest only when necessary, i.e.,
when topology level that cannot be enumerated by leaf 0xB, e.g., die or
module level, are configured for the guest, e.g., -smp xx,dies=2.
However, TDX architecture forces to require CPUID 0x1f to configure CPU
topology.
Introduce a bool flag, enable_cpuid_0x1f, in CPU for the case that
requires CPUID leaf 0x1f to be exposed to guest.
Introduce a new function x86_has_cpuid_0x1f(), which is the wrapper of
cpu->enable_cpuid_0x1f and x86_has_extended_topo() to check if it needs
to enable cpuid leaf 0x1f for the guest.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20250508150002.689633-34-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit ab8bd85adf75900edc2764d0ebe8b53867cc54aa)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/cpu.c | 4 ++--
target/i386/cpu.h | 9 +++++++++
target/i386/kvm/kvm.c | 2 +-
3 files changed, 12 insertions(+), 3 deletions(-)
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index ee6f1c0627..ab34626b19 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -7177,7 +7177,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
break;
case 0x1F:
/* V2 Extended Topology Enumeration Leaf */
- if (!x86_has_extended_topo(env->avail_cpu_topo)) {
+ if (!x86_has_cpuid_0x1f(cpu)) {
*eax = *ebx = *ecx = *edx = 0;
break;
}
@@ -8035,7 +8035,7 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp)
* cpu->vendor_cpuid_only has been unset for compatibility with older
* machine types.
*/
- if (x86_has_extended_topo(env->avail_cpu_topo) &&
+ if (x86_has_cpuid_0x1f(cpu) &&
(IS_INTEL_CPU(env) || !cpu->vendor_cpuid_only)) {
x86_cpu_adjust_level(cpu, &env->cpuid_min_level, 0x1F);
}
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index ee1a1b6622..83fa89bf0a 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -2168,6 +2168,9 @@ struct ArchCPU {
/* Compatibility bits for old machine types: */
bool enable_cpuid_0xb;
+ /* Force to enable cpuid 0x1f */
+ bool enable_cpuid_0x1f;
+
/* Enable auto level-increase for all CPUID leaves */
bool full_cpuid_auto_level;
@@ -2429,6 +2432,12 @@ void host_cpuid(uint32_t function, uint32_t count,
uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx);
bool cpu_has_x2apic_feature(CPUX86State *env);
+static inline bool x86_has_cpuid_0x1f(X86CPU *cpu)
+{
+ return cpu->enable_cpuid_0x1f ||
+ x86_has_extended_topo(cpu->env.avail_cpu_topo);
+}
+
/* helper.c */
void x86_cpu_set_a20(X86CPU *cpu, int a20_state);
void cpu_sync_avx_hflag(CPUX86State *env);
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 4bda4f5525..f4809ee004 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -1861,7 +1861,7 @@ uint32_t kvm_x86_build_cpuid(CPUX86State *env, struct kvm_cpuid_entry2 *entries,
break;
}
case 0x1f:
- if (!x86_has_extended_topo(env->avail_cpu_topo)) {
+ if (!x86_has_cpuid_0x1f(env_archcpu(env))) {
cpuid_i--;
break;
}
--
2.50.1

View File

@ -0,0 +1,59 @@
From faffdc7fae7e90e9b4bdbb36ab2e753ebbf26732 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:49 +0200
Subject: [PATCH 087/115] i386/cpu: Move adjustment of CPUID_EXT_PDCM before
feature_dependencies[] check
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [87/115] 8e3628cfafdd307c5a38cbd2ec52ce3f679c1796 (bonzini/rhel-qemu-kvm)
There is one entry relates to CPUID_EXT_PDCM in feature_dependencies[].
So it needs to get correct value of CPUID_EXT_PDCM before using
feature_dependencies[] to apply dependencies.
Besides, it also ensures CPUID_EXT_PDCM value is tracked in
env->features[FEAT_1_ECX].
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20250304052450.465445-2-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit e68ec2980901c8e7f948f3305770962806c53f0b)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/cpu.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 433d0a0418..a9d6811032 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -7026,9 +7026,6 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
if (threads_per_pkg > 1) {
*ebx |= threads_per_pkg << 16;
}
- if (!cpu->enable_pmu) {
- *ecx &= ~CPUID_EXT_PDCM;
- }
break;
case 2:
/* cache info: needed for Pentium Pro compatibility */
@@ -8012,6 +8009,10 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp)
}
}
+ if (!cpu->enable_pmu) {
+ env->features[FEAT_1_ECX] &= ~CPUID_EXT_PDCM;
+ }
+
for (i = 0; i < ARRAY_SIZE(feature_dependencies); i++) {
FeatureDep *d = &feature_dependencies[i];
if (!(env->features[d->from.index] & d->from.mask)) {
--
2.50.1

View File

@ -0,0 +1,91 @@
From eb67f5f683c98892292aa40695d7ec59a17c122f Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:50 +0200
Subject: [PATCH 105/115] i386/cpu: Move x86_ext_save_areas[] initialization to
.instance_init
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [105/115] 289d5143a71cd0dc24c6d8e32d510657d5c77091 (bonzini/rhel-qemu-kvm)
In x86_cpu_post_initfn(), the initialization of x86_ext_save_areas[]
marks the unsupported xsave areas based on Host support.
This step must be done before accel_cpu_instance_init(), otherwise,
KVM's assertion on host xsave support would fail:
qemu-system-x86_64: ../target/i386/kvm/kvm-cpu.c:149:
kvm_cpu_xsave_init: Assertion `esa->size == eax' failed.
(on AMD EPYC 7302 16-Core Processor)
Move x86_ext_save_areas[] initialization to .instance_init and place it
before accel_cpu_instance_init().
Fixes: commit 5f158abef44c ("target/i386: move accel_cpu_instance_init to .instance_init")
Reported-by: Paolo Abeni <pabeni@redhat.com>
Tested-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20250717023933.2502109-1-zhao1.liu@intel.com
Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit e52af92e9e6f8fc00f2ae6b63214b3d6213b3cec)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/cpu.c | 22 +++++++++++++++-------
1 file changed, 15 insertions(+), 7 deletions(-)
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index d0161f922c..ee753351fc 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -8617,6 +8617,16 @@ static void x86_cpu_register_feature_bit_props(X86CPUClass *xcc,
}
static void x86_cpu_post_initfn(Object *obj)
+{
+#ifndef CONFIG_USER_ONLY
+ if (current_machine && current_machine->cgs) {
+ x86_confidential_guest_cpu_instance_init(
+ X86_CONFIDENTIAL_GUEST(current_machine->cgs), (CPU(obj)));
+ }
+#endif
+}
+
+static void x86_cpu_init_xsave(void)
{
static bool first = true;
uint64_t supported_xcr0;
@@ -8637,13 +8647,6 @@ static void x86_cpu_post_initfn(Object *obj)
}
}
}
-
-#ifndef CONFIG_USER_ONLY
- if (current_machine && current_machine->cgs) {
- x86_confidential_guest_cpu_instance_init(
- X86_CONFIDENTIAL_GUEST(current_machine->cgs), (CPU(obj)));
- }
-#endif
}
static void x86_cpu_init_default_topo(X86CPU *cpu)
@@ -8713,6 +8716,11 @@ static void x86_cpu_initfn(Object *obj)
x86_cpu_load_model(cpu, xcc->model);
}
+ /*
+ * accel's cpu_instance_init may have the xsave check,
+ * so x86_ext_save_areas[] must be initialized before this.
+ */
+ x86_cpu_init_xsave();
accel_cpu_instance_init(CPU(obj));
}
--
2.50.1

View File

@ -0,0 +1,73 @@
From 07bba3fcfd6b8eb6e833e7d675be3ccc667423de Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:49 +0200
Subject: [PATCH 089/115] i386/cpu: Rename enable_cpuid_0x1f to
force_cpuid_0x1f
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [89/115] d1152aea8e699b1563750d286b41a47d9036429e (bonzini/rhel-qemu-kvm)
The name of "enable_cpuid_0x1f" isn't right to its behavior because the
leaf 0x1f can be enabled even when "enable_cpuid_0x1f" is false.
Rename it to "force_cpuid_0x1f" to better reflect its behavior.
Suggested-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
Link: https://lore.kernel.org/r/20250603050305.1704586-2-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 90d2bbd1f6edfa22a056070ee62ded55099cd56d)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/cpu.h | 4 ++--
target/i386/kvm/tdx.c | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 2e73945b28..a08931f969 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -2195,7 +2195,7 @@ struct ArchCPU {
bool enable_cpuid_0xb;
/* Force to enable cpuid 0x1f */
- bool enable_cpuid_0x1f;
+ bool force_cpuid_0x1f;
/* Enable auto level-increase for all CPUID leaves */
bool full_cpuid_auto_level;
@@ -2465,7 +2465,7 @@ void mark_forced_on_features(X86CPU *cpu, FeatureWord w, uint64_t mask,
static inline bool x86_has_cpuid_0x1f(X86CPU *cpu)
{
- return cpu->enable_cpuid_0x1f ||
+ return cpu->force_cpuid_0x1f ||
x86_has_extended_topo(cpu->env.avail_cpu_topo);
}
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index 3099e40baa..ca3641441c 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -752,7 +752,7 @@ static void tdx_cpu_instance_init(X86ConfidentialGuest *cg, CPUState *cpu)
/* invtsc is fixed1 for TD guest */
object_property_set_bool(OBJECT(cpu), "invtsc", true, &error_abort);
- x86cpu->enable_cpuid_0x1f = true;
+ x86cpu->force_cpuid_0x1f = true;
}
static uint32_t tdx_adjust_cpuid_features(X86ConfidentialGuest *cg,
--
2.50.1

View File

@ -0,0 +1,69 @@
From 90ccca2272ea05adc4ecedbbac70692f72831533 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:44 +0200
Subject: [PATCH 016/115] i386/cpu: Set and track CPUID_EXT3_CMP_LEG in
env->features[FEAT_8000_0001_ECX]
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [16/115] 8d6efb657ea34f7adccb5f8648cf0d10ba126299 (bonzini/rhel-qemu-kvm)
The correct usage is tracking and maintaining features in env->features[]
instead of manually set it in cpu_x86_cpuid().
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Link: https://lore.kernel.org/r/20241219110125.1266461-11-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 99a637a86f55c8486b06c698656befdf012eec4d)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit eb7304fc81c136a475a951720b76afa1f128ae95)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/cpu.c | 20 +++++++++-----------
1 file changed, 9 insertions(+), 11 deletions(-)
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index e20977411d..a97d042a2e 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -7404,17 +7404,6 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
*ecx = env->features[FEAT_8000_0001_ECX];
*edx = env->features[FEAT_8000_0001_EDX];
- /* The Linux kernel checks for the CMPLegacy bit and
- * discards multiple thread information if it is set.
- * So don't set it here for Intel to make Linux guests happy.
- */
- if (threads_per_pkg > 1) {
- if (env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1 ||
- env->cpuid_vendor2 != CPUID_VENDOR_INTEL_2 ||
- env->cpuid_vendor3 != CPUID_VENDOR_INTEL_3) {
- *ecx |= 1 << 1; /* CmpLegacy bit */
- }
- }
if (tcg_enabled() && env->cpuid_vendor1 == CPUID_VENDOR_INTEL_1 &&
!(env->hflags & HF_LMA_MASK)) {
*edx &= ~CPUID_EXT2_SYSCALL;
@@ -7976,6 +7965,15 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp)
if (x86_threads_per_pkg(&env->topo_info) > 1) {
env->features[FEAT_1_EDX] |= CPUID_HT;
+
+ /*
+ * The Linux kernel checks for the CMPLegacy bit and
+ * discards multiple thread information if it is set.
+ * So don't set it here for Intel to make Linux guests happy.
+ */
+ if (!IS_INTEL_CPU(env)) {
+ env->features[FEAT_8000_0001_ECX] |= CPUID_EXT3_CMP_LEG;
+ }
}
for (i = 0; i < ARRAY_SIZE(feature_dependencies); i++) {
--
2.50.1

View File

@ -0,0 +1,59 @@
From 673bbd9c79946356d2863f0944a576d100750c64 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:44 +0200
Subject: [PATCH 015/115] i386/cpu: Set up CPUID_HT in
x86_cpu_expand_features() instead of cpu_x86_cpuid()
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [15/115] c763ec424a0cc64f46fd27aa2f03fd1b2933c23b (bonzini/rhel-qemu-kvm)
Currently CPUID_HT is evaluated in cpu_x86_cpuid() each time. It's not a
correct usage of how feature bit is maintained and evaluated. The
expected practice is that features are tracked in env->features[] and
cpu_x86_cpuid() should be the consumer of env->features[].
Track CPUID_HT in env->features[FEAT_1_EDX] instead and evaluate it in
cpu's realizefn().
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Link: https://lore.kernel.org/r/20241219110125.1266461-10-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit c6bd2dd634208ca717b6dc010064fe34d1359080)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 4e3da2efde9f16460e90246bec15b29739930bb1)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/cpu.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 2295149bfa..e20977411d 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -6989,7 +6989,6 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
*edx = env->features[FEAT_1_EDX];
if (threads_per_pkg > 1) {
*ebx |= threads_per_pkg << 16;
- *edx |= CPUID_HT;
}
if (!cpu->enable_pmu) {
*ecx &= ~CPUID_EXT_PDCM;
@@ -7975,6 +7974,10 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp)
}
}
+ if (x86_threads_per_pkg(&env->topo_info) > 1) {
+ env->features[FEAT_1_EDX] |= CPUID_HT;
+ }
+
for (i = 0; i < ARRAY_SIZE(feature_dependencies); i++) {
FeatureDep *d = &feature_dependencies[i];
if (!(env->features[d->from.index] & d->from.mask)) {
--
2.50.1

View File

@ -0,0 +1,301 @@
From 03d6e64ed980335602368d3b470e33cb21af307d Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:44 +0200
Subject: [PATCH 012/115] i386/cpu: Track a X86CPUTopoInfo directly in
CPUX86State
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [12/115] 4d23b4e386ec4179739869c1ace2bb2b9f728bde (bonzini/rhel-qemu-kvm)
The name of nr_modules/nr_dies are ambiguous and they mislead people.
The purpose of them is to record and form the topology information. So
just maintain a X86CPUTopoInfo member in CPUX86State instead. Then
nr_modules and nr_dies can be dropped.
As the benefit, x86 can switch to use information in
CPUX86State::topo_info and get rid of the nr_cores and nr_threads in
CPUState. This helps remove the dependency on qemu_init_vcpu(), so that
x86 can get and use topology info earlier in x86_cpu_realizefn(); drop
the comment that highlighted the depedency.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Link: https://lore.kernel.org/r/20241219110125.1266461-7-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 84b71a131c1bc84c36fafb63271080ecf9f2ff7a)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit f598befcd2cddbf01f7606102400cec4acd35d48)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
hw/i386/x86-common.c | 12 ++++------
target/i386/cpu-sysemu.c | 6 ++---
target/i386/cpu.c | 51 +++++++++++++++++-----------------------
target/i386/cpu.h | 6 +----
4 files changed, 30 insertions(+), 45 deletions(-)
diff --git a/hw/i386/x86-common.c b/hw/i386/x86-common.c
index 2806be98f3..562215990d 100644
--- a/hw/i386/x86-common.c
+++ b/hw/i386/x86-common.c
@@ -248,7 +248,7 @@ void x86_cpu_pre_plug(HotplugHandler *hotplug_dev,
CPUX86State *env = &cpu->env;
MachineState *ms = MACHINE(hotplug_dev);
X86MachineState *x86ms = X86_MACHINE(hotplug_dev);
- X86CPUTopoInfo topo_info;
+ X86CPUTopoInfo *topo_info = &env->topo_info;
if (!object_dynamic_cast(OBJECT(cpu), ms->cpu_type)) {
error_setg(errp, "Invalid CPU type, expected cpu type: '%s'",
@@ -267,15 +267,13 @@ void x86_cpu_pre_plug(HotplugHandler *hotplug_dev,
}
}
- init_topo_info(&topo_info, x86ms);
+ init_topo_info(topo_info, x86ms);
if (ms->smp.modules > 1) {
- env->nr_modules = ms->smp.modules;
set_bit(CPU_TOPO_LEVEL_MODULE, env->avail_cpu_topo);
}
if (ms->smp.dies > 1) {
- env->nr_dies = ms->smp.dies;
set_bit(CPU_TOPO_LEVEL_DIE, env->avail_cpu_topo);
}
@@ -346,12 +344,12 @@ void x86_cpu_pre_plug(HotplugHandler *hotplug_dev,
topo_ids.module_id = cpu->module_id;
topo_ids.core_id = cpu->core_id;
topo_ids.smt_id = cpu->thread_id;
- cpu->apic_id = x86_apicid_from_topo_ids(&topo_info, &topo_ids);
+ cpu->apic_id = x86_apicid_from_topo_ids(topo_info, &topo_ids);
}
cpu_slot = x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, &idx);
if (!cpu_slot) {
- x86_topo_ids_from_apicid(cpu->apic_id, &topo_info, &topo_ids);
+ x86_topo_ids_from_apicid(cpu->apic_id, topo_info, &topo_ids);
error_setg(errp,
"Invalid CPU [socket: %u, die: %u, module: %u, core: %u, thread: %u]"
@@ -374,7 +372,7 @@ void x86_cpu_pre_plug(HotplugHandler *hotplug_dev,
/* TODO: move socket_id/core_id/thread_id checks into x86_cpu_realizefn()
* once -smp refactoring is complete and there will be CPU private
* CPUState::nr_cores and CPUState::nr_threads fields instead of globals */
- x86_topo_ids_from_apicid(cpu->apic_id, &topo_info, &topo_ids);
+ x86_topo_ids_from_apicid(cpu->apic_id, topo_info, &topo_ids);
if (cpu->socket_id != -1 && cpu->socket_id != topo_ids.pkg_id) {
error_setg(errp, "property socket-id: %u doesn't match set apic-id:"
" 0x%x (socket-id: %u)", cpu->socket_id, cpu->apic_id,
diff --git a/target/i386/cpu-sysemu.c b/target/i386/cpu-sysemu.c
index 4e9df0bc01..31b37c6325 100644
--- a/target/i386/cpu-sysemu.c
+++ b/target/i386/cpu-sysemu.c
@@ -312,11 +312,11 @@ void x86_cpu_get_crash_info_qom(Object *obj, Visitor *v,
uint64_t cpu_x86_get_msr_core_thread_count(X86CPU *cpu)
{
- CPUState *cs = CPU(cpu);
+ CPUX86State *env = &cpu->env;
uint64_t val;
- val = cs->nr_threads * cs->nr_cores; /* thread count, bits 15..0 */
- val |= ((uint32_t)cs->nr_cores << 16); /* core count, bits 31..16 */
+ val = x86_threads_per_pkg(&env->topo_info); /* thread count, bits 15..0 */
+ val |= x86_cores_per_pkg(&env->topo_info) << 16; /* core count, bits 31..16 */
return val;
}
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 1c79eb9a06..554455169a 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -6947,15 +6947,10 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
CPUState *cs = env_cpu(env);
uint32_t limit;
uint32_t signature[3];
- X86CPUTopoInfo topo_info;
+ X86CPUTopoInfo *topo_info = &env->topo_info;
uint32_t threads_per_pkg;
- topo_info.dies_per_pkg = env->nr_dies;
- topo_info.modules_per_die = env->nr_modules;
- topo_info.cores_per_module = cs->nr_cores / env->nr_dies / env->nr_modules;
- topo_info.threads_per_core = cs->nr_threads;
-
- threads_per_pkg = x86_threads_per_pkg(&topo_info);
+ threads_per_pkg = x86_threads_per_pkg(topo_info);
/* Calculate & apply limits for different index ranges */
if (index >= 0xC0000000) {
@@ -7032,12 +7027,12 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
int host_vcpus_per_cache = 1 + ((*eax & 0x3FFC000) >> 14);
*eax &= ~0xFC000000;
- *eax |= max_core_ids_in_package(&topo_info) << 26;
+ *eax |= max_core_ids_in_package(topo_info) << 26;
if (host_vcpus_per_cache > threads_per_pkg) {
*eax &= ~0x3FFC000;
/* Share the cache at package level. */
- *eax |= max_thread_ids_for_cache(&topo_info,
+ *eax |= max_thread_ids_for_cache(topo_info,
CPU_TOPO_LEVEL_PACKAGE) << 14;
}
}
@@ -7049,7 +7044,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
switch (count) {
case 0: /* L1 dcache info */
encode_cache_cpuid4(env->cache_info_cpuid4.l1d_cache,
- &topo_info,
+ topo_info,
eax, ebx, ecx, edx);
if (!cpu->l1_cache_per_core) {
*eax &= ~MAKE_64BIT_MASK(14, 12);
@@ -7057,7 +7052,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
break;
case 1: /* L1 icache info */
encode_cache_cpuid4(env->cache_info_cpuid4.l1i_cache,
- &topo_info,
+ topo_info,
eax, ebx, ecx, edx);
if (!cpu->l1_cache_per_core) {
*eax &= ~MAKE_64BIT_MASK(14, 12);
@@ -7065,13 +7060,13 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
break;
case 2: /* L2 cache info */
encode_cache_cpuid4(env->cache_info_cpuid4.l2_cache,
- &topo_info,
+ topo_info,
eax, ebx, ecx, edx);
break;
case 3: /* L3 cache info */
if (cpu->enable_l3_cache) {
encode_cache_cpuid4(env->cache_info_cpuid4.l3_cache,
- &topo_info,
+ topo_info,
eax, ebx, ecx, edx);
break;
}
@@ -7154,12 +7149,12 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
switch (count) {
case 0:
- *eax = apicid_core_offset(&topo_info);
- *ebx = topo_info.threads_per_core;
+ *eax = apicid_core_offset(topo_info);
+ *ebx = topo_info->threads_per_core;
*ecx |= CPUID_B_ECX_TOPO_LEVEL_SMT << 8;
break;
case 1:
- *eax = apicid_pkg_offset(&topo_info);
+ *eax = apicid_pkg_offset(topo_info);
*ebx = threads_per_pkg;
*ecx |= CPUID_B_ECX_TOPO_LEVEL_CORE << 8;
break;
@@ -7185,7 +7180,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
break;
}
- encode_topo_cpuid1f(env, count, &topo_info, eax, ebx, ecx, edx);
+ encode_topo_cpuid1f(env, count, topo_info, eax, ebx, ecx, edx);
break;
case 0xD: {
/* Processor Extended State */
@@ -7488,7 +7483,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
* thread ID within a package".
* Bits 7:0 is "The number of threads in the package is NC+1"
*/
- *ecx = (apicid_pkg_offset(&topo_info) << 12) |
+ *ecx = (apicid_pkg_offset(topo_info) << 12) |
(threads_per_pkg - 1);
} else {
*ecx = 0;
@@ -7517,19 +7512,19 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
switch (count) {
case 0: /* L1 dcache info */
encode_cache_cpuid8000001d(env->cache_info_amd.l1d_cache,
- &topo_info, eax, ebx, ecx, edx);
+ topo_info, eax, ebx, ecx, edx);
break;
case 1: /* L1 icache info */
encode_cache_cpuid8000001d(env->cache_info_amd.l1i_cache,
- &topo_info, eax, ebx, ecx, edx);
+ topo_info, eax, ebx, ecx, edx);
break;
case 2: /* L2 cache info */
encode_cache_cpuid8000001d(env->cache_info_amd.l2_cache,
- &topo_info, eax, ebx, ecx, edx);
+ topo_info, eax, ebx, ecx, edx);
break;
case 3: /* L3 cache info */
encode_cache_cpuid8000001d(env->cache_info_amd.l3_cache,
- &topo_info, eax, ebx, ecx, edx);
+ topo_info, eax, ebx, ecx, edx);
break;
default: /* end of info */
*eax = *ebx = *ecx = *edx = 0;
@@ -7541,7 +7536,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
break;
case 0x8000001E:
if (cpu->core_id <= 255) {
- encode_topo_cpuid8000001e(cpu, &topo_info, eax, ebx, ecx, edx);
+ encode_topo_cpuid8000001e(cpu, topo_info, eax, ebx, ecx, edx);
} else {
*eax = 0;
*ebx = 0;
@@ -8440,17 +8435,14 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp)
* fixes this issue by adjusting CPUID_0000_0001_EBX and CPUID_8000_0008_ECX
* based on inputs (sockets,cores,threads), it is still better to give
* users a warning.
- *
- * NOTE: the following code has to follow qemu_init_vcpu(). Otherwise
- * cs->nr_threads hasn't be populated yet and the checking is incorrect.
*/
if (IS_AMD_CPU(env) &&
!(env->features[FEAT_8000_0001_ECX] & CPUID_EXT3_TOPOEXT) &&
- cs->nr_threads > 1) {
+ env->topo_info.threads_per_core > 1) {
warn_report_once("This family of AMD CPU doesn't support "
"hyperthreading(%d). Please configure -smp "
"options properly or try enabling topoext "
- "feature.", cs->nr_threads);
+ "feature.", env->topo_info.threads_per_core);
}
#ifndef CONFIG_USER_ONLY
@@ -8611,8 +8603,7 @@ static void x86_cpu_init_default_topo(X86CPU *cpu)
{
CPUX86State *env = &cpu->env;
- env->nr_modules = 1;
- env->nr_dies = 1;
+ env->topo_info = (X86CPUTopoInfo) {1, 1, 1, 1};
/* SMT, core and package levels are set by default. */
set_bit(CPU_TOPO_LEVEL_SMT, env->avail_cpu_topo);
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 8c9216d9d0..cc42a2c520 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -2024,11 +2024,7 @@ typedef struct CPUArchState {
TPRAccess tpr_access_type;
- /* Number of dies within this CPU package. */
- unsigned nr_dies;
-
- /* Number of modules within one die. */
- unsigned nr_modules;
+ X86CPUTopoInfo topo_info;
/* Bitmap of available CPU topology levels for this CPU. */
DECLARE_BITMAP(avail_cpu_topo, CPU_TOPO_LEVEL_MAX);
--
2.50.1

View File

@ -0,0 +1,87 @@
From 831b04c3f8f66705a01b4fd455dbe53fa3193a4e Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:47 +0200
Subject: [PATCH 060/115] i386/cpu: introduce
x86_confidential_guest_cpu_instance_init()
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [60/115] be57d5108231bb12dc6c9ffd8c9c639b87b1f15c (bonzini/rhel-qemu-kvm)
To allow execute confidential guest specific cpu init operations.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20250508150002.689633-32-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 8583c53e2b619b1b9569d3f2d3f3cb2904a573ad)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Conflict: include/system/ is still include/sysemu/
---
target/i386/confidential-guest.h | 11 +++++++++++
target/i386/cpu.c | 8 ++++++++
2 files changed, 19 insertions(+)
diff --git a/target/i386/confidential-guest.h b/target/i386/confidential-guest.h
index 7342d2843a..38169ed68e 100644
--- a/target/i386/confidential-guest.h
+++ b/target/i386/confidential-guest.h
@@ -39,6 +39,7 @@ struct X86ConfidentialGuestClass {
/* <public> */
int (*kvm_type)(X86ConfidentialGuest *cg);
+ void (*cpu_instance_init)(X86ConfidentialGuest *cg, CPUState *cpu);
uint32_t (*mask_cpuid_features)(X86ConfidentialGuest *cg, uint32_t feature, uint32_t index,
int reg, uint32_t value);
};
@@ -59,6 +60,16 @@ static inline int x86_confidential_guest_kvm_type(X86ConfidentialGuest *cg)
}
}
+static inline void x86_confidential_guest_cpu_instance_init(X86ConfidentialGuest *cg,
+ CPUState *cpu)
+{
+ X86ConfidentialGuestClass *klass = X86_CONFIDENTIAL_GUEST_GET_CLASS(cg);
+
+ if (klass->cpu_instance_init) {
+ klass->cpu_instance_init(cg, cpu);
+ }
+}
+
/**
* x86_confidential_guest_mask_cpuid_features:
*
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 4eef3d1dbd..ee6f1c0627 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -36,6 +36,7 @@
#include "hw/qdev-properties.h"
#include "hw/i386/topology.h"
#ifndef CONFIG_USER_ONLY
+#include "confidential-guest.h"
#include "sysemu/reset.h"
#include "qapi/qapi-commands-machine-target.h"
#include "exec/address-spaces.h"
@@ -8600,6 +8601,13 @@ static void x86_cpu_post_initfn(Object *obj)
}
accel_cpu_instance_init(CPU(obj));
+
+#ifndef CONFIG_USER_ONLY
+ if (current_machine && current_machine->cgs) {
+ x86_confidential_guest_cpu_instance_init(
+ X86_CONFIDENTIAL_GUEST(current_machine->cgs), (CPU(obj)));
+ }
+#endif
}
static void x86_cpu_init_default_topo(X86CPU *cpu)
--
2.50.1

View File

@ -0,0 +1,55 @@
From d77454aef08828ab25881dbd9acb25f3d6f59d10 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:49 +0200
Subject: [PATCH 086/115] i386/tdvf: Fix build on 32-bit host
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [86/115] 99b8d992079a248091911010238eb20588acca6a (bonzini/rhel-qemu-kvm)
Use PRI formats where required.
Cc: Isaku Yamahata <isaku.yamahata@intel.com>
Signed-off-by: Cédric Le Goater <clg@redhat.com>
Link: https://lore.kernel.org/r/20250602173101.1052983-3-clg@redhat.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 6f1035fc65406c4e72e1dbd76e64924415edd616)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
hw/i386/tdvf.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/hw/i386/tdvf.c b/hw/i386/tdvf.c
index 88453cf3a5..6fcc794880 100644
--- a/hw/i386/tdvf.c
+++ b/hw/i386/tdvf.c
@@ -101,16 +101,16 @@ static int tdvf_parse_and_check_section_entry(const TdvfSectionEntry *src,
/* sanity check */
if (entry->size < entry->data_len) {
- error_report("Broken metadata RawDataSize 0x%x MemoryDataSize 0x%lx",
+ error_report("Broken metadata RawDataSize 0x%x MemoryDataSize 0x%"PRIx64,
entry->data_len, entry->size);
return -1;
}
if (!QEMU_IS_ALIGNED(entry->address, TDVF_ALIGNMENT)) {
- error_report("MemoryAddress 0x%lx not page aligned", entry->address);
+ error_report("MemoryAddress 0x%"PRIx64" not page aligned", entry->address);
return -1;
}
if (!QEMU_IS_ALIGNED(entry->size, TDVF_ALIGNMENT)) {
- error_report("MemoryDataSize 0x%lx not page aligned", entry->size);
+ error_report("MemoryDataSize 0x%"PRIx64" not page aligned", entry->size);
return -1;
}
--
2.50.1

View File

@ -0,0 +1,315 @@
From e31406cad5a27e2a807dfacb92d9d44efd1615fe Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:46 +0200
Subject: [PATCH 046/115] i386/tdvf: Introduce function to parse TDVF metadata
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [46/115] 5eed49eea4f80f57a310e69a47ee220bd41f6188 (bonzini/rhel-qemu-kvm)
TDX VM needs to boot with its specialized firmware, Trusted Domain
Virtual Firmware (TDVF). QEMU needs to parse TDVF and map it in TD
guest memory prior to running the TDX VM.
A TDVF Metadata in TDVF image describes the structure of firmware.
QEMU refers to it to setup memory for TDVF. Introduce function
tdvf_parse_metadata() to parse the metadata from TDVF image and store
the info of each TDVF section.
TDX metadata is located by a TDX metadata offset block, which is a
GUID-ed structure. The data portion of the GUID structure contains
only an 4-byte field that is the offset of TDX metadata to the end
of firmware file.
Select X86_FW_OVMF when TDX is enable to leverage existing functions
to parse and search OVMF's GUID-ed structures.
Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
Co-developed-by: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20250508150002.689633-18-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit b65a6011d16c4f7cb2eb227ab1bc735850475288)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Conflicts: system/ -> sysemu/
---
hw/i386/Kconfig | 1 +
hw/i386/meson.build | 1 +
hw/i386/tdvf.c | 188 +++++++++++++++++++++++++++++++++++++++++
include/hw/i386/tdvf.h | 38 +++++++++
4 files changed, 228 insertions(+)
create mode 100644 hw/i386/tdvf.c
create mode 100644 include/hw/i386/tdvf.h
diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig
index edd61cd2aa..31e50e2ebf 100644
--- a/hw/i386/Kconfig
+++ b/hw/i386/Kconfig
@@ -12,6 +12,7 @@ config SGX
config TDX
bool
+ select X86_FW_OVMF
depends on KVM
config PC
diff --git a/hw/i386/meson.build b/hw/i386/meson.build
index 03aad10df7..d6d8023664 100644
--- a/hw/i386/meson.build
+++ b/hw/i386/meson.build
@@ -31,6 +31,7 @@ i386_ss.add(when: 'CONFIG_PC', if_true: files(
'port92.c'))
i386_ss.add(when: 'CONFIG_X86_FW_OVMF', if_true: files('pc_sysfw_ovmf.c'),
if_false: files('pc_sysfw_ovmf-stubs.c'))
+i386_ss.add(when: 'CONFIG_TDX', if_true: files('tdvf.c'))
subdir('kvm')
subdir('xen')
diff --git a/hw/i386/tdvf.c b/hw/i386/tdvf.c
new file mode 100644
index 0000000000..824a387d42
--- /dev/null
+++ b/hw/i386/tdvf.c
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2025 Intel Corporation
+ * Author: Isaku Yamahata <isaku.yamahata at gmail.com>
+ * <isaku.yamahata at intel.com>
+ * Xiaoyao Li <xiaoyao.li@intel.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+
+#include "hw/i386/pc.h"
+#include "hw/i386/tdvf.h"
+#include "sysemu/kvm.h"
+
+#define TDX_METADATA_OFFSET_GUID "e47a6535-984a-4798-865e-4685a7bf8ec2"
+#define TDX_METADATA_VERSION 1
+#define TDVF_SIGNATURE 0x46564454 /* TDVF as little endian */
+#define TDVF_ALIGNMENT 4096
+
+/*
+ * the raw structs read from TDVF keeps the name convention in
+ * TDVF Design Guide spec.
+ */
+typedef struct {
+ uint32_t DataOffset;
+ uint32_t RawDataSize;
+ uint64_t MemoryAddress;
+ uint64_t MemoryDataSize;
+ uint32_t Type;
+ uint32_t Attributes;
+} TdvfSectionEntry;
+
+typedef struct {
+ uint32_t Signature;
+ uint32_t Length;
+ uint32_t Version;
+ uint32_t NumberOfSectionEntries;
+ TdvfSectionEntry SectionEntries[];
+} TdvfMetadata;
+
+struct tdx_metadata_offset {
+ uint32_t offset;
+};
+
+static TdvfMetadata *tdvf_get_metadata(void *flash_ptr, int size)
+{
+ TdvfMetadata *metadata;
+ uint32_t offset = 0;
+ uint8_t *data;
+
+ if ((uint32_t) size != size) {
+ return NULL;
+ }
+
+ if (pc_system_ovmf_table_find(TDX_METADATA_OFFSET_GUID, &data, NULL)) {
+ offset = size - le32_to_cpu(((struct tdx_metadata_offset *)data)->offset);
+
+ if (offset + sizeof(*metadata) > size) {
+ return NULL;
+ }
+ } else {
+ error_report("Cannot find TDX_METADATA_OFFSET_GUID");
+ return NULL;
+ }
+
+ metadata = flash_ptr + offset;
+
+ /* Finally, verify the signature to determine if this is a TDVF image. */
+ metadata->Signature = le32_to_cpu(metadata->Signature);
+ if (metadata->Signature != TDVF_SIGNATURE) {
+ error_report("Invalid TDVF signature in metadata!");
+ return NULL;
+ }
+
+ /* Sanity check that the TDVF doesn't overlap its own metadata. */
+ metadata->Length = le32_to_cpu(metadata->Length);
+ if (offset + metadata->Length > size) {
+ return NULL;
+ }
+
+ /* Only version 1 is supported/defined. */
+ metadata->Version = le32_to_cpu(metadata->Version);
+ if (metadata->Version != TDX_METADATA_VERSION) {
+ return NULL;
+ }
+
+ return metadata;
+}
+
+static int tdvf_parse_and_check_section_entry(const TdvfSectionEntry *src,
+ TdxFirmwareEntry *entry)
+{
+ entry->data_offset = le32_to_cpu(src->DataOffset);
+ entry->data_len = le32_to_cpu(src->RawDataSize);
+ entry->address = le64_to_cpu(src->MemoryAddress);
+ entry->size = le64_to_cpu(src->MemoryDataSize);
+ entry->type = le32_to_cpu(src->Type);
+ entry->attributes = le32_to_cpu(src->Attributes);
+
+ /* sanity check */
+ if (entry->size < entry->data_len) {
+ error_report("Broken metadata RawDataSize 0x%x MemoryDataSize 0x%lx",
+ entry->data_len, entry->size);
+ return -1;
+ }
+ if (!QEMU_IS_ALIGNED(entry->address, TDVF_ALIGNMENT)) {
+ error_report("MemoryAddress 0x%lx not page aligned", entry->address);
+ return -1;
+ }
+ if (!QEMU_IS_ALIGNED(entry->size, TDVF_ALIGNMENT)) {
+ error_report("MemoryDataSize 0x%lx not page aligned", entry->size);
+ return -1;
+ }
+
+ switch (entry->type) {
+ case TDVF_SECTION_TYPE_BFV:
+ case TDVF_SECTION_TYPE_CFV:
+ /* The sections that must be copied from firmware image to TD memory */
+ if (entry->data_len == 0) {
+ error_report("%d section with RawDataSize == 0", entry->type);
+ return -1;
+ }
+ break;
+ case TDVF_SECTION_TYPE_TD_HOB:
+ case TDVF_SECTION_TYPE_TEMP_MEM:
+ /* The sections that no need to be copied from firmware image */
+ if (entry->data_len != 0) {
+ error_report("%d section with RawDataSize 0x%x != 0",
+ entry->type, entry->data_len);
+ return -1;
+ }
+ break;
+ default:
+ error_report("TDVF contains unsupported section type %d", entry->type);
+ return -1;
+ }
+
+ return 0;
+}
+
+int tdvf_parse_metadata(TdxFirmware *fw, void *flash_ptr, int size)
+{
+ g_autofree TdvfSectionEntry *sections = NULL;
+ TdvfMetadata *metadata;
+ ssize_t entries_size;
+ int i;
+
+ metadata = tdvf_get_metadata(flash_ptr, size);
+ if (!metadata) {
+ return -EINVAL;
+ }
+
+ /* load and parse metadata entries */
+ fw->nr_entries = le32_to_cpu(metadata->NumberOfSectionEntries);
+ if (fw->nr_entries < 2) {
+ error_report("Invalid number of fw entries (%u) in TDVF Metadata",
+ fw->nr_entries);
+ return -EINVAL;
+ }
+
+ entries_size = fw->nr_entries * sizeof(TdvfSectionEntry);
+ if (metadata->Length != sizeof(*metadata) + entries_size) {
+ error_report("TDVF metadata len (0x%x) mismatch, expected (0x%x)",
+ metadata->Length,
+ (uint32_t)(sizeof(*metadata) + entries_size));
+ return -EINVAL;
+ }
+
+ fw->entries = g_new(TdxFirmwareEntry, fw->nr_entries);
+ sections = g_new(TdvfSectionEntry, fw->nr_entries);
+
+ memcpy(sections, (void *)metadata + sizeof(*metadata), entries_size);
+
+ for (i = 0; i < fw->nr_entries; i++) {
+ if (tdvf_parse_and_check_section_entry(&sections[i], &fw->entries[i])) {
+ goto err;
+ }
+ }
+
+ return 0;
+
+err:
+ fw->entries = 0;
+ g_free(fw->entries);
+ return -EINVAL;
+}
diff --git a/include/hw/i386/tdvf.h b/include/hw/i386/tdvf.h
new file mode 100644
index 0000000000..7ebcac42a3
--- /dev/null
+++ b/include/hw/i386/tdvf.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2025 Intel Corporation
+ * Author: Isaku Yamahata <isaku.yamahata at gmail.com>
+ * <isaku.yamahata at intel.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_I386_TDVF_H
+#define HW_I386_TDVF_H
+
+#include "qemu/osdep.h"
+
+#define TDVF_SECTION_TYPE_BFV 0
+#define TDVF_SECTION_TYPE_CFV 1
+#define TDVF_SECTION_TYPE_TD_HOB 2
+#define TDVF_SECTION_TYPE_TEMP_MEM 3
+
+#define TDVF_SECTION_ATTRIBUTES_MR_EXTEND (1U << 0)
+#define TDVF_SECTION_ATTRIBUTES_PAGE_AUG (1U << 1)
+
+typedef struct TdxFirmwareEntry {
+ uint32_t data_offset;
+ uint32_t data_len;
+ uint64_t address;
+ uint64_t size;
+ uint32_t type;
+ uint32_t attributes;
+} TdxFirmwareEntry;
+
+typedef struct TdxFirmware {
+ uint32_t nr_entries;
+ TdxFirmwareEntry *entries;
+} TdxFirmware;
+
+int tdvf_parse_metadata(TdxFirmware *fw, void *flash_ptr, int size);
+
+#endif /* HW_I386_TDVF_H */
--
2.50.1

View File

@ -0,0 +1,106 @@
From 62fd2fea6ebca35e3bd12685ce5b10635375968b Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:25:28 +0200
Subject: [PATCH 053/115] i386/tdx: Add TDVF memory via KVM_TDX_INIT_MEM_REGION
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [53/115] 44c8cbffafa5b307c5e28c6ad76abb496287cf47 (bonzini/rhel-qemu-kvm)
TDVF firmware (CODE and VARS) needs to be copied to TD's private
memory via KVM_TDX_INIT_MEM_REGION, as well as TD HOB and TEMP memory.
If the TDVF section has TDVF_SECTION_ATTRIBUTES_MR_EXTEND set in the
flag, calling KVM_TDX_EXTEND_MEMORY to extend the measurement.
After populating the TDVF memory, the original image located in shared
ramblock can be discarded.
Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20250508150002.689633-25-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit ebc2d2b497c59414ac3c91de32bc546d27940e74)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Conflicts: system/ -> sysemu/,exec/
---
target/i386/kvm/tdx.c | 42 ++++++++++++++++++++++++++++++++++++++++++
1 file changed, 42 insertions(+)
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index db5d58b600..8f0826ac11 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -17,6 +17,7 @@
#include "qom/object_interfaces.h"
#include "crypto/hash.h"
#include "sysemu/sysemu.h"
+#include "exec/ramblock.h"
#include "hw/i386/e820_memory_layout.h"
#include "hw/i386/tdvf.h"
@@ -262,6 +263,9 @@ static void tdx_finalize_vm(Notifier *notifier, void *unused)
{
TdxFirmware *tdvf = &tdx_guest->tdvf;
TdxFirmwareEntry *entry;
+ RAMBlock *ram_block;
+ Error *local_err = NULL;
+ int r;
tdx_init_ram_entries();
@@ -297,6 +301,44 @@ static void tdx_finalize_vm(Notifier *notifier, void *unused)
sizeof(TdxRamEntry), &tdx_ram_entry_compare);
tdvf_hob_create(tdx_guest, tdx_get_hob_entry(tdx_guest));
+
+ for_each_tdx_fw_entry(tdvf, entry) {
+ struct kvm_tdx_init_mem_region region;
+ uint32_t flags;
+
+ region = (struct kvm_tdx_init_mem_region) {
+ .source_addr = (uint64_t)entry->mem_ptr,
+ .gpa = entry->address,
+ .nr_pages = entry->size >> 12,
+ };
+
+ flags = entry->attributes & TDVF_SECTION_ATTRIBUTES_MR_EXTEND ?
+ KVM_TDX_MEASURE_MEMORY_REGION : 0;
+
+ do {
+ error_free(local_err);
+ local_err = NULL;
+ r = tdx_vcpu_ioctl(first_cpu, KVM_TDX_INIT_MEM_REGION, flags,
+ &region, &local_err);
+ } while (r == -EAGAIN || r == -EINTR);
+ if (r < 0) {
+ error_report_err(local_err);
+ exit(1);
+ }
+
+ if (entry->type == TDVF_SECTION_TYPE_TD_HOB ||
+ entry->type == TDVF_SECTION_TYPE_TEMP_MEM) {
+ qemu_ram_munmap(-1, entry->mem_ptr, entry->size);
+ entry->mem_ptr = NULL;
+ }
+ }
+
+ /*
+ * TDVF image has been copied into private region above via
+ * KVM_MEMORY_MAPPING. It becomes useless.
+ */
+ ram_block = tdx_guest->tdvf_mr->ram_block;
+ ram_block_discard_range(ram_block, 0, ram_block->max_length);
}
static Notifier tdx_machine_done_notify = {
--
2.50.1

View File

@ -0,0 +1,249 @@
From 70f7099dfd895a7ffeee3f66e188c1853d3885d6 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:48 +0200
Subject: [PATCH 074/115] i386/tdx: Add TDX fixed1 bits to supported CPUIDs
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [74/115] 81fb39c826363f892e46386883c69c0e862d7850 (bonzini/rhel-qemu-kvm)
TDX architecture forcibly sets some CPUID bits for TD guest that VMM
cannot disable it. They are fixed1 bits.
Fixed1 bits are not covered by tdx_caps.cpuid (which only contains the
directly configurable bits), while fixed1 bits are supported for TD guest
obviously.
Add fixed1 bits to tdx_supported_cpuid. Besides, set all the fixed1
bits to the initial set of KVM's support since KVM might not report them
as supported.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20250508150002.689633-46-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 0ba06e46d09b84a2cb97a268da5576aaca3a24ca)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/cpu.h | 2 +
target/i386/kvm/kvm_i386.h | 7 ++
target/i386/kvm/tdx.c | 134 +++++++++++++++++++++++++++++++++++++
target/i386/sev.c | 8 ---
4 files changed, 143 insertions(+), 8 deletions(-)
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 601e828577..529f24df00 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -924,6 +924,8 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w);
#define CPUID_7_0_EDX_FSRM (1U << 4)
/* AVX512 Vector Pair Intersection to a Pair of Mask Registers */
#define CPUID_7_0_EDX_AVX512_VP2INTERSECT (1U << 8)
+ /* "md_clear" VERW clears CPU buffers */
+#define CPUID_7_0_EDX_MD_CLEAR (1U << 10)
/* SERIALIZE instruction */
#define CPUID_7_0_EDX_SERIALIZE (1U << 14)
/* TSX Suspend Load Address Tracking instruction */
diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h
index 797610496a..f1d55d5b75 100644
--- a/target/i386/kvm/kvm_i386.h
+++ b/target/i386/kvm/kvm_i386.h
@@ -44,6 +44,13 @@ void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask);
#ifdef CONFIG_KVM
+#include <linux/kvm.h>
+
+typedef struct KvmCpuidInfo {
+ struct kvm_cpuid2 cpuid;
+ struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES];
+} KvmCpuidInfo;
+
bool kvm_is_vm_type_supported(int type);
bool kvm_has_adjust_clock_stable(void);
bool kvm_has_exception_payload(void);
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index 4949d01f22..6fa30c3ec4 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -367,6 +367,133 @@ static Notifier tdx_machine_done_notify = {
.notify = tdx_finalize_vm,
};
+/*
+ * Some CPUID bits change from fixed1 to configurable bits when TDX module
+ * supports TDX_FEATURES0.VE_REDUCTION. e.g., MCA/MCE/MTRR/CORE_CAPABILITY.
+ *
+ * To make QEMU work with all the versions of TDX module, keep the fixed1 bits
+ * here if they are ever fixed1 bits in any of the version though not fixed1 in
+ * the latest version. Otherwise, with the older version of TDX module, QEMU may
+ * treat the fixed1 bit as unsupported.
+ *
+ * For newer TDX module, it does no harm to keep them in tdx_fixed1_bits even
+ * though they changed to configurable bits. Because tdx_fixed1_bits is used to
+ * setup the supported bits.
+ */
+KvmCpuidInfo tdx_fixed1_bits = {
+ .cpuid.nent = 8,
+ .entries[0] = {
+ .function = 0x1,
+ .index = 0,
+ .ecx = CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_DTES64 |
+ CPUID_EXT_DSCPL | CPUID_EXT_SSSE3 | CPUID_EXT_CX16 |
+ CPUID_EXT_PDCM | CPUID_EXT_PCID | CPUID_EXT_SSE41 |
+ CPUID_EXT_SSE42 | CPUID_EXT_X2APIC | CPUID_EXT_MOVBE |
+ CPUID_EXT_POPCNT | CPUID_EXT_AES | CPUID_EXT_XSAVE |
+ CPUID_EXT_RDRAND | CPUID_EXT_HYPERVISOR,
+ .edx = CPUID_FP87 | CPUID_VME | CPUID_DE | CPUID_PSE | CPUID_TSC |
+ CPUID_MSR | CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC |
+ CPUID_SEP | CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV |
+ CPUID_PAT | CPUID_CLFLUSH | CPUID_DTS | CPUID_MMX | CPUID_FXSR |
+ CPUID_SSE | CPUID_SSE2,
+ },
+ .entries[1] = {
+ .function = 0x6,
+ .index = 0,
+ .eax = CPUID_6_EAX_ARAT,
+ },
+ .entries[2] = {
+ .function = 0x7,
+ .index = 0,
+ .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX,
+ .ebx = CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_FDP_EXCPTN_ONLY |
+ CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_INVPCID |
+ CPUID_7_0_EBX_ZERO_FCS_FDS | CPUID_7_0_EBX_RDSEED |
+ CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLFLUSHOPT |
+ CPUID_7_0_EBX_CLWB | CPUID_7_0_EBX_SHA_NI,
+ .ecx = CPUID_7_0_ECX_BUS_LOCK_DETECT | CPUID_7_0_ECX_MOVDIRI |
+ CPUID_7_0_ECX_MOVDIR64B,
+ .edx = CPUID_7_0_EDX_MD_CLEAR | CPUID_7_0_EDX_SPEC_CTRL |
+ CPUID_7_0_EDX_STIBP | CPUID_7_0_EDX_FLUSH_L1D |
+ CPUID_7_0_EDX_ARCH_CAPABILITIES | CPUID_7_0_EDX_CORE_CAPABILITY |
+ CPUID_7_0_EDX_SPEC_CTRL_SSBD,
+ },
+ .entries[3] = {
+ .function = 0x7,
+ .index = 2,
+ .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX,
+ .edx = CPUID_7_2_EDX_PSFD | CPUID_7_2_EDX_IPRED_CTRL |
+ CPUID_7_2_EDX_RRSBA_CTRL | CPUID_7_2_EDX_BHI_CTRL,
+ },
+ .entries[4] = {
+ .function = 0xD,
+ .index = 0,
+ .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX,
+ .eax = XSTATE_FP_MASK | XSTATE_SSE_MASK,
+ },
+ .entries[5] = {
+ .function = 0xD,
+ .index = 1,
+ .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX,
+ .eax = CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC|
+ CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES,
+ },
+ .entries[6] = {
+ .function = 0x80000001,
+ .index = 0,
+ .ecx = CPUID_EXT3_LAHF_LM | CPUID_EXT3_ABM | CPUID_EXT3_3DNOWPREFETCH,
+ /*
+ * Strictly speaking, SYSCALL is not fixed1 bit since it depends on
+ * the CPU to be in 64-bit mode. But here fixed1 is used to serve the
+ * purpose of supported bits for TDX. In this sense, SYACALL is always
+ * supported.
+ */
+ .edx = CPUID_EXT2_SYSCALL | CPUID_EXT2_NX | CPUID_EXT2_PDPE1GB |
+ CPUID_EXT2_RDTSCP | CPUID_EXT2_LM,
+ },
+ .entries[7] = {
+ .function = 0x80000007,
+ .index = 0,
+ .edx = CPUID_APM_INVTSC,
+ },
+};
+
+static struct kvm_cpuid_entry2 *find_in_supported_entry(uint32_t function,
+ uint32_t index)
+{
+ struct kvm_cpuid_entry2 *e;
+
+ e = cpuid_find_entry(tdx_supported_cpuid, function, index);
+ if (!e) {
+ if (tdx_supported_cpuid->nent >= KVM_MAX_CPUID_ENTRIES) {
+ error_report("tdx_supported_cpuid requries more space than %d entries",
+ KVM_MAX_CPUID_ENTRIES);
+ exit(1);
+ }
+ e = &tdx_supported_cpuid->entries[tdx_supported_cpuid->nent++];
+ e->function = function;
+ e->index = index;
+ }
+
+ return e;
+}
+
+static void tdx_add_supported_cpuid_by_fixed1_bits(void)
+{
+ struct kvm_cpuid_entry2 *e, *e1;
+ int i;
+
+ for (i = 0; i < tdx_fixed1_bits.cpuid.nent; i++) {
+ e = &tdx_fixed1_bits.entries[i];
+
+ e1 = find_in_supported_entry(e->function, e->index);
+ e1->eax |= e->eax;
+ e1->ebx |= e->ebx;
+ e1->ecx |= e->ecx;
+ e1->edx |= e->edx;
+ }
+}
+
static void tdx_setup_supported_cpuid(void)
{
if (tdx_supported_cpuid) {
@@ -379,6 +506,8 @@ static void tdx_setup_supported_cpuid(void)
memcpy(tdx_supported_cpuid->entries, tdx_caps->cpuid.entries,
tdx_caps->cpuid.nent * sizeof(struct kvm_cpuid_entry2));
tdx_supported_cpuid->nent = tdx_caps->cpuid.nent;
+
+ tdx_add_supported_cpuid_by_fixed1_bits();
}
static int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
@@ -463,6 +592,11 @@ static uint32_t tdx_adjust_cpuid_features(X86ConfidentialGuest *cg,
{
struct kvm_cpuid_entry2 *e;
+ e = cpuid_find_entry(&tdx_fixed1_bits.cpuid, feature, index);
+ if (e) {
+ value |= cpuid_entry_get_reg(e, reg);
+ }
+
if (is_feature_word_cpuid(feature, index, reg)) {
e = cpuid_find_entry(tdx_supported_cpuid, feature, index);
if (e) {
diff --git a/target/i386/sev.c b/target/i386/sev.c
index 24fcd078fc..edbad9bb92 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -211,14 +211,6 @@ static const char *const sev_fw_errlist[] = {
#define SEV_FW_MAX_ERROR ARRAY_SIZE(sev_fw_errlist)
-/* <linux/kvm.h> doesn't expose this, so re-use the max from kvm.c */
-#define KVM_MAX_CPUID_ENTRIES 100
-
-typedef struct KvmCpuidInfo {
- struct kvm_cpuid2 cpuid;
- struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES];
-} KvmCpuidInfo;
-
#define SNP_CPUID_FUNCTION_MAXCOUNT 64
#define SNP_CPUID_FUNCTION_UNKNOWN 0xFFFFFFFF
--
2.50.1

View File

@ -0,0 +1,60 @@
From 2c833c9f0b720bcf4267f9025344586b4eaa4906 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:48 +0200
Subject: [PATCH 077/115] i386/tdx: Add XFD to supported bit of TDX
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [77/115] 12485d15d9f665ad75d51f92380997df371180e4 (bonzini/rhel-qemu-kvm)
Just mark XFD as always supported for TDX. This simple solution relies
on the fact KVM will report XFD as 0 when it's not supported by the
hardware.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20250508150002.689633-49-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 9f5771c57dbe92d46361afd992a5851c846d0322)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/cpu.h | 1 +
target/i386/kvm/tdx.c | 6 ++++++
2 files changed, 7 insertions(+)
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 3a7a409809..19645eb6f8 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1100,6 +1100,7 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w);
#define CPUID_XSAVE_XSAVEC (1U << 1)
#define CPUID_XSAVE_XGETBV1 (1U << 2)
#define CPUID_XSAVE_XSAVES (1U << 3)
+#define CPUID_XSAVE_XFD (1U << 4)
#define CPUID_6_EAX_ARAT (1U << 2)
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index feb9cd7466..f15ed51a32 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -621,6 +621,12 @@ static void tdx_add_supported_cpuid_by_xfam(void)
e->edx |= (tdx_caps->supported_xfam & CPUID_XSTATE_XCR0_MASK) >> 32;
e = find_in_supported_entry(0xd, 1);
+ /*
+ * Mark XFD always support for TDX, it will be cleared finally in
+ * tdx_adjust_cpuid_features() if XFD is unavailable on the hardware
+ * because in this case the original data has it as 0.
+ */
+ e->eax |= CPUID_XSAVE_XFD;
e->ecx |= (tdx_caps->supported_xfam & CPUID_XSTATE_XSS_MASK);
e->edx |= (tdx_caps->supported_xfam & CPUID_XSTATE_XSS_MASK) >> 32;
}
--
2.50.1

View File

@ -0,0 +1,113 @@
From b62e9f0a875520807daa28d0b808e2ba9d96ca79 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:46 +0200
Subject: [PATCH 038/115] i386/tdx: Add property sept-ve-disable for tdx-guest
object
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [38/115] 3a21966b474dde36a34aa215e45262ba83a809ac (bonzini/rhel-qemu-kvm)
Bit 28 of TD attribute, named SEPT_VE_DISABLE. When set to 1, it disables
EPT violation conversion to #VE on guest TD access of PENDING pages.
Some guest OS (e.g., Linux TD guest) may require this bit as 1.
Otherwise refuse to boot.
Add sept-ve-disable property for tdx-guest object, for user to configure
this bit.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
Acked-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20250508150002.689633-10-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 6016e2972d94c90307b6caf55a8e3aee5424c09b)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
qapi/qom.json | 8 +++++++-
target/i386/kvm/tdx.c | 23 +++++++++++++++++++++++
2 files changed, 30 insertions(+), 1 deletion(-)
diff --git a/qapi/qom.json b/qapi/qom.json
index 530efeb7c5..fefb54f90b 100644
--- a/qapi/qom.json
+++ b/qapi/qom.json
@@ -1016,10 +1016,16 @@
# @attributes: The 'attributes' of a TD guest that is passed to
# KVM_TDX_INIT_VM
#
+# @sept-ve-disable: toggle bit 28 of TD attributes to control disabling
+# of EPT violation conversion to #VE on guest TD access of PENDING
+# pages. Some guest OS (e.g., Linux TD guest) may require this to
+# be set, otherwise they refuse to boot.
+#
# Since: 10.1
##
{ 'struct': 'TdxGuestProperties',
- 'data': { '*attributes': 'uint64' } }
+ 'data': { '*attributes': 'uint64',
+ '*sept-ve-disable': 'bool' } }
##
# @ThreadContextProperties:
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index 8f02c76249..370bd86f2c 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -18,6 +18,8 @@
#include "kvm_i386.h"
#include "tdx.h"
+#define TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE BIT_ULL(28)
+
static TdxGuest *tdx_guest;
static struct kvm_tdx_capabilities *tdx_caps;
@@ -252,6 +254,24 @@ int tdx_pre_create_vcpu(CPUState *cpu, Error **errp)
return 0;
}
+static bool tdx_guest_get_sept_ve_disable(Object *obj, Error **errp)
+{
+ TdxGuest *tdx = TDX_GUEST(obj);
+
+ return !!(tdx->attributes & TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE);
+}
+
+static void tdx_guest_set_sept_ve_disable(Object *obj, bool value, Error **errp)
+{
+ TdxGuest *tdx = TDX_GUEST(obj);
+
+ if (value) {
+ tdx->attributes |= TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE;
+ } else {
+ tdx->attributes &= ~TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE;
+ }
+}
+
/* tdx guest */
OBJECT_DEFINE_TYPE_WITH_INTERFACES(TdxGuest,
tdx_guest,
@@ -272,6 +292,9 @@ static void tdx_guest_init(Object *obj)
object_property_add_uint64_ptr(obj, "attributes", &tdx->attributes,
OBJ_PROP_FLAG_READWRITE);
+ object_property_add_bool(obj, "sept-ve-disable",
+ tdx_guest_get_sept_ve_disable,
+ tdx_guest_set_sept_ve_disable);
}
static void tdx_guest_finalize(Object *obj)
--
2.50.1

View File

@ -0,0 +1,144 @@
From d228f2ad7fb96be3ec1fa3256ee0404cf7e2b094 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:48 +0200
Subject: [PATCH 075/115] i386/tdx: Add supported CPUID bits related to TD
Attributes
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [75/115] 095f42329711d2cb7f147856ebf2775a522fd8e3 (bonzini/rhel-qemu-kvm)
For TDX, some CPUID feature bit is configured via TD attributes. They
are not covered by tdx_caps.cpuid (which only contians the directly
configurable CPUID bits), but they are actually supported when the
related attributre bit is supported.
Note, LASS and KeyLocker are not supported by KVM for TDX, nor does
QEMU support it (see TDX_SUPPORTED_TD_ATTRS). They are defined in
tdx_attrs_maps[] for the completeness of the existing TD Attribute
bits that are related with CPUID features.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Link: https://lore.kernel.org/r/20250508150002.689633-47-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 31df29c532a9ef473c6efd497950a620099bf1da)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/cpu.h | 4 +++
target/i386/kvm/tdx.c | 60 +++++++++++++++++++++++++++++++++++++++++++
2 files changed, 64 insertions(+)
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 529f24df00..e02cb75619 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -903,6 +903,8 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w);
#define CPUID_7_0_ECX_LA57 (1U << 16)
/* Read Processor ID */
#define CPUID_7_0_ECX_RDPID (1U << 22)
+/* KeyLocker */
+#define CPUID_7_0_ECX_KeyLocker (1U << 23)
/* Bus Lock Debug Exception */
#define CPUID_7_0_ECX_BUS_LOCK_DETECT (1U << 24)
/* Cache Line Demote Instruction */
@@ -963,6 +965,8 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w);
#define CPUID_7_1_EAX_AVX_VNNI (1U << 4)
/* AVX512 BFloat16 Instruction */
#define CPUID_7_1_EAX_AVX512_BF16 (1U << 5)
+/* Linear address space separation */
+#define CPUID_7_1_EAX_LASS (1U << 6)
/* CMPCCXADD Instructions */
#define CPUID_7_1_EAX_CMPCCXADD (1U << 7)
/* Fast Zero REP MOVS */
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index 6fa30c3ec4..60dd239c05 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -458,6 +458,34 @@ KvmCpuidInfo tdx_fixed1_bits = {
},
};
+typedef struct TdxAttrsMap {
+ uint32_t attr_index;
+ uint32_t cpuid_leaf;
+ uint32_t cpuid_subleaf;
+ int cpuid_reg;
+ uint32_t feat_mask;
+} TdxAttrsMap;
+
+static TdxAttrsMap tdx_attrs_maps[] = {
+ {.attr_index = 27,
+ .cpuid_leaf = 7,
+ .cpuid_subleaf = 1,
+ .cpuid_reg = R_EAX,
+ .feat_mask = CPUID_7_1_EAX_LASS,},
+
+ {.attr_index = 30,
+ .cpuid_leaf = 7,
+ .cpuid_subleaf = 0,
+ .cpuid_reg = R_ECX,
+ .feat_mask = CPUID_7_0_ECX_PKS,},
+
+ {.attr_index = 31,
+ .cpuid_leaf = 7,
+ .cpuid_subleaf = 0,
+ .cpuid_reg = R_ECX,
+ .feat_mask = CPUID_7_0_ECX_KeyLocker,},
+};
+
static struct kvm_cpuid_entry2 *find_in_supported_entry(uint32_t function,
uint32_t index)
{
@@ -494,6 +522,37 @@ static void tdx_add_supported_cpuid_by_fixed1_bits(void)
}
}
+static void tdx_add_supported_cpuid_by_attrs(void)
+{
+ struct kvm_cpuid_entry2 *e;
+ TdxAttrsMap *map;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(tdx_attrs_maps); i++) {
+ map = &tdx_attrs_maps[i];
+ if (!((1ULL << map->attr_index) & tdx_caps->supported_attrs)) {
+ continue;
+ }
+
+ e = find_in_supported_entry(map->cpuid_leaf, map->cpuid_subleaf);
+
+ switch(map->cpuid_reg) {
+ case R_EAX:
+ e->eax |= map->feat_mask;
+ break;
+ case R_EBX:
+ e->ebx |= map->feat_mask;
+ break;
+ case R_ECX:
+ e->ecx |= map->feat_mask;
+ break;
+ case R_EDX:
+ e->edx |= map->feat_mask;
+ break;
+ }
+ }
+}
+
static void tdx_setup_supported_cpuid(void)
{
if (tdx_supported_cpuid) {
@@ -508,6 +567,7 @@ static void tdx_setup_supported_cpuid(void)
tdx_supported_cpuid->nent = tdx_caps->cpuid.nent;
tdx_add_supported_cpuid_by_fixed1_bits();
+ tdx_add_supported_cpuid_by_attrs();
}
static int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
--
2.50.1

View File

@ -0,0 +1,220 @@
From 714abb122a2cc4819b05a3893dfd2c61a9204c5e Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:48 +0200
Subject: [PATCH 076/115] i386/tdx: Add supported CPUID bits relates to XFAM
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [76/115] 459d99074c90bfd8048585dec42749cb18493ee9 (bonzini/rhel-qemu-kvm)
Some CPUID bits are controlled by XFAM. They are not covered by
tdx_caps.cpuid (which only contians the directly configurable bits), but
they are actually supported when the related XFAM bit is supported.
Add these XFAM controlled bits to TDX supported CPUID bits based on the
supported_xfam.
Besides, incorporate the supported_xfam into the supported CPUID leaf of
0xD.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Link: https://lore.kernel.org/r/20250508150002.689633-48-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 8c94c84cb9e0140b48acc9c9d404525ca7ef7457)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/cpu.c | 12 -------
target/i386/cpu.h | 16 ++++++++++
target/i386/kvm/tdx.c | 73 +++++++++++++++++++++++++++++++++++++++++++
3 files changed, 89 insertions(+), 12 deletions(-)
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 2da456da64..cd6d9e8c1c 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -1660,15 +1660,6 @@ bool is_feature_word_cpuid(uint32_t feature, uint32_t index, int reg)
return false;
}
-typedef struct FeatureMask {
- FeatureWord index;
- uint64_t mask;
-} FeatureMask;
-
-typedef struct FeatureDep {
- FeatureMask from, to;
-} FeatureDep;
-
static FeatureDep feature_dependencies[] = {
{
.from = { FEAT_7_0_EDX, CPUID_7_0_EDX_ARCH_CAPABILITIES },
@@ -1837,9 +1828,6 @@ static const X86RegisterInfo32 x86_reg_info_32[CPU_NB_REGS32] = {
};
#undef REGISTER
-/* CPUID feature bits available in XSS */
-#define CPUID_XSTATE_XSS_MASK (XSTATE_ARCH_LBR_MASK)
-
ExtSaveArea x86_ext_save_areas[XSAVE_STATE_AREA_COUNT] = {
[XSTATE_FP_BIT] = {
/* x87 FP state component is always enabled if XSAVE is supported */
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index e02cb75619..3a7a409809 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -589,6 +589,7 @@ typedef enum X86Seg {
#define XSTATE_OPMASK_BIT 5
#define XSTATE_ZMM_Hi256_BIT 6
#define XSTATE_Hi16_ZMM_BIT 7
+#define XSTATE_PT_BIT 8
#define XSTATE_PKRU_BIT 9
#define XSTATE_ARCH_LBR_BIT 15
#define XSTATE_XTILE_CFG_BIT 17
@@ -602,6 +603,7 @@ typedef enum X86Seg {
#define XSTATE_OPMASK_MASK (1ULL << XSTATE_OPMASK_BIT)
#define XSTATE_ZMM_Hi256_MASK (1ULL << XSTATE_ZMM_Hi256_BIT)
#define XSTATE_Hi16_ZMM_MASK (1ULL << XSTATE_Hi16_ZMM_BIT)
+#define XSTATE_PT_MASK (1ULL << XSTATE_PT_BIT)
#define XSTATE_PKRU_MASK (1ULL << XSTATE_PKRU_BIT)
#define XSTATE_ARCH_LBR_MASK (1ULL << XSTATE_ARCH_LBR_BIT)
#define XSTATE_XTILE_CFG_MASK (1ULL << XSTATE_XTILE_CFG_BIT)
@@ -624,6 +626,11 @@ typedef enum X86Seg {
XSTATE_Hi16_ZMM_MASK | XSTATE_PKRU_MASK | \
XSTATE_XTILE_CFG_MASK | XSTATE_XTILE_DATA_MASK)
+/* CPUID feature bits available in XSS */
+#define CPUID_XSTATE_XSS_MASK (XSTATE_ARCH_LBR_MASK)
+
+#define CPUID_XSTATE_MASK (CPUID_XSTATE_XCR0_MASK | CPUID_XSTATE_XSS_MASK)
+
/* CPUID feature words */
typedef enum FeatureWord {
FEAT_1_EDX, /* CPUID[1].EDX */
@@ -671,6 +678,15 @@ typedef enum FeatureWord {
FEATURE_WORDS,
} FeatureWord;
+typedef struct FeatureMask {
+ FeatureWord index;
+ uint64_t mask;
+} FeatureMask;
+
+typedef struct FeatureDep {
+ FeatureMask from, to;
+} FeatureDep;
+
typedef uint64_t FeatureWordArray[FEATURE_WORDS];
uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w);
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index 60dd239c05..feb9cd7466 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -23,6 +23,8 @@
#include <linux/kvm_para.h>
+#include "cpu.h"
+#include "cpu-internal.h"
#include "hw/i386/e820_memory_layout.h"
#include "hw/i386/tdvf.h"
#include "hw/i386/x86.h"
@@ -486,6 +488,32 @@ static TdxAttrsMap tdx_attrs_maps[] = {
.feat_mask = CPUID_7_0_ECX_KeyLocker,},
};
+typedef struct TdxXFAMDep {
+ int xfam_bit;
+ FeatureMask feat_mask;
+} TdxXFAMDep;
+
+/*
+ * Note, only the CPUID bits whose virtualization type are "XFAM & Native" are
+ * defiend here.
+ *
+ * For those whose virtualization type are "XFAM & Configured & Native", they
+ * are reported as configurable bits. And they are not supported if not in the
+ * configureable bits list from KVM even if the corresponding XFAM bit is
+ * supported.
+ */
+TdxXFAMDep tdx_xfam_deps[] = {
+ { XSTATE_YMM_BIT, { FEAT_1_ECX, CPUID_EXT_FMA }},
+ { XSTATE_YMM_BIT, { FEAT_7_0_EBX, CPUID_7_0_EBX_AVX2 }},
+ { XSTATE_OPMASK_BIT, { FEAT_7_0_ECX, CPUID_7_0_ECX_AVX512_VBMI}},
+ { XSTATE_OPMASK_BIT, { FEAT_7_0_EDX, CPUID_7_0_EDX_AVX512_FP16}},
+ { XSTATE_PT_BIT, { FEAT_7_0_EBX, CPUID_7_0_EBX_INTEL_PT}},
+ { XSTATE_PKRU_BIT, { FEAT_7_0_ECX, CPUID_7_0_ECX_PKU}},
+ { XSTATE_XTILE_CFG_BIT, { FEAT_7_0_EDX, CPUID_7_0_EDX_AMX_BF16 }},
+ { XSTATE_XTILE_CFG_BIT, { FEAT_7_0_EDX, CPUID_7_0_EDX_AMX_TILE }},
+ { XSTATE_XTILE_CFG_BIT, { FEAT_7_0_EDX, CPUID_7_0_EDX_AMX_INT8 }},
+};
+
static struct kvm_cpuid_entry2 *find_in_supported_entry(uint32_t function,
uint32_t index)
{
@@ -553,6 +581,50 @@ static void tdx_add_supported_cpuid_by_attrs(void)
}
}
+static void tdx_add_supported_cpuid_by_xfam(void)
+{
+ struct kvm_cpuid_entry2 *e;
+ int i;
+
+ const TdxXFAMDep *xfam_dep;
+ const FeatureWordInfo *f;
+ for (i = 0; i < ARRAY_SIZE(tdx_xfam_deps); i++) {
+ xfam_dep = &tdx_xfam_deps[i];
+ if (!((1ULL << xfam_dep->xfam_bit) & tdx_caps->supported_xfam)) {
+ continue;
+ }
+
+ f = &feature_word_info[xfam_dep->feat_mask.index];
+ if (f->type != CPUID_FEATURE_WORD) {
+ continue;
+ }
+
+ e = find_in_supported_entry(f->cpuid.eax, f->cpuid.ecx);
+ switch(f->cpuid.reg) {
+ case R_EAX:
+ e->eax |= xfam_dep->feat_mask.mask;
+ break;
+ case R_EBX:
+ e->ebx |= xfam_dep->feat_mask.mask;
+ break;
+ case R_ECX:
+ e->ecx |= xfam_dep->feat_mask.mask;
+ break;
+ case R_EDX:
+ e->edx |= xfam_dep->feat_mask.mask;
+ break;
+ }
+ }
+
+ e = find_in_supported_entry(0xd, 0);
+ e->eax |= (tdx_caps->supported_xfam & CPUID_XSTATE_XCR0_MASK);
+ e->edx |= (tdx_caps->supported_xfam & CPUID_XSTATE_XCR0_MASK) >> 32;
+
+ e = find_in_supported_entry(0xd, 1);
+ e->ecx |= (tdx_caps->supported_xfam & CPUID_XSTATE_XSS_MASK);
+ e->edx |= (tdx_caps->supported_xfam & CPUID_XSTATE_XSS_MASK) >> 32;
+}
+
static void tdx_setup_supported_cpuid(void)
{
if (tdx_supported_cpuid) {
@@ -568,6 +640,7 @@ static void tdx_setup_supported_cpuid(void)
tdx_add_supported_cpuid_by_fixed1_bits();
tdx_add_supported_cpuid_by_attrs();
+ tdx_add_supported_cpuid_by_xfam();
}
static int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
--
2.50.1

View File

@ -0,0 +1,66 @@
From 8d316b8468ec2e87ffc2e75c422698a2acdbcd16 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:46 +0200
Subject: [PATCH 054/115] i386/tdx: Call KVM_TDX_INIT_VCPU to initialize TDX
vcpu
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [54/115] 01b560fa024020f7be4e649cabd47f9f6bca920a (bonzini/rhel-qemu-kvm)
TDX vcpu needs to be initialized by SEAMCALL(TDH.VP.INIT) and KVM
provides vcpu level IOCTL KVM_TDX_INIT_VCPU for it.
KVM_TDX_INIT_VCPU needs the address of the HOB as input. Invoke it for
each vcpu after HOB list is created.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20250508150002.689633-26-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 41f7fd22073561a23229c0479d9d708dee9d3a1e)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/kvm/tdx.c | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index 8f0826ac11..7980daf8c4 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -259,6 +259,18 @@ static void tdx_init_ram_entries(void)
tdx_guest->nr_ram_entries = j;
}
+static void tdx_post_init_vcpus(void)
+{
+ TdxFirmwareEntry *hob;
+ CPUState *cpu;
+
+ hob = tdx_get_hob_entry(tdx_guest);
+ CPU_FOREACH(cpu) {
+ tdx_vcpu_ioctl(cpu, KVM_TDX_INIT_VCPU, 0, (void *)hob->address,
+ &error_fatal);
+ }
+}
+
static void tdx_finalize_vm(Notifier *notifier, void *unused)
{
TdxFirmware *tdvf = &tdx_guest->tdvf;
@@ -302,6 +314,8 @@ static void tdx_finalize_vm(Notifier *notifier, void *unused)
tdvf_hob_create(tdx_guest, tdx_get_hob_entry(tdx_guest));
+ tdx_post_init_vcpus();
+
for_each_tdx_fw_entry(tdvf, entry) {
struct kvm_tdx_init_mem_region region;
uint32_t flags;
--
2.50.1

View File

@ -0,0 +1,75 @@
From 7dd643f8005449c3e34643f2cb85fcbab8011482 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:49 +0200
Subject: [PATCH 091/115] i386/tdx: Clarify the error message of
mrconfigid/mrowner/mrownerconfig
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [91/115] 5f8559f8d9f842f79916f49f4806dd4cf2f8c686 (bonzini/rhel-qemu-kvm)
The error message is misleading - we successfully decoded the data,
the decoded data was simply with the wrong length.
Change the error message to show it is an length check failure with both
the received and expected values.
Suggested-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
Link: https://lore.kernel.org/r/20250603050305.1704586-4-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 41cd354d350d3c64915be9c5decbf20abd84e486)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/kvm/tdx.c | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index ca3641441c..ed3a55991a 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -1032,7 +1032,9 @@ int tdx_pre_create_vcpu(CPUState *cpu, Error **errp)
return -1;
}
if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) {
- error_setg(errp, "TDX: failed to decode mrconfigid");
+ error_setg(errp, "TDX 'mrconfigid' sha384 digest was %ld bytes, "
+ "expected %d bytes", data_len,
+ QCRYPTO_HASH_DIGEST_LEN_SHA384);
return -1;
}
memcpy(init_vm->mrconfigid, data, data_len);
@@ -1045,7 +1047,9 @@ int tdx_pre_create_vcpu(CPUState *cpu, Error **errp)
return -1;
}
if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) {
- error_setg(errp, "TDX: failed to decode mrowner");
+ error_setg(errp, "TDX 'mrowner' sha384 digest was %ld bytes, "
+ "expected %d bytes", data_len,
+ QCRYPTO_HASH_DIGEST_LEN_SHA384);
return -1;
}
memcpy(init_vm->mrowner, data, data_len);
@@ -1058,7 +1062,9 @@ int tdx_pre_create_vcpu(CPUState *cpu, Error **errp)
return -1;
}
if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) {
- error_setg(errp, "TDX: failed to decode mrownerconfig");
+ error_setg(errp, "TDX 'mrownerconfig' sha384 digest was %ld bytes, "
+ "expected %d bytes", data_len,
+ QCRYPTO_HASH_DIGEST_LEN_SHA384);
return -1;
}
memcpy(init_vm->mrownerconfig, data, data_len);
--
2.50.1

View File

@ -0,0 +1,80 @@
From 25f3b21b6b1654d1ffde72e231a4635b5929a6a8 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:48 +0200
Subject: [PATCH 078/115] i386/tdx: Define supported KVM features for TDX
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [78/115] d0ab8e6f6795d750c9fd07bf19bec65078424075 (bonzini/rhel-qemu-kvm)
For TDX, only limited KVM PV features are supported.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20250508150002.689633-50-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 4d6e288a350a977b0fb0613db952087928ccd93e)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/kvm/tdx.c | 20 ++++++++++++++++++++
1 file changed, 20 insertions(+)
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index f15ed51a32..32e03caf43 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -32,6 +32,8 @@
#include "kvm_i386.h"
#include "tdx.h"
+#include "standard-headers/asm-x86/kvm_para.h"
+
#define TDX_MIN_TSC_FREQUENCY_KHZ (100 * 1000)
#define TDX_MAX_TSC_FREQUENCY_KHZ (10 * 1000 * 1000)
@@ -44,6 +46,14 @@
TDX_TD_ATTRIBUTES_PKS | \
TDX_TD_ATTRIBUTES_PERFMON)
+#define TDX_SUPPORTED_KVM_FEATURES ((1U << KVM_FEATURE_NOP_IO_DELAY) | \
+ (1U << KVM_FEATURE_PV_UNHALT) | \
+ (1U << KVM_FEATURE_PV_TLB_FLUSH) | \
+ (1U << KVM_FEATURE_PV_SEND_IPI) | \
+ (1U << KVM_FEATURE_POLL_CONTROL) | \
+ (1U << KVM_FEATURE_PV_SCHED_YIELD) | \
+ (1U << KVM_FEATURE_MSI_EXT_DEST_ID))
+
static TdxGuest *tdx_guest;
static struct kvm_tdx_capabilities *tdx_caps;
@@ -631,6 +641,14 @@ static void tdx_add_supported_cpuid_by_xfam(void)
e->edx |= (tdx_caps->supported_xfam & CPUID_XSTATE_XSS_MASK) >> 32;
}
+static void tdx_add_supported_kvm_features(void)
+{
+ struct kvm_cpuid_entry2 *e;
+
+ e = find_in_supported_entry(0x40000001, 0);
+ e->eax = TDX_SUPPORTED_KVM_FEATURES;
+}
+
static void tdx_setup_supported_cpuid(void)
{
if (tdx_supported_cpuid) {
@@ -647,6 +665,8 @@ static void tdx_setup_supported_cpuid(void)
tdx_add_supported_cpuid_by_fixed1_bits();
tdx_add_supported_cpuid_by_attrs();
tdx_add_supported_cpuid_by_xfam();
+
+ tdx_add_supported_kvm_features();
}
static int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
--
2.50.1

View File

@ -0,0 +1,56 @@
From 52482dfcf0f97cd5db7a210497bdf45390cb7600 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:47 +0200
Subject: [PATCH 066/115] i386/tdx: Disable PIC for TDX VMs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [66/115] 8f3badca2c7cb18d3d10bb1208396f63b7aeb47b (bonzini/rhel-qemu-kvm)
Legacy PIC (8259) cannot be supported for TDX VMs since TDX module
doesn't allow directly interrupt injection. Using posted interrupts
for the PIC is not a viable option as the guest BIOS/kernel will not
do EOI for PIC IRQs, i.e. will leave the vIRR bit set.
Hence disable PIC for TDX VMs and error out if user wants PIC.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20250508150002.689633-38-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit e7ef60892c80a9ce5b8504ceb13a81f4e0d4b3f7)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/kvm/tdx.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index 9bd6843988..4f17e17308 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -381,6 +381,13 @@ static int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
return -EINVAL;
}
+ if (x86ms->pic == ON_OFF_AUTO_AUTO) {
+ x86ms->pic = ON_OFF_AUTO_OFF;
+ } else if (x86ms->pic == ON_OFF_AUTO_ON) {
+ error_setg(errp, "TDX VM doesn't support PIC");
+ return -EINVAL;
+ }
+
if (!tdx_caps) {
r = get_tdx_capabilities(errp);
if (r) {
--
2.50.1

View File

@ -0,0 +1,61 @@
From e8e1554e0b626131745170c94a780b0d875aad63 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:47 +0200
Subject: [PATCH 065/115] i386/tdx: Disable SMM for TDX VMs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [65/115] 69f950f26ffbea4f8ad6ff7f0be7ee3a4ce13c2b (bonzini/rhel-qemu-kvm)
TDX doesn't support SMM and VMM cannot emulate SMM for TDX VMs because
VMM cannot manipulate TDX VM's memory.
Disable SMM for TDX VMs and error out if user requests to enable SMM.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20250508150002.689633-37-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 810d4e83d07ca0d072205453a42c324a51d5a5fa)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/kvm/tdx.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index 7bc36b620e..9bd6843988 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -367,11 +367,20 @@ static Notifier tdx_machine_done_notify = {
static int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
{
+ MachineState *ms = MACHINE(qdev_get_machine());
+ X86MachineState *x86ms = X86_MACHINE(ms);
TdxGuest *tdx = TDX_GUEST(cgs);
int r = 0;
kvm_mark_guest_state_protected();
+ if (x86ms->smm == ON_OFF_AUTO_AUTO) {
+ x86ms->smm = ON_OFF_AUTO_OFF;
+ } else if (x86ms->smm == ON_OFF_AUTO_ON) {
+ error_setg(errp, "TDX VM doesn't support SMM");
+ return -EINVAL;
+ }
+
if (!tdx_caps) {
r = get_tdx_capabilities(errp);
if (r) {
--
2.50.1

View File

@ -0,0 +1,78 @@
From 43d75b6ffdd152f55c366befb9eabbc8d0a7f6e7 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:46 +0200
Subject: [PATCH 048/115] i386/tdx: Don't initialize pc.rom for TDX VMs
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [48/115] 857ab7fccb138b45db15621688e412aef24b5821 (bonzini/rhel-qemu-kvm)
For TDX, the address below 1MB are entirely general RAM. No need to
initialize pc.rom memory region for TDs.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20250508150002.689633-20-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 49b1f0f812372129736c1df0421c8f67d86d362b)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
hw/i386/pc.c | 29 ++++++++++++++++-------------
1 file changed, 16 insertions(+), 13 deletions(-)
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 5237538640..057cd1fb86 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -43,6 +43,7 @@
#include "sysemu/xen.h"
#include "sysemu/reset.h"
#include "kvm/kvm_i386.h"
+#include "kvm/tdx.h"
#include "hw/xen/xen.h"
#include "qapi/qmp/qlist.h"
#include "qemu/error-report.h"
@@ -1131,21 +1132,23 @@ void pc_memory_init(PCMachineState *pcms,
/* Initialize PC system firmware */
pc_system_firmware_init(pcms, rom_memory);
- option_rom_mr = g_malloc(sizeof(*option_rom_mr));
- if (machine_require_guest_memfd(machine)) {
- memory_region_init_ram_guest_memfd(option_rom_mr, NULL, "pc.rom",
- PC_ROM_SIZE, &error_fatal);
- } else {
- memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE,
- &error_fatal);
- if (pcmc->pci_enabled) {
- memory_region_set_readonly(option_rom_mr, true);
+ if (!is_tdx_vm()) {
+ option_rom_mr = g_malloc(sizeof(*option_rom_mr));
+ if (machine_require_guest_memfd(machine)) {
+ memory_region_init_ram_guest_memfd(option_rom_mr, NULL, "pc.rom",
+ PC_ROM_SIZE, &error_fatal);
+ } else {
+ memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE,
+ &error_fatal);
+ if (pcmc->pci_enabled) {
+ memory_region_set_readonly(option_rom_mr, true);
+ }
}
+ memory_region_add_subregion_overlap(rom_memory,
+ PC_ROM_MIN_VGA,
+ option_rom_mr,
+ 1);
}
- memory_region_add_subregion_overlap(rom_memory,
- PC_ROM_MIN_VGA,
- option_rom_mr,
- 1);
fw_cfg = fw_cfg_arch_create(machine,
x86ms->boot_cpus, x86ms->apic_id_limit);
--
2.50.1

View File

@ -0,0 +1,59 @@
From ec1ff403bb13fbb487a19a16c3da299038fecf2c Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:50 +0200
Subject: [PATCH 104/115] i386/tdx: Don't mask off CPUID_EXT_PDCM
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [104/115] 46c1e5c86031d5d9ab9082e2d0affb3ddf7d1fb2 (bonzini/rhel-qemu-kvm)
It gets below warning when booting TDX VMs:
warning: TDX forcibly sets the feature: CPUID[eax=01h].ECX.pdcm [bit 15]
Because CPUID_EXT_PDCM is fixed1 for TDX, and MSR_IA32_PERF_CAPABILITIES is
supported for TDX guest unconditioanlly.
Don't mask off CPUID_EXT_PDCM for TDX.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20250625035710.2770679-1-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 7ff24fb657d35c014f735f69aef03810fde607ab)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Conflict: no call to mark_unavailable_features
---
target/i386/cpu.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 2160754869..d0161f922c 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -27,6 +27,7 @@
#include "sysemu/hvf.h"
#include "hvf/hvf-i386.h"
#include "kvm/kvm_i386.h"
+#include "kvm/tdx.h"
#include "sev.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
@@ -8011,7 +8012,8 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp)
}
}
- if (!cpu->enable_pmu) {
+ /* PDCM is fixed1 bit for TDX */
+ if (!cpu->enable_pmu && !is_tdx_vm()) {
env->features[FEAT_1_ECX] &= ~CPUID_EXT_PDCM;
}
--
2.50.1

View File

@ -0,0 +1,46 @@
From 6339bd26122c876a663e841810de28c064e09128 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:47 +0200
Subject: [PATCH 068/115] i386/tdx: Don't synchronize guest tsc for TDs
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [68/115] 56f6dc49e20933d8539b2413a902a2fbad2751e0 (bonzini/rhel-qemu-kvm)
TSC of TDs is not accessible and KVM doesn't allow access of
MSR_IA32_TSC for TDs. To avoid the assert() in kvm_get_tsc, make
kvm_synchronize_all_tsc() noop for TDs,
Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
Reviewed-by: Connor Kuehl <ckuehl@redhat.com>
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20250508150002.689633-40-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 0ed55865b49b703af93e160d48935812a7114e07)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/kvm/kvm.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index f4809ee004..0c47eef03c 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -319,7 +319,7 @@ void kvm_synchronize_all_tsc(void)
{
CPUState *cpu;
- if (kvm_enabled()) {
+ if (kvm_enabled() && !is_tdx_vm()) {
CPU_FOREACH(cpu) {
run_on_cpu(cpu, do_kvm_synchronize_tsc, RUN_ON_CPU_NULL);
}
--
2.50.1

View File

@ -0,0 +1,60 @@
From fd2d6e5623acae8b935c2ee5cb03d1a1b1631bb9 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:48 +0200
Subject: [PATCH 081/115] i386/tdx: Don't treat SYSCALL as unavailable
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [81/115] f742fd685ae68e7828947fa1013a5bdb0b49750d (bonzini/rhel-qemu-kvm)
On Intel CPU, the value of CPUID_EXT2_SYSCALL depends on the mode of
the vcpu. It's 0 outside 64-bit mode and 1 in 64-bit mode.
The initial state of TDX vcpu is 32-bit protected mode. At the time of
calling KVM_TDX_GET_CPUID, vcpu hasn't started running so the value read
is 0.
In reality, 64-bit mode should always be supported. So mark
CPUID_EXT2_SYSCALL always supported to avoid false warning.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20250508150002.689633-53-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit deb9db6fb789cfe80527b75983e86137589227a4)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/kvm/tdx.c | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index 01fff9a27a..3e23010094 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -845,6 +845,19 @@ static int tdx_check_features(X86ConfidentialGuest *cg, CPUState *cs)
continue;
}
+ /* Fixup for special cases */
+ switch (w) {
+ case FEAT_8000_0001_EDX:
+ /*
+ * Intel enumerates SYSCALL bit as 1 only when processor in 64-bit
+ * mode and before vcpu running it's not in 64-bit mode.
+ */
+ actual |= CPUID_EXT2_SYSCALL;
+ break;
+ default:
+ break;
+ }
+
requested = env->features[w];
unavailable = requested & ~actual;
mark_unavailable_features(cpu, w, unavailable, unav_prefix);
--
2.50.1

View File

@ -0,0 +1,55 @@
From b7e4b3d2a67cf3af83f9226a8f3b7b159d15fba1 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:47 +0200
Subject: [PATCH 056/115] i386/tdx: Enable user exit on KVM_HC_MAP_GPA_RANGE
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [56/115] 88e3d0cfdca0c78873aed608518d75ed1703c5fb (bonzini/rhel-qemu-kvm)
KVM translates TDG.VP.VMCALL<MapGPA> to KVM_HC_MAP_GPA_RANGE, and QEMU
needs to enable user exit on KVM_HC_MAP_GPA_RANGE in order to handle the
memory conversion requested by TD guest.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20250508150002.689633-28-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 1ff5048d74e661943260c33e864c4118acb37ab4)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/kvm/tdx.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index 19ed1038a7..62c83394d0 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -19,6 +19,8 @@
#include "sysemu/sysemu.h"
#include "exec/ramblock.h"
+#include <linux/kvm_para.h>
+
#include "hw/i386/e820_memory_layout.h"
#include "hw/i386/tdvf.h"
#include "hw/i386/x86.h"
@@ -376,6 +378,11 @@ static int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
}
}
+ /* TDX relies on KVM_HC_MAP_GPA_RANGE to handle TDG.VP.VMCALL<MapGPA> */
+ if (!kvm_enable_hypercall(BIT_ULL(KVM_HC_MAP_GPA_RANGE))) {
+ return -EOPNOTSUPP;
+ }
+
qemu_add_machine_init_done_notifier(&tdx_machine_done_notify);
tdx_guest = tdx;
--
2.50.1

View File

@ -0,0 +1,59 @@
From bbfdcd93ce27ea64b6f6854dfdb8635f107de76c Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:49 +0200
Subject: [PATCH 088/115] i386/tdx: Error and exit when named cpu model is
requested
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [88/115] 7ffb866075f6c8951eee9fbbd6ff3a0362519c4a (bonzini/rhel-qemu-kvm)
Currently, it gets below error when requesting any named cpu model with
"-cpu" to boot a TDX VM:
qemu-system-x86_64: KVM_TDX_INIT_VM failed: Invalid argument
It misleads people to think it's the bug of KVM or QEMU. It is just that
current QEMU doesn't support named cpu model for TDX.
To support named cpu models for TDX guest, there are opens to be
finalized and needs a mount of additional work.
For now, explicitly check the case when named cpu model is requested.
Error report a hint and exit.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Link: https://lore.kernel.org/r/20250612133801.2238342-1-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 750560f8a832361cf5cc4cd7bc4f56e1e76206f6)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/kvm/tdx.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index cca2b11622..3099e40baa 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -739,8 +739,14 @@ static int tdx_kvm_type(X86ConfidentialGuest *cg)
static void tdx_cpu_instance_init(X86ConfidentialGuest *cg, CPUState *cpu)
{
+ X86CPUClass *xcc = X86_CPU_GET_CLASS(cpu);
X86CPU *x86cpu = X86_CPU(cpu);
+ if (xcc->model) {
+ error_report("Named cpu model is not supported for TDX yet!");
+ exit(1);
+ }
+
object_property_set_bool(OBJECT(cpu), "pmu", false, &error_abort);
/* invtsc is fixed1 for TD guest */
--
2.50.1

View File

@ -0,0 +1,233 @@
From 58e6218a7d4b00316cf4ccd6a394190169a4cc61 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:48 +0200
Subject: [PATCH 080/115] i386/tdx: Fetch and validate CPUID of TD guest
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [80/115] c2ba91b28a4d0191fb186aea069733b81ab56e0f (bonzini/rhel-qemu-kvm)
Use KVM_TDX_GET_CPUID to get the CPUIDs that are managed and enfored
by TDX module for TD guest. Check QEMU's configuration against the
fetched data.
Print wanring message when 1. a feature is not supported but requested
by QEMU or 2. QEMU doesn't want to expose a feature while it is enforced
enabled.
- If cpu->enforced_cpuid is not set, prints the warning message of both
1) and 2) and tweak QEMU's configuration.
- If cpu->enforced_cpuid is set, quit if any case of 1) or 2).
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Link: https://lore.kernel.org/r/20250508150002.689633-52-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit e3d1a4a6d1d61cf5fbd0e4b389cfb3976093739f)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/cpu.c | 33 +++++++++++++-
target/i386/cpu.h | 7 +++
target/i386/kvm/tdx.c | 101 ++++++++++++++++++++++++++++++++++++++++++
3 files changed, 139 insertions(+), 2 deletions(-)
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index cd6d9e8c1c..433d0a0418 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -5937,8 +5937,8 @@ static bool x86_cpu_have_filtered_features(X86CPU *cpu)
return false;
}
-static void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint64_t mask,
- const char *verbose_prefix)
+void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint64_t mask,
+ const char *verbose_prefix)
{
CPUX86State *env = &cpu->env;
FeatureWordInfo *f = &feature_word_info[w];
@@ -5965,6 +5965,35 @@ static void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint64_t mask,
}
}
+void mark_forced_on_features(X86CPU *cpu, FeatureWord w, uint64_t mask,
+ const char *verbose_prefix)
+{
+ CPUX86State *env = &cpu->env;
+ FeatureWordInfo *f = &feature_word_info[w];
+ int i;
+
+ if (!cpu->force_features) {
+ env->features[w] |= mask;
+ }
+
+ cpu->forced_on_features[w] |= mask;
+
+ if (!verbose_prefix) {
+ return;
+ }
+
+ for (i = 0; i < 64; ++i) {
+ if ((1ULL << i) & mask) {
+ g_autofree char *feat_word_str = feature_word_description(f);
+ warn_report("%s: %s%s%s [bit %d]",
+ verbose_prefix,
+ feat_word_str,
+ f->feat_names[i] ? "." : "",
+ f->feat_names[i] ? f->feat_names[i] : "", i);
+ }
+ }
+}
+
static void x86_cpuid_version_get_family(Object *obj, Visitor *v,
const char *name, void *opaque,
Error **errp)
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 19645eb6f8..2e73945b28 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -2144,6 +2144,9 @@ struct ArchCPU {
/* Features that were filtered out because of missing host capabilities */
FeatureWordArray filtered_features;
+ /* Features that are forced enabled by underlying hypervisor, e.g., TDX */
+ FeatureWordArray forced_on_features;
+
/* Enable PMU CPUID bits. This can't be enabled by default yet because
* it doesn't have ABI stability guarantees, as it passes all PMU CPUID
* bits returned by GET_SUPPORTED_CPUID (that depend on host CPU and kernel
@@ -2455,6 +2458,10 @@ void host_cpuid(uint32_t function, uint32_t count,
uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx);
bool cpu_has_x2apic_feature(CPUX86State *env);
bool is_feature_word_cpuid(uint32_t feature, uint32_t index, int reg);
+void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint64_t mask,
+ const char *verbose_prefix);
+void mark_forced_on_features(X86CPU *cpu, FeatureWord w, uint64_t mask,
+ const char *verbose_prefix);
static inline bool x86_has_cpuid_0x1f(X86CPU *cpu)
{
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index 32e03caf43..01fff9a27a 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -766,6 +766,106 @@ static uint32_t tdx_adjust_cpuid_features(X86ConfidentialGuest *cg,
return value;
}
+static struct kvm_cpuid2 *tdx_fetch_cpuid(CPUState *cpu, int *ret)
+{
+ struct kvm_cpuid2 *fetch_cpuid;
+ int size = KVM_MAX_CPUID_ENTRIES;
+ Error *local_err = NULL;
+ int r;
+
+ do {
+ error_free(local_err);
+ local_err = NULL;
+
+ fetch_cpuid = g_malloc0(sizeof(*fetch_cpuid) +
+ sizeof(struct kvm_cpuid_entry2) * size);
+ fetch_cpuid->nent = size;
+ r = tdx_vcpu_ioctl(cpu, KVM_TDX_GET_CPUID, 0, fetch_cpuid, &local_err);
+ if (r == -E2BIG) {
+ g_free(fetch_cpuid);
+ size = fetch_cpuid->nent;
+ }
+ } while (r == -E2BIG);
+
+ if (r < 0) {
+ error_report_err(local_err);
+ *ret = r;
+ return NULL;
+ }
+
+ return fetch_cpuid;
+}
+
+static int tdx_check_features(X86ConfidentialGuest *cg, CPUState *cs)
+{
+ uint64_t actual, requested, unavailable, forced_on;
+ g_autofree struct kvm_cpuid2 *fetch_cpuid;
+ const char *forced_on_prefix = NULL;
+ const char *unav_prefix = NULL;
+ struct kvm_cpuid_entry2 *entry;
+ X86CPU *cpu = X86_CPU(cs);
+ CPUX86State *env = &cpu->env;
+ FeatureWordInfo *wi;
+ FeatureWord w;
+ bool mismatch = false;
+ int r;
+
+ fetch_cpuid = tdx_fetch_cpuid(cs, &r);
+ if (!fetch_cpuid) {
+ return r;
+ }
+
+ if (cpu->check_cpuid || cpu->enforce_cpuid) {
+ unav_prefix = "TDX doesn't support requested feature";
+ forced_on_prefix = "TDX forcibly sets the feature";
+ }
+
+ for (w = 0; w < FEATURE_WORDS; w++) {
+ wi = &feature_word_info[w];
+ actual = 0;
+
+ switch (wi->type) {
+ case CPUID_FEATURE_WORD:
+ entry = cpuid_find_entry(fetch_cpuid, wi->cpuid.eax, wi->cpuid.ecx);
+ if (!entry) {
+ /*
+ * If KVM doesn't report it means it's totally configurable
+ * by QEMU
+ */
+ continue;
+ }
+
+ actual = cpuid_entry_get_reg(entry, wi->cpuid.reg);
+ break;
+ case MSR_FEATURE_WORD:
+ /*
+ * TODO:
+ * validate MSR features when KVM has interface report them.
+ */
+ continue;
+ }
+
+ requested = env->features[w];
+ unavailable = requested & ~actual;
+ mark_unavailable_features(cpu, w, unavailable, unav_prefix);
+ if (unavailable) {
+ mismatch = true;
+ }
+
+ forced_on = actual & ~requested;
+ mark_forced_on_features(cpu, w, forced_on, forced_on_prefix);
+ if (forced_on) {
+ mismatch = true;
+ }
+ }
+
+ if (cpu->enforce_cpuid && mismatch) {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int tdx_validate_attributes(TdxGuest *tdx, Error **errp)
{
if ((tdx->attributes & ~tdx_caps->supported_attrs)) {
@@ -1161,4 +1261,5 @@ static void tdx_guest_class_init(ObjectClass *oc, void *data)
x86_klass->kvm_type = tdx_kvm_type;
x86_klass->cpu_instance_init = tdx_cpu_instance_init;
x86_klass->adjust_cpuid_features = tdx_adjust_cpuid_features;
+ x86_klass->check_features = tdx_check_features;
}
--
2.50.1

View File

@ -0,0 +1,44 @@
From 8468a1a6a23c02c4a4b09501d7702d7e44095df9 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:47 +0200
Subject: [PATCH 055/115] i386/tdx: Finalize TDX VM
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [55/115] 391331e4b8c4582cc4db040188c44ab5ff23987e (bonzini/rhel-qemu-kvm)
Invoke KVM_TDX_FINALIZE_VM to finalize the TD's measurement and make
the TD vCPUs runnable once machine initialization is complete.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20250508150002.689633-27-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit ae60ff4e9f9e5790f79abf866ec67270c28ca477)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/kvm/tdx.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index 7980daf8c4..19ed1038a7 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -353,6 +353,9 @@ static void tdx_finalize_vm(Notifier *notifier, void *unused)
*/
ram_block = tdx_guest->tdvf_mr->ram_block;
ram_block_discard_range(ram_block, 0, ram_block->max_length);
+
+ tdx_vm_ioctl(KVM_TDX_FINALIZE_VM, 0, NULL, &error_fatal);
+ CONFIDENTIAL_GUEST_SUPPORT(tdx_guest)->ready = true;
}
static Notifier tdx_machine_done_notify = {
--
2.50.1

View File

@ -0,0 +1,121 @@
From 537c96692b8d9830e4712a74f17d294cfd43a2bb Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:49 +0200
Subject: [PATCH 085/115] i386/tdx: Fix build on 32-bit host
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [85/115] e62d030c943c217f64f1488b4ef3f52e8d77cab9 (bonzini/rhel-qemu-kvm)
Use PRI formats where required and fix pointer cast.
Cc: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Cédric Le Goater <clg@redhat.com>
Link: https://lore.kernel.org/r/20250602173101.1052983-2-clg@redhat.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit e7f926eb7f5b81c709313974b476ed181c9c76d5)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/kvm/tdx.c | 26 +++++++++++++-------------
1 file changed, 13 insertions(+), 13 deletions(-)
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index b9c3ba3725..cca2b11622 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -284,7 +284,7 @@ static void tdx_post_init_vcpus(void)
hob = tdx_get_hob_entry(tdx_guest);
CPU_FOREACH(cpu) {
- tdx_vcpu_ioctl(cpu, KVM_TDX_INIT_VCPU, 0, (void *)hob->address,
+ tdx_vcpu_ioctl(cpu, KVM_TDX_INIT_VCPU, 0, (void *)(uintptr_t)hob->address,
&error_fatal);
}
}
@@ -339,7 +339,7 @@ static void tdx_finalize_vm(Notifier *notifier, void *unused)
uint32_t flags;
region = (struct kvm_tdx_init_mem_region) {
- .source_addr = (uint64_t)entry->mem_ptr,
+ .source_addr = (uintptr_t)entry->mem_ptr,
.gpa = entry->address,
.nr_pages = entry->size >> 12,
};
@@ -893,16 +893,16 @@ static int tdx_check_features(X86ConfidentialGuest *cg, CPUState *cs)
static int tdx_validate_attributes(TdxGuest *tdx, Error **errp)
{
if ((tdx->attributes & ~tdx_caps->supported_attrs)) {
- error_setg(errp, "Invalid attributes 0x%lx for TDX VM "
- "(KVM supported: 0x%llx)", tdx->attributes,
- tdx_caps->supported_attrs);
+ error_setg(errp, "Invalid attributes 0x%"PRIx64" for TDX VM "
+ "(KVM supported: 0x%"PRIx64")", tdx->attributes,
+ (uint64_t)tdx_caps->supported_attrs);
return -1;
}
if (tdx->attributes & ~TDX_SUPPORTED_TD_ATTRS) {
error_setg(errp, "Some QEMU unsupported TD attribute bits being "
- "requested: 0x%lx (QEMU supported: 0x%llx)",
- tdx->attributes, TDX_SUPPORTED_TD_ATTRS);
+ "requested: 0x%"PRIx64" (QEMU supported: 0x%"PRIx64")",
+ tdx->attributes, (uint64_t)TDX_SUPPORTED_TD_ATTRS);
return -1;
}
@@ -931,8 +931,8 @@ static int setup_td_xfam(X86CPU *x86cpu, Error **errp)
env->features[FEAT_XSAVE_XSS_HI];
if (xfam & ~tdx_caps->supported_xfam) {
- error_setg(errp, "Invalid XFAM 0x%lx for TDX VM (supported: 0x%llx))",
- xfam, tdx_caps->supported_xfam);
+ error_setg(errp, "Invalid XFAM 0x%"PRIx64" for TDX VM (supported: 0x%"PRIx64"))",
+ xfam, (uint64_t)tdx_caps->supported_xfam);
return -1;
}
@@ -999,14 +999,14 @@ int tdx_pre_create_vcpu(CPUState *cpu, Error **errp)
if (env->tsc_khz && (env->tsc_khz < TDX_MIN_TSC_FREQUENCY_KHZ ||
env->tsc_khz > TDX_MAX_TSC_FREQUENCY_KHZ)) {
- error_setg(errp, "Invalid TSC %ld KHz, must specify cpu_frequency "
+ error_setg(errp, "Invalid TSC %"PRId64" KHz, must specify cpu_frequency "
"between [%d, %d] kHz", env->tsc_khz,
TDX_MIN_TSC_FREQUENCY_KHZ, TDX_MAX_TSC_FREQUENCY_KHZ);
return -EINVAL;
}
if (env->tsc_khz % (25 * 1000)) {
- error_setg(errp, "Invalid TSC %ld KHz, it must be multiple of 25MHz",
+ error_setg(errp, "Invalid TSC %"PRId64" KHz, it must be multiple of 25MHz",
env->tsc_khz);
return -EINVAL;
}
@@ -1014,7 +1014,7 @@ int tdx_pre_create_vcpu(CPUState *cpu, Error **errp)
/* it's safe even env->tsc_khz is 0. KVM uses host's tsc_khz in this case */
r = kvm_vm_ioctl(kvm_state, KVM_SET_TSC_KHZ, env->tsc_khz);
if (r < 0) {
- error_setg_errno(errp, -r, "Unable to set TSC frequency to %ld kHz",
+ error_setg_errno(errp, -r, "Unable to set TSC frequency to %"PRId64" kHz",
env->tsc_khz);
return r;
}
@@ -1139,7 +1139,7 @@ int tdx_handle_report_fatal_error(X86CPU *cpu, struct kvm_run *run)
uint64_t gpa = -1ull;
if (error_code & 0xffff) {
- error_report("TDX: REPORT_FATAL_ERROR: invalid error code: 0x%lx",
+ error_report("TDX: REPORT_FATAL_ERROR: invalid error code: 0x%"PRIx64,
error_code);
return -1;
}
--
2.50.1

View File

@ -0,0 +1,78 @@
From 6c84c60f50287a00c91dd390b8d71f008c704048 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:50 +0200
Subject: [PATCH 102/115] i386/tdx: Fix the report of gpa in QAPI
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [102/115] ccaf1112d985ac652bd118bce4486853163cbc16 (bonzini/rhel-qemu-kvm)
Gpa is defined in QAPI but never reported to monitor because has_gpa is
never set to ture.
Fix it by setting has_gpa to ture when TDX_REPORT_FATAL_ERROR_GPA_VALID
is set in error_code.
Fixes: 6e250463b08b ("i386/tdx: Wire TDX_REPORT_FATAL_ERROR with GuestPanic facility")
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Link: https://lore.kernel.org/r/20250710035538.303136-1-zhenzhong.duan@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit b28f6d5c16f19f8c56926c10929db29f913895ad)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/kvm/tdx.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index e56db74f58..20fcd9a4c5 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -1321,7 +1321,8 @@ void tdx_handle_setup_event_notify_interrupt(X86CPU *cpu, struct kvm_run *run)
}
static void tdx_panicked_on_fatal_error(X86CPU *cpu, uint64_t error_code,
- char *message, uint64_t gpa)
+ char *message, bool has_gpa,
+ uint64_t gpa)
{
GuestPanicInformation *panic_info;
@@ -1330,6 +1331,7 @@ static void tdx_panicked_on_fatal_error(X86CPU *cpu, uint64_t error_code,
panic_info->u.tdx.error_code = (uint32_t) error_code;
panic_info->u.tdx.message = message;
panic_info->u.tdx.gpa = gpa;
+ panic_info->u.tdx.has_gpa = has_gpa;
qemu_system_guest_panicked(panic_info);
}
@@ -1349,6 +1351,7 @@ int tdx_handle_report_fatal_error(X86CPU *cpu, struct kvm_run *run)
char *message = NULL;
uint64_t *tmp;
uint64_t gpa = -1ull;
+ bool has_gpa = false;
if (error_code & 0xffff) {
error_report("TDX: REPORT_FATAL_ERROR: invalid error code: 0x%"PRIx64,
@@ -1381,9 +1384,10 @@ int tdx_handle_report_fatal_error(X86CPU *cpu, struct kvm_run *run)
if (error_code & TDX_REPORT_FATAL_ERROR_GPA_VALID) {
gpa = run->system_event.data[R_R13];
+ has_gpa = true;
}
- tdx_panicked_on_fatal_error(cpu, error_code, message, gpa);
+ tdx_panicked_on_fatal_error(cpu, error_code, message, has_gpa, gpa);
return -1;
}
--
2.50.1

View File

@ -0,0 +1,50 @@
From 3d855c5ffd0b235642c334ea3e9680451e2e50a6 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:49 +0200
Subject: [PATCH 090/115] i386/tdx: Fix the typo of the comment of struct
TdxGuest
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [90/115] 86befce58e11c4eba084141ec177648b901dd165 (bonzini/rhel-qemu-kvm)
Change sha348 to sha384.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
Link: https://lore.kernel.org/r/20250603050305.1704586-3-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit a38da9f4876bb17d7ed9c6e24964b12b61877d38)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/kvm/tdx.h | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/target/i386/kvm/tdx.h b/target/i386/kvm/tdx.h
index 04b5afe199..8dd66e9014 100644
--- a/target/i386/kvm/tdx.h
+++ b/target/i386/kvm/tdx.h
@@ -40,9 +40,9 @@ typedef struct TdxGuest {
bool initialized;
uint64_t attributes; /* TD attributes */
uint64_t xfam;
- char *mrconfigid; /* base64 encoded sha348 digest */
- char *mrowner; /* base64 encoded sha348 digest */
- char *mrownerconfig; /* base64 encoded sha348 digest */
+ char *mrconfigid; /* base64 encoded sha384 digest */
+ char *mrowner; /* base64 encoded sha384 digest */
+ char *mrownerconfig; /* base64 encoded sha384 digest */
MemoryRegion *tdvf_mr;
TdxFirmware tdvf;
--
2.50.1

View File

@ -0,0 +1,45 @@
From 06700e387cf55d20af5fa85245b189408f35a851 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:47 +0200
Subject: [PATCH 063/115] i386/tdx: Force exposing CPUID 0x1f
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [63/115] 45b63d9803eb03a2f9014c9a3eb084c2edf5f11d (bonzini/rhel-qemu-kvm)
TDX uses CPUID 0x1f to configure TD guest's CPU topology. So set
enable_cpuid_0x1f for TDs.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20250508150002.689633-35-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 9002494f80b751a7655045c5f46bf90bc1d3bbd0)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/kvm/tdx.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index afd7e62422..410f8a9997 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -400,7 +400,11 @@ static int tdx_kvm_type(X86ConfidentialGuest *cg)
static void tdx_cpu_instance_init(X86ConfidentialGuest *cg, CPUState *cpu)
{
+ X86CPU *x86cpu = X86_CPU(cpu);
+
object_property_set_bool(OBJECT(cpu), "pmu", false, &error_abort);
+
+ x86cpu->enable_cpuid_0x1f = true;
}
static int tdx_validate_attributes(TdxGuest *tdx, Error **errp)
--
2.50.1

View File

@ -0,0 +1,193 @@
From 5c6bd0700ee50d40a791c1a00c475f9042e23343 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:45 +0200
Subject: [PATCH 034/115] i386/tdx: Get tdx_capabilities via
KVM_TDX_CAPABILITIES
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [34/115] bd4711c0cf17e73ea2f21417b6b031c9118a35c6 (bonzini/rhel-qemu-kvm)
KVM provides TDX capabilities via sub command KVM_TDX_CAPABILITIES of
IOCTL(KVM_MEMORY_ENCRYPT_OP). Get the capabilities when initializing
TDX context. It will be used to validate user's setting later.
Since there is no interface reporting how many cpuid configs contains in
KVM_TDX_CAPABILITIES, QEMU chooses to try starting with a known number
and abort when it exceeds KVM_MAX_CPUID_ENTRIES.
Besides, introduce the interfaces to invoke TDX "ioctls" at VCPU scope
in preparation.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Link: https://lore.kernel.org/r/20250508150002.689633-6-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 8eddedc3701d2190db976a05155a8263c8ec175b)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/kvm/kvm.c | 2 -
target/i386/kvm/kvm_i386.h | 2 +
target/i386/kvm/tdx.c | 107 ++++++++++++++++++++++++++++++++++++-
3 files changed, 108 insertions(+), 3 deletions(-)
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 12da34b9c5..bf4493dd84 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -1771,8 +1771,6 @@ static int hyperv_init_vcpu(X86CPU *cpu)
static Error *invtsc_mig_blocker;
-#define KVM_MAX_CPUID_ENTRIES 100
-
static void kvm_init_xsave(CPUX86State *env)
{
if (has_xsave2) {
diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h
index 7edb154a16..499691e8d0 100644
--- a/target/i386/kvm/kvm_i386.h
+++ b/target/i386/kvm/kvm_i386.h
@@ -13,6 +13,8 @@
#include "sysemu/kvm.h"
+#define KVM_MAX_CPUID_ENTRIES 100
+
/* always false if !CONFIG_KVM */
#define kvm_pit_in_kernel() \
(kvm_irqchip_in_kernel() && !kvm_irqchip_is_split())
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index 4ff9486081..c67be5e618 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -10,17 +10,122 @@
*/
#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
#include "qom/object_interfaces.h"
#include "hw/i386/x86.h"
#include "kvm_i386.h"
#include "tdx.h"
+static struct kvm_tdx_capabilities *tdx_caps;
+
+enum tdx_ioctl_level {
+ TDX_VM_IOCTL,
+ TDX_VCPU_IOCTL,
+};
+
+static int tdx_ioctl_internal(enum tdx_ioctl_level level, void *state,
+ int cmd_id, __u32 flags, void *data,
+ Error **errp)
+{
+ struct kvm_tdx_cmd tdx_cmd = {};
+ int r;
+
+ const char *tdx_ioctl_name[] = {
+ [KVM_TDX_CAPABILITIES] = "KVM_TDX_CAPABILITIES",
+ [KVM_TDX_INIT_VM] = "KVM_TDX_INIT_VM",
+ [KVM_TDX_INIT_VCPU] = "KVM_TDX_INIT_VCPU",
+ [KVM_TDX_INIT_MEM_REGION] = "KVM_TDX_INIT_MEM_REGION",
+ [KVM_TDX_FINALIZE_VM] = "KVM_TDX_FINALIZE_VM",
+ [KVM_TDX_GET_CPUID] = "KVM_TDX_GET_CPUID",
+ };
+
+ tdx_cmd.id = cmd_id;
+ tdx_cmd.flags = flags;
+ tdx_cmd.data = (__u64)(unsigned long)data;
+
+ switch (level) {
+ case TDX_VM_IOCTL:
+ r = kvm_vm_ioctl(kvm_state, KVM_MEMORY_ENCRYPT_OP, &tdx_cmd);
+ break;
+ case TDX_VCPU_IOCTL:
+ r = kvm_vcpu_ioctl(state, KVM_MEMORY_ENCRYPT_OP, &tdx_cmd);
+ break;
+ default:
+ error_setg(errp, "Invalid tdx_ioctl_level %d", level);
+ return -EINVAL;
+ }
+
+ if (r < 0) {
+ error_setg_errno(errp, -r, "TDX ioctl %s failed, hw_errors: 0x%llx",
+ tdx_ioctl_name[cmd_id], tdx_cmd.hw_error);
+ }
+ return r;
+}
+
+static inline int tdx_vm_ioctl(int cmd_id, __u32 flags, void *data,
+ Error **errp)
+{
+ return tdx_ioctl_internal(TDX_VM_IOCTL, NULL, cmd_id, flags, data, errp);
+}
+
+static inline int tdx_vcpu_ioctl(CPUState *cpu, int cmd_id, __u32 flags,
+ void *data, Error **errp)
+{
+ return tdx_ioctl_internal(TDX_VCPU_IOCTL, cpu, cmd_id, flags, data, errp);
+}
+
+static int get_tdx_capabilities(Error **errp)
+{
+ struct kvm_tdx_capabilities *caps;
+ /* 1st generation of TDX reports 6 cpuid configs */
+ int nr_cpuid_configs = 6;
+ size_t size;
+ int r;
+
+ do {
+ Error *local_err = NULL;
+ size = sizeof(struct kvm_tdx_capabilities) +
+ nr_cpuid_configs * sizeof(struct kvm_cpuid_entry2);
+ caps = g_malloc0(size);
+ caps->cpuid.nent = nr_cpuid_configs;
+
+ r = tdx_vm_ioctl(KVM_TDX_CAPABILITIES, 0, caps, &local_err);
+ if (r == -E2BIG) {
+ g_free(caps);
+ nr_cpuid_configs *= 2;
+ if (nr_cpuid_configs > KVM_MAX_CPUID_ENTRIES) {
+ error_report("KVM TDX seems broken that number of CPUID entries"
+ " in kvm_tdx_capabilities exceeds limit: %d",
+ KVM_MAX_CPUID_ENTRIES);
+ error_propagate(errp, local_err);
+ return r;
+ }
+ error_free(local_err);
+ } else if (r < 0) {
+ g_free(caps);
+ error_propagate(errp, local_err);
+ return r;
+ }
+ } while (r == -E2BIG);
+
+ tdx_caps = caps;
+
+ return 0;
+}
+
static int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
{
+ int r = 0;
+
kvm_mark_guest_state_protected();
- return 0;
+ if (!tdx_caps) {
+ r = get_tdx_capabilities(errp);
+ }
+
+ return r;
}
static int tdx_kvm_type(X86ConfidentialGuest *cg)
--
2.50.1

View File

@ -0,0 +1,146 @@
From 7c045d8fcd636f8e2e52f303fc668bff20bc6e5d Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:47 +0200
Subject: [PATCH 057/115] i386/tdx: Handle KVM_SYSTEM_EVENT_TDX_FATAL
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [57/115] 2b4612a6e7c55849578ab1dc803849190c80692d (bonzini/rhel-qemu-kvm)
TD guest can use TDG.VP.VMCALL<REPORT_FATAL_ERROR> to request
termination. KVM translates such request into KVM_EXIT_SYSTEM_EVENT with
type of KVM_SYSTEM_EVENT_TDX_FATAL.
Add hanlder for such exit. Parse and print the error message, and
terminate the TD guest in the handler.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20250508150002.689633-29-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 98dbfd6849f117de02ac6f513f2a1f95563e60ae)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/kvm/kvm.c | 10 +++++++++
target/i386/kvm/tdx-stub.c | 5 +++++
target/i386/kvm/tdx.c | 46 ++++++++++++++++++++++++++++++++++++++
target/i386/kvm/tdx.h | 2 ++
4 files changed, 63 insertions(+)
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index fa61221190..4bda4f5525 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -6019,6 +6019,16 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
case KVM_EXIT_HYPERCALL:
ret = kvm_handle_hypercall(run);
break;
+ case KVM_EXIT_SYSTEM_EVENT:
+ switch (run->system_event.type) {
+ case KVM_SYSTEM_EVENT_TDX_FATAL:
+ ret = tdx_handle_report_fatal_error(cpu, run);
+ break;
+ default:
+ ret = -1;
+ break;
+ }
+ break;
default:
fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
ret = -1;
diff --git a/target/i386/kvm/tdx-stub.c b/target/i386/kvm/tdx-stub.c
index 7748b6d0a4..720a4ff046 100644
--- a/target/i386/kvm/tdx-stub.c
+++ b/target/i386/kvm/tdx-stub.c
@@ -13,3 +13,8 @@ int tdx_parse_tdvf(void *flash_ptr, int size)
{
return -EINVAL;
}
+
+int tdx_handle_report_fatal_error(X86CPU *cpu, struct kvm_run *run)
+{
+ return -EINVAL;
+}
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index 62c83394d0..679613ab55 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -615,6 +615,52 @@ int tdx_parse_tdvf(void *flash_ptr, int size)
return tdvf_parse_metadata(&tdx_guest->tdvf, flash_ptr, size);
}
+/*
+ * Only 8 registers can contain valid ASCII byte stream to form the fatal
+ * message, and their sequence is: R14, R15, RBX, RDI, RSI, R8, R9, RDX
+ */
+#define TDX_FATAL_MESSAGE_MAX 64
+
+int tdx_handle_report_fatal_error(X86CPU *cpu, struct kvm_run *run)
+{
+ uint64_t error_code = run->system_event.data[R_R12];
+ uint64_t reg_mask = run->system_event.data[R_ECX];
+ char *message = NULL;
+ uint64_t *tmp;
+
+ if (error_code & 0xffff) {
+ error_report("TDX: REPORT_FATAL_ERROR: invalid error code: 0x%lx",
+ error_code);
+ return -1;
+ }
+
+ if (reg_mask) {
+ message = g_malloc0(TDX_FATAL_MESSAGE_MAX + 1);
+ tmp = (uint64_t *)message;
+
+#define COPY_REG(REG) \
+ do { \
+ if (reg_mask & BIT_ULL(REG)) { \
+ *(tmp++) = run->system_event.data[REG]; \
+ } \
+ } while (0)
+
+ COPY_REG(R_R14);
+ COPY_REG(R_R15);
+ COPY_REG(R_EBX);
+ COPY_REG(R_EDI);
+ COPY_REG(R_ESI);
+ COPY_REG(R_R8);
+ COPY_REG(R_R9);
+ COPY_REG(R_EDX);
+ *((char *)tmp) = '\0';
+ }
+#undef COPY_REG
+
+ error_report("TD guest reports fatal error. %s", message ? : "");
+ return -1;
+}
+
static bool tdx_guest_get_sept_ve_disable(Object *obj, Error **errp)
{
TdxGuest *tdx = TDX_GUEST(obj);
diff --git a/target/i386/kvm/tdx.h b/target/i386/kvm/tdx.h
index 36a7400e74..04b5afe199 100644
--- a/target/i386/kvm/tdx.h
+++ b/target/i386/kvm/tdx.h
@@ -8,6 +8,7 @@
#endif
#include "confidential-guest.h"
+#include "cpu.h"
#include "hw/i386/tdvf.h"
#define TYPE_TDX_GUEST "tdx-guest"
@@ -59,5 +60,6 @@ bool is_tdx_vm(void);
int tdx_pre_create_vcpu(CPUState *cpu, Error **errp);
void tdx_set_tdvf_region(MemoryRegion *tdvf_mr);
int tdx_parse_tdvf(void *flash_ptr, int size);
+int tdx_handle_report_fatal_error(X86CPU *cpu, struct kvm_run *run);
#endif /* QEMU_I386_TDX_H */
--
2.50.1

View File

@ -0,0 +1,179 @@
From 8f50b918a55f8a30e782eef028a625a209a19fde Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:48 +0200
Subject: [PATCH 073/115] i386/tdx: Implement adjust_cpuid_features() for TDX
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [73/115] 115b0cd2bbc276b2cf915e9ad48fd89c4ee7ce8e (bonzini/rhel-qemu-kvm)
Maintain a TDX specific supported CPUID set, and use it to mask the
common supported CPUID value of KVM. It can avoid newly added supported
features (reported via KVM_GET_SUPPORTED_CPUID) for common VMs being
falsely reported as supported for TDX.
As the first step, initialize the TDX supported CPUID set with all the
configurable CPUID bits. It's not complete because there are other CPUID
bits are supported for TDX but not reported as directly configurable.
E.g. the XFAM related bits, attribute related bits and fixed-1 bits.
They will be handled in the future.
Also, what matters are the CPUID bits related to QEMU's feature word.
Only mask the CPUID leafs which are feature word leaf.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20250508150002.689633-45-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 75ec6189f5c65cab210dd9f16cf4eef368038d45)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/cpu.c | 16 ++++++++++++++++
target/i386/cpu.h | 1 +
target/i386/kvm/kvm.c | 2 +-
target/i386/kvm/kvm_i386.h | 1 +
target/i386/kvm/tdx.c | 34 ++++++++++++++++++++++++++++++++++
5 files changed, 53 insertions(+), 1 deletion(-)
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index ab34626b19..2da456da64 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -1644,6 +1644,22 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
},
};
+bool is_feature_word_cpuid(uint32_t feature, uint32_t index, int reg)
+{
+ FeatureWordInfo *wi;
+ FeatureWord w;
+
+ for (w = 0; w < FEATURE_WORDS; w++) {
+ wi = &feature_word_info[w];
+ if (wi->type == CPUID_FEATURE_WORD && wi->cpuid.eax == feature &&
+ (!wi->cpuid.needs_ecx || wi->cpuid.ecx == index) &&
+ wi->cpuid.reg == reg) {
+ return true;
+ }
+ }
+ return false;
+}
+
typedef struct FeatureMask {
FeatureWord index;
uint64_t mask;
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 83fa89bf0a..601e828577 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -2431,6 +2431,7 @@ void cpu_set_apic_feature(CPUX86State *env);
void host_cpuid(uint32_t function, uint32_t count,
uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx);
bool cpu_has_x2apic_feature(CPUX86State *env);
+bool is_feature_word_cpuid(uint32_t feature, uint32_t index, int reg);
static inline bool x86_has_cpuid_0x1f(X86CPU *cpu)
{
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 5349ff4db7..76352323e4 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -385,7 +385,7 @@ static bool host_tsx_broken(void)
/* Returns the value for a specific register on the cpuid entry
*/
-static uint32_t cpuid_entry_get_reg(struct kvm_cpuid_entry2 *entry, int reg)
+uint32_t cpuid_entry_get_reg(struct kvm_cpuid_entry2 *entry, int reg)
{
uint32_t ret = 0;
switch (reg) {
diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h
index f2fbf3f99f..797610496a 100644
--- a/target/i386/kvm/kvm_i386.h
+++ b/target/i386/kvm/kvm_i386.h
@@ -62,6 +62,7 @@ void kvm_update_msi_routes_all(void *private, bool global,
struct kvm_cpuid_entry2 *cpuid_find_entry(struct kvm_cpuid2 *cpuid,
uint32_t function,
uint32_t index);
+uint32_t cpuid_entry_get_reg(struct kvm_cpuid_entry2 *entry, int reg);
uint32_t kvm_x86_build_cpuid(CPUX86State *env, struct kvm_cpuid_entry2 *entries,
uint32_t cpuid_i);
#endif /* CONFIG_KVM */
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index 58fb68cab0..4949d01f22 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -45,6 +45,7 @@
static TdxGuest *tdx_guest;
static struct kvm_tdx_capabilities *tdx_caps;
+static struct kvm_cpuid2 *tdx_supported_cpuid;
/* Valid after kvm_arch_init()->confidential_guest_kvm_init()->tdx_kvm_init() */
bool is_tdx_vm(void)
@@ -366,6 +367,20 @@ static Notifier tdx_machine_done_notify = {
.notify = tdx_finalize_vm,
};
+static void tdx_setup_supported_cpuid(void)
+{
+ if (tdx_supported_cpuid) {
+ return;
+ }
+
+ tdx_supported_cpuid = g_malloc0(sizeof(*tdx_supported_cpuid) +
+ KVM_MAX_CPUID_ENTRIES * sizeof(struct kvm_cpuid_entry2));
+
+ memcpy(tdx_supported_cpuid->entries, tdx_caps->cpuid.entries,
+ tdx_caps->cpuid.nent * sizeof(struct kvm_cpuid_entry2));
+ tdx_supported_cpuid->nent = tdx_caps->cpuid.nent;
+}
+
static int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
{
MachineState *ms = MACHINE(qdev_get_machine());
@@ -403,6 +418,8 @@ static int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
}
}
+ tdx_setup_supported_cpuid();
+
/* TDX relies on KVM_HC_MAP_GPA_RANGE to handle TDG.VP.VMCALL<MapGPA> */
if (!kvm_enable_hypercall(BIT_ULL(KVM_HC_MAP_GPA_RANGE))) {
return -EOPNOTSUPP;
@@ -440,6 +457,22 @@ static void tdx_cpu_instance_init(X86ConfidentialGuest *cg, CPUState *cpu)
x86cpu->enable_cpuid_0x1f = true;
}
+static uint32_t tdx_adjust_cpuid_features(X86ConfidentialGuest *cg,
+ uint32_t feature, uint32_t index,
+ int reg, uint32_t value)
+{
+ struct kvm_cpuid_entry2 *e;
+
+ if (is_feature_word_cpuid(feature, index, reg)) {
+ e = cpuid_find_entry(tdx_supported_cpuid, feature, index);
+ if (e) {
+ value &= cpuid_entry_get_reg(e, reg);
+ }
+ }
+
+ return value;
+}
+
static int tdx_validate_attributes(TdxGuest *tdx, Error **errp)
{
if ((tdx->attributes & ~tdx_caps->supported_attrs)) {
@@ -834,4 +867,5 @@ static void tdx_guest_class_init(ObjectClass *oc, void *data)
klass->kvm_init = tdx_kvm_init;
x86_klass->kvm_type = tdx_kvm_type;
x86_klass->cpu_instance_init = tdx_cpu_instance_init;
+ x86_klass->adjust_cpuid_features = tdx_adjust_cpuid_features;
}
--
2.50.1

View File

@ -0,0 +1,92 @@
From 08b7b3ce573d25b6130c28620c51ac57532bd454 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:45 +0200
Subject: [PATCH 033/115] i386/tdx: Implement tdx_kvm_init() to initialize TDX
VM context
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [33/115] 1c018f800145aced56f1268aa0f70cbbe1ec4658 (bonzini/rhel-qemu-kvm)
Implement TDX specific ConfidentialGuestSupportClass::kvm_init()
callback, tdx_kvm_init().
Mark guest state is proctected for TDX VM. More TDX specific
initialization will be added later.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20250508150002.689633-5-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 631a2ac5a4beab740b342367550562cd659b4c4a)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/kvm/kvm.c | 11 +----------
target/i386/kvm/tdx.c | 10 ++++++++++
2 files changed, 11 insertions(+), 10 deletions(-)
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index d7c2dc3c71..12da34b9c5 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -3021,16 +3021,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
Error *local_err = NULL;
/*
- * Initialize SEV context, if required
- *
- * If no memory encryption is requested (ms->cgs == NULL) this is
- * a no-op.
- *
- * It's also a no-op if a non-SEV confidential guest support
- * mechanism is selected. SEV is the only mechanism available to
- * select on x86 at present, so this doesn't arise, but if new
- * mechanisms are supported in future (e.g. TDX), they'll need
- * their own initialization either here or elsewhere.
+ * Initialize confidential guest (SEV/TDX) context, if required
*/
if (ms->cgs) {
ret = confidential_guest_kvm_init(ms->cgs, &local_err);
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index d785c1f6d1..4ff9486081 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -12,9 +12,17 @@
#include "qemu/osdep.h"
#include "qom/object_interfaces.h"
+#include "hw/i386/x86.h"
#include "kvm_i386.h"
#include "tdx.h"
+static int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
+{
+ kvm_mark_guest_state_protected();
+
+ return 0;
+}
+
static int tdx_kvm_type(X86ConfidentialGuest *cg)
{
/* Do the object check */
@@ -49,7 +57,9 @@ static void tdx_guest_finalize(Object *obj)
static void tdx_guest_class_init(ObjectClass *oc, void *data)
{
+ ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc);
X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc);
+ klass->kvm_init = tdx_kvm_init;
x86_klass->kvm_type = tdx_kvm_type;
}
--
2.50.1

View File

@ -0,0 +1,76 @@
From c741697e7ae55ca5742e9518b8b5071b66b1eff0 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:45 +0200
Subject: [PATCH 032/115] i386/tdx: Implement tdx_kvm_type() for TDX
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [32/115] db2604869e78aaf67a57ed2bfc34b15d7e9a830f (bonzini/rhel-qemu-kvm)
TDX VM requires VM type to be KVM_X86_TDX_VM. Implement tdx_kvm_type()
as X86ConfidentialGuestClass->kvm_type.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20250508150002.689633-4-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit b455880e5515a9fc2b923bfc6c60bb54519b51d3)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/kvm/kvm.c | 1 +
target/i386/kvm/tdx.c | 12 ++++++++++++
2 files changed, 13 insertions(+)
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index fe6b34bb10..d7c2dc3c71 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -183,6 +183,7 @@ static const char *vm_type_name[] = {
[KVM_X86_SEV_VM] = "SEV",
[KVM_X86_SEV_ES_VM] = "SEV-ES",
[KVM_X86_SNP_VM] = "SEV-SNP",
+ [KVM_X86_TDX_VM] = "TDX",
};
bool kvm_is_vm_type_supported(int type)
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index ec84ae2947..d785c1f6d1 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -12,8 +12,17 @@
#include "qemu/osdep.h"
#include "qom/object_interfaces.h"
+#include "kvm_i386.h"
#include "tdx.h"
+static int tdx_kvm_type(X86ConfidentialGuest *cg)
+{
+ /* Do the object check */
+ TDX_GUEST(cg);
+
+ return KVM_X86_TDX_VM;
+}
+
/* tdx guest */
OBJECT_DEFINE_TYPE_WITH_INTERFACES(TdxGuest,
tdx_guest,
@@ -40,4 +49,7 @@ static void tdx_guest_finalize(Object *obj)
static void tdx_guest_class_init(ObjectClass *oc, void *data)
{
+ X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc);
+
+ x86_klass->kvm_type = tdx_kvm_type;
}
--
2.50.1

View File

@ -0,0 +1,101 @@
From a71b4e5d1ce8bcb7cf076500a1fe8871a674b03c Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:46 +0200
Subject: [PATCH 044/115] i386/tdx: Implement user specified tsc frequency
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [44/115] f12a5f1a60aa8bdd69d79e31b92dcf21380b7659 (bonzini/rhel-qemu-kvm)
Reuse "-cpu,tsc-frequency=" to get user wanted tsc frequency and call VM
scope VM_SET_TSC_KHZ to set the tsc frequency of TD before KVM_TDX_INIT_VM.
Besides, sanity check the tsc frequency to be in the legal range and
legal granularity (required by TDX module).
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20250508150002.689633-16-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 0e73b843616e52882940ab89e1b0e86e22be2162)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/kvm/kvm.c | 9 +++++++++
target/i386/kvm/tdx.c | 25 +++++++++++++++++++++++++
2 files changed, 34 insertions(+)
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 3b71c182ab..fa61221190 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -861,6 +861,15 @@ static int kvm_arch_set_tsc_khz(CPUState *cs)
int r, cur_freq;
bool set_ioctl = false;
+ /*
+ * TSC of TD vcpu is immutable, it cannot be set/changed via vcpu scope
+ * VM_SET_TSC_KHZ, but only be initialized via VM scope VM_SET_TSC_KHZ
+ * before ioctl KVM_TDX_INIT_VM in tdx_pre_create_vcpu()
+ */
+ if (is_tdx_vm()) {
+ return 0;
+ }
+
if (!env->tsc_khz) {
return 0;
}
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index c96e8eb7b8..56ad5f599d 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -20,6 +20,9 @@
#include "kvm_i386.h"
#include "tdx.h"
+#define TDX_MIN_TSC_FREQUENCY_KHZ (100 * 1000)
+#define TDX_MAX_TSC_FREQUENCY_KHZ (10 * 1000 * 1000)
+
#define TDX_TD_ATTRIBUTES_DEBUG BIT_ULL(0)
#define TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE BIT_ULL(28)
#define TDX_TD_ATTRIBUTES_PKS BIT_ULL(30)
@@ -267,6 +270,28 @@ int tdx_pre_create_vcpu(CPUState *cpu, Error **errp)
return r;
}
+ if (env->tsc_khz && (env->tsc_khz < TDX_MIN_TSC_FREQUENCY_KHZ ||
+ env->tsc_khz > TDX_MAX_TSC_FREQUENCY_KHZ)) {
+ error_setg(errp, "Invalid TSC %ld KHz, must specify cpu_frequency "
+ "between [%d, %d] kHz", env->tsc_khz,
+ TDX_MIN_TSC_FREQUENCY_KHZ, TDX_MAX_TSC_FREQUENCY_KHZ);
+ return -EINVAL;
+ }
+
+ if (env->tsc_khz % (25 * 1000)) {
+ error_setg(errp, "Invalid TSC %ld KHz, it must be multiple of 25MHz",
+ env->tsc_khz);
+ return -EINVAL;
+ }
+
+ /* it's safe even env->tsc_khz is 0. KVM uses host's tsc_khz in this case */
+ r = kvm_vm_ioctl(kvm_state, KVM_SET_TSC_KHZ, env->tsc_khz);
+ if (r < 0) {
+ error_setg_errno(errp, -r, "Unable to set TSC frequency to %ld kHz",
+ env->tsc_khz);
+ return r;
+ }
+
if (tdx_guest->mrconfigid) {
g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrconfigid,
strlen(tdx_guest->mrconfigid), &data_len, errp);
--
2.50.1

View File

@ -0,0 +1,283 @@
From 7f2aa231529a03552d642eccae4c4cc209a3ccdb Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:46 +0200
Subject: [PATCH 037/115] i386/tdx: Initialize TDX before creating TD vcpus
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [37/115] 1626ea547590c6f25d7d51b66a23820a701398af (bonzini/rhel-qemu-kvm)
Invoke KVM_TDX_INIT_VM in kvm_arch_pre_create_vcpu() that
KVM_TDX_INIT_VM configures global TD configurations, e.g. the canonical
CPUID config, and must be executed prior to creating vCPUs.
Use kvm_x86_arch_cpuid() to setup the CPUID settings for TDX VM.
Note, this doesn't address the fact that QEMU may change the CPUID
configuration when creating vCPUs, i.e. punts on refactoring QEMU to
provide a stable CPUID config prior to kvm_arch_init().
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
Acked-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20250508150002.689633-9-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit f15898b0f50609d66465326221aa54b6699da674)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/kvm/kvm.c | 16 +++---
target/i386/kvm/kvm_i386.h | 5 ++
target/i386/kvm/meson.build | 2 +-
target/i386/kvm/tdx-stub.c | 10 ++++
target/i386/kvm/tdx.c | 105 ++++++++++++++++++++++++++++++++++++
target/i386/kvm/tdx.h | 6 +++
6 files changed, 137 insertions(+), 7 deletions(-)
create mode 100644 target/i386/kvm/tdx-stub.c
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 1fddec6b9c..3b71c182ab 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -38,6 +38,7 @@
#include "kvm_i386.h"
#include "../confidential-guest.h"
#include "sev.h"
+#include "tdx.h"
#include "xen-emu.h"
#include "hyperv.h"
#include "hyperv-proto.h"
@@ -406,9 +407,9 @@ static uint32_t cpuid_entry_get_reg(struct kvm_cpuid_entry2 *entry, int reg)
/* Find matching entry for function/index on kvm_cpuid2 struct
*/
-static struct kvm_cpuid_entry2 *cpuid_find_entry(struct kvm_cpuid2 *cpuid,
- uint32_t function,
- uint32_t index)
+struct kvm_cpuid_entry2 *cpuid_find_entry(struct kvm_cpuid2 *cpuid,
+ uint32_t function,
+ uint32_t index)
{
int i;
for (i = 0; i < cpuid->nent; ++i) {
@@ -1813,9 +1814,8 @@ static void kvm_init_nested_state(CPUX86State *env)
}
}
-static uint32_t kvm_x86_build_cpuid(CPUX86State *env,
- struct kvm_cpuid_entry2 *entries,
- uint32_t cpuid_i)
+uint32_t kvm_x86_build_cpuid(CPUX86State *env, struct kvm_cpuid_entry2 *entries,
+ uint32_t cpuid_i)
{
uint32_t limit, i, j;
uint32_t unused;
@@ -2041,6 +2041,10 @@ full:
int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp)
{
+ if (is_tdx_vm()) {
+ return tdx_pre_create_vcpu(cpu, errp);
+ }
+
return 0;
}
diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h
index 499691e8d0..f2fbf3f99f 100644
--- a/target/i386/kvm/kvm_i386.h
+++ b/target/i386/kvm/kvm_i386.h
@@ -59,6 +59,11 @@ uint64_t kvm_swizzle_msi_ext_dest_id(uint64_t address);
void kvm_update_msi_routes_all(void *private, bool global,
uint32_t index, uint32_t mask);
+struct kvm_cpuid_entry2 *cpuid_find_entry(struct kvm_cpuid2 *cpuid,
+ uint32_t function,
+ uint32_t index);
+uint32_t kvm_x86_build_cpuid(CPUX86State *env, struct kvm_cpuid_entry2 *entries,
+ uint32_t cpuid_i);
#endif /* CONFIG_KVM */
void kvm_pc_setup_irq_routing(bool pci_enabled);
diff --git a/target/i386/kvm/meson.build b/target/i386/kvm/meson.build
index 466bccb9cb..3f44cdedb7 100644
--- a/target/i386/kvm/meson.build
+++ b/target/i386/kvm/meson.build
@@ -8,7 +8,7 @@ i386_kvm_ss.add(files(
i386_kvm_ss.add(when: 'CONFIG_XEN_EMU', if_true: files('xen-emu.c'))
-i386_kvm_ss.add(when: 'CONFIG_TDX', if_true: files('tdx.c'))
+i386_kvm_ss.add(when: 'CONFIG_TDX', if_true: files('tdx.c'), if_false: files('tdx-stub.c'))
i386_system_ss.add(when: 'CONFIG_HYPERV', if_true: files('hyperv.c'), if_false: files('hyperv-stub.c'))
diff --git a/target/i386/kvm/tdx-stub.c b/target/i386/kvm/tdx-stub.c
new file mode 100644
index 0000000000..2344433594
--- /dev/null
+++ b/target/i386/kvm/tdx-stub.c
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "qemu/osdep.h"
+
+#include "tdx.h"
+
+int tdx_pre_create_vcpu(CPUState *cpu, Error **errp)
+{
+ return -EINVAL;
+}
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index 16f67e18ae..8f02c76249 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -149,6 +149,109 @@ static int tdx_kvm_type(X86ConfidentialGuest *cg)
return KVM_X86_TDX_VM;
}
+static int setup_td_xfam(X86CPU *x86cpu, Error **errp)
+{
+ CPUX86State *env = &x86cpu->env;
+ uint64_t xfam;
+
+ xfam = env->features[FEAT_XSAVE_XCR0_LO] |
+ env->features[FEAT_XSAVE_XCR0_HI] |
+ env->features[FEAT_XSAVE_XSS_LO] |
+ env->features[FEAT_XSAVE_XSS_HI];
+
+ if (xfam & ~tdx_caps->supported_xfam) {
+ error_setg(errp, "Invalid XFAM 0x%lx for TDX VM (supported: 0x%llx))",
+ xfam, tdx_caps->supported_xfam);
+ return -1;
+ }
+
+ tdx_guest->xfam = xfam;
+ return 0;
+}
+
+static void tdx_filter_cpuid(struct kvm_cpuid2 *cpuids)
+{
+ int i, dest_cnt = 0;
+ struct kvm_cpuid_entry2 *src, *dest, *conf;
+
+ for (i = 0; i < cpuids->nent; i++) {
+ src = cpuids->entries + i;
+ conf = cpuid_find_entry(&tdx_caps->cpuid, src->function, src->index);
+ if (!conf) {
+ continue;
+ }
+ dest = cpuids->entries + dest_cnt;
+
+ dest->function = src->function;
+ dest->index = src->index;
+ dest->flags = src->flags;
+ dest->eax = src->eax & conf->eax;
+ dest->ebx = src->ebx & conf->ebx;
+ dest->ecx = src->ecx & conf->ecx;
+ dest->edx = src->edx & conf->edx;
+
+ dest_cnt++;
+ }
+ cpuids->nent = dest_cnt++;
+}
+
+int tdx_pre_create_vcpu(CPUState *cpu, Error **errp)
+{
+ X86CPU *x86cpu = X86_CPU(cpu);
+ CPUX86State *env = &x86cpu->env;
+ g_autofree struct kvm_tdx_init_vm *init_vm = NULL;
+ Error *local_err = NULL;
+ int retry = 10000;
+ int r = 0;
+
+ QEMU_LOCK_GUARD(&tdx_guest->lock);
+ if (tdx_guest->initialized) {
+ return r;
+ }
+
+ init_vm = g_malloc0(sizeof(struct kvm_tdx_init_vm) +
+ sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES);
+
+ r = setup_td_xfam(x86cpu, errp);
+ if (r) {
+ return r;
+ }
+
+ init_vm->cpuid.nent = kvm_x86_build_cpuid(env, init_vm->cpuid.entries, 0);
+ tdx_filter_cpuid(&init_vm->cpuid);
+
+ init_vm->attributes = tdx_guest->attributes;
+ init_vm->xfam = tdx_guest->xfam;
+
+ /*
+ * KVM_TDX_INIT_VM gets -EAGAIN when KVM side SEAMCALL(TDH_MNG_CREATE)
+ * gets TDX_RND_NO_ENTROPY due to Random number generation (e.g., RDRAND or
+ * RDSEED) is busy.
+ *
+ * Retry for the case.
+ */
+ do {
+ error_free(local_err);
+ local_err = NULL;
+ r = tdx_vm_ioctl(KVM_TDX_INIT_VM, 0, init_vm, &local_err);
+ } while (r == -EAGAIN && --retry);
+
+ if (r < 0) {
+ if (!retry) {
+ error_append_hint(&local_err, "Hardware RNG (Random Number "
+ "Generator) is busy occupied by someone (via RDRAND/RDSEED) "
+ "maliciously, which leads to KVM_TDX_INIT_VM keeping failure "
+ "due to lack of entropy.\n");
+ }
+ error_propagate(errp, local_err);
+ return r;
+ }
+
+ tdx_guest->initialized = true;
+
+ return 0;
+}
+
/* tdx guest */
OBJECT_DEFINE_TYPE_WITH_INTERFACES(TdxGuest,
tdx_guest,
@@ -162,6 +265,8 @@ static void tdx_guest_init(Object *obj)
ConfidentialGuestSupport *cgs = CONFIDENTIAL_GUEST_SUPPORT(obj);
TdxGuest *tdx = TDX_GUEST(obj);
+ qemu_mutex_init(&tdx->lock);
+
cgs->require_guest_memfd = true;
tdx->attributes = 0;
diff --git a/target/i386/kvm/tdx.h b/target/i386/kvm/tdx.h
index de8ae91961..4e2b5c61ff 100644
--- a/target/i386/kvm/tdx.h
+++ b/target/i386/kvm/tdx.h
@@ -19,7 +19,11 @@ typedef struct TdxGuestClass {
typedef struct TdxGuest {
X86ConfidentialGuest parent_obj;
+ QemuMutex lock;
+
+ bool initialized;
uint64_t attributes; /* TD attributes */
+ uint64_t xfam;
} TdxGuest;
#ifdef CONFIG_TDX
@@ -28,4 +32,6 @@ bool is_tdx_vm(void);
#define is_tdx_vm() 0
#endif /* CONFIG_TDX */
+int tdx_pre_create_vcpu(CPUState *cpu, Error **errp);
+
#endif /* QEMU_I386_TDX_H */
--
2.50.1

View File

@ -0,0 +1,104 @@
From af03f2b600e6d02d86b21cc25bbeeaa35d104cfc Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:45 +0200
Subject: [PATCH 035/115] i386/tdx: Introduce is_tdx_vm() helper and cache
tdx_guest object
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [35/115] 8e84ea1bc7eaf07ccfd915601b471cd75d01bf90 (bonzini/rhel-qemu-kvm)
It will need special handling for TDX VMs all around the QEMU.
Introduce is_tdx_vm() helper to query if it's a TDX VM.
Cache tdx_guest object thus no need to cast from ms->cgs every time.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Isaku Yamahata <isaku.yamahata@intel.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20250508150002.689633-7-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 1619d0e45be0d1e48a46d80963b4e77dc1b000a2)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/kvm/tdx.c | 15 ++++++++++++++-
target/i386/kvm/tdx.h | 10 ++++++++++
2 files changed, 24 insertions(+), 1 deletion(-)
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index c67be5e618..16f67e18ae 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -18,8 +18,16 @@
#include "kvm_i386.h"
#include "tdx.h"
+static TdxGuest *tdx_guest;
+
static struct kvm_tdx_capabilities *tdx_caps;
+/* Valid after kvm_arch_init()->confidential_guest_kvm_init()->tdx_kvm_init() */
+bool is_tdx_vm(void)
+{
+ return !!tdx_guest;
+}
+
enum tdx_ioctl_level {
TDX_VM_IOCTL,
TDX_VCPU_IOCTL,
@@ -117,15 +125,20 @@ static int get_tdx_capabilities(Error **errp)
static int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
{
+ TdxGuest *tdx = TDX_GUEST(cgs);
int r = 0;
kvm_mark_guest_state_protected();
if (!tdx_caps) {
r = get_tdx_capabilities(errp);
+ if (r) {
+ return r;
+ }
}
- return r;
+ tdx_guest = tdx;
+ return 0;
}
static int tdx_kvm_type(X86ConfidentialGuest *cg)
diff --git a/target/i386/kvm/tdx.h b/target/i386/kvm/tdx.h
index f3b7253361..de8ae91961 100644
--- a/target/i386/kvm/tdx.h
+++ b/target/i386/kvm/tdx.h
@@ -3,6 +3,10 @@
#ifndef QEMU_I386_TDX_H
#define QEMU_I386_TDX_H
+#ifndef CONFIG_USER_ONLY
+#include CONFIG_DEVICES /* CONFIG_TDX */
+#endif
+
#include "confidential-guest.h"
#define TYPE_TDX_GUEST "tdx-guest"
@@ -18,4 +22,10 @@ typedef struct TdxGuest {
uint64_t attributes; /* TD attributes */
} TdxGuest;
+#ifdef CONFIG_TDX
+bool is_tdx_vm(void);
+#else
+#define is_tdx_vm() 0
+#endif /* CONFIG_TDX */
+
#endif /* QEMU_I386_TDX_H */
--
2.50.1

View File

@ -0,0 +1,42 @@
From 0fe5ae12f6e427821edddf3bb1618aa0576f73cb Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:48 +0200
Subject: [PATCH 082/115] i386/tdx: Make invtsc default on
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [82/115] 24e58b9f91fca93c4e17de19592f4e232abee39a (bonzini/rhel-qemu-kvm)
Because it's fixed1 bit that enforced by TDX module.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20250508150002.689633-54-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit ea4867b911fc2f6d4c8bd50ec62f0dc0fa190fab)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/kvm/tdx.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index 3e23010094..3ec31d4872 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -742,6 +742,9 @@ static void tdx_cpu_instance_init(X86ConfidentialGuest *cg, CPUState *cpu)
object_property_set_bool(OBJECT(cpu), "pmu", false, &error_abort);
+ /* invtsc is fixed1 for TD guest */
+ object_property_set_bool(OBJECT(cpu), "invtsc", true, &error_abort);
+
x86cpu->enable_cpuid_0x1f = true;
}
--
2.50.1

View File

@ -0,0 +1,48 @@
From c0f849d9cc9b60aff17fd9a1349788856c0e98b9 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:46 +0200
Subject: [PATCH 039/115] i386/tdx: Make sept_ve_disable set by default
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [39/115] ccfc985bc607ff9d1c91e65d810003347d410088 (bonzini/rhel-qemu-kvm)
For TDX KVM use case, Linux guest is the most major one. It requires
sept_ve_disable set. Make it default for the main use case. For other use
case, it can be enabled/disabled via qemu command line.
Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20250508150002.689633-11-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 714af52276e74a1829674d180ef26ecb6261834c)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/kvm/tdx.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index 370bd86f2c..2ed40b7614 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -288,7 +288,7 @@ static void tdx_guest_init(Object *obj)
qemu_mutex_init(&tdx->lock);
cgs->require_guest_memfd = true;
- tdx->attributes = 0;
+ tdx->attributes = TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE;
object_property_add_uint64_ptr(obj, "attributes", &tdx->attributes,
OBJ_PROP_FLAG_READWRITE);
--
2.50.1

View File

@ -0,0 +1,92 @@
From 44207fd96718352fce68c68515fecf3ea4cee2e9 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:47 +0200
Subject: [PATCH 069/115] i386/tdx: Only configure MSR_IA32_UCODE_REV in
kvm_init_msrs() for TDs
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [69/115] eeff1e580ba5e6da1e2eaddeb51784999b994951 (bonzini/rhel-qemu-kvm)
For TDs, only MSR_IA32_UCODE_REV in kvm_init_msrs() can be configured
by VMM, while the features enumerated/controlled by other MSRs except
MSR_IA32_UCODE_REV in kvm_init_msrs() are not under control of VMM.
Only configure MSR_IA32_UCODE_REV for TDs.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20250508150002.689633-41-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit f9aaad3362a5886d78e7d4d50d563ac16c6acdde)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/kvm/kvm.c | 40 +++++++++++++++++++++-------------------
1 file changed, 21 insertions(+), 19 deletions(-)
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 0c47eef03c..f3fe553151 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -3761,32 +3761,34 @@ static void kvm_init_msrs(X86CPU *cpu)
CPUX86State *env = &cpu->env;
kvm_msr_buf_reset(cpu);
- if (has_msr_arch_capabs) {
- kvm_msr_entry_add(cpu, MSR_IA32_ARCH_CAPABILITIES,
- env->features[FEAT_ARCH_CAPABILITIES]);
- }
- if (has_msr_core_capabs) {
- kvm_msr_entry_add(cpu, MSR_IA32_CORE_CAPABILITY,
- env->features[FEAT_CORE_CAPABILITY]);
- }
+ if (!is_tdx_vm()) {
+ if (has_msr_arch_capabs) {
+ kvm_msr_entry_add(cpu, MSR_IA32_ARCH_CAPABILITIES,
+ env->features[FEAT_ARCH_CAPABILITIES]);
+ }
+
+ if (has_msr_core_capabs) {
+ kvm_msr_entry_add(cpu, MSR_IA32_CORE_CAPABILITY,
+ env->features[FEAT_CORE_CAPABILITY]);
+ }
+
+ if (has_msr_perf_capabs && cpu->enable_pmu) {
+ kvm_msr_entry_add_perf(cpu, env->features);
+ }
- if (has_msr_perf_capabs && cpu->enable_pmu) {
- kvm_msr_entry_add_perf(cpu, env->features);
+ /*
+ * Older kernels do not include VMX MSRs in KVM_GET_MSR_INDEX_LIST, but
+ * all kernels with MSR features should have them.
+ */
+ if (kvm_feature_msrs && cpu_has_vmx(env)) {
+ kvm_msr_entry_add_vmx(cpu, env->features);
+ }
}
if (has_msr_ucode_rev) {
kvm_msr_entry_add(cpu, MSR_IA32_UCODE_REV, cpu->ucode_rev);
}
-
- /*
- * Older kernels do not include VMX MSRs in KVM_GET_MSR_INDEX_LIST, but
- * all kernels with MSR features should have them.
- */
- if (kvm_feature_msrs && cpu_has_vmx(env)) {
- kvm_msr_entry_add_vmx(cpu, env->features);
- }
-
assert(kvm_buf_set_msrs(cpu) == 0);
}
--
2.50.1

View File

@ -0,0 +1,116 @@
From 2a27aea7e254e3828051e68a4c369068eacfb09e Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:46 +0200
Subject: [PATCH 047/115] i386/tdx: Parse TDVF metadata for TDX VM
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [47/115] bf2f78d0bbeb0b720e00b1593d02aeeaf46eb13e (bonzini/rhel-qemu-kvm)
After TDVF is loaded to bios MemoryRegion, it needs parse TDVF metadata.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20250508150002.689633-19-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit cb5d65a854e58abeb705a2ce14cc3eb28973c606)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
hw/i386/pc_sysfw.c | 7 +++++++
target/i386/kvm/tdx-stub.c | 5 +++++
target/i386/kvm/tdx.c | 5 +++++
target/i386/kvm/tdx.h | 3 +++
4 files changed, 20 insertions(+)
diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c
index e6271e1020..7436a4f33e 100644
--- a/hw/i386/pc_sysfw.c
+++ b/hw/i386/pc_sysfw.c
@@ -37,6 +37,7 @@
#include "hw/block/flash.h"
#include "sysemu/kvm.h"
#include "target/i386/sev.h"
+#include "kvm/tdx.h"
#define FLASH_SECTOR_SIZE 4096
@@ -280,5 +281,11 @@ void x86_firmware_configure(hwaddr gpa, void *ptr, int size)
}
sev_encrypt_flash(gpa, ptr, size, &error_fatal);
+ } else if (is_tdx_vm()) {
+ ret = tdx_parse_tdvf(ptr, size);
+ if (ret) {
+ error_report("failed to parse TDVF for TDX VM");
+ exit(1);
+ }
}
}
diff --git a/target/i386/kvm/tdx-stub.c b/target/i386/kvm/tdx-stub.c
index 2344433594..7748b6d0a4 100644
--- a/target/i386/kvm/tdx-stub.c
+++ b/target/i386/kvm/tdx-stub.c
@@ -8,3 +8,8 @@ int tdx_pre_create_vcpu(CPUState *cpu, Error **errp)
{
return -EINVAL;
}
+
+int tdx_parse_tdvf(void *flash_ptr, int size)
+{
+ return -EINVAL;
+}
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index 2522f2030d..71be3bd28d 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -382,6 +382,11 @@ int tdx_pre_create_vcpu(CPUState *cpu, Error **errp)
return 0;
}
+int tdx_parse_tdvf(void *flash_ptr, int size)
+{
+ return tdvf_parse_metadata(&tdx_guest->tdvf, flash_ptr, size);
+}
+
static bool tdx_guest_get_sept_ve_disable(Object *obj, Error **errp)
{
TdxGuest *tdx = TDX_GUEST(obj);
diff --git a/target/i386/kvm/tdx.h b/target/i386/kvm/tdx.h
index b73461b8d8..28a03c2a7b 100644
--- a/target/i386/kvm/tdx.h
+++ b/target/i386/kvm/tdx.h
@@ -8,6 +8,7 @@
#endif
#include "confidential-guest.h"
+#include "hw/i386/tdvf.h"
#define TYPE_TDX_GUEST "tdx-guest"
#define TDX_GUEST(obj) OBJECT_CHECK(TdxGuest, (obj), TYPE_TDX_GUEST)
@@ -32,6 +33,7 @@ typedef struct TdxGuest {
char *mrownerconfig; /* base64 encoded sha348 digest */
MemoryRegion *tdvf_mr;
+ TdxFirmware tdvf;
} TdxGuest;
#ifdef CONFIG_TDX
@@ -42,5 +44,6 @@ bool is_tdx_vm(void);
int tdx_pre_create_vcpu(CPUState *cpu, Error **errp);
void tdx_set_tdvf_region(MemoryRegion *tdvf_mr);
+int tdx_parse_tdvf(void *flash_ptr, int size);
#endif /* QEMU_I386_TDX_H */
--
2.50.1

View File

@ -0,0 +1,70 @@
From 7ead76d9444ad38538033a070836121cb4a7e20f Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:49 +0200
Subject: [PATCH 099/115] i386/tdx: Remove enumeration of GetQuote in
tdx_handle_get_tdvmcall_info()
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [99/115] 0d402fadbfd0059369166a44d98f51fa46fd7ca3 (bonzini/rhel-qemu-kvm)
GHCI is finalized with the <GetQuote> being one of the base VMCALLs, and
not enuemrated via <GetTdVmCallInfo>.
Adjust tdx_handle_get_tdvmcall_info() to match with GHCI.
Opportunistically fix the wrong indentation and explicitly set the
ret to TDG_VP_VMCALL_SUCCESS (in case KVM leaves unexpected value).
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Link: https://lore.kernel.org/r/20250703024021.3559286-2-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit b57999bb258349fabe497c8ff70277b5e5b281e2)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/kvm/tdx.c | 6 ++++--
target/i386/kvm/tdx.h | 2 --
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index 201da78b06..2ca661cbc4 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -1259,13 +1259,15 @@ out_free:
void tdx_handle_get_tdvmcall_info(X86CPU *cpu, struct kvm_run *run)
{
if (run->tdx.get_tdvmcall_info.leaf != 1) {
- return;
+ return;
}
- run->tdx.get_tdvmcall_info.r11 = TDG_VP_VMCALL_SUBFUNC_GET_QUOTE;
+ run->tdx.get_tdvmcall_info.r11 = 0;
run->tdx.get_tdvmcall_info.r12 = 0;
run->tdx.get_tdvmcall_info.r13 = 0;
run->tdx.get_tdvmcall_info.r14 = 0;
+
+ run->tdx.get_tdvmcall_info.ret = TDG_VP_VMCALL_SUCCESS;
}
static void tdx_panicked_on_fatal_error(X86CPU *cpu, uint64_t error_code,
diff --git a/target/i386/kvm/tdx.h b/target/i386/kvm/tdx.h
index 35a09c19c5..d439078a87 100644
--- a/target/i386/kvm/tdx.h
+++ b/target/i386/kvm/tdx.h
@@ -32,8 +32,6 @@ typedef struct TdxGuestClass {
#define TDG_VP_VMCALL_GPA_INUSE 0x8000000000000001ULL
#define TDG_VP_VMCALL_ALIGN_ERROR 0x8000000000000002ULL
-#define TDG_VP_VMCALL_SUBFUNC_GET_QUOTE 0x0000000000000001ULL
-
enum TdxRamType {
TDX_RAM_UNACCEPTED,
TDX_RAM_ADDED,
--
2.50.1

View File

@ -0,0 +1,50 @@
From a31b5ed8480b9f7e4d49a9e3ab1077fb1d4eb269 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:50 +0200
Subject: [PATCH 103/115] i386/tdx: Remove task->watch only when it's valid
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [103/115] a9a4c03cf0f589eab2dc07572c2cb724cca5686c (bonzini/rhel-qemu-kvm)
In some case (e.g., failed to connect to QGS socket),
tdx_generate_quote_cleanup() is called with task->watch invalid. It
triggers assertion of
qemu-system-x86_64: GLib: g_source_remove: assertion 'tag > 0' failed
Fix it by checking task->watch.
Fixes: 40da501d8989 ("i386/tdx: handle TDG.VP.VMCALL<GetQuote>")
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20250625035505.2770580-1-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 50fd57418c3f08f13eb964dcb49f065246f2ecbf)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/kvm/tdx-quote-generator.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/target/i386/kvm/tdx-quote-generator.c b/target/i386/kvm/tdx-quote-generator.c
index f59715f617..dee8334b27 100644
--- a/target/i386/kvm/tdx-quote-generator.c
+++ b/target/i386/kvm/tdx-quote-generator.c
@@ -75,7 +75,9 @@ static void tdx_generate_quote_cleanup(TdxGenerateQuoteTask *task)
{
timer_del(&task->timer);
- g_source_remove(task->watch);
+ if (task->watch) {
+ g_source_remove(task->watch);
+ }
qio_channel_close(QIO_CHANNEL(task->sioc), NULL);
object_unref(OBJECT(task->sioc));
--
2.50.1

View File

@ -0,0 +1,50 @@
From b01012b874646e7f5ce3d5da8f9c5539ca97ea16 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:50 +0200
Subject: [PATCH 108/115] i386/tdx: Remove the redundant
qemu_mutex_init(&tdx->lock)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [108/115] 51fa7fa495c526d62f144a0eb2ff4c87df397c8c (bonzini/rhel-qemu-kvm)
Commit 40da501d8989 ("i386/tdx: handle TDG.VP.VMCALL<GetQuote>") added
redundant qemu_mutex_init(&tdx->lock) in tdx_guest_init by mistake.
Fix it by removing the redundant one.
Fixes: 40da501d8989 ("i386/tdx: handle TDG.VP.VMCALL<GetQuote>")
Reported-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Link: https://lore.kernel.org/r/20250717103707.688929-1-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit f64832033d1262983bfe759669b4f65080f760dc)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/kvm/tdx.c | 2 --
1 file changed, 2 deletions(-)
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index 08eed19960..2ff5211794 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -1527,8 +1527,6 @@ static void tdx_guest_init(Object *obj)
tdx_guest_set_qgs,
NULL, NULL);
- qemu_mutex_init(&tdx->lock);
-
tdx->event_notify_vector = -1;
tdx->event_notify_apicid = -1;
}
--
2.50.1

View File

@ -0,0 +1,78 @@
From c97fb38ea2145d0f827cc693e03628d9582433c3 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:46 +0200
Subject: [PATCH 043/115] i386/tdx: Set APIC bus rate to match with what TDX
module enforces
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [43/115] 2a25d2d762dba32fd6d0d3f4669c254cf0cb6a65 (bonzini/rhel-qemu-kvm)
TDX advertises core crystal clock with cpuid[0x15] as 25MHz for TD
guests and it's unchangeable from VMM. As a result, TDX guest reads
the APIC timer at the same frequency, 25MHz.
While KVM's default emulated frequency for APIC bus is 1GHz, set the
APIC bus rate to match with TDX explicitly to ensure KVM provide correct
emulated APIC timer for TD guest.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20250508150002.689633-15-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit d529a2ac5ef4620173439942f78ec668f9165fc1)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/kvm/tdx.c | 13 +++++++++++++
target/i386/kvm/tdx.h | 3 +++
2 files changed, 16 insertions(+)
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index 39fd964c6b..c96e8eb7b8 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -254,6 +254,19 @@ int tdx_pre_create_vcpu(CPUState *cpu, Error **errp)
init_vm = g_malloc0(sizeof(struct kvm_tdx_init_vm) +
sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES);
+ if (!kvm_check_extension(kvm_state, KVM_CAP_X86_APIC_BUS_CYCLES_NS)) {
+ error_setg(errp, "KVM doesn't support KVM_CAP_X86_APIC_BUS_CYCLES_NS");
+ return -EOPNOTSUPP;
+ }
+
+ r = kvm_vm_enable_cap(kvm_state, KVM_CAP_X86_APIC_BUS_CYCLES_NS,
+ 0, TDX_APIC_BUS_CYCLES_NS);
+ if (r < 0) {
+ error_setg_errno(errp, -r,
+ "Unable to set core crystal clock frequency to 25MHz");
+ return r;
+ }
+
if (tdx_guest->mrconfigid) {
g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrconfigid,
strlen(tdx_guest->mrconfigid), &data_len, errp);
diff --git a/target/i386/kvm/tdx.h b/target/i386/kvm/tdx.h
index e472b11fb0..d39e733d9f 100644
--- a/target/i386/kvm/tdx.h
+++ b/target/i386/kvm/tdx.h
@@ -16,6 +16,9 @@ typedef struct TdxGuestClass {
X86ConfidentialGuestClass parent_class;
} TdxGuestClass;
+/* TDX requires bus frequency 25MHz */
+#define TDX_APIC_BUS_CYCLES_NS 40
+
typedef struct TdxGuest {
X86ConfidentialGuest parent_obj;
--
2.50.1

View File

@ -0,0 +1,64 @@
From 13e58b76efc0c20373a21d319d43361af80d82f6 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:47 +0200
Subject: [PATCH 067/115] i386/tdx: Set and check kernel_irqchip mode for TDX
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [67/115] 6d1c37af7cbaf0e84d1f1e2eb493f2397afaf452 (bonzini/rhel-qemu-kvm)
KVM mandates kernel_irqchip to be split mode.
Set it to split mode automatically when users don't provide an explicit
value, otherwise check it to be the split mode.
Suggested-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20250508150002.689633-39-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit bb45580d842530d78b58179eaf80b6331b15324e)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Conflicts: system/ -> sysemu/
---
target/i386/kvm/tdx.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index 4f17e17308..58fb68cab0 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -16,6 +16,7 @@
#include "qapi/error.h"
#include "qom/object_interfaces.h"
#include "crypto/hash.h"
+#include "sysemu/kvm_int.h"
#include "sysemu/runstate.h"
#include "sysemu/sysemu.h"
#include "exec/ramblock.h"
@@ -388,6 +389,13 @@ static int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
return -EINVAL;
}
+ if (kvm_state->kernel_irqchip_split == ON_OFF_AUTO_AUTO) {
+ kvm_state->kernel_irqchip_split = ON_OFF_AUTO_ON;
+ } else if (kvm_state->kernel_irqchip_split != ON_OFF_AUTO_ON) {
+ error_setg(errp, "TDX VM requires kernel_irqchip to be split");
+ return -EINVAL;
+ }
+
if (!tdx_caps) {
r = get_tdx_capabilities(errp);
if (r) {
--
2.50.1

View File

@ -0,0 +1,54 @@
From 23e6031f307d262cae8a76306acb5341706098d6 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:47 +0200
Subject: [PATCH 064/115] i386/tdx: Set kvm_readonly_mem_enabled to false for
TDX VM
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [64/115] ee2c60314038f35359ea6bd00422b39c8573da88 (bonzini/rhel-qemu-kvm)
TDX only supports readonly for shared memory but not for private memory.
In the view of QEMU, it has no idea whether a memslot is used as shared
memory of private. Thus just mark kvm_readonly_mem_enabled to false to
TDX VM for simplicity.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20250508150002.689633-36-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit da6728658bf63d6a3989f1587a33566b3e54bed8)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/kvm/tdx.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index 410f8a9997..7bc36b620e 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -384,6 +384,15 @@ static int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
return -EOPNOTSUPP;
}
+ /*
+ * Set kvm_readonly_mem_allowed to false, because TDX only supports readonly
+ * memory for shared memory but not for private memory. Besides, whether a
+ * memslot is private or shared is not determined by QEMU.
+ *
+ * Thus, just mark readonly memory not supported for simplicity.
+ */
+ kvm_readonly_mem_allowed = false;
+
qemu_add_machine_init_done_notifier(&tdx_machine_done_notify);
tdx_guest = tdx;
--
2.50.1

View File

@ -0,0 +1,60 @@
From 53a6bd5c6e3e4f0cf3fcf1b0a326c13e9defdc50 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:50 +0200
Subject: [PATCH 100/115] i386/tdx: Set value of <GetTdVmCallInfo> based on
capabilities of both KVM and QEMU
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [100/115] ebdd2061181d3ce95404931904e816171ae4f5ac (bonzini/rhel-qemu-kvm)
KVM reports the supported TDVMCALL sub leafs in TDX capabilities.
one for kernel-supported
TDVMCALLs (userspace can set those blindly) and one for user-supported
TDVMCALLs (userspace can set those if it knows how to handle them)
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Link: https://lore.kernel.org/r/20250703024021.3559286-4-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 55be385b10658a2372f944fa41aaba016e1e8433)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/kvm/tdx.c | 11 +++++++++--
1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index 2ca661cbc4..a24e15571a 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -1256,14 +1256,21 @@ out_free:
g_free(task);
}
+#define SUPPORTED_TDVMCALLINFO_1_R11 (0)
+#define SUPPORTED_TDVMCALLINFO_1_R12 (0)
+
void tdx_handle_get_tdvmcall_info(X86CPU *cpu, struct kvm_run *run)
{
if (run->tdx.get_tdvmcall_info.leaf != 1) {
return;
}
- run->tdx.get_tdvmcall_info.r11 = 0;
- run->tdx.get_tdvmcall_info.r12 = 0;
+ run->tdx.get_tdvmcall_info.r11 = (tdx_caps->user_tdvmcallinfo_1_r11 &
+ SUPPORTED_TDVMCALLINFO_1_R11) |
+ tdx_caps->kernel_tdvmcallinfo_1_r11;
+ run->tdx.get_tdvmcall_info.r12 = (tdx_caps->user_tdvmcallinfo_1_r12 &
+ SUPPORTED_TDVMCALLINFO_1_R12) |
+ tdx_caps->kernel_tdvmcallinfo_1_r12;
run->tdx.get_tdvmcall_info.r13 = 0;
run->tdx.get_tdvmcall_info.r14 = 0;
--
2.50.1

View File

@ -0,0 +1,264 @@
From 2e545a2564bcece4b3f1e2baa18e93d81a94b20d Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:46 +0200
Subject: [PATCH 052/115] i386/tdx: Setup the TD HOB list
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [52/115] 789ad8477ccd49c1977e874d30963ebf9ac7f8b3 (bonzini/rhel-qemu-kvm)
The TD HOB list is used to pass the information from VMM to TDVF. The TD
HOB must include PHIT HOB and Resource Descriptor HOB. More details can
be found in TDVF specification and PI specification.
Build the TD HOB in TDX's machine_init_done callback.
Co-developed-by: Isaku Yamahata <isaku.yamahata@intel.com>
Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
Co-developed-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20250508150002.689633-24-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit a731425980a4d3f8bb96fc41893b6437672875ee)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
hw/i386/meson.build | 2 +-
hw/i386/tdvf-hob.c | 130 ++++++++++++++++++++++++++++++++++++++++++
hw/i386/tdvf-hob.h | 26 +++++++++
target/i386/kvm/tdx.c | 16 ++++++
4 files changed, 173 insertions(+), 1 deletion(-)
create mode 100644 hw/i386/tdvf-hob.c
create mode 100644 hw/i386/tdvf-hob.h
diff --git a/hw/i386/meson.build b/hw/i386/meson.build
index d6d8023664..cbac982039 100644
--- a/hw/i386/meson.build
+++ b/hw/i386/meson.build
@@ -31,7 +31,7 @@ i386_ss.add(when: 'CONFIG_PC', if_true: files(
'port92.c'))
i386_ss.add(when: 'CONFIG_X86_FW_OVMF', if_true: files('pc_sysfw_ovmf.c'),
if_false: files('pc_sysfw_ovmf-stubs.c'))
-i386_ss.add(when: 'CONFIG_TDX', if_true: files('tdvf.c'))
+i386_ss.add(when: 'CONFIG_TDX', if_true: files('tdvf.c', 'tdvf-hob.c'))
subdir('kvm')
subdir('xen')
diff --git a/hw/i386/tdvf-hob.c b/hw/i386/tdvf-hob.c
new file mode 100644
index 0000000000..782b3d1578
--- /dev/null
+++ b/hw/i386/tdvf-hob.c
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2025 Intel Corporation
+ * Author: Isaku Yamahata <isaku.yamahata at gmail.com>
+ * <isaku.yamahata at intel.com>
+ * Xiaoyao Li <xiaoyao.li@intel.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "standard-headers/uefi/uefi.h"
+#include "hw/pci/pcie_host.h"
+#include "tdvf-hob.h"
+
+typedef struct TdvfHob {
+ hwaddr hob_addr;
+ void *ptr;
+ int size;
+
+ /* working area */
+ void *current;
+ void *end;
+} TdvfHob;
+
+static uint64_t tdvf_current_guest_addr(const TdvfHob *hob)
+{
+ return hob->hob_addr + (hob->current - hob->ptr);
+}
+
+static void tdvf_align(TdvfHob *hob, size_t align)
+{
+ hob->current = QEMU_ALIGN_PTR_UP(hob->current, align);
+}
+
+static void *tdvf_get_area(TdvfHob *hob, uint64_t size)
+{
+ void *ret;
+
+ if (hob->current + size > hob->end) {
+ error_report("TD_HOB overrun, size = 0x%" PRIx64, size);
+ exit(1);
+ }
+
+ ret = hob->current;
+ hob->current += size;
+ tdvf_align(hob, 8);
+ return ret;
+}
+
+static void tdvf_hob_add_memory_resources(TdxGuest *tdx, TdvfHob *hob)
+{
+ EFI_HOB_RESOURCE_DESCRIPTOR *region;
+ EFI_RESOURCE_ATTRIBUTE_TYPE attr;
+ EFI_RESOURCE_TYPE resource_type;
+
+ TdxRamEntry *e;
+ int i;
+
+ for (i = 0; i < tdx->nr_ram_entries; i++) {
+ e = &tdx->ram_entries[i];
+
+ if (e->type == TDX_RAM_UNACCEPTED) {
+ resource_type = EFI_RESOURCE_MEMORY_UNACCEPTED;
+ attr = EFI_RESOURCE_ATTRIBUTE_TDVF_UNACCEPTED;
+ } else if (e->type == TDX_RAM_ADDED) {
+ resource_type = EFI_RESOURCE_SYSTEM_MEMORY;
+ attr = EFI_RESOURCE_ATTRIBUTE_TDVF_PRIVATE;
+ } else {
+ error_report("unknown TDX_RAM_ENTRY type %d", e->type);
+ exit(1);
+ }
+
+ region = tdvf_get_area(hob, sizeof(*region));
+ *region = (EFI_HOB_RESOURCE_DESCRIPTOR) {
+ .Header = {
+ .HobType = EFI_HOB_TYPE_RESOURCE_DESCRIPTOR,
+ .HobLength = cpu_to_le16(sizeof(*region)),
+ .Reserved = cpu_to_le32(0),
+ },
+ .Owner = EFI_HOB_OWNER_ZERO,
+ .ResourceType = cpu_to_le32(resource_type),
+ .ResourceAttribute = cpu_to_le32(attr),
+ .PhysicalStart = cpu_to_le64(e->address),
+ .ResourceLength = cpu_to_le64(e->length),
+ };
+ }
+}
+
+void tdvf_hob_create(TdxGuest *tdx, TdxFirmwareEntry *td_hob)
+{
+ TdvfHob hob = {
+ .hob_addr = td_hob->address,
+ .size = td_hob->size,
+ .ptr = td_hob->mem_ptr,
+
+ .current = td_hob->mem_ptr,
+ .end = td_hob->mem_ptr + td_hob->size,
+ };
+
+ EFI_HOB_GENERIC_HEADER *last_hob;
+ EFI_HOB_HANDOFF_INFO_TABLE *hit;
+
+ /* Note, Efi{Free}Memory{Bottom,Top} are ignored, leave 'em zeroed. */
+ hit = tdvf_get_area(&hob, sizeof(*hit));
+ *hit = (EFI_HOB_HANDOFF_INFO_TABLE) {
+ .Header = {
+ .HobType = EFI_HOB_TYPE_HANDOFF,
+ .HobLength = cpu_to_le16(sizeof(*hit)),
+ .Reserved = cpu_to_le32(0),
+ },
+ .Version = cpu_to_le32(EFI_HOB_HANDOFF_TABLE_VERSION),
+ .BootMode = cpu_to_le32(0),
+ .EfiMemoryTop = cpu_to_le64(0),
+ .EfiMemoryBottom = cpu_to_le64(0),
+ .EfiFreeMemoryTop = cpu_to_le64(0),
+ .EfiFreeMemoryBottom = cpu_to_le64(0),
+ .EfiEndOfHobList = cpu_to_le64(0), /* initialized later */
+ };
+
+ tdvf_hob_add_memory_resources(tdx, &hob);
+
+ last_hob = tdvf_get_area(&hob, sizeof(*last_hob));
+ *last_hob = (EFI_HOB_GENERIC_HEADER) {
+ .HobType = EFI_HOB_TYPE_END_OF_HOB_LIST,
+ .HobLength = cpu_to_le16(sizeof(*last_hob)),
+ .Reserved = cpu_to_le32(0),
+ };
+ hit->EfiEndOfHobList = tdvf_current_guest_addr(&hob);
+}
diff --git a/hw/i386/tdvf-hob.h b/hw/i386/tdvf-hob.h
new file mode 100644
index 0000000000..4fc6a3740a
--- /dev/null
+++ b/hw/i386/tdvf-hob.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef HW_I386_TD_HOB_H
+#define HW_I386_TD_HOB_H
+
+#include "hw/i386/tdvf.h"
+#include "target/i386/kvm/tdx.h"
+
+void tdvf_hob_create(TdxGuest *tdx, TdxFirmwareEntry *td_hob);
+
+#define EFI_RESOURCE_ATTRIBUTE_TDVF_PRIVATE \
+ (EFI_RESOURCE_ATTRIBUTE_PRESENT | \
+ EFI_RESOURCE_ATTRIBUTE_INITIALIZED | \
+ EFI_RESOURCE_ATTRIBUTE_TESTED)
+
+#define EFI_RESOURCE_ATTRIBUTE_TDVF_UNACCEPTED \
+ (EFI_RESOURCE_ATTRIBUTE_PRESENT | \
+ EFI_RESOURCE_ATTRIBUTE_INITIALIZED | \
+ EFI_RESOURCE_ATTRIBUTE_TESTED)
+
+#define EFI_RESOURCE_ATTRIBUTE_TDVF_MMIO \
+ (EFI_RESOURCE_ATTRIBUTE_PRESENT | \
+ EFI_RESOURCE_ATTRIBUTE_INITIALIZED | \
+ EFI_RESOURCE_ATTRIBUTE_UNCACHEABLE)
+
+#endif
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index d1c7821347..db5d58b600 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -21,6 +21,7 @@
#include "hw/i386/e820_memory_layout.h"
#include "hw/i386/tdvf.h"
#include "hw/i386/x86.h"
+#include "hw/i386/tdvf-hob.h"
#include "kvm_i386.h"
#include "tdx.h"
@@ -147,6 +148,19 @@ void tdx_set_tdvf_region(MemoryRegion *tdvf_mr)
tdx_guest->tdvf_mr = tdvf_mr;
}
+static TdxFirmwareEntry *tdx_get_hob_entry(TdxGuest *tdx)
+{
+ TdxFirmwareEntry *entry;
+
+ for_each_tdx_fw_entry(&tdx->tdvf, entry) {
+ if (entry->type == TDVF_SECTION_TYPE_TD_HOB) {
+ return entry;
+ }
+ }
+ error_report("TDVF metadata doesn't specify TD_HOB location.");
+ exit(1);
+}
+
static void tdx_add_ram_entry(uint64_t address, uint64_t length,
enum TdxRamType type)
{
@@ -281,6 +295,8 @@ static void tdx_finalize_vm(Notifier *notifier, void *unused)
qsort(tdx_guest->ram_entries, tdx_guest->nr_ram_entries,
sizeof(TdxRamEntry), &tdx_ram_entry_compare);
+
+ tdvf_hob_create(tdx_guest, tdx_get_hob_entry(tdx_guest));
}
static Notifier tdx_machine_done_notify = {
--
2.50.1

View File

@ -0,0 +1,226 @@
From 9c3c02aab32a45468605b6872c358968615681ef Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:46 +0200
Subject: [PATCH 042/115] i386/tdx: Support user configurable
mrconfigid/mrowner/mrownerconfig
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [42/115] 7f7acc5c5153f3486fcad084c036f67dbc483c66 (bonzini/rhel-qemu-kvm)
Three sha384 hash values, mrconfigid, mrowner and mrownerconfig, of a TD
can be provided for TDX attestation. Detailed meaning of them can be
found: https://lore.kernel.org/qemu-devel/31d6dbc1-f453-4cef-ab08-4813f4e0ff92@intel.com/
Allow user to specify those values via property mrconfigid, mrowner and
mrownerconfig. They are all in base64 format.
example
-object tdx-guest, \
mrconfigid=ASNFZ4mrze8BI0VniavN7wEjRWeJq83vASNFZ4mrze8BI0VniavN7wEjRWeJq83v,\
mrowner=ASNFZ4mrze8BI0VniavN7wEjRWeJq83vASNFZ4mrze8BI0VniavN7wEjRWeJq83v,\
mrownerconfig=ASNFZ4mrze8BI0VniavN7wEjRWeJq83vASNFZ4mrze8BI0VniavN7wEjRWeJq83v
Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
Co-developed-by: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Acked-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20250508150002.689633-14-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit d05a0858cf876f79b57a622716fbad07f5b2ea08)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
qapi/qom.json | 16 +++++++-
target/i386/kvm/tdx.c | 95 +++++++++++++++++++++++++++++++++++++++++++
target/i386/kvm/tdx.h | 3 ++
3 files changed, 113 insertions(+), 1 deletion(-)
diff --git a/qapi/qom.json b/qapi/qom.json
index fefb54f90b..970ffeee9e 100644
--- a/qapi/qom.json
+++ b/qapi/qom.json
@@ -1021,11 +1021,25 @@
# pages. Some guest OS (e.g., Linux TD guest) may require this to
# be set, otherwise they refuse to boot.
#
+# @mrconfigid: ID for non-owner-defined configuration of the guest TD,
+# e.g., run-time or OS configuration (base64 encoded SHA384 digest).
+# Defaults to all zeros.
+#
+# @mrowner: ID for the guest TDs owner (base64 encoded SHA384 digest).
+# Defaults to all zeros.
+#
+# @mrownerconfig: ID for owner-defined configuration of the guest TD,
+# e.g., specific to the workload rather than the run-time or OS
+# (base64 encoded SHA384 digest). Defaults to all zeros.
+#
# Since: 10.1
##
{ 'struct': 'TdxGuestProperties',
'data': { '*attributes': 'uint64',
- '*sept-ve-disable': 'bool' } }
+ '*sept-ve-disable': 'bool',
+ '*mrconfigid': 'str',
+ '*mrowner': 'str',
+ '*mrownerconfig': 'str' } }
##
# @ThreadContextProperties:
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index 3de3b5fa6a..39fd964c6b 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -11,8 +11,10 @@
#include "qemu/osdep.h"
#include "qemu/error-report.h"
+#include "qemu/base64.h"
#include "qapi/error.h"
#include "qom/object_interfaces.h"
+#include "crypto/hash.h"
#include "hw/i386/x86.h"
#include "kvm_i386.h"
@@ -240,6 +242,7 @@ int tdx_pre_create_vcpu(CPUState *cpu, Error **errp)
CPUX86State *env = &x86cpu->env;
g_autofree struct kvm_tdx_init_vm *init_vm = NULL;
Error *local_err = NULL;
+ size_t data_len;
int retry = 10000;
int r = 0;
@@ -251,6 +254,45 @@ int tdx_pre_create_vcpu(CPUState *cpu, Error **errp)
init_vm = g_malloc0(sizeof(struct kvm_tdx_init_vm) +
sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES);
+ if (tdx_guest->mrconfigid) {
+ g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrconfigid,
+ strlen(tdx_guest->mrconfigid), &data_len, errp);
+ if (!data) {
+ return -1;
+ }
+ if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) {
+ error_setg(errp, "TDX: failed to decode mrconfigid");
+ return -1;
+ }
+ memcpy(init_vm->mrconfigid, data, data_len);
+ }
+
+ if (tdx_guest->mrowner) {
+ g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrowner,
+ strlen(tdx_guest->mrowner), &data_len, errp);
+ if (!data) {
+ return -1;
+ }
+ if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) {
+ error_setg(errp, "TDX: failed to decode mrowner");
+ return -1;
+ }
+ memcpy(init_vm->mrowner, data, data_len);
+ }
+
+ if (tdx_guest->mrownerconfig) {
+ g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrownerconfig,
+ strlen(tdx_guest->mrownerconfig), &data_len, errp);
+ if (!data) {
+ return -1;
+ }
+ if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) {
+ error_setg(errp, "TDX: failed to decode mrownerconfig");
+ return -1;
+ }
+ memcpy(init_vm->mrownerconfig, data, data_len);
+ }
+
r = setup_td_guest_attributes(x86cpu, errp);
if (r) {
return r;
@@ -314,6 +356,51 @@ static void tdx_guest_set_sept_ve_disable(Object *obj, bool value, Error **errp)
}
}
+static char *tdx_guest_get_mrconfigid(Object *obj, Error **errp)
+{
+ TdxGuest *tdx = TDX_GUEST(obj);
+
+ return g_strdup(tdx->mrconfigid);
+}
+
+static void tdx_guest_set_mrconfigid(Object *obj, const char *value, Error **errp)
+{
+ TdxGuest *tdx = TDX_GUEST(obj);
+
+ g_free(tdx->mrconfigid);
+ tdx->mrconfigid = g_strdup(value);
+}
+
+static char *tdx_guest_get_mrowner(Object *obj, Error **errp)
+{
+ TdxGuest *tdx = TDX_GUEST(obj);
+
+ return g_strdup(tdx->mrowner);
+}
+
+static void tdx_guest_set_mrowner(Object *obj, const char *value, Error **errp)
+{
+ TdxGuest *tdx = TDX_GUEST(obj);
+
+ g_free(tdx->mrowner);
+ tdx->mrowner = g_strdup(value);
+}
+
+static char *tdx_guest_get_mrownerconfig(Object *obj, Error **errp)
+{
+ TdxGuest *tdx = TDX_GUEST(obj);
+
+ return g_strdup(tdx->mrownerconfig);
+}
+
+static void tdx_guest_set_mrownerconfig(Object *obj, const char *value, Error **errp)
+{
+ TdxGuest *tdx = TDX_GUEST(obj);
+
+ g_free(tdx->mrownerconfig);
+ tdx->mrownerconfig = g_strdup(value);
+}
+
/* tdx guest */
OBJECT_DEFINE_TYPE_WITH_INTERFACES(TdxGuest,
tdx_guest,
@@ -337,6 +424,14 @@ static void tdx_guest_init(Object *obj)
object_property_add_bool(obj, "sept-ve-disable",
tdx_guest_get_sept_ve_disable,
tdx_guest_set_sept_ve_disable);
+ object_property_add_str(obj, "mrconfigid",
+ tdx_guest_get_mrconfigid,
+ tdx_guest_set_mrconfigid);
+ object_property_add_str(obj, "mrowner",
+ tdx_guest_get_mrowner, tdx_guest_set_mrowner);
+ object_property_add_str(obj, "mrownerconfig",
+ tdx_guest_get_mrownerconfig,
+ tdx_guest_set_mrownerconfig);
}
static void tdx_guest_finalize(Object *obj)
diff --git a/target/i386/kvm/tdx.h b/target/i386/kvm/tdx.h
index 4e2b5c61ff..e472b11fb0 100644
--- a/target/i386/kvm/tdx.h
+++ b/target/i386/kvm/tdx.h
@@ -24,6 +24,9 @@ typedef struct TdxGuest {
bool initialized;
uint64_t attributes; /* TD attributes */
uint64_t xfam;
+ char *mrconfigid; /* base64 encoded sha348 digest */
+ char *mrowner; /* base64 encoded sha348 digest */
+ char *mrownerconfig; /* base64 encoded sha348 digest */
} TdxGuest;
#ifdef CONFIG_TDX
--
2.50.1

View File

@ -0,0 +1,223 @@
From c4a9204c7d8521c6fdc6b02b48db74ac109bcf09 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:46 +0200
Subject: [PATCH 050/115] i386/tdx: Track RAM entries for TDX VM
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [50/115] fc14b8ad21b0be7179f9af62b6b32068122cae61 (bonzini/rhel-qemu-kvm)
The RAM of TDX VM can be classified into two types:
- TDX_RAM_UNACCEPTED: default type of TDX memory, which needs to be
accepted by TDX guest before it can be used and will be all-zeros
after being accepted.
- TDX_RAM_ADDED: the RAM that is ADD'ed to TD guest before running, and
can be used directly. E.g., TD HOB and TEMP MEM that needed by TDVF.
Maintain TdxRamEntries[] which grabs the initial RAM info from e820 table
and mark each RAM range as default type TDX_RAM_UNACCEPTED.
Then turn the range of TD HOB and TEMP MEM to TDX_RAM_ADDED since these
ranges will be ADD'ed before TD runs and no need to be accepted runtime.
The TdxRamEntries[] are later used to setup the memory TD resource HOB
that passes memory info from QEMU to TDVF.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20250508150002.689633-22-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit f18672e4cf91feed4b91ef85a264a500935a2865)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/kvm/tdx.c | 109 ++++++++++++++++++++++++++++++++++++++++++
target/i386/kvm/tdx.h | 14 ++++++
2 files changed, 123 insertions(+)
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index 95687e3a91..d1c7821347 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -18,6 +18,7 @@
#include "crypto/hash.h"
#include "sysemu/sysemu.h"
+#include "hw/i386/e820_memory_layout.h"
#include "hw/i386/tdvf.h"
#include "hw/i386/x86.h"
#include "kvm_i386.h"
@@ -146,11 +147,110 @@ void tdx_set_tdvf_region(MemoryRegion *tdvf_mr)
tdx_guest->tdvf_mr = tdvf_mr;
}
+static void tdx_add_ram_entry(uint64_t address, uint64_t length,
+ enum TdxRamType type)
+{
+ uint32_t nr_entries = tdx_guest->nr_ram_entries;
+ tdx_guest->ram_entries = g_renew(TdxRamEntry, tdx_guest->ram_entries,
+ nr_entries + 1);
+
+ tdx_guest->ram_entries[nr_entries].address = address;
+ tdx_guest->ram_entries[nr_entries].length = length;
+ tdx_guest->ram_entries[nr_entries].type = type;
+ tdx_guest->nr_ram_entries++;
+}
+
+static int tdx_accept_ram_range(uint64_t address, uint64_t length)
+{
+ uint64_t head_start, tail_start, head_length, tail_length;
+ uint64_t tmp_address, tmp_length;
+ TdxRamEntry *e;
+ int i = 0;
+
+ do {
+ if (i == tdx_guest->nr_ram_entries) {
+ return -1;
+ }
+
+ e = &tdx_guest->ram_entries[i++];
+ } while (address + length <= e->address || address >= e->address + e->length);
+
+ /*
+ * The to-be-accepted ram range must be fully contained by one
+ * RAM entry.
+ */
+ if (e->address > address ||
+ e->address + e->length < address + length) {
+ return -1;
+ }
+
+ if (e->type == TDX_RAM_ADDED) {
+ return 0;
+ }
+
+ tmp_address = e->address;
+ tmp_length = e->length;
+
+ e->address = address;
+ e->length = length;
+ e->type = TDX_RAM_ADDED;
+
+ head_length = address - tmp_address;
+ if (head_length > 0) {
+ head_start = tmp_address;
+ tdx_add_ram_entry(head_start, head_length, TDX_RAM_UNACCEPTED);
+ }
+
+ tail_start = address + length;
+ if (tail_start < tmp_address + tmp_length) {
+ tail_length = tmp_address + tmp_length - tail_start;
+ tdx_add_ram_entry(tail_start, tail_length, TDX_RAM_UNACCEPTED);
+ }
+
+ return 0;
+}
+
+static int tdx_ram_entry_compare(const void *lhs_, const void* rhs_)
+{
+ const TdxRamEntry *lhs = lhs_;
+ const TdxRamEntry *rhs = rhs_;
+
+ if (lhs->address == rhs->address) {
+ return 0;
+ }
+ if (le64_to_cpu(lhs->address) > le64_to_cpu(rhs->address)) {
+ return 1;
+ }
+ return -1;
+}
+
+static void tdx_init_ram_entries(void)
+{
+ unsigned i, j, nr_e820_entries;
+
+ nr_e820_entries = e820_get_table(NULL);
+ tdx_guest->ram_entries = g_new(TdxRamEntry, nr_e820_entries);
+
+ for (i = 0, j = 0; i < nr_e820_entries; i++) {
+ uint64_t addr, len;
+
+ if (e820_get_entry(i, E820_RAM, &addr, &len)) {
+ tdx_guest->ram_entries[j].address = addr;
+ tdx_guest->ram_entries[j].length = len;
+ tdx_guest->ram_entries[j].type = TDX_RAM_UNACCEPTED;
+ j++;
+ }
+ }
+ tdx_guest->nr_ram_entries = j;
+}
+
static void tdx_finalize_vm(Notifier *notifier, void *unused)
{
TdxFirmware *tdvf = &tdx_guest->tdvf;
TdxFirmwareEntry *entry;
+ tdx_init_ram_entries();
+
for_each_tdx_fw_entry(tdvf, entry) {
switch (entry->type) {
case TDVF_SECTION_TYPE_BFV:
@@ -166,12 +266,21 @@ static void tdx_finalize_vm(Notifier *notifier, void *unused)
entry->type);
exit(1);
}
+ if (tdx_accept_ram_range(entry->address, entry->size)) {
+ error_report("Failed to accept memory for TDVF section %d",
+ entry->type);
+ qemu_ram_munmap(-1, entry->mem_ptr, entry->size);
+ exit(1);
+ }
break;
default:
error_report("Unsupported TDVF section %d", entry->type);
exit(1);
}
}
+
+ qsort(tdx_guest->ram_entries, tdx_guest->nr_ram_entries,
+ sizeof(TdxRamEntry), &tdx_ram_entry_compare);
}
static Notifier tdx_machine_done_notify = {
diff --git a/target/i386/kvm/tdx.h b/target/i386/kvm/tdx.h
index 28a03c2a7b..36a7400e74 100644
--- a/target/i386/kvm/tdx.h
+++ b/target/i386/kvm/tdx.h
@@ -20,6 +20,17 @@ typedef struct TdxGuestClass {
/* TDX requires bus frequency 25MHz */
#define TDX_APIC_BUS_CYCLES_NS 40
+enum TdxRamType {
+ TDX_RAM_UNACCEPTED,
+ TDX_RAM_ADDED,
+};
+
+typedef struct TdxRamEntry {
+ uint64_t address;
+ uint64_t length;
+ enum TdxRamType type;
+} TdxRamEntry;
+
typedef struct TdxGuest {
X86ConfidentialGuest parent_obj;
@@ -34,6 +45,9 @@ typedef struct TdxGuest {
MemoryRegion *tdvf_mr;
TdxFirmware tdvf;
+
+ uint32_t nr_ram_entries;
+ TdxRamEntry *ram_entries;
} TdxGuest;
#ifdef CONFIG_TDX
--
2.50.1

View File

@ -0,0 +1,151 @@
From 95f727555b05af436c6e23a0dde42155a217a6e8 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:46 +0200
Subject: [PATCH 049/115] i386/tdx: Track mem_ptr for each firmware entry of
TDVF
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [49/115] 8cee9537e1387c718ec037bbc1a8d0a6b17bbf49 (bonzini/rhel-qemu-kvm)
For each TDVF sections, QEMU needs to copy the content to guest
private memory via KVM API (KVM_TDX_INIT_MEM_REGION).
Introduce a field @mem_ptr for TdxFirmwareEntry to track the memory
pointer of each TDVF sections. So that QEMU can add/copy them to guest
private memory later.
TDVF sections can be classified into two groups:
- Firmware itself, e.g., TDVF BFV and CFV, that located separately from
guest RAM. Its memory pointer is the bios pointer.
- Sections located at guest RAM, e.g., TEMP_MEM and TD_HOB.
mmap a new memory range for them.
Register a machine_init_done callback to do the stuff.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20250508150002.689633-21-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 4420ba0ebbf014acc68f78669e0767e288313ed6)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Conflicts: system/ -> sysemu/
---
hw/i386/tdvf.c | 1 +
include/hw/i386/tdvf.h | 7 +++++++
target/i386/kvm/tdx.c | 37 +++++++++++++++++++++++++++++++++++++
3 files changed, 45 insertions(+)
diff --git a/hw/i386/tdvf.c b/hw/i386/tdvf.c
index 824a387d42..88453cf3a5 100644
--- a/hw/i386/tdvf.c
+++ b/hw/i386/tdvf.c
@@ -179,6 +179,7 @@ int tdvf_parse_metadata(TdxFirmware *fw, void *flash_ptr, int size)
}
}
+ fw->mem_ptr = flash_ptr;
return 0;
err:
diff --git a/include/hw/i386/tdvf.h b/include/hw/i386/tdvf.h
index 7ebcac42a3..e75c8d1acc 100644
--- a/include/hw/i386/tdvf.h
+++ b/include/hw/i386/tdvf.h
@@ -26,13 +26,20 @@ typedef struct TdxFirmwareEntry {
uint64_t size;
uint32_t type;
uint32_t attributes;
+
+ void *mem_ptr;
} TdxFirmwareEntry;
typedef struct TdxFirmware {
+ void *mem_ptr;
+
uint32_t nr_entries;
TdxFirmwareEntry *entries;
} TdxFirmware;
+#define for_each_tdx_fw_entry(fw, e) \
+ for (e = (fw)->entries; e != (fw)->entries + (fw)->nr_entries; e++)
+
int tdvf_parse_metadata(TdxFirmware *fw, void *flash_ptr, int size);
#endif /* HW_I386_TDVF_H */
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index 71be3bd28d..95687e3a91 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -12,10 +12,13 @@
#include "qemu/osdep.h"
#include "qemu/error-report.h"
#include "qemu/base64.h"
+#include "qemu/mmap-alloc.h"
#include "qapi/error.h"
#include "qom/object_interfaces.h"
#include "crypto/hash.h"
+#include "sysemu/sysemu.h"
+#include "hw/i386/tdvf.h"
#include "hw/i386/x86.h"
#include "kvm_i386.h"
#include "tdx.h"
@@ -143,6 +146,38 @@ void tdx_set_tdvf_region(MemoryRegion *tdvf_mr)
tdx_guest->tdvf_mr = tdvf_mr;
}
+static void tdx_finalize_vm(Notifier *notifier, void *unused)
+{
+ TdxFirmware *tdvf = &tdx_guest->tdvf;
+ TdxFirmwareEntry *entry;
+
+ for_each_tdx_fw_entry(tdvf, entry) {
+ switch (entry->type) {
+ case TDVF_SECTION_TYPE_BFV:
+ case TDVF_SECTION_TYPE_CFV:
+ entry->mem_ptr = tdvf->mem_ptr + entry->data_offset;
+ break;
+ case TDVF_SECTION_TYPE_TD_HOB:
+ case TDVF_SECTION_TYPE_TEMP_MEM:
+ entry->mem_ptr = qemu_ram_mmap(-1, entry->size,
+ qemu_real_host_page_size(), 0, 0);
+ if (entry->mem_ptr == MAP_FAILED) {
+ error_report("Failed to mmap memory for TDVF section %d",
+ entry->type);
+ exit(1);
+ }
+ break;
+ default:
+ error_report("Unsupported TDVF section %d", entry->type);
+ exit(1);
+ }
+ }
+}
+
+static Notifier tdx_machine_done_notify = {
+ .notify = tdx_finalize_vm,
+};
+
static int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
{
TdxGuest *tdx = TDX_GUEST(cgs);
@@ -157,6 +192,8 @@ static int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
}
}
+ qemu_add_machine_init_done_notifier(&tdx_machine_done_notify);
+
tdx_guest = tdx;
return 0;
}
--
2.50.1

View File

@ -0,0 +1,106 @@
From e0384fc5822eb8fcea9a5e59b89b9430dedadba3 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:46 +0200
Subject: [PATCH 041/115] i386/tdx: Validate TD attributes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [41/115] c95c6476092eb717dd042c64d656c2b1aa70a409 (bonzini/rhel-qemu-kvm)
Validate TD attributes with tdx_caps that only supported bits are
allowed by KVM.
Besides, sanity check the attribute bits that have not been supported by
QEMU yet. e.g., debug bit, it will be allowed in the future when debug
TD support lands in QEMU.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Link: https://lore.kernel.org/r/20250508150002.689633-13-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 53b6f406b4f1a215fb3ec60e56ddba2e019a45ef)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/kvm/tdx.c | 33 +++++++++++++++++++++++++++++++--
1 file changed, 31 insertions(+), 2 deletions(-)
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index 1ab063f790..3de3b5fa6a 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -18,10 +18,15 @@
#include "kvm_i386.h"
#include "tdx.h"
+#define TDX_TD_ATTRIBUTES_DEBUG BIT_ULL(0)
#define TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE BIT_ULL(28)
#define TDX_TD_ATTRIBUTES_PKS BIT_ULL(30)
#define TDX_TD_ATTRIBUTES_PERFMON BIT_ULL(63)
+#define TDX_SUPPORTED_TD_ATTRS (TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE |\
+ TDX_TD_ATTRIBUTES_PKS | \
+ TDX_TD_ATTRIBUTES_PERFMON)
+
static TdxGuest *tdx_guest;
static struct kvm_tdx_capabilities *tdx_caps;
@@ -153,13 +158,34 @@ static int tdx_kvm_type(X86ConfidentialGuest *cg)
return KVM_X86_TDX_VM;
}
-static void setup_td_guest_attributes(X86CPU *x86cpu)
+static int tdx_validate_attributes(TdxGuest *tdx, Error **errp)
+{
+ if ((tdx->attributes & ~tdx_caps->supported_attrs)) {
+ error_setg(errp, "Invalid attributes 0x%lx for TDX VM "
+ "(KVM supported: 0x%llx)", tdx->attributes,
+ tdx_caps->supported_attrs);
+ return -1;
+ }
+
+ if (tdx->attributes & ~TDX_SUPPORTED_TD_ATTRS) {
+ error_setg(errp, "Some QEMU unsupported TD attribute bits being "
+ "requested: 0x%lx (QEMU supported: 0x%llx)",
+ tdx->attributes, TDX_SUPPORTED_TD_ATTRS);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int setup_td_guest_attributes(X86CPU *x86cpu, Error **errp)
{
CPUX86State *env = &x86cpu->env;
tdx_guest->attributes |= (env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_PKS) ?
TDX_TD_ATTRIBUTES_PKS : 0;
tdx_guest->attributes |= x86cpu->enable_pmu ? TDX_TD_ATTRIBUTES_PERFMON : 0;
+
+ return tdx_validate_attributes(tdx_guest, errp);
}
static int setup_td_xfam(X86CPU *x86cpu, Error **errp)
@@ -225,7 +251,10 @@ int tdx_pre_create_vcpu(CPUState *cpu, Error **errp)
init_vm = g_malloc0(sizeof(struct kvm_tdx_init_vm) +
sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES);
- setup_td_guest_attributes(x86cpu);
+ r = setup_td_guest_attributes(x86cpu, errp);
+ if (r) {
+ return r;
+ }
r = setup_td_xfam(x86cpu, errp);
if (r) {
--
2.50.1

View File

@ -0,0 +1,88 @@
From b7e8674a1d3d577a3e88e95d2dab6aac626eca41 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:48 +0200
Subject: [PATCH 083/115] i386/tdx: Validate phys_bits against host value
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [83/115] 42a17d80e8c176a2dc4e2d3da0ea74f7fdb85577 (bonzini/rhel-qemu-kvm)
For TDX guest, the phys_bits is not configurable and can only be
host/native value.
Validate phys_bits inside tdx_check_features().
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20250508150002.689633-55-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 907ee7b67e50a7eea2768c66e3ad67c9aa4ffd3c)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/host-cpu.c | 2 +-
target/i386/host-cpu.h | 1 +
target/i386/kvm/tdx.c | 8 ++++++++
3 files changed, 10 insertions(+), 1 deletion(-)
diff --git a/target/i386/host-cpu.c b/target/i386/host-cpu.c
index 4a77ecc1fc..4ab536ab80 100644
--- a/target/i386/host-cpu.c
+++ b/target/i386/host-cpu.c
@@ -15,7 +15,7 @@
#include "sysemu/sysemu.h"
/* Note: Only safe for use on x86(-64) hosts */
-static uint32_t host_cpu_phys_bits(void)
+uint32_t host_cpu_phys_bits(void)
{
uint32_t eax;
uint32_t host_phys_bits;
diff --git a/target/i386/host-cpu.h b/target/i386/host-cpu.h
index 6a9bc918ba..b97ec01c9b 100644
--- a/target/i386/host-cpu.h
+++ b/target/i386/host-cpu.h
@@ -10,6 +10,7 @@
#ifndef HOST_CPU_H
#define HOST_CPU_H
+uint32_t host_cpu_phys_bits(void);
void host_cpu_instance_init(X86CPU *cpu);
void host_cpu_max_instance_init(X86CPU *cpu);
bool host_cpu_realizefn(CPUState *cs, Error **errp);
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index 3ec31d4872..b9c3ba3725 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -25,6 +25,7 @@
#include "cpu.h"
#include "cpu-internal.h"
+#include "host-cpu.h"
#include "hw/i386/e820_memory_layout.h"
#include "hw/i386/tdvf.h"
#include "hw/i386/x86.h"
@@ -879,6 +880,13 @@ static int tdx_check_features(X86ConfidentialGuest *cg, CPUState *cs)
return -EINVAL;
}
+ if (cpu->phys_bits != host_cpu_phys_bits()) {
+ error_report("TDX requires guest CPU physical bits (%u) "
+ "to match host CPU physical bits (%u)",
+ cpu->phys_bits, host_cpu_phys_bits());
+ return -EINVAL;
+ }
+
return 0;
}
--
2.50.1

View File

@ -0,0 +1,78 @@
From e417afeb76f30fca6c16e57971521c5c9c25681c Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:46 +0200
Subject: [PATCH 040/115] i386/tdx: Wire CPU features up with attributes of TD
guest
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [40/115] ce59bfc2281e630283095aec8975d93ef455d599 (bonzini/rhel-qemu-kvm)
For QEMU VMs,
- PKS is configured via CPUID_7_0_ECX_PKS, e.g., -cpu xxx,+pks and
- PMU is configured by x86cpu->enable_pmu, e.g., -cpu xxx,pmu=on
While the bit 30 (PKS) and bit 63 (PERFMON) of TD's attributes are also
used to configure the PKS and PERFMON/PMU of TD, reuse the existing
configuration interfaces of 'cpu' for TD's attributes.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20250508150002.689633-12-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit bb3be394cf80d68251e5b89e823dddc679b6e644)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/kvm/tdx.c | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index 2ed40b7614..1ab063f790 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -19,6 +19,8 @@
#include "tdx.h"
#define TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE BIT_ULL(28)
+#define TDX_TD_ATTRIBUTES_PKS BIT_ULL(30)
+#define TDX_TD_ATTRIBUTES_PERFMON BIT_ULL(63)
static TdxGuest *tdx_guest;
@@ -151,6 +153,15 @@ static int tdx_kvm_type(X86ConfidentialGuest *cg)
return KVM_X86_TDX_VM;
}
+static void setup_td_guest_attributes(X86CPU *x86cpu)
+{
+ CPUX86State *env = &x86cpu->env;
+
+ tdx_guest->attributes |= (env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_PKS) ?
+ TDX_TD_ATTRIBUTES_PKS : 0;
+ tdx_guest->attributes |= x86cpu->enable_pmu ? TDX_TD_ATTRIBUTES_PERFMON : 0;
+}
+
static int setup_td_xfam(X86CPU *x86cpu, Error **errp)
{
CPUX86State *env = &x86cpu->env;
@@ -214,6 +225,8 @@ int tdx_pre_create_vcpu(CPUState *cpu, Error **errp)
init_vm = g_malloc0(sizeof(struct kvm_tdx_init_vm) +
sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES);
+ setup_td_guest_attributes(x86cpu);
+
r = setup_td_xfam(x86cpu, errp);
if (r) {
return r;
--
2.50.1

View File

@ -0,0 +1,240 @@
From 7f2a5ac9ad2e2d54b09e1bcd1a1d59cbde1443eb Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:47 +0200
Subject: [PATCH 058/115] i386/tdx: Wire TDX_REPORT_FATAL_ERROR with GuestPanic
facility
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [58/115] 4cdc786e5ad029e8ed5713b4a27bb038a4b6f935 (bonzini/rhel-qemu-kvm)
Integrate TDX's TDX_REPORT_FATAL_ERROR into QEMU GuestPanic facility
Originated-from: Isaku Yamahata <isaku.yamahata@intel.com>
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Acked-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20250508150002.689633-30-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 6e250463b08b4028123f201343ee72099ef81e68)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Conflicts: system/ -> sysemu/
---
qapi/run-state.json | 31 +++++++++++++++++++--
system/runstate.c | 65 +++++++++++++++++++++++++++++++++++++++++++
target/i386/kvm/tdx.c | 25 ++++++++++++++++-
3 files changed, 118 insertions(+), 3 deletions(-)
diff --git a/qapi/run-state.json b/qapi/run-state.json
index ce95cfa46b..ee11adc508 100644
--- a/qapi/run-state.json
+++ b/qapi/run-state.json
@@ -501,10 +501,12 @@
#
# @s390: s390 guest panic information type (Since: 2.12)
#
+# @tdx: tdx guest panic information type (Since: 10.1)
+#
# Since: 2.9
##
{ 'enum': 'GuestPanicInformationType',
- 'data': [ 'hyper-v', 's390' ] }
+ 'data': [ 'hyper-v', 's390', 'tdx' ] }
##
# @GuestPanicInformation:
@@ -519,7 +521,8 @@
'base': {'type': 'GuestPanicInformationType'},
'discriminator': 'type',
'data': {'hyper-v': 'GuestPanicInformationHyperV',
- 's390': 'GuestPanicInformationS390'}}
+ 's390': 'GuestPanicInformationS390',
+ 'tdx' : 'GuestPanicInformationTdx'}}
##
# @GuestPanicInformationHyperV:
@@ -598,6 +601,30 @@
'psw-addr': 'uint64',
'reason': 'S390CrashReason'}}
+##
+# @GuestPanicInformationTdx:
+#
+# TDX Guest panic information specific to TDX, as specified in the
+# "Guest-Hypervisor Communication Interface (GHCI) Specification",
+# section TDG.VP.VMCALL<ReportFatalError>.
+#
+# @error-code: TD-specific error code
+#
+# @message: Human-readable error message provided by the guest. Not
+# to be trusted.
+#
+# @gpa: guest-physical address of a page that contains more verbose
+# error information, as zero-terminated string. Present when the
+# "GPA valid" bit (bit 63) is set in @error-code.
+#
+#
+# Since: 10.1
+##
+{'struct': 'GuestPanicInformationTdx',
+ 'data': {'error-code': 'uint32',
+ 'message': 'str',
+ '*gpa': 'uint64'}}
+
##
# @MEMORY_FAILURE:
#
diff --git a/system/runstate.c b/system/runstate.c
index c2c9afa905..31970c522e 100644
--- a/system/runstate.c
+++ b/system/runstate.c
@@ -565,6 +565,58 @@ static void qemu_system_wakeup(void)
}
}
+static char *tdx_parse_panic_message(char *message)
+{
+ bool printable = false;
+ char *buf = NULL;
+ int len = 0, i;
+
+ /*
+ * Although message is defined as a json string, we shouldn't
+ * unconditionally treat it as is because the guest generated it and
+ * it's not necessarily trustable.
+ */
+ if (message) {
+ /* The caller guarantees the NULL-terminated string. */
+ len = strlen(message);
+
+ printable = len > 0;
+ for (i = 0; i < len; i++) {
+ if (!(0x20 <= message[i] && message[i] <= 0x7e)) {
+ printable = false;
+ break;
+ }
+ }
+ }
+
+ if (len == 0) {
+ buf = g_malloc(1);
+ buf[0] = '\0';
+ } else {
+ if (!printable) {
+ /* 3 = length of "%02x " */
+ buf = g_malloc(len * 3);
+ for (i = 0; i < len; i++) {
+ if (message[i] == '\0') {
+ break;
+ } else {
+ sprintf(buf + 3 * i, "%02x ", message[i]);
+ }
+ }
+ if (i > 0) {
+ /* replace the last ' '(space) to NULL */
+ buf[i * 3 - 1] = '\0';
+ } else {
+ buf[0] = '\0';
+ }
+ } else {
+ buf = g_strdup(message);
+ }
+ }
+
+ return buf;
+}
+
void qemu_system_guest_panicked(GuestPanicInformation *info)
{
qemu_log_mask(LOG_GUEST_ERROR, "Guest crashed");
@@ -606,7 +658,20 @@ void qemu_system_guest_panicked(GuestPanicInformation *info)
S390CrashReason_str(info->u.s390.reason),
info->u.s390.psw_mask,
info->u.s390.psw_addr);
+ } else if (info->type == GUEST_PANIC_INFORMATION_TYPE_TDX) {
+ char *message = tdx_parse_panic_message(info->u.tdx.message);
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "\nTDX guest reports fatal error."
+ " error code: 0x%" PRIx32 " error message:\"%s\"\n",
+ info->u.tdx.error_code, message);
+ g_free(message);
+ if (info->u.tdx.gpa != -1ull) {
+ qemu_log_mask(LOG_GUEST_ERROR, "Additional error information "
+ "can be found at gpa page: 0x%" PRIx64 "\n",
+ info->u.tdx.gpa);
+ }
}
+
qapi_free_GuestPanicInformation(info);
}
}
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index 679613ab55..7611f51aae 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -16,6 +16,7 @@
#include "qapi/error.h"
#include "qom/object_interfaces.h"
#include "crypto/hash.h"
+#include "sysemu/runstate.h"
#include "sysemu/sysemu.h"
#include "exec/ramblock.h"
@@ -615,18 +616,35 @@ int tdx_parse_tdvf(void *flash_ptr, int size)
return tdvf_parse_metadata(&tdx_guest->tdvf, flash_ptr, size);
}
+static void tdx_panicked_on_fatal_error(X86CPU *cpu, uint64_t error_code,
+ char *message, uint64_t gpa)
+{
+ GuestPanicInformation *panic_info;
+
+ panic_info = g_new0(GuestPanicInformation, 1);
+ panic_info->type = GUEST_PANIC_INFORMATION_TYPE_TDX;
+ panic_info->u.tdx.error_code = (uint32_t) error_code;
+ panic_info->u.tdx.message = message;
+ panic_info->u.tdx.gpa = gpa;
+
+ qemu_system_guest_panicked(panic_info);
+}
+
/*
* Only 8 registers can contain valid ASCII byte stream to form the fatal
* message, and their sequence is: R14, R15, RBX, RDI, RSI, R8, R9, RDX
*/
#define TDX_FATAL_MESSAGE_MAX 64
+#define TDX_REPORT_FATAL_ERROR_GPA_VALID BIT_ULL(63)
+
int tdx_handle_report_fatal_error(X86CPU *cpu, struct kvm_run *run)
{
uint64_t error_code = run->system_event.data[R_R12];
uint64_t reg_mask = run->system_event.data[R_ECX];
char *message = NULL;
uint64_t *tmp;
+ uint64_t gpa = -1ull;
if (error_code & 0xffff) {
error_report("TDX: REPORT_FATAL_ERROR: invalid error code: 0x%lx",
@@ -657,7 +675,12 @@ int tdx_handle_report_fatal_error(X86CPU *cpu, struct kvm_run *run)
}
#undef COPY_REG
- error_report("TD guest reports fatal error. %s", message ? : "");
+ if (error_code & TDX_REPORT_FATAL_ERROR_GPA_VALID) {
+ gpa = run->system_event.data[R_R13];
+ }
+
+ tdx_panicked_on_fatal_error(cpu, error_code, message, gpa);
+
return -1;
}
--
2.50.1

View File

@ -0,0 +1,798 @@
From e100b3595bacf3d18af8a6b8234e8c1abdd1f87c Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:26:18 +0200
Subject: [PATCH 093/115] i386/tdx: handle TDG.VP.VMCALL<GetQuote>
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [93/115] 4e656b0d7f8a022d25573b9583ed7864815cb88b (bonzini/rhel-qemu-kvm)
Add property "quote-generation-socket" to tdx-guest, which is a property
of type SocketAddress to specify Quote Generation Service(QGS).
On request of GetQuote, it connects to the QGS socket, read request
data from shared guest memory, send the request data to the QGS,
and store the response into shared guest memory, at last notify
TD guest by interrupt.
command line example:
qemu-system-x86_64 \
-object '{"qom-type":"tdx-guest","id":"tdx0","quote-generation-socket":{"type":"unix", "path":"/var/run/tdx-qgs/qgs.socket"}}' \
-machine confidential-guest-support=tdx0
Note, above example uses the unix socket. It can be other types, like vsock,
which depends on the implementation of QGS.
To avoid no response from QGS server, setup a timer for the transaction.
If timeout, make it an error and interrupt guest. Define the threshold of
time to 30s at present, maybe change to other value if not appropriate.
Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
Co-developed-by: Chenyi Qiang <chenyi.qiang@intel.com>
Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
Co-developed-by: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Tested-by: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 40da501d8989913935660dc24953ece02c9e98b8)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Conflicts: system/ -> sysemu/,exec/
---
qapi/qom.json | 8 +-
target/i386/kvm/kvm.c | 3 +
target/i386/kvm/meson.build | 2 +-
target/i386/kvm/tdx-quote-generator.c | 300 ++++++++++++++++++++++++++
target/i386/kvm/tdx-quote-generator.h | 82 +++++++
target/i386/kvm/tdx-stub.c | 4 +
target/i386/kvm/tdx.c | 176 ++++++++++++++-
target/i386/kvm/tdx.h | 10 +
8 files changed, 582 insertions(+), 3 deletions(-)
create mode 100644 target/i386/kvm/tdx-quote-generator.c
create mode 100644 target/i386/kvm/tdx-quote-generator.h
diff --git a/qapi/qom.json b/qapi/qom.json
index 970ffeee9e..72c1605ad7 100644
--- a/qapi/qom.json
+++ b/qapi/qom.json
@@ -1032,6 +1032,11 @@
# e.g., specific to the workload rather than the run-time or OS
# (base64 encoded SHA384 digest). Defaults to all zeros.
#
+# @quote-generation-socket: socket address for Quote Generation
+# Service (QGS). QGS is a daemon running on the host. Without
+# it, the guest will not be able to get a TD quote for
+# attestation.
+#
# Since: 10.1
##
{ 'struct': 'TdxGuestProperties',
@@ -1039,7 +1044,8 @@
'*sept-ve-disable': 'bool',
'*mrconfigid': 'str',
'*mrowner': 'str',
- '*mrownerconfig': 'str' } }
+ '*mrownerconfig': 'str',
+ '*quote-generation-socket': 'SocketAddress' } }
##
# @ThreadContextProperties:
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 26328a1d3b..fbf11b2122 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -6045,6 +6045,9 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
* does not handle the TDVMCALL.
*/
switch (run->tdx.nr) {
+ case TDVMCALL_GET_QUOTE:
+ tdx_handle_get_quote(cpu, run);
+ break;
case TDVMCALL_GET_TD_VM_CALL_INFO:
tdx_handle_get_tdvmcall_info(cpu, run);
break;
diff --git a/target/i386/kvm/meson.build b/target/i386/kvm/meson.build
index 3f44cdedb7..2675bf8902 100644
--- a/target/i386/kvm/meson.build
+++ b/target/i386/kvm/meson.build
@@ -8,7 +8,7 @@ i386_kvm_ss.add(files(
i386_kvm_ss.add(when: 'CONFIG_XEN_EMU', if_true: files('xen-emu.c'))
-i386_kvm_ss.add(when: 'CONFIG_TDX', if_true: files('tdx.c'), if_false: files('tdx-stub.c'))
+i386_kvm_ss.add(when: 'CONFIG_TDX', if_true: files('tdx.c', 'tdx-quote-generator.c'), if_false: files('tdx-stub.c'))
i386_system_ss.add(when: 'CONFIG_HYPERV', if_true: files('hyperv.c'), if_false: files('hyperv-stub.c'))
diff --git a/target/i386/kvm/tdx-quote-generator.c b/target/i386/kvm/tdx-quote-generator.c
new file mode 100644
index 0000000000..f59715f617
--- /dev/null
+++ b/target/i386/kvm/tdx-quote-generator.c
@@ -0,0 +1,300 @@
+/*
+ * QEMU TDX Quote Generation Support
+ *
+ * Copyright (c) 2025 Intel Corporation
+ *
+ * Author:
+ * Xiaoyao Li <xiaoyao.li@intel.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "qapi/qapi-visit-sockets.h"
+
+#include "tdx-quote-generator.h"
+
+#define QGS_MSG_LIB_MAJOR_VER 1
+#define QGS_MSG_LIB_MINOR_VER 1
+
+typedef enum _qgs_msg_type_t {
+ GET_QUOTE_REQ = 0,
+ GET_QUOTE_RESP = 1,
+ GET_COLLATERAL_REQ = 2,
+ GET_COLLATERAL_RESP = 3,
+ GET_PLATFORM_INFO_REQ = 4,
+ GET_PLATFORM_INFO_RESP = 5,
+ QGS_MSG_TYPE_MAX
+} qgs_msg_type_t;
+
+typedef struct _qgs_msg_header_t {
+ uint16_t major_version;
+ uint16_t minor_version;
+ uint32_t type;
+ uint32_t size; // size of the whole message, include this header, in byte
+ uint32_t error_code; // used in response only
+} qgs_msg_header_t;
+
+typedef struct _qgs_msg_get_quote_req_t {
+ qgs_msg_header_t header; // header.type = GET_QUOTE_REQ
+ uint32_t report_size; // cannot be 0
+ uint32_t id_list_size; // length of id_list, in byte, can be 0
+} qgs_msg_get_quote_req_t;
+
+typedef struct _qgs_msg_get_quote_resp_s {
+ qgs_msg_header_t header; // header.type = GET_QUOTE_RESP
+ uint32_t selected_id_size; // can be 0 in case only one id is sent in request
+ uint32_t quote_size; // length of quote_data, in byte
+ uint8_t id_quote[]; // selected id followed by quote
+} qgs_msg_get_quote_resp_t;
+
+#define HEADER_SIZE 4
+
+static uint32_t decode_header(const char *buf, size_t len) {
+ if (len < HEADER_SIZE) {
+ return 0;
+ }
+ uint32_t msg_size = 0;
+ for (uint32_t i = 0; i < HEADER_SIZE; ++i) {
+ msg_size = msg_size * 256 + (buf[i] & 0xFF);
+ }
+ return msg_size;
+}
+
+static void encode_header(char *buf, size_t len, uint32_t size) {
+ assert(len >= HEADER_SIZE);
+ buf[0] = ((size >> 24) & 0xFF);
+ buf[1] = ((size >> 16) & 0xFF);
+ buf[2] = ((size >> 8) & 0xFF);
+ buf[3] = (size & 0xFF);
+}
+
+static void tdx_generate_quote_cleanup(TdxGenerateQuoteTask *task)
+{
+ timer_del(&task->timer);
+
+ g_source_remove(task->watch);
+ qio_channel_close(QIO_CHANNEL(task->sioc), NULL);
+ object_unref(OBJECT(task->sioc));
+
+ task->completion(task);
+}
+
+static gboolean tdx_get_quote_read(QIOChannel *ioc, GIOCondition condition,
+ gpointer opaque)
+{
+ TdxGenerateQuoteTask *task = opaque;
+ Error *err = NULL;
+ int ret;
+
+ ret = qio_channel_read(ioc, task->receive_buf + task->receive_buf_received,
+ task->payload_len - task->receive_buf_received, &err);
+ if (ret < 0) {
+ if (ret == QIO_CHANNEL_ERR_BLOCK) {
+ return G_SOURCE_CONTINUE;
+ } else {
+ error_report_err(err);
+ task->status_code = TDX_VP_GET_QUOTE_ERROR;
+ goto end;
+ }
+ }
+
+ if (ret == 0) {
+ error_report("End of file before reply received");
+ task->status_code = TDX_VP_GET_QUOTE_ERROR;
+ goto end;
+ }
+
+ task->receive_buf_received += ret;
+ if (task->receive_buf_received >= HEADER_SIZE) {
+ uint32_t len = decode_header(task->receive_buf,
+ task->receive_buf_received);
+ if (len == 0 ||
+ len > (task->payload_len - HEADER_SIZE)) {
+ error_report("Message len %u must be non-zero & less than %zu",
+ len, (task->payload_len - HEADER_SIZE));
+ task->status_code = TDX_VP_GET_QUOTE_ERROR;
+ goto end;
+ }
+
+ /* Now we know the size, shrink to fit */
+ task->payload_len = HEADER_SIZE + len;
+ task->receive_buf = g_renew(char,
+ task->receive_buf,
+ task->payload_len);
+ }
+
+ if (task->receive_buf_received >= (sizeof(qgs_msg_header_t) + HEADER_SIZE)) {
+ qgs_msg_header_t *hdr = (qgs_msg_header_t *)(task->receive_buf + HEADER_SIZE);
+ if (hdr->major_version != QGS_MSG_LIB_MAJOR_VER ||
+ hdr->minor_version != QGS_MSG_LIB_MINOR_VER) {
+ error_report("Invalid QGS message header version %d.%d",
+ hdr->major_version,
+ hdr->minor_version);
+ task->status_code = TDX_VP_GET_QUOTE_ERROR;
+ goto end;
+ }
+ if (hdr->type != GET_QUOTE_RESP) {
+ error_report("Invalid QGS message type %d",
+ hdr->type);
+ task->status_code = TDX_VP_GET_QUOTE_ERROR;
+ goto end;
+ }
+ if (hdr->size > (task->payload_len - HEADER_SIZE)) {
+ error_report("QGS message size %d exceeds payload capacity %zu",
+ hdr->size, task->payload_len);
+ task->status_code = TDX_VP_GET_QUOTE_ERROR;
+ goto end;
+ }
+ if (hdr->error_code != 0) {
+ error_report("QGS message error code %d",
+ hdr->error_code);
+ task->status_code = TDX_VP_GET_QUOTE_ERROR;
+ goto end;
+ }
+ }
+ if (task->receive_buf_received >= (sizeof(qgs_msg_get_quote_resp_t) + HEADER_SIZE)) {
+ qgs_msg_get_quote_resp_t *msg = (qgs_msg_get_quote_resp_t *)(task->receive_buf + HEADER_SIZE);
+ if (msg->selected_id_size != 0) {
+ error_report("QGS message selected ID was %d not 0",
+ msg->selected_id_size);
+ task->status_code = TDX_VP_GET_QUOTE_ERROR;
+ goto end;
+ }
+
+ if ((task->payload_len - HEADER_SIZE - sizeof(qgs_msg_get_quote_resp_t)) !=
+ msg->quote_size) {
+ error_report("QGS quote size %d should be %zu",
+ msg->quote_size,
+ (task->payload_len - sizeof(qgs_msg_get_quote_resp_t)));
+ task->status_code = TDX_VP_GET_QUOTE_ERROR;
+ goto end;
+ }
+ }
+
+ if (task->receive_buf_received == task->payload_len) {
+ size_t strip = HEADER_SIZE + sizeof(qgs_msg_get_quote_resp_t);
+ memmove(task->receive_buf,
+ task->receive_buf + strip,
+ task->receive_buf_received - strip);
+ task->receive_buf_received -= strip;
+ task->status_code = TDX_VP_GET_QUOTE_SUCCESS;
+ goto end;
+ }
+
+ return G_SOURCE_CONTINUE;
+
+end:
+ tdx_generate_quote_cleanup(task);
+ return G_SOURCE_REMOVE;
+}
+
+static gboolean tdx_send_report(QIOChannel *ioc, GIOCondition condition,
+ gpointer opaque)
+{
+ TdxGenerateQuoteTask *task = opaque;
+ Error *err = NULL;
+ int ret;
+
+ ret = qio_channel_write(ioc, task->send_data + task->send_data_sent,
+ task->send_data_size - task->send_data_sent, &err);
+ if (ret < 0) {
+ if (ret == QIO_CHANNEL_ERR_BLOCK) {
+ ret = 0;
+ } else {
+ error_report_err(err);
+ task->status_code = TDX_VP_GET_QUOTE_ERROR;
+ tdx_generate_quote_cleanup(task);
+ goto end;
+ }
+ }
+ task->send_data_sent += ret;
+
+ if (task->send_data_sent == task->send_data_size) {
+ task->watch = qio_channel_add_watch(QIO_CHANNEL(task->sioc), G_IO_IN,
+ tdx_get_quote_read, task, NULL);
+ goto end;
+ }
+
+ return G_SOURCE_CONTINUE;
+
+end:
+ return G_SOURCE_REMOVE;
+}
+
+static void tdx_quote_generator_connected(QIOTask *qio_task, gpointer opaque)
+{
+ TdxGenerateQuoteTask *task = opaque;
+ Error *err = NULL;
+ int ret;
+
+ ret = qio_task_propagate_error(qio_task, &err);
+ if (ret) {
+ error_report_err(err);
+ task->status_code = TDX_VP_GET_QUOTE_QGS_UNAVAILABLE;
+ tdx_generate_quote_cleanup(task);
+ return;
+ }
+
+ task->watch = qio_channel_add_watch(QIO_CHANNEL(task->sioc), G_IO_OUT,
+ tdx_send_report, task, NULL);
+}
+
+#define TRANSACTION_TIMEOUT 30000
+
+static void getquote_expired(void *opaque)
+{
+ TdxGenerateQuoteTask *task = opaque;
+
+ task->status_code = TDX_VP_GET_QUOTE_ERROR;
+ tdx_generate_quote_cleanup(task);
+}
+
+static void setup_get_quote_timer(TdxGenerateQuoteTask *task)
+{
+ int64_t time;
+
+ timer_init_ms(&task->timer, QEMU_CLOCK_VIRTUAL, getquote_expired, task);
+ time = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
+ timer_mod(&task->timer, time + TRANSACTION_TIMEOUT);
+}
+
+void tdx_generate_quote(TdxGenerateQuoteTask *task,
+ SocketAddress *qg_sock_addr)
+{
+ QIOChannelSocket *sioc;
+ qgs_msg_get_quote_req_t msg;
+
+ /* Prepare a QGS message prelude */
+ msg.header.major_version = QGS_MSG_LIB_MAJOR_VER;
+ msg.header.minor_version = QGS_MSG_LIB_MINOR_VER;
+ msg.header.type = GET_QUOTE_REQ;
+ msg.header.size = sizeof(msg) + task->send_data_size;
+ msg.header.error_code = 0;
+ msg.report_size = task->send_data_size;
+ msg.id_list_size = 0;
+
+ /* Make room to add the QGS message prelude */
+ task->send_data = g_renew(char,
+ task->send_data,
+ task->send_data_size + sizeof(msg) + HEADER_SIZE);
+ memmove(task->send_data + sizeof(msg) + HEADER_SIZE,
+ task->send_data,
+ task->send_data_size);
+ memcpy(task->send_data + HEADER_SIZE,
+ &msg,
+ sizeof(msg));
+ encode_header(task->send_data, HEADER_SIZE, task->send_data_size + sizeof(msg));
+ task->send_data_size += sizeof(msg) + HEADER_SIZE;
+
+ sioc = qio_channel_socket_new();
+ task->sioc = sioc;
+
+ setup_get_quote_timer(task);
+
+ qio_channel_socket_connect_async(sioc, qg_sock_addr,
+ tdx_quote_generator_connected, task,
+ NULL, NULL);
+}
diff --git a/target/i386/kvm/tdx-quote-generator.h b/target/i386/kvm/tdx-quote-generator.h
new file mode 100644
index 0000000000..3bd9b8ef33
--- /dev/null
+++ b/target/i386/kvm/tdx-quote-generator.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef QEMU_I386_TDX_QUOTE_GENERATOR_H
+#define QEMU_I386_TDX_QUOTE_GENERATOR_H
+
+#include "qom/object_interfaces.h"
+#include "io/channel-socket.h"
+#include "exec/hwaddr.h"
+
+#define TDX_GET_QUOTE_STRUCTURE_VERSION 1ULL
+
+#define TDX_VP_GET_QUOTE_SUCCESS 0ULL
+#define TDX_VP_GET_QUOTE_IN_FLIGHT (-1ULL)
+#define TDX_VP_GET_QUOTE_ERROR 0x8000000000000000ULL
+#define TDX_VP_GET_QUOTE_QGS_UNAVAILABLE 0x8000000000000001ULL
+
+/* Limit to avoid resource starvation. */
+#define TDX_GET_QUOTE_MAX_BUF_LEN (128 * 1024)
+#define TDX_MAX_GET_QUOTE_REQUEST 16
+
+#define TDX_GET_QUOTE_HDR_SIZE 24
+
+/* Format of pages shared with guest. */
+struct tdx_get_quote_header {
+ /* Format version: must be 1 in little endian. */
+ uint64_t structure_version;
+
+ /*
+ * GetQuote status code in little endian:
+ * Guest must set error_code to 0 to avoid information leak.
+ * Qemu sets this before interrupting guest.
+ */
+ uint64_t error_code;
+
+ /*
+ * in-message size in little endian: The message will follow this header.
+ * The in-message will be send to QGS.
+ */
+ uint32_t in_len;
+
+ /*
+ * out-message size in little endian:
+ * On request, out_len must be zero to avoid information leak.
+ * On return, message size from QGS. Qemu overwrites this field.
+ * The message will follows this header. The in-message is overwritten.
+ */
+ uint32_t out_len;
+
+ /*
+ * Message buffer follows.
+ * Guest sets message that will be send to QGS. If out_len > in_len, guest
+ * should zero remaining buffer to avoid information leak.
+ * Qemu overwrites this buffer with a message returned from QGS.
+ */
+};
+
+typedef struct TdxGenerateQuoteTask {
+ hwaddr buf_gpa;
+ hwaddr payload_gpa;
+ uint64_t payload_len;
+
+ char *send_data;
+ uint64_t send_data_size;
+ uint64_t send_data_sent;
+
+ char *receive_buf;
+ uint64_t receive_buf_received;
+
+ uint64_t status_code;
+ struct tdx_get_quote_header hdr;
+
+ QIOChannelSocket *sioc;
+ guint watch;
+ QEMUTimer timer;
+
+ void (*completion)(struct TdxGenerateQuoteTask *task);
+ void *opaque;
+} TdxGenerateQuoteTask;
+
+void tdx_generate_quote(TdxGenerateQuoteTask *task, SocketAddress *qg_sock_addr);
+
+#endif /* QEMU_I386_TDX_QUOTE_GENERATOR_H */
diff --git a/target/i386/kvm/tdx-stub.c b/target/i386/kvm/tdx-stub.c
index 62a12a0677..76fee49eff 100644
--- a/target/i386/kvm/tdx-stub.c
+++ b/target/i386/kvm/tdx-stub.c
@@ -19,6 +19,10 @@ int tdx_handle_report_fatal_error(X86CPU *cpu, struct kvm_run *run)
return -EINVAL;
}
+void tdx_handle_get_quote(X86CPU *cpu, struct kvm_run *run)
+{
+}
+
void tdx_handle_get_tdvmcall_info(X86CPU *cpu, struct kvm_run *run)
{
}
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index e0197b6582..201da78b06 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -14,12 +14,14 @@
#include "qemu/base64.h"
#include "qemu/mmap-alloc.h"
#include "qapi/error.h"
+#include "qapi/qapi-visit-sockets.h"
#include "qom/object_interfaces.h"
#include "crypto/hash.h"
#include "sysemu/kvm_int.h"
#include "sysemu/runstate.h"
#include "sysemu/sysemu.h"
#include "exec/ramblock.h"
+#include "exec/address-spaces.h"
#include <linux/kvm_para.h>
@@ -32,6 +34,7 @@
#include "hw/i386/tdvf-hob.h"
#include "kvm_i386.h"
#include "tdx.h"
+#include "tdx-quote-generator.h"
#include "standard-headers/asm-x86/kvm_para.h"
@@ -1120,13 +1123,146 @@ int tdx_parse_tdvf(void *flash_ptr, int size)
return tdvf_parse_metadata(&tdx_guest->tdvf, flash_ptr, size);
}
+static void tdx_get_quote_completion(TdxGenerateQuoteTask *task)
+{
+ TdxGuest *tdx = task->opaque;
+ int ret;
+
+ /* Maintain the number of in-flight requests. */
+ qemu_mutex_lock(&tdx->lock);
+ tdx->num--;
+ qemu_mutex_unlock(&tdx->lock);
+
+ if (task->status_code == TDX_VP_GET_QUOTE_SUCCESS) {
+ ret = address_space_write(&address_space_memory, task->payload_gpa,
+ MEMTXATTRS_UNSPECIFIED, task->receive_buf,
+ task->receive_buf_received);
+ if (ret != MEMTX_OK) {
+ error_report("TDX: get-quote: failed to write quote data.");
+ } else {
+ task->hdr.out_len = cpu_to_le64(task->receive_buf_received);
+ }
+ }
+ task->hdr.error_code = cpu_to_le64(task->status_code);
+
+ /* Publish the response contents before marking this request completed. */
+ smp_wmb();
+ ret = address_space_write(&address_space_memory, task->buf_gpa,
+ MEMTXATTRS_UNSPECIFIED, &task->hdr,
+ TDX_GET_QUOTE_HDR_SIZE);
+ if (ret != MEMTX_OK) {
+ error_report("TDX: get-quote: failed to update GetQuote header.");
+ }
+
+ g_free(task->send_data);
+ g_free(task->receive_buf);
+ g_free(task);
+ object_unref(tdx);
+}
+
+void tdx_handle_get_quote(X86CPU *cpu, struct kvm_run *run)
+{
+ TdxGenerateQuoteTask *task;
+ struct tdx_get_quote_header hdr;
+ hwaddr buf_gpa = run->tdx.get_quote.gpa;
+ uint64_t buf_len = run->tdx.get_quote.size;
+
+ QEMU_BUILD_BUG_ON(sizeof(struct tdx_get_quote_header) != TDX_GET_QUOTE_HDR_SIZE);
+
+ run->tdx.get_quote.ret = TDG_VP_VMCALL_INVALID_OPERAND;
+
+ if (buf_len == 0) {
+ return;
+ }
+
+ if (!QEMU_IS_ALIGNED(buf_gpa, 4096) || !QEMU_IS_ALIGNED(buf_len, 4096)) {
+ run->tdx.get_quote.ret = TDG_VP_VMCALL_ALIGN_ERROR;
+ return;
+ }
+
+ if (address_space_read(&address_space_memory, buf_gpa, MEMTXATTRS_UNSPECIFIED,
+ &hdr, TDX_GET_QUOTE_HDR_SIZE) != MEMTX_OK) {
+ error_report("TDX: get-quote: failed to read GetQuote header.");
+ return;
+ }
+
+ if (le64_to_cpu(hdr.structure_version) != TDX_GET_QUOTE_STRUCTURE_VERSION) {
+ return;
+ }
+
+ /* Only safe-guard check to avoid too large buffer size. */
+ if (buf_len > TDX_GET_QUOTE_MAX_BUF_LEN ||
+ le32_to_cpu(hdr.in_len) > buf_len - TDX_GET_QUOTE_HDR_SIZE) {
+ return;
+ }
+
+ if (!tdx_guest->qg_sock_addr) {
+ hdr.error_code = cpu_to_le64(TDX_VP_GET_QUOTE_QGS_UNAVAILABLE);
+ if (address_space_write(&address_space_memory, buf_gpa,
+ MEMTXATTRS_UNSPECIFIED,
+ &hdr, TDX_GET_QUOTE_HDR_SIZE) != MEMTX_OK) {
+ error_report("TDX: failed to update GetQuote header.");
+ return;
+ }
+ run->tdx.get_quote.ret = TDG_VP_VMCALL_SUCCESS;
+ return;
+ }
+
+ qemu_mutex_lock(&tdx_guest->lock);
+ if (tdx_guest->num >= TDX_MAX_GET_QUOTE_REQUEST) {
+ qemu_mutex_unlock(&tdx_guest->lock);
+ run->tdx.get_quote.ret = TDG_VP_VMCALL_RETRY;
+ return;
+ }
+ tdx_guest->num++;
+ qemu_mutex_unlock(&tdx_guest->lock);
+
+ task = g_new(TdxGenerateQuoteTask, 1);
+ task->buf_gpa = buf_gpa;
+ task->payload_gpa = buf_gpa + TDX_GET_QUOTE_HDR_SIZE;
+ task->payload_len = buf_len - TDX_GET_QUOTE_HDR_SIZE;
+ task->hdr = hdr;
+ task->completion = tdx_get_quote_completion;
+
+ task->send_data_size = le32_to_cpu(hdr.in_len);
+ task->send_data = g_malloc(task->send_data_size);
+ task->send_data_sent = 0;
+
+ if (address_space_read(&address_space_memory, task->payload_gpa,
+ MEMTXATTRS_UNSPECIFIED, task->send_data,
+ task->send_data_size) != MEMTX_OK) {
+ goto out_free;
+ }
+
+ /* Mark the buffer in-flight. */
+ hdr.error_code = cpu_to_le64(TDX_VP_GET_QUOTE_IN_FLIGHT);
+ if (address_space_write(&address_space_memory, buf_gpa,
+ MEMTXATTRS_UNSPECIFIED,
+ &hdr, TDX_GET_QUOTE_HDR_SIZE) != MEMTX_OK) {
+ goto out_free;
+ }
+
+ task->receive_buf = g_malloc0(task->payload_len);
+ task->receive_buf_received = 0;
+ task->opaque = tdx_guest;
+
+ object_ref(tdx_guest);
+ tdx_generate_quote(task, tdx_guest->qg_sock_addr);
+ run->tdx.get_quote.ret = TDG_VP_VMCALL_SUCCESS;
+ return;
+
+out_free:
+ g_free(task->send_data);
+ g_free(task);
+}
+
void tdx_handle_get_tdvmcall_info(X86CPU *cpu, struct kvm_run *run)
{
if (run->tdx.get_tdvmcall_info.leaf != 1) {
return;
}
- run->tdx.get_tdvmcall_info.r11 = 0;
+ run->tdx.get_tdvmcall_info.r11 = TDG_VP_VMCALL_SUBFUNC_GET_QUOTE;
run->tdx.get_tdvmcall_info.r12 = 0;
run->tdx.get_tdvmcall_info.r13 = 0;
run->tdx.get_tdvmcall_info.r14 = 0;
@@ -1263,6 +1399,37 @@ static void tdx_guest_set_mrownerconfig(Object *obj, const char *value, Error **
tdx->mrownerconfig = g_strdup(value);
}
+static void tdx_guest_get_qgs(Object *obj, Visitor *v,
+ const char *name, void *opaque,
+ Error **errp)
+{
+ TdxGuest *tdx = TDX_GUEST(obj);
+
+ if (!tdx->qg_sock_addr) {
+ error_setg(errp, "quote-generation-socket is not set");
+ return;
+ }
+ visit_type_SocketAddress(v, name, &tdx->qg_sock_addr, errp);
+}
+
+static void tdx_guest_set_qgs(Object *obj, Visitor *v,
+ const char *name, void *opaque,
+ Error **errp)
+{
+ TdxGuest *tdx = TDX_GUEST(obj);
+ SocketAddress *sock = NULL;
+
+ if (!visit_type_SocketAddress(v, name, &sock, errp)) {
+ return;
+ }
+
+ if (tdx->qg_sock_addr) {
+ qapi_free_SocketAddress(tdx->qg_sock_addr);
+ }
+
+ tdx->qg_sock_addr = sock;
+}
+
/* tdx guest */
OBJECT_DEFINE_TYPE_WITH_INTERFACES(TdxGuest,
tdx_guest,
@@ -1294,6 +1461,13 @@ static void tdx_guest_init(Object *obj)
object_property_add_str(obj, "mrownerconfig",
tdx_guest_get_mrownerconfig,
tdx_guest_set_mrownerconfig);
+
+ object_property_add(obj, "quote-generation-socket", "SocketAddress",
+ tdx_guest_get_qgs,
+ tdx_guest_set_qgs,
+ NULL, NULL);
+
+ qemu_mutex_init(&tdx->lock);
}
static void tdx_guest_finalize(Object *obj)
diff --git a/target/i386/kvm/tdx.h b/target/i386/kvm/tdx.h
index 0dd41d5811..35a09c19c5 100644
--- a/target/i386/kvm/tdx.h
+++ b/target/i386/kvm/tdx.h
@@ -11,6 +11,8 @@
#include "cpu.h"
#include "hw/i386/tdvf.h"
+#include "tdx-quote-generator.h"
+
#define TYPE_TDX_GUEST "tdx-guest"
#define TDX_GUEST(obj) OBJECT_CHECK(TdxGuest, (obj), TYPE_TDX_GUEST)
@@ -22,6 +24,7 @@ typedef struct TdxGuestClass {
#define TDX_APIC_BUS_CYCLES_NS 40
#define TDVMCALL_GET_TD_VM_CALL_INFO 0x10000
+#define TDVMCALL_GET_QUOTE 0x10002
#define TDG_VP_VMCALL_SUCCESS 0x0000000000000000ULL
#define TDG_VP_VMCALL_RETRY 0x0000000000000001ULL
@@ -29,6 +32,8 @@ typedef struct TdxGuestClass {
#define TDG_VP_VMCALL_GPA_INUSE 0x8000000000000001ULL
#define TDG_VP_VMCALL_ALIGN_ERROR 0x8000000000000002ULL
+#define TDG_VP_VMCALL_SUBFUNC_GET_QUOTE 0x0000000000000001ULL
+
enum TdxRamType {
TDX_RAM_UNACCEPTED,
TDX_RAM_ADDED,
@@ -57,6 +62,10 @@ typedef struct TdxGuest {
uint32_t nr_ram_entries;
TdxRamEntry *ram_entries;
+
+ /* GetQuote */
+ SocketAddress *qg_sock_addr;
+ int num;
} TdxGuest;
#ifdef CONFIG_TDX
@@ -69,6 +78,7 @@ int tdx_pre_create_vcpu(CPUState *cpu, Error **errp);
void tdx_set_tdvf_region(MemoryRegion *tdvf_mr);
int tdx_parse_tdvf(void *flash_ptr, int size);
int tdx_handle_report_fatal_error(X86CPU *cpu, struct kvm_run *run);
+void tdx_handle_get_quote(X86CPU *cpu, struct kvm_run *run);
void tdx_handle_get_tdvmcall_info(X86CPU *cpu, struct kvm_run *run);
#endif /* QEMU_I386_TDX_H */
--
2.50.1

View File

@ -0,0 +1,111 @@
From 432fbc1dacdb5de4fb3af42e21749c53cd26a855 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:49 +0200
Subject: [PATCH 092/115] i386/tdx: handle TDG.VP.VMCALL<GetTdVmCallInfo>
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [92/115] 443c167d16e725f756cab6371aad04b7c2b4f15a (bonzini/rhel-qemu-kvm)
Signed-off-by: Binbin Wu <binbin.wu@linux.intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 427b8cf47a6959cd8b0db12bcf66e9009afa2c07)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/kvm/kvm.c | 12 ++++++++++++
target/i386/kvm/tdx-stub.c | 4 ++++
target/i386/kvm/tdx.c | 12 ++++++++++++
target/i386/kvm/tdx.h | 9 +++++++++
4 files changed, 37 insertions(+)
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index b6fddcd543..26328a1d3b 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -6039,6 +6039,18 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
break;
}
break;
+ case KVM_EXIT_TDX:
+ /*
+ * run->tdx is already set up for the case where userspace
+ * does not handle the TDVMCALL.
+ */
+ switch (run->tdx.nr) {
+ case TDVMCALL_GET_TD_VM_CALL_INFO:
+ tdx_handle_get_tdvmcall_info(cpu, run);
+ break;
+ }
+ ret = 0;
+ break;
default:
fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
ret = -1;
diff --git a/target/i386/kvm/tdx-stub.c b/target/i386/kvm/tdx-stub.c
index 720a4ff046..62a12a0677 100644
--- a/target/i386/kvm/tdx-stub.c
+++ b/target/i386/kvm/tdx-stub.c
@@ -18,3 +18,7 @@ int tdx_handle_report_fatal_error(X86CPU *cpu, struct kvm_run *run)
{
return -EINVAL;
}
+
+void tdx_handle_get_tdvmcall_info(X86CPU *cpu, struct kvm_run *run)
+{
+}
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index ed3a55991a..e0197b6582 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -1120,6 +1120,18 @@ int tdx_parse_tdvf(void *flash_ptr, int size)
return tdvf_parse_metadata(&tdx_guest->tdvf, flash_ptr, size);
}
+void tdx_handle_get_tdvmcall_info(X86CPU *cpu, struct kvm_run *run)
+{
+ if (run->tdx.get_tdvmcall_info.leaf != 1) {
+ return;
+ }
+
+ run->tdx.get_tdvmcall_info.r11 = 0;
+ run->tdx.get_tdvmcall_info.r12 = 0;
+ run->tdx.get_tdvmcall_info.r13 = 0;
+ run->tdx.get_tdvmcall_info.r14 = 0;
+}
+
static void tdx_panicked_on_fatal_error(X86CPU *cpu, uint64_t error_code,
char *message, uint64_t gpa)
{
diff --git a/target/i386/kvm/tdx.h b/target/i386/kvm/tdx.h
index 8dd66e9014..0dd41d5811 100644
--- a/target/i386/kvm/tdx.h
+++ b/target/i386/kvm/tdx.h
@@ -21,6 +21,14 @@ typedef struct TdxGuestClass {
/* TDX requires bus frequency 25MHz */
#define TDX_APIC_BUS_CYCLES_NS 40
+#define TDVMCALL_GET_TD_VM_CALL_INFO 0x10000
+
+#define TDG_VP_VMCALL_SUCCESS 0x0000000000000000ULL
+#define TDG_VP_VMCALL_RETRY 0x0000000000000001ULL
+#define TDG_VP_VMCALL_INVALID_OPERAND 0x8000000000000000ULL
+#define TDG_VP_VMCALL_GPA_INUSE 0x8000000000000001ULL
+#define TDG_VP_VMCALL_ALIGN_ERROR 0x8000000000000002ULL
+
enum TdxRamType {
TDX_RAM_UNACCEPTED,
TDX_RAM_ADDED,
@@ -61,5 +69,6 @@ int tdx_pre_create_vcpu(CPUState *cpu, Error **errp);
void tdx_set_tdvf_region(MemoryRegion *tdvf_mr);
int tdx_parse_tdvf(void *flash_ptr, int size);
int tdx_handle_report_fatal_error(X86CPU *cpu, struct kvm_run *run);
+void tdx_handle_get_tdvmcall_info(X86CPU *cpu, struct kvm_run *run);
#endif /* QEMU_I386_TDX_H */
--
2.50.1

View File

@ -0,0 +1,197 @@
From e2157a814aa2a700736e97d2548460e9c6c808f2 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:50 +0200
Subject: [PATCH 101/115] i386/tdx: handle
TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [101/115] 747409544735f7e2da43ce3906c0d0d487b5d78f (bonzini/rhel-qemu-kvm)
Record the interrupt vector and the apic id of the vcpu that calls
TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT.
Inject the interrupt to TD guest to notify the completion of <GetQuote>
when notify interrupt vector is valid.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Link: https://lore.kernel.org/r/20250703024021.3559286-5-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit efa742b23eff2a799c196d756bd506fe74e96fdc)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/kvm/kvm.c | 3 +++
target/i386/kvm/tdx-stub.c | 4 ++++
target/i386/kvm/tdx.c | 48 +++++++++++++++++++++++++++++++++++++-
target/i386/kvm/tdx.h | 7 ++++++
4 files changed, 61 insertions(+), 1 deletion(-)
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index fbf11b2122..f789965a33 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -6051,6 +6051,9 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
case TDVMCALL_GET_TD_VM_CALL_INFO:
tdx_handle_get_tdvmcall_info(cpu, run);
break;
+ case TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT:
+ tdx_handle_setup_event_notify_interrupt(cpu, run);
+ break;
}
ret = 0;
break;
diff --git a/target/i386/kvm/tdx-stub.c b/target/i386/kvm/tdx-stub.c
index 76fee49eff..1f0e108a69 100644
--- a/target/i386/kvm/tdx-stub.c
+++ b/target/i386/kvm/tdx-stub.c
@@ -26,3 +26,7 @@ void tdx_handle_get_quote(X86CPU *cpu, struct kvm_run *run)
void tdx_handle_get_tdvmcall_info(X86CPU *cpu, struct kvm_run *run)
{
}
+
+void tdx_handle_setup_event_notify_interrupt(X86CPU *cpu, struct kvm_run *run)
+{
+}
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index a24e15571a..e56db74f58 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -28,10 +28,13 @@
#include "cpu.h"
#include "cpu-internal.h"
#include "host-cpu.h"
+#include "hw/i386/apic_internal.h"
+#include "hw/i386/apic-msidef.h"
#include "hw/i386/e820_memory_layout.h"
#include "hw/i386/tdvf.h"
#include "hw/i386/x86.h"
#include "hw/i386/tdvf-hob.h"
+#include "hw/pci/msi.h"
#include "kvm_i386.h"
#include "tdx.h"
#include "tdx-quote-generator.h"
@@ -1123,6 +1126,28 @@ int tdx_parse_tdvf(void *flash_ptr, int size)
return tdvf_parse_metadata(&tdx_guest->tdvf, flash_ptr, size);
}
+static void tdx_inject_interrupt(uint32_t apicid, uint32_t vector)
+{
+ int ret;
+
+ if (vector < 32 || vector > 255) {
+ return;
+ }
+
+ MSIMessage msg = {
+ .address = ((apicid & 0xff) << MSI_ADDR_DEST_ID_SHIFT) |
+ (((uint64_t)apicid & 0xffffff00) << 32),
+ .data = vector | (APIC_DM_FIXED << MSI_DATA_DELIVERY_MODE_SHIFT),
+ };
+
+ ret = kvm_irqchip_send_msi(kvm_state, msg);
+ if (ret < 0) {
+ /* In this case, no better way to tell it to guest. Log it. */
+ error_report("TDX: injection interrupt %d failed, interrupt lost (%s).",
+ vector, strerror(-ret));
+ }
+}
+
static void tdx_get_quote_completion(TdxGenerateQuoteTask *task)
{
TdxGuest *tdx = task->opaque;
@@ -1154,6 +1179,9 @@ static void tdx_get_quote_completion(TdxGenerateQuoteTask *task)
error_report("TDX: get-quote: failed to update GetQuote header.");
}
+ tdx_inject_interrupt(tdx_guest->event_notify_apicid,
+ tdx_guest->event_notify_vector);
+
g_free(task->send_data);
g_free(task->receive_buf);
g_free(task);
@@ -1256,7 +1284,7 @@ out_free:
g_free(task);
}
-#define SUPPORTED_TDVMCALLINFO_1_R11 (0)
+#define SUPPORTED_TDVMCALLINFO_1_R11 (TDG_VP_VMCALL_SUBFUNC_SET_EVENT_NOTIFY_INTERRUPT)
#define SUPPORTED_TDVMCALLINFO_1_R12 (0)
void tdx_handle_get_tdvmcall_info(X86CPU *cpu, struct kvm_run *run)
@@ -1277,6 +1305,21 @@ void tdx_handle_get_tdvmcall_info(X86CPU *cpu, struct kvm_run *run)
run->tdx.get_tdvmcall_info.ret = TDG_VP_VMCALL_SUCCESS;
}
+void tdx_handle_setup_event_notify_interrupt(X86CPU *cpu, struct kvm_run *run)
+{
+ uint64_t vector = run->tdx.setup_event_notify.vector;
+
+ if (vector >= 32 && vector < 256) {
+ qemu_mutex_lock(&tdx_guest->lock);
+ tdx_guest->event_notify_vector = vector;
+ tdx_guest->event_notify_apicid = cpu->apic_id;
+ qemu_mutex_unlock(&tdx_guest->lock);
+ run->tdx.setup_event_notify.ret = TDG_VP_VMCALL_SUCCESS;
+ } else {
+ run->tdx.setup_event_notify.ret = TDG_VP_VMCALL_INVALID_OPERAND;
+ }
+}
+
static void tdx_panicked_on_fatal_error(X86CPU *cpu, uint64_t error_code,
char *message, uint64_t gpa)
{
@@ -1477,6 +1520,9 @@ static void tdx_guest_init(Object *obj)
NULL, NULL);
qemu_mutex_init(&tdx->lock);
+
+ tdx->event_notify_vector = -1;
+ tdx->event_notify_apicid = -1;
}
static void tdx_guest_finalize(Object *obj)
diff --git a/target/i386/kvm/tdx.h b/target/i386/kvm/tdx.h
index d439078a87..1c38faf983 100644
--- a/target/i386/kvm/tdx.h
+++ b/target/i386/kvm/tdx.h
@@ -25,6 +25,7 @@ typedef struct TdxGuestClass {
#define TDVMCALL_GET_TD_VM_CALL_INFO 0x10000
#define TDVMCALL_GET_QUOTE 0x10002
+#define TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT 0x10004
#define TDG_VP_VMCALL_SUCCESS 0x0000000000000000ULL
#define TDG_VP_VMCALL_RETRY 0x0000000000000001ULL
@@ -32,6 +33,8 @@ typedef struct TdxGuestClass {
#define TDG_VP_VMCALL_GPA_INUSE 0x8000000000000001ULL
#define TDG_VP_VMCALL_ALIGN_ERROR 0x8000000000000002ULL
+#define TDG_VP_VMCALL_SUBFUNC_SET_EVENT_NOTIFY_INTERRUPT BIT_ULL(1)
+
enum TdxRamType {
TDX_RAM_UNACCEPTED,
TDX_RAM_ADDED,
@@ -64,6 +67,9 @@ typedef struct TdxGuest {
/* GetQuote */
SocketAddress *qg_sock_addr;
int num;
+
+ uint32_t event_notify_vector;
+ uint32_t event_notify_apicid;
} TdxGuest;
#ifdef CONFIG_TDX
@@ -78,5 +84,6 @@ int tdx_parse_tdvf(void *flash_ptr, int size);
int tdx_handle_report_fatal_error(X86CPU *cpu, struct kvm_run *run);
void tdx_handle_get_quote(X86CPU *cpu, struct kvm_run *run);
void tdx_handle_get_tdvmcall_info(X86CPU *cpu, struct kvm_run *run);
+void tdx_handle_setup_event_notify_interrupt(X86CPU *cpu, struct kvm_run *run);
#endif /* QEMU_I386_TDX_H */
--
2.50.1

View File

@ -0,0 +1,50 @@
From 99563f467faeace2726e62292b128faf3fb56fcb Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:47 +0200
Subject: [PATCH 061/115] i386/tdx: implement tdx_cpu_instance_init()
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [61/115] 20d53c1ff1d645a043cac27f33f9778732c1469e (bonzini/rhel-qemu-kvm)
Currently, pmu is not supported for TDX by KVM.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20250508150002.689633-33-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 7c615242671dbe65e198c20889dcaa9b4b9a1624)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/kvm/tdx.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index 7611f51aae..afd7e62422 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -398,6 +398,11 @@ static int tdx_kvm_type(X86ConfidentialGuest *cg)
return KVM_X86_TDX_VM;
}
+static void tdx_cpu_instance_init(X86ConfidentialGuest *cg, CPUState *cpu)
+{
+ object_property_set_bool(OBJECT(cpu), "pmu", false, &error_abort);
+}
+
static int tdx_validate_attributes(TdxGuest *tdx, Error **errp)
{
if ((tdx->attributes & ~tdx_caps->supported_attrs)) {
@@ -791,4 +796,5 @@ static void tdx_guest_class_init(ObjectClass *oc, void *data)
klass->kvm_init = tdx_kvm_init;
x86_klass->kvm_type = tdx_kvm_type;
+ x86_klass->cpu_instance_init = tdx_cpu_instance_init;
}
--
2.50.1

View File

@ -0,0 +1,105 @@
From 69a39313aa07e1e4845e53e34a6b866395863c73 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:46 +0200
Subject: [PATCH 045/115] i386/tdx: load TDVF for TD guest
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [45/115] cdf138cb88831b4c28f78cd9ad72d75465a731cf (bonzini/rhel-qemu-kvm)
TDVF(OVMF) needs to run at private memory for TD guest. TDX cannot
support pflash device since it doesn't support read-only private memory.
Thus load TDVF(OVMF) with -bios option for TDs.
Use memory_region_init_ram_guest_memfd() to allocate the MemoryRegion
for TDVF because it needs to be located at private memory.
Also store the MemoryRegion pointer of TDVF since the shared ramblock of
it can be discared after it gets copied to private ramblock.
Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com>
Co-developed-by: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20250508150002.689633-17-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 0dd5fe5ebeabefc7b3d7f043991b1edfe6b8eda9)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
hw/i386/x86-common.c | 6 +++++-
target/i386/kvm/tdx.c | 6 ++++++
target/i386/kvm/tdx.h | 3 +++
3 files changed, 14 insertions(+), 1 deletion(-)
diff --git a/hw/i386/x86-common.c b/hw/i386/x86-common.c
index 562215990d..97cd84d618 100644
--- a/hw/i386/x86-common.c
+++ b/hw/i386/x86-common.c
@@ -44,6 +44,7 @@
#include "standard-headers/asm-x86/bootparam.h"
#include CONFIG_DEVICES
#include "kvm/kvm_i386.h"
+#include "kvm/tdx.h"
#ifdef CONFIG_XEN_EMU
#include "hw/xen/xen.h"
@@ -1003,11 +1004,14 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware,
if (machine_require_guest_memfd(MACHINE(x86ms))) {
memory_region_init_ram_guest_memfd(&x86ms->bios, NULL, "pc.bios",
bios_size, &error_fatal);
+ if (is_tdx_vm()) {
+ tdx_set_tdvf_region(&x86ms->bios);
+ }
} else {
memory_region_init_ram(&x86ms->bios, NULL, "pc.bios",
bios_size, &error_fatal);
}
- if (sev_enabled()) {
+ if (sev_enabled() || is_tdx_vm()) {
/*
* The concept of a "reset" simply doesn't exist for
* confidential computing guests, we have to destroy and
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index 56ad5f599d..2522f2030d 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -137,6 +137,12 @@ static int get_tdx_capabilities(Error **errp)
return 0;
}
+void tdx_set_tdvf_region(MemoryRegion *tdvf_mr)
+{
+ assert(!tdx_guest->tdvf_mr);
+ tdx_guest->tdvf_mr = tdvf_mr;
+}
+
static int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
{
TdxGuest *tdx = TDX_GUEST(cgs);
diff --git a/target/i386/kvm/tdx.h b/target/i386/kvm/tdx.h
index d39e733d9f..b73461b8d8 100644
--- a/target/i386/kvm/tdx.h
+++ b/target/i386/kvm/tdx.h
@@ -30,6 +30,8 @@ typedef struct TdxGuest {
char *mrconfigid; /* base64 encoded sha348 digest */
char *mrowner; /* base64 encoded sha348 digest */
char *mrownerconfig; /* base64 encoded sha348 digest */
+
+ MemoryRegion *tdvf_mr;
} TdxGuest;
#ifdef CONFIG_TDX
@@ -39,5 +41,6 @@ bool is_tdx_vm(void);
#endif /* CONFIG_TDX */
int tdx_pre_create_vcpu(CPUState *cpu, Error **errp);
+void tdx_set_tdvf_region(MemoryRegion *tdvf_mr);
#endif /* QEMU_I386_TDX_H */
--
2.50.1

View File

@ -0,0 +1,101 @@
From 693fa1348914dfa37ce5fa4a654e67d573e4941a Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:44 +0200
Subject: [PATCH 011/115] i386/topology: Introduce helpers for various topology
info of different level
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [11/115] 52179ff6718bd4bfd9eac78efd20e7e5337d3b07 (bonzini/rhel-qemu-kvm)
Introduce various helpers for getting the topology info of different
semantics. Using the helper is more self-explanatory.
Besides, the semantic of the helper will stay unchanged even when new
topology is added in the future. At that time, updating the
implementation of the helper without affecting the callers.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Link: https://lore.kernel.org/r/20241219110125.1266461-6-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit e60cbeec190d349682bf97cf55446e8ae260b11a)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 12b80c6f6e6edf61c998aa5bac9d0c44a81144a0)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
include/hw/i386/topology.h | 25 +++++++++++++++++++++++++
target/i386/cpu.c | 11 ++++-------
2 files changed, 29 insertions(+), 7 deletions(-)
diff --git a/include/hw/i386/topology.h b/include/hw/i386/topology.h
index 1880df621a..e533a117c3 100644
--- a/include/hw/i386/topology.h
+++ b/include/hw/i386/topology.h
@@ -217,4 +217,29 @@ static inline bool x86_has_extended_topo(unsigned long *topo_bitmap)
test_bit(CPU_TOPO_LEVEL_DIE, topo_bitmap);
}
+static inline unsigned x86_module_per_pkg(X86CPUTopoInfo *topo_info)
+{
+ return topo_info->modules_per_die * topo_info->dies_per_pkg;
+}
+
+static inline unsigned x86_cores_per_pkg(X86CPUTopoInfo *topo_info)
+{
+ return topo_info->cores_per_module * x86_module_per_pkg(topo_info);
+}
+
+static inline unsigned x86_threads_per_pkg(X86CPUTopoInfo *topo_info)
+{
+ return topo_info->threads_per_core * x86_cores_per_pkg(topo_info);
+}
+
+static inline unsigned x86_threads_per_module(X86CPUTopoInfo *topo_info)
+{
+ return topo_info->threads_per_core * topo_info->cores_per_module;
+}
+
+static inline unsigned x86_threads_per_die(X86CPUTopoInfo *topo_info)
+{
+ return x86_threads_per_module(topo_info) * topo_info->modules_per_die;
+}
+
#endif /* HW_I386_TOPOLOGY_H */
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index b639769ef3..1c79eb9a06 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -312,13 +312,11 @@ static uint32_t num_threads_by_topo_level(X86CPUTopoInfo *topo_info,
case CPU_TOPO_LEVEL_CORE:
return topo_info->threads_per_core;
case CPU_TOPO_LEVEL_MODULE:
- return topo_info->threads_per_core * topo_info->cores_per_module;
+ return x86_threads_per_module(topo_info);
case CPU_TOPO_LEVEL_DIE:
- return topo_info->threads_per_core * topo_info->cores_per_module *
- topo_info->modules_per_die;
+ return x86_threads_per_die(topo_info);
case CPU_TOPO_LEVEL_PACKAGE:
- return topo_info->threads_per_core * topo_info->cores_per_module *
- topo_info->modules_per_die * topo_info->dies_per_pkg;
+ return x86_threads_per_pkg(topo_info);
default:
g_assert_not_reached();
}
@@ -6957,8 +6955,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
topo_info.cores_per_module = cs->nr_cores / env->nr_dies / env->nr_modules;
topo_info.threads_per_core = cs->nr_threads;
- threads_per_pkg = topo_info.threads_per_core * topo_info.cores_per_module *
- topo_info.modules_per_die * topo_info.dies_per_pkg;
+ threads_per_pkg = x86_threads_per_pkg(&topo_info);
/* Calculate & apply limits for different index ranges */
if (index >= 0xC0000000) {
--
2.50.1

View File

@ -0,0 +1,49 @@
From 544ec72c98bdb325589cf9cddc7356ce5d4ae586 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:44 +0200
Subject: [PATCH 010/115] i386/topology: Update the comment of
x86_apicid_from_topo_ids()
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [10/115] 62641dcd4cb51e9d252e4d5b3257ac87cfb165c0 (bonzini/rhel-qemu-kvm)
Update the comment of x86_apicid_from_topo_ids() to match the current
implementation,
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20241219110125.1266461-5-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 8f78378de70fc79fdc7e1318496bd91ddd22df49)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit a4452a3e65f1fdbbafd22e9089864d37d323d7e0)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
include/hw/i386/topology.h | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/include/hw/i386/topology.h b/include/hw/i386/topology.h
index dff49fce11..1880df621a 100644
--- a/include/hw/i386/topology.h
+++ b/include/hw/i386/topology.h
@@ -135,9 +135,10 @@ static inline unsigned apicid_pkg_offset(X86CPUTopoInfo *topo_info)
}
/*
- * Make APIC ID for the CPU based on Pkg_ID, Core_ID, SMT_ID
+ * Make APIC ID for the CPU based on topology and IDs of each topology level.
*
- * The caller must make sure core_id < nr_cores and smt_id < nr_threads.
+ * The caller must make sure the ID of each level doesn't exceed the width of
+ * the level.
*/
static inline apic_id_t x86_apicid_from_topo_ids(X86CPUTopoInfo *topo_info,
const X86CPUTopoIDs *topo_ids)
--
2.50.1

View File

@ -0,0 +1,45 @@
From f88758420a256bb2b277d21cb3f3b60426caf51d Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:47 +0200
Subject: [PATCH 059/115] kvm: Check KVM_CAP_MAX_VCPUS at vm level
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [59/115] aac4b448f0e64cdca79d2a8f3c53f683b9514a8a (bonzini/rhel-qemu-kvm)
KVM with TDX support starts to report different KVM_CAP_MAX_VCPUS per
different VM types. So switch to check the KVM_CAP_MAX_VCPUS at vm level.
KVM still returns the global KVM_CAP_MAX_VCPUS when the KVM is old that
doesn't report different value at vm level.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20250508150002.689633-31-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 77b5403a0298a5460554f768a2098fd21588e555)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
accel/kvm/kvm-all.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 65952d7f4e..c1605bc4fa 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -2419,7 +2419,7 @@ static int kvm_recommended_vcpus(KVMState *s)
static int kvm_max_vcpus(KVMState *s)
{
- int ret = kvm_check_extension(s, KVM_CAP_MAX_VCPUS);
+ int ret = kvm_vm_check_extension(s, KVM_CAP_MAX_VCPUS);
return (ret) ? ret : kvm_recommended_vcpus(s);
}
--
2.50.1

View File

@ -0,0 +1,187 @@
From cb4a0164407562ac307b9ccc0031455cf1065bdb Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:45 +0200
Subject: [PATCH 036/115] kvm: Introduce kvm_arch_pre_create_vcpu()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [36/115] 1c3f6170a844694bc1d4b5d48860330d5961d26d (bonzini/rhel-qemu-kvm)
Introduce kvm_arch_pre_create_vcpu(), to perform arch-dependent
work prior to create any vcpu. This is for i386 TDX because it needs
call TDX_INIT_VM before creating any vcpu.
The specific implementation for i386 will be added in the future patch.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Link: https://lore.kernel.org/r/20250508150002.689633-8-xiaoyao.li@intel.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit a668268dc08f7f4d30cecd513054bb38ce48c0d6)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Conflicts: different context in loongarch
---
accel/kvm/kvm-all.c | 5 +++++
include/sysemu/kvm.h | 1 +
target/arm/kvm.c | 5 +++++
target/i386/kvm/kvm.c | 5 +++++
target/loongarch/kvm/kvm.c | 5 +++++
target/mips/kvm.c | 5 +++++
target/ppc/kvm.c | 5 +++++
target/riscv/kvm/kvm-cpu.c | 5 +++++
target/s390x/kvm/kvm.c | 5 +++++
9 files changed, 41 insertions(+)
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 49dedda47e..65952d7f4e 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -530,6 +530,11 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp)
trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
+ ret = kvm_arch_pre_create_vcpu(cpu, errp);
+ if (ret < 0) {
+ goto err;
+ }
+
ret = kvm_create_vcpu(cpu);
if (ret < 0) {
error_setg_errno(errp, -ret,
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index d9ad723f78..0e6b29b331 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -374,6 +374,7 @@ int kvm_arch_get_default_type(MachineState *ms);
int kvm_arch_init(MachineState *ms, KVMState *s);
+int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp);
int kvm_arch_init_vcpu(CPUState *cpu);
int kvm_arch_destroy_vcpu(CPUState *cpu);
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index f1f1b5b375..e0469e7831 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -1860,6 +1860,11 @@ static int kvm_arm_sve_set_vls(ARMCPU *cpu)
#define ARM_CPU_ID_MPIDR 3, 0, 0, 0, 5
+int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp)
+{
+ return 0;
+}
+
int kvm_arch_init_vcpu(CPUState *cs)
{
int ret;
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index bf4493dd84..1fddec6b9c 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -2039,6 +2039,11 @@ full:
abort();
}
+int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp)
+{
+ return 0;
+}
+
int kvm_arch_init_vcpu(CPUState *cs)
{
struct {
diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c
index 9204d4295d..f6e20dbc61 100644
--- a/target/loongarch/kvm/kvm.c
+++ b/target/loongarch/kvm/kvm.c
@@ -663,6 +663,11 @@ static void kvm_loongarch_vm_stage_change(void *opaque, bool running,
}
}
+int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp)
+{
+ return 0;
+}
+
int kvm_arch_init_vcpu(CPUState *cs)
{
uint64_t val;
diff --git a/target/mips/kvm.c b/target/mips/kvm.c
index a98798c669..75058f8938 100644
--- a/target/mips/kvm.c
+++ b/target/mips/kvm.c
@@ -61,6 +61,11 @@ int kvm_arch_irqchip_create(KVMState *s)
return 0;
}
+int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp)
+{
+ return 0;
+}
+
int kvm_arch_init_vcpu(CPUState *cs)
{
CPUMIPSState *env = cpu_env(cs);
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index 7daa097164..7a86d09802 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -479,6 +479,11 @@ static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
}
}
+int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp)
+{
+ return 0;
+}
+
int kvm_arch_init_vcpu(CPUState *cs)
{
PowerPCCPU *cpu = POWERPC_CPU(cs);
diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c
index 2bfb112be0..18ab470549 100644
--- a/target/riscv/kvm/kvm-cpu.c
+++ b/target/riscv/kvm/kvm-cpu.c
@@ -1355,6 +1355,11 @@ static int kvm_vcpu_enable_sbi_dbcn(RISCVCPU *cpu, CPUState *cs)
return kvm_set_one_reg(cs, kvm_sbi_dbcn.kvm_reg_id, &reg);
}
+int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp)
+{
+ return 0;
+}
+
int kvm_arch_init_vcpu(CPUState *cs)
{
int ret = 0;
diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c
index afc8d570c9..3273beff18 100644
--- a/target/s390x/kvm/kvm.c
+++ b/target/s390x/kvm/kvm.c
@@ -408,6 +408,11 @@ unsigned long kvm_arch_vcpu_id(CPUState *cpu)
return cpu->cpu_index;
}
+int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp)
+{
+ return 0;
+}
+
int kvm_arch_init_vcpu(CPUState *cs)
{
unsigned int max_cpus = MACHINE(qdev_get_machine())->smp.max_cpus;
--
2.50.1

View File

@ -0,0 +1,90 @@
From e1658c60f353f93b195fb6f569cb44a5fb1f3e99 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:43 +0200
Subject: [PATCH 003/115] kvm/i386: make kvm_filter_msr() and related
definitions private to kvm module
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [3/115] ef00896ed2cac6bc1ef39f21aad422ae5b571cb6 (bonzini/rhel-qemu-kvm)
kvm_filer_msr() is only used from i386 kvm module. Make it static so that its
easy for developers to understand that its not used anywhere else.
Same for QEMURDMSRHandler, QEMUWRMSRHandler and KVMMSRHandlers definitions.
CC: philmd@linaro.org
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Ani Sinha <anisinha@redhat.com>
Link: https://lore.kernel.org/r/20240903140045.41167-1-anisinha@redhat.com
[Make struct unnamed. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit ed2880f4e93bf83106ebdc8562a5ee4d93285a3b)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/kvm/kvm.c | 12 +++++++++++-
target/i386/kvm/kvm_i386.h | 11 -----------
2 files changed, 11 insertions(+), 12 deletions(-)
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 94b678e9e3..b02aec915c 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -92,7 +92,17 @@
* 255 kvm_msr_entry structs */
#define MSR_BUF_SIZE 4096
+typedef bool QEMURDMSRHandler(X86CPU *cpu, uint32_t msr, uint64_t *val);
+typedef bool QEMUWRMSRHandler(X86CPU *cpu, uint32_t msr, uint64_t val);
+typedef struct {
+ uint32_t msr;
+ QEMURDMSRHandler *rdmsr;
+ QEMUWRMSRHandler *wrmsr;
+} KVMMSRHandlers;
+
static void kvm_init_msrs(X86CPU *cpu);
+static bool kvm_filter_msr(KVMState *s, uint32_t msr, QEMURDMSRHandler *rdmsr,
+ QEMUWRMSRHandler *wrmsr);
const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
KVM_CAP_INFO(SET_TSS_ADDR),
@@ -5762,7 +5772,7 @@ static bool kvm_install_msr_filters(KVMState *s)
return true;
}
-bool kvm_filter_msr(KVMState *s, uint32_t msr, QEMURDMSRHandler *rdmsr,
+static bool kvm_filter_msr(KVMState *s, uint32_t msr, QEMURDMSRHandler *rdmsr,
QEMUWRMSRHandler *wrmsr)
{
int i;
diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h
index 34fc60774b..9de9c0d303 100644
--- a/target/i386/kvm/kvm_i386.h
+++ b/target/i386/kvm/kvm_i386.h
@@ -66,17 +66,6 @@ uint64_t kvm_swizzle_msi_ext_dest_id(uint64_t address);
void kvm_update_msi_routes_all(void *private, bool global,
uint32_t index, uint32_t mask);
-typedef bool QEMURDMSRHandler(X86CPU *cpu, uint32_t msr, uint64_t *val);
-typedef bool QEMUWRMSRHandler(X86CPU *cpu, uint32_t msr, uint64_t val);
-typedef struct kvm_msr_handlers {
- uint32_t msr;
- QEMURDMSRHandler *rdmsr;
- QEMUWRMSRHandler *wrmsr;
-} KVMMSRHandlers;
-
-bool kvm_filter_msr(KVMState *s, uint32_t msr, QEMURDMSRHandler *rdmsr,
- QEMUWRMSRHandler *wrmsr);
-
#endif /* CONFIG_KVM */
void kvm_pc_setup_irq_routing(bool pci_enabled);
--
2.50.1

View File

@ -0,0 +1,52 @@
From be25ddc3910de08325880177c16d4f0c38feeb73 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:43 +0200
Subject: [PATCH 004/115] kvm: remove unnecessary #ifdef
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [4/115] c7a7b57246c2c345961d5b5923f8b086c58b82c6 (bonzini/rhel-qemu-kvm)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit feccfa77bed975e2e60c90756f08b8a56df6daf6)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/kvm/kvm_i386.h | 11 +----------
1 file changed, 1 insertion(+), 10 deletions(-)
diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h
index 9de9c0d303..7edb154a16 100644
--- a/target/i386/kvm/kvm_i386.h
+++ b/target/i386/kvm/kvm_i386.h
@@ -13,8 +13,7 @@
#include "sysemu/kvm.h"
-#ifdef CONFIG_KVM
-
+/* always false if !CONFIG_KVM */
#define kvm_pit_in_kernel() \
(kvm_irqchip_in_kernel() && !kvm_irqchip_is_split())
#define kvm_pic_in_kernel() \
@@ -22,14 +21,6 @@
#define kvm_ioapic_in_kernel() \
(kvm_irqchip_in_kernel() && !kvm_irqchip_is_split())
-#else
-
-#define kvm_pit_in_kernel() 0
-#define kvm_pic_in_kernel() 0
-#define kvm_ioapic_in_kernel() 0
-
-#endif /* CONFIG_KVM */
-
bool kvm_has_smm(void);
bool kvm_enable_x2apic(void);
bool kvm_hv_vpindex_settable(void);
--
2.50.1

View File

@ -0,0 +1,495 @@
From d90604bca41cef828b4e37d77b53e472b272dbca Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:45 +0200
Subject: [PATCH 026/115] linux-headers: Update to Linux v6.14-rc3
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [26/115] 6674b075f7bd2c7fd4cbd4ccb1689fed0fc3b026 (bonzini/rhel-qemu-kvm)
Update headers to retrieve the latest KVM caps for RISC-V.
Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
Message-ID: <20250221153758.652078-2-dbarboza@ventanamicro.com>
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
(cherry picked from commit 421ee1ec6f0de0b0fd96b262bda18b97e54263b4)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
include/standard-headers/linux/ethtool.h | 4 +
include/standard-headers/linux/fuse.h | 76 ++++++++++++++++++-
.../linux/input-event-codes.h | 1 +
include/standard-headers/linux/pci_regs.h | 16 ++--
include/standard-headers/linux/virtio_pci.h | 14 ++++
linux-headers/asm-arm64/kvm.h | 3 -
linux-headers/asm-loongarch/kvm_para.h | 1 +
linux-headers/asm-riscv/kvm.h | 7 +-
linux-headers/asm-x86/kvm.h | 1 +
linux-headers/linux/iommufd.h | 35 ++++++---
linux-headers/linux/kvm.h | 8 +-
linux-headers/linux/stddef.h | 13 +++-
linux-headers/linux/vduse.h | 2 +-
13 files changed, 146 insertions(+), 35 deletions(-)
diff --git a/include/standard-headers/linux/ethtool.h b/include/standard-headers/linux/ethtool.h
index 67c47912e5..e83382531c 100644
--- a/include/standard-headers/linux/ethtool.h
+++ b/include/standard-headers/linux/ethtool.h
@@ -681,6 +681,8 @@ enum ethtool_link_ext_substate_module {
* @ETH_SS_STATS_ETH_MAC: names of IEEE 802.3 MAC statistics
* @ETH_SS_STATS_ETH_CTRL: names of IEEE 802.3 MAC Control statistics
* @ETH_SS_STATS_RMON: names of RMON statistics
+ * @ETH_SS_STATS_PHY: names of PHY(dev) statistics
+ * @ETH_SS_TS_FLAGS: hardware timestamping flags
*
* @ETH_SS_COUNT: number of defined string sets
*/
@@ -706,6 +708,8 @@ enum ethtool_stringset {
ETH_SS_STATS_ETH_MAC,
ETH_SS_STATS_ETH_CTRL,
ETH_SS_STATS_RMON,
+ ETH_SS_STATS_PHY,
+ ETH_SS_TS_FLAGS,
/* add new constants above here */
ETH_SS_COUNT
diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h
index 889e12ad15..d303effb2a 100644
--- a/include/standard-headers/linux/fuse.h
+++ b/include/standard-headers/linux/fuse.h
@@ -220,6 +220,15 @@
*
* 7.41
* - add FUSE_ALLOW_IDMAP
+ * 7.42
+ * - Add FUSE_OVER_IO_URING and all other io-uring related flags and data
+ * structures:
+ * - struct fuse_uring_ent_in_out
+ * - struct fuse_uring_req_header
+ * - struct fuse_uring_cmd_req
+ * - FUSE_URING_IN_OUT_HEADER_SZ
+ * - FUSE_URING_OP_IN_OUT_SZ
+ * - enum fuse_uring_cmd
*/
#ifndef _LINUX_FUSE_H
@@ -251,7 +260,7 @@
#define FUSE_KERNEL_VERSION 7
/** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 41
+#define FUSE_KERNEL_MINOR_VERSION 42
/** The node ID of the root inode */
#define FUSE_ROOT_ID 1
@@ -421,6 +430,7 @@ struct fuse_file_lock {
* FUSE_HAS_RESEND: kernel supports resending pending requests, and the high bit
* of the request ID indicates resend requests
* FUSE_ALLOW_IDMAP: allow creation of idmapped mounts
+ * FUSE_OVER_IO_URING: Indicate that client supports io-uring
*/
#define FUSE_ASYNC_READ (1 << 0)
#define FUSE_POSIX_LOCKS (1 << 1)
@@ -467,6 +477,7 @@ struct fuse_file_lock {
/* Obsolete alias for FUSE_DIRECT_IO_ALLOW_MMAP */
#define FUSE_DIRECT_IO_RELAX FUSE_DIRECT_IO_ALLOW_MMAP
#define FUSE_ALLOW_IDMAP (1ULL << 40)
+#define FUSE_OVER_IO_URING (1ULL << 41)
/**
* CUSE INIT request/reply flags
@@ -1202,4 +1213,67 @@ struct fuse_supp_groups {
uint32_t groups[];
};
+/**
+ * Size of the ring buffer header
+ */
+#define FUSE_URING_IN_OUT_HEADER_SZ 128
+#define FUSE_URING_OP_IN_OUT_SZ 128
+
+/* Used as part of the fuse_uring_req_header */
+struct fuse_uring_ent_in_out {
+ uint64_t flags;
+
+ /*
+ * commit ID to be used in a reply to a ring request (see also
+ * struct fuse_uring_cmd_req)
+ */
+ uint64_t commit_id;
+
+ /* size of user payload buffer */
+ uint32_t payload_sz;
+ uint32_t padding;
+
+ uint64_t reserved;
+};
+
+/**
+ * Header for all fuse-io-uring requests
+ */
+struct fuse_uring_req_header {
+ /* struct fuse_in_header / struct fuse_out_header */
+ char in_out[FUSE_URING_IN_OUT_HEADER_SZ];
+
+ /* per op code header */
+ char op_in[FUSE_URING_OP_IN_OUT_SZ];
+
+ struct fuse_uring_ent_in_out ring_ent_in_out;
+};
+
+/**
+ * sqe commands to the kernel
+ */
+enum fuse_uring_cmd {
+ FUSE_IO_URING_CMD_INVALID = 0,
+
+ /* register the request buffer and fetch a fuse request */
+ FUSE_IO_URING_CMD_REGISTER = 1,
+
+ /* commit fuse request result and fetch next request */
+ FUSE_IO_URING_CMD_COMMIT_AND_FETCH = 2,
+};
+
+/**
+ * In the 80B command area of the SQE.
+ */
+struct fuse_uring_cmd_req {
+ uint64_t flags;
+
+ /* entry identifier for commits */
+ uint64_t commit_id;
+
+ /* queue the command is for (queue index) */
+ uint16_t qid;
+ uint8_t padding[6];
+};
+
#endif /* _LINUX_FUSE_H */
diff --git a/include/standard-headers/linux/input-event-codes.h b/include/standard-headers/linux/input-event-codes.h
index 50b2b7497e..09ba0ad878 100644
--- a/include/standard-headers/linux/input-event-codes.h
+++ b/include/standard-headers/linux/input-event-codes.h
@@ -519,6 +519,7 @@
#define KEY_NOTIFICATION_CENTER 0x1bc /* Show/hide the notification center */
#define KEY_PICKUP_PHONE 0x1bd /* Answer incoming call */
#define KEY_HANGUP_PHONE 0x1be /* Decline incoming call */
+#define KEY_LINK_PHONE 0x1bf /* AL Phone Syncing */
#define KEY_DEL_EOL 0x1c0
#define KEY_DEL_EOS 0x1c1
diff --git a/include/standard-headers/linux/pci_regs.h b/include/standard-headers/linux/pci_regs.h
index 1601c7ed5f..3445c4970e 100644
--- a/include/standard-headers/linux/pci_regs.h
+++ b/include/standard-headers/linux/pci_regs.h
@@ -533,7 +533,7 @@
#define PCI_EXP_DEVSTA_TRPND 0x0020 /* Transactions Pending */
#define PCI_CAP_EXP_RC_ENDPOINT_SIZEOF_V1 12 /* v1 endpoints without link end here */
#define PCI_EXP_LNKCAP 0x0c /* Link Capabilities */
-#define PCI_EXP_LNKCAP_SLS 0x0000000f /* Supported Link Speeds */
+#define PCI_EXP_LNKCAP_SLS 0x0000000f /* Max Link Speed (prior to PCIe r3.0: Supported Link Speeds) */
#define PCI_EXP_LNKCAP_SLS_2_5GB 0x00000001 /* LNKCAP2 SLS Vector bit 0 */
#define PCI_EXP_LNKCAP_SLS_5_0GB 0x00000002 /* LNKCAP2 SLS Vector bit 1 */
#define PCI_EXP_LNKCAP_SLS_8_0GB 0x00000003 /* LNKCAP2 SLS Vector bit 2 */
@@ -665,6 +665,7 @@
#define PCI_EXP_DEVCAP2_OBFF_MSG 0x00040000 /* New message signaling */
#define PCI_EXP_DEVCAP2_OBFF_WAKE 0x00080000 /* Re-use WAKE# for OBFF */
#define PCI_EXP_DEVCAP2_EE_PREFIX 0x00200000 /* End-End TLP Prefix */
+#define PCI_EXP_DEVCAP2_EE_PREFIX_MAX 0x00c00000 /* Max End-End TLP Prefixes */
#define PCI_EXP_DEVCTL2 0x28 /* Device Control 2 */
#define PCI_EXP_DEVCTL2_COMP_TIMEOUT 0x000f /* Completion Timeout Value */
#define PCI_EXP_DEVCTL2_COMP_TMOUT_DIS 0x0010 /* Completion Timeout Disable */
@@ -789,10 +790,11 @@
/* Same bits as above */
#define PCI_ERR_CAP 0x18 /* Advanced Error Capabilities & Ctrl*/
#define PCI_ERR_CAP_FEP(x) ((x) & 0x1f) /* First Error Pointer */
-#define PCI_ERR_CAP_ECRC_GENC 0x00000020 /* ECRC Generation Capable */
-#define PCI_ERR_CAP_ECRC_GENE 0x00000040 /* ECRC Generation Enable */
-#define PCI_ERR_CAP_ECRC_CHKC 0x00000080 /* ECRC Check Capable */
-#define PCI_ERR_CAP_ECRC_CHKE 0x00000100 /* ECRC Check Enable */
+#define PCI_ERR_CAP_ECRC_GENC 0x00000020 /* ECRC Generation Capable */
+#define PCI_ERR_CAP_ECRC_GENE 0x00000040 /* ECRC Generation Enable */
+#define PCI_ERR_CAP_ECRC_CHKC 0x00000080 /* ECRC Check Capable */
+#define PCI_ERR_CAP_ECRC_CHKE 0x00000100 /* ECRC Check Enable */
+#define PCI_ERR_CAP_PREFIX_LOG_PRESENT 0x00000800 /* TLP Prefix Log Present */
#define PCI_ERR_HEADER_LOG 0x1c /* Header Log Register (16 bytes) */
#define PCI_ERR_ROOT_COMMAND 0x2c /* Root Error Command */
#define PCI_ERR_ROOT_CMD_COR_EN 0x00000001 /* Correctable Err Reporting Enable */
@@ -808,6 +810,7 @@
#define PCI_ERR_ROOT_FATAL_RCV 0x00000040 /* Fatal Received */
#define PCI_ERR_ROOT_AER_IRQ 0xf8000000 /* Advanced Error Interrupt Message Number */
#define PCI_ERR_ROOT_ERR_SRC 0x34 /* Error Source Identification */
+#define PCI_ERR_PREFIX_LOG 0x38 /* TLP Prefix LOG Register (up to 16 bytes) */
/* Virtual Channel */
#define PCI_VC_PORT_CAP1 0x04
@@ -1001,9 +1004,6 @@
#define PCI_ACS_CTRL 0x06 /* ACS Control Register */
#define PCI_ACS_EGRESS_CTL_V 0x08 /* ACS Egress Control Vector */
-#define PCI_VSEC_HDR 4 /* extended cap - vendor-specific */
-#define PCI_VSEC_HDR_LEN_SHIFT 20 /* shift for length field */
-
/* SATA capability */
#define PCI_SATA_REGS 4 /* SATA REGs specifier */
#define PCI_SATA_REGS_MASK 0xF /* location - BAR#/inline */
diff --git a/include/standard-headers/linux/virtio_pci.h b/include/standard-headers/linux/virtio_pci.h
index b177ed8972..91fec6f502 100644
--- a/include/standard-headers/linux/virtio_pci.h
+++ b/include/standard-headers/linux/virtio_pci.h
@@ -116,6 +116,8 @@
#define VIRTIO_PCI_CAP_PCI_CFG 5
/* Additional shared memory capability */
#define VIRTIO_PCI_CAP_SHARED_MEMORY_CFG 8
+/* PCI vendor data configuration */
+#define VIRTIO_PCI_CAP_VENDOR_CFG 9
/* This is the PCI capability header: */
struct virtio_pci_cap {
@@ -130,6 +132,18 @@ struct virtio_pci_cap {
uint32_t length; /* Length of the structure, in bytes. */
};
+/* This is the PCI vendor data capability header: */
+struct virtio_pci_vndr_data {
+ uint8_t cap_vndr; /* Generic PCI field: PCI_CAP_ID_VNDR */
+ uint8_t cap_next; /* Generic PCI field: next ptr. */
+ uint8_t cap_len; /* Generic PCI field: capability length */
+ uint8_t cfg_type; /* Identifies the structure. */
+ uint16_t vendor_id; /* Identifies the vendor-specific format. */
+ /* For Vendor Definition */
+ /* Pads structure to a multiple of 4 bytes */
+ /* Reads must not have side effects */
+};
+
struct virtio_pci_cap64 {
struct virtio_pci_cap cap;
uint32_t offset_hi; /* Most sig 32 bits of offset */
diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h
index dccd5d965f..ec1e82bdc8 100644
--- a/linux-headers/asm-arm64/kvm.h
+++ b/linux-headers/asm-arm64/kvm.h
@@ -43,9 +43,6 @@
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
#define KVM_DIRTY_LOG_PAGE_OFFSET 64
-#define KVM_REG_SIZE(id) \
- (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
-
struct kvm_regs {
struct user_pt_regs regs; /* sp = sp_el0 */
diff --git a/linux-headers/asm-loongarch/kvm_para.h b/linux-headers/asm-loongarch/kvm_para.h
index 4ba4ad8db1..fd7f40713d 100644
--- a/linux-headers/asm-loongarch/kvm_para.h
+++ b/linux-headers/asm-loongarch/kvm_para.h
@@ -17,5 +17,6 @@
#define KVM_FEATURE_STEAL_TIME 2
/* BIT 24 - 31 are features configurable by user space vmm */
#define KVM_FEATURE_VIRT_EXTIOI 24
+#define KVM_FEATURE_USER_HCALL 25
#endif /* _ASM_KVM_PARA_H */
diff --git a/linux-headers/asm-riscv/kvm.h b/linux-headers/asm-riscv/kvm.h
index 3482c9a73d..f06bc5efcd 100644
--- a/linux-headers/asm-riscv/kvm.h
+++ b/linux-headers/asm-riscv/kvm.h
@@ -179,6 +179,9 @@ enum KVM_RISCV_ISA_EXT_ID {
KVM_RISCV_ISA_EXT_SSNPM,
KVM_RISCV_ISA_EXT_SVADE,
KVM_RISCV_ISA_EXT_SVADU,
+ KVM_RISCV_ISA_EXT_SVVPTC,
+ KVM_RISCV_ISA_EXT_ZABHA,
+ KVM_RISCV_ISA_EXT_ZICCRSE,
KVM_RISCV_ISA_EXT_MAX,
};
@@ -198,6 +201,7 @@ enum KVM_RISCV_SBI_EXT_ID {
KVM_RISCV_SBI_EXT_VENDOR,
KVM_RISCV_SBI_EXT_DBCN,
KVM_RISCV_SBI_EXT_STA,
+ KVM_RISCV_SBI_EXT_SUSP,
KVM_RISCV_SBI_EXT_MAX,
};
@@ -211,9 +215,6 @@ struct kvm_riscv_sbi_sta {
#define KVM_RISCV_TIMER_STATE_OFF 0
#define KVM_RISCV_TIMER_STATE_ON 1
-#define KVM_REG_SIZE(id) \
- (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
-
/* If you need to interpret the index values, here is the key: */
#define KVM_REG_RISCV_TYPE_MASK 0x00000000FF000000
#define KVM_REG_RISCV_TYPE_SHIFT 24
diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h
index 96589490c4..86f2c34e7a 100644
--- a/linux-headers/asm-x86/kvm.h
+++ b/linux-headers/asm-x86/kvm.h
@@ -923,5 +923,6 @@ struct kvm_hyperv_eventfd {
#define KVM_X86_SEV_VM 2
#define KVM_X86_SEV_ES_VM 3
#define KVM_X86_SNP_VM 4
+#define KVM_X86_TDX_VM 5
#endif /* _ASM_X86_KVM_H */
diff --git a/linux-headers/linux/iommufd.h b/linux-headers/linux/iommufd.h
index 37aae16502..ccbdca5e11 100644
--- a/linux-headers/linux/iommufd.h
+++ b/linux-headers/linux/iommufd.h
@@ -297,7 +297,7 @@ struct iommu_ioas_unmap {
* ioctl(IOMMU_OPTION_HUGE_PAGES)
* @IOMMU_OPTION_RLIMIT_MODE:
* Change how RLIMIT_MEMLOCK accounting works. The caller must have privilege
- * to invoke this. Value 0 (default) is user based accouting, 1 uses process
+ * to invoke this. Value 0 (default) is user based accounting, 1 uses process
* based accounting. Global option, object_id must be 0
* @IOMMU_OPTION_HUGE_PAGES:
* Value 1 (default) allows contiguous pages to be combined when generating
@@ -390,7 +390,7 @@ struct iommu_vfio_ioas {
* @IOMMU_HWPT_ALLOC_PASID: Requests a domain that can be used with PASID. The
* domain can be attached to any PASID on the device.
* Any domain attached to the non-PASID part of the
- * device must also be flaged, otherwise attaching a
+ * device must also be flagged, otherwise attaching a
* PASID will blocked.
* If IOMMU does not support PASID it will return
* error (-EOPNOTSUPP).
@@ -558,16 +558,25 @@ struct iommu_hw_info_vtd {
* For the details of @idr, @iidr and @aidr, please refer to the chapters
* from 6.3.1 to 6.3.6 in the SMMUv3 Spec.
*
- * User space should read the underlying ARM SMMUv3 hardware information for
- * the list of supported features.
+ * This reports the raw HW capability, and not all bits are meaningful to be
+ * read by userspace. Only the following fields should be used:
*
- * Note that these values reflect the raw HW capability, without any insight if
- * any required kernel driver support is present. Bits may be set indicating the
- * HW has functionality that is lacking kernel software support, such as BTM. If
- * a VMM is using this information to construct emulated copies of these
- * registers it should only forward bits that it knows it can support.
+ * idr[0]: ST_LEVEL, TERM_MODEL, STALL_MODEL, TTENDIAN , CD2L, ASID16, TTF
+ * idr[1]: SIDSIZE, SSIDSIZE
+ * idr[3]: BBML, RIL
+ * idr[5]: VAX, GRAN64K, GRAN16K, GRAN4K
*
- * In future, presence of required kernel support will be indicated in flags.
+ * - S1P should be assumed to be true if a NESTED HWPT can be created
+ * - VFIO/iommufd only support platforms with COHACC, it should be assumed to be
+ * true.
+ * - ATS is a per-device property. If the VMM describes any devices as ATS
+ * capable in ACPI/DT it should set the corresponding idr.
+ *
+ * This list may expand in future (eg E0PD, AIE, PBHA, D128, DS etc). It is
+ * important that VMMs do not read bits outside the list to allow for
+ * compatibility with future kernels. Several features in the SMMUv3
+ * architecture are not currently supported by the kernel for nesting: HTTU,
+ * BTM, MPAM and others.
*/
struct iommu_hw_info_arm_smmuv3 {
__u32 flags;
@@ -766,7 +775,7 @@ struct iommu_hwpt_vtd_s1_invalidate {
};
/**
- * struct iommu_viommu_arm_smmuv3_invalidate - ARM SMMUv3 cahce invalidation
+ * struct iommu_viommu_arm_smmuv3_invalidate - ARM SMMUv3 cache invalidation
* (IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3)
* @cmd: 128-bit cache invalidation command that runs in SMMU CMDQ.
* Must be little-endian.
@@ -859,6 +868,7 @@ enum iommu_hwpt_pgfault_perm {
* @pasid: Process Address Space ID
* @grpid: Page Request Group Index
* @perm: Combination of enum iommu_hwpt_pgfault_perm
+ * @__reserved: Must be 0.
* @addr: Fault address
* @length: a hint of how much data the requestor is expecting to fetch. For
* example, if the PRI initiator knows it is going to do a 10MB
@@ -874,7 +884,8 @@ struct iommu_hwpt_pgfault {
__u32 pasid;
__u32 grpid;
__u32 perm;
- __u64 addr;
+ __u32 __reserved;
+ __aligned_u64 addr;
__u32 length;
__u32 cookie;
};
diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
index 3bcd4eabe3..27181b3dd8 100644
--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
@@ -609,10 +609,6 @@ struct kvm_ioeventfd {
#define KVM_X86_DISABLE_EXITS_HLT (1 << 1)
#define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2)
#define KVM_X86_DISABLE_EXITS_CSTATE (1 << 3)
-#define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT | \
- KVM_X86_DISABLE_EXITS_HLT | \
- KVM_X86_DISABLE_EXITS_PAUSE | \
- KVM_X86_DISABLE_EXITS_CSTATE)
/* for KVM_ENABLE_CAP */
struct kvm_enable_cap {
@@ -1062,6 +1058,10 @@ struct kvm_dirty_tlb {
#define KVM_REG_SIZE_SHIFT 52
#define KVM_REG_SIZE_MASK 0x00f0000000000000ULL
+
+#define KVM_REG_SIZE(id) \
+ (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
+
#define KVM_REG_SIZE_U8 0x0000000000000000ULL
#define KVM_REG_SIZE_U16 0x0010000000000000ULL
#define KVM_REG_SIZE_U32 0x0020000000000000ULL
diff --git a/linux-headers/linux/stddef.h b/linux-headers/linux/stddef.h
index 96aa341942..e1416f7937 100644
--- a/linux-headers/linux/stddef.h
+++ b/linux-headers/linux/stddef.h
@@ -8,6 +8,13 @@
#define __always_inline __inline__
#endif
+/* Not all C++ standards support type declarations inside an anonymous union */
+#ifndef __cplusplus
+#define __struct_group_tag(TAG) TAG
+#else
+#define __struct_group_tag(TAG)
+#endif
+
/**
* __struct_group() - Create a mirrored named and anonyomous struct
*
@@ -20,13 +27,13 @@
* and size: one anonymous and one named. The former's members can be used
* normally without sub-struct naming, and the latter can be used to
* reason about the start, end, and size of the group of struct members.
- * The named struct can also be explicitly tagged for layer reuse, as well
- * as both having struct attributes appended.
+ * The named struct can also be explicitly tagged for layer reuse (C only),
+ * as well as both having struct attributes appended.
*/
#define __struct_group(TAG, NAME, ATTRS, MEMBERS...) \
union { \
struct { MEMBERS } ATTRS; \
- struct TAG { MEMBERS } ATTRS NAME; \
+ struct __struct_group_tag(TAG) { MEMBERS } ATTRS NAME; \
} ATTRS
#ifdef __cplusplus
diff --git a/linux-headers/linux/vduse.h b/linux-headers/linux/vduse.h
index 6d2ca064b5..f46269af34 100644
--- a/linux-headers/linux/vduse.h
+++ b/linux-headers/linux/vduse.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
#ifndef _VDUSE_H_
#define _VDUSE_H_
--
2.50.1

View File

@ -0,0 +1,949 @@
From a02b11f5744f20a44c1dc3291ffa20985e3302d3 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:45 +0200
Subject: [PATCH 027/115] linux-headers: Update to Linux v6.15-rc3
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [27/115] 019c5b8dd28df2c5a0822468c3c485569983b2c1 (bonzini/rhel-qemu-kvm)
Update headers to retrieve uapi information for vfio-ap
Signed-off-by: Rorie Reyes <rreyes@linux.ibm.com>
Reviewed-by: Cédric Le Goater <clg@redhat.com>
Link: https://lore.kernel.org/qemu-devel/20250425052401.8287-3-rreyes@linux.ibm.com
Signed-off-by: Cédric Le Goater <clg@redhat.com>
(cherry picked from commit 1cab5a02ab8144aad2abd001835e49104e4aae0f)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
include/standard-headers/asm-x86/setup_data.h | 4 +-
include/standard-headers/drm/drm_fourcc.h | 41 ++++++
include/standard-headers/linux/const.h | 2 +-
include/standard-headers/linux/ethtool.h | 22 +++
include/standard-headers/linux/fuse.h | 12 +-
include/standard-headers/linux/pci_regs.h | 13 +-
include/standard-headers/linux/virtio_net.h | 13 ++
include/standard-headers/linux/virtio_snd.h | 2 +-
linux-headers/asm-arm64/kvm.h | 11 ++
linux-headers/asm-arm64/unistd_64.h | 1 +
linux-headers/asm-generic/mman-common.h | 1 +
linux-headers/asm-generic/unistd.h | 4 +-
linux-headers/asm-loongarch/unistd_64.h | 1 +
linux-headers/asm-mips/unistd_n32.h | 1 +
linux-headers/asm-mips/unistd_n64.h | 1 +
linux-headers/asm-mips/unistd_o32.h | 1 +
linux-headers/asm-powerpc/unistd_32.h | 1 +
linux-headers/asm-powerpc/unistd_64.h | 1 +
linux-headers/asm-riscv/kvm.h | 2 +
linux-headers/asm-riscv/unistd_32.h | 1 +
linux-headers/asm-riscv/unistd_64.h | 1 +
linux-headers/asm-s390/unistd_32.h | 1 +
linux-headers/asm-s390/unistd_64.h | 1 +
linux-headers/asm-x86/kvm.h | 3 +
linux-headers/asm-x86/unistd_32.h | 1 +
linux-headers/asm-x86/unistd_64.h | 1 +
linux-headers/asm-x86/unistd_x32.h | 1 +
linux-headers/linux/bits.h | 8 +-
linux-headers/linux/const.h | 2 +-
linux-headers/linux/iommufd.h | 129 +++++++++++++++++-
linux-headers/linux/kvm.h | 1 +
linux-headers/linux/psp-sev.h | 21 ++-
linux-headers/linux/stddef.h | 2 +
linux-headers/linux/vfio.h | 30 ++--
34 files changed, 301 insertions(+), 36 deletions(-)
diff --git a/include/standard-headers/asm-x86/setup_data.h b/include/standard-headers/asm-x86/setup_data.h
index 09355f54c5..a483d72f42 100644
--- a/include/standard-headers/asm-x86/setup_data.h
+++ b/include/standard-headers/asm-x86/setup_data.h
@@ -18,7 +18,7 @@
#define SETUP_INDIRECT (1<<31)
#define SETUP_TYPE_MAX (SETUP_ENUM_MAX | SETUP_INDIRECT)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include "standard-headers/linux/types.h"
@@ -78,6 +78,6 @@ struct ima_setup_data {
uint64_t size;
} QEMU_PACKED;
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_SETUP_DATA_H */
diff --git a/include/standard-headers/drm/drm_fourcc.h b/include/standard-headers/drm/drm_fourcc.h
index 708647776f..a8b759dcbc 100644
--- a/include/standard-headers/drm/drm_fourcc.h
+++ b/include/standard-headers/drm/drm_fourcc.h
@@ -420,6 +420,7 @@ extern "C" {
#define DRM_FORMAT_MOD_VENDOR_ARM 0x08
#define DRM_FORMAT_MOD_VENDOR_ALLWINNER 0x09
#define DRM_FORMAT_MOD_VENDOR_AMLOGIC 0x0a
+#define DRM_FORMAT_MOD_VENDOR_MTK 0x0b
/* add more to the end as needed */
@@ -1452,6 +1453,46 @@ drm_fourcc_canonicalize_nvidia_format_mod(uint64_t modifier)
*/
#define AMLOGIC_FBC_OPTION_MEM_SAVING (1ULL << 0)
+/* MediaTek modifiers
+ * Bits Parameter Notes
+ * ----- ------------------------ ---------------------------------------------
+ * 7: 0 TILE LAYOUT Values are MTK_FMT_MOD_TILE_*
+ * 15: 8 COMPRESSION Values are MTK_FMT_MOD_COMPRESS_*
+ * 23:16 10 BIT LAYOUT Values are MTK_FMT_MOD_10BIT_LAYOUT_*
+ *
+ */
+
+#define DRM_FORMAT_MOD_MTK(__flags) fourcc_mod_code(MTK, __flags)
+
+/*
+ * MediaTek Tiled Modifier
+ * The lowest 8 bits of the modifier is used to specify the tiling
+ * layout. Only the 16L_32S tiling is used for now, but we define an
+ * "untiled" version and leave room for future expansion.
+ */
+#define MTK_FMT_MOD_TILE_MASK 0xf
+#define MTK_FMT_MOD_TILE_NONE 0x0
+#define MTK_FMT_MOD_TILE_16L32S 0x1
+
+/*
+ * Bits 8-15 specify compression options
+ */
+#define MTK_FMT_MOD_COMPRESS_MASK (0xf << 8)
+#define MTK_FMT_MOD_COMPRESS_NONE (0x0 << 8)
+#define MTK_FMT_MOD_COMPRESS_V1 (0x1 << 8)
+
+/*
+ * Bits 16-23 specify how the bits of 10 bit formats are
+ * stored out in memory
+ */
+#define MTK_FMT_MOD_10BIT_LAYOUT_MASK (0xf << 16)
+#define MTK_FMT_MOD_10BIT_LAYOUT_PACKED (0x0 << 16)
+#define MTK_FMT_MOD_10BIT_LAYOUT_LSBTILED (0x1 << 16)
+#define MTK_FMT_MOD_10BIT_LAYOUT_LSBRASTER (0x2 << 16)
+
+/* alias for the most common tiling format */
+#define DRM_FORMAT_MOD_MTK_16L_32S_TILE DRM_FORMAT_MOD_MTK(MTK_FMT_MOD_TILE_16L32S)
+
/*
* AMD modifiers
*
diff --git a/include/standard-headers/linux/const.h b/include/standard-headers/linux/const.h
index 2122610de7..95ede23342 100644
--- a/include/standard-headers/linux/const.h
+++ b/include/standard-headers/linux/const.h
@@ -33,7 +33,7 @@
* Missing __asm__ support
*
* __BIT128() would not work in the __asm__ code, as it shifts an
- * 'unsigned __init128' data type as direct representation of
+ * 'unsigned __int128' data type as direct representation of
* 128 bit constants is not supported in the gcc compiler, as
* they get silently truncated.
*
diff --git a/include/standard-headers/linux/ethtool.h b/include/standard-headers/linux/ethtool.h
index e83382531c..5d1ad5fdea 100644
--- a/include/standard-headers/linux/ethtool.h
+++ b/include/standard-headers/linux/ethtool.h
@@ -2059,6 +2059,24 @@ enum ethtool_link_mode_bit_indices {
ETHTOOL_LINK_MODE_10baseT1S_Half_BIT = 100,
ETHTOOL_LINK_MODE_10baseT1S_P2MP_Half_BIT = 101,
ETHTOOL_LINK_MODE_10baseT1BRR_Full_BIT = 102,
+ ETHTOOL_LINK_MODE_200000baseCR_Full_BIT = 103,
+ ETHTOOL_LINK_MODE_200000baseKR_Full_BIT = 104,
+ ETHTOOL_LINK_MODE_200000baseDR_Full_BIT = 105,
+ ETHTOOL_LINK_MODE_200000baseDR_2_Full_BIT = 106,
+ ETHTOOL_LINK_MODE_200000baseSR_Full_BIT = 107,
+ ETHTOOL_LINK_MODE_200000baseVR_Full_BIT = 108,
+ ETHTOOL_LINK_MODE_400000baseCR2_Full_BIT = 109,
+ ETHTOOL_LINK_MODE_400000baseKR2_Full_BIT = 110,
+ ETHTOOL_LINK_MODE_400000baseDR2_Full_BIT = 111,
+ ETHTOOL_LINK_MODE_400000baseDR2_2_Full_BIT = 112,
+ ETHTOOL_LINK_MODE_400000baseSR2_Full_BIT = 113,
+ ETHTOOL_LINK_MODE_400000baseVR2_Full_BIT = 114,
+ ETHTOOL_LINK_MODE_800000baseCR4_Full_BIT = 115,
+ ETHTOOL_LINK_MODE_800000baseKR4_Full_BIT = 116,
+ ETHTOOL_LINK_MODE_800000baseDR4_Full_BIT = 117,
+ ETHTOOL_LINK_MODE_800000baseDR4_2_Full_BIT = 118,
+ ETHTOOL_LINK_MODE_800000baseSR4_Full_BIT = 119,
+ ETHTOOL_LINK_MODE_800000baseVR4_Full_BIT = 120,
/* must be last entry */
__ETHTOOL_LINK_MODE_MASK_NBITS
@@ -2271,6 +2289,10 @@ static inline int ethtool_validate_duplex(uint8_t duplex)
* be exploited to reduce the RSS queue spread.
*/
#define RXH_XFRM_SYM_XOR (1 << 0)
+/* Similar to SYM_XOR, except that one copy of the XOR'ed fields is replaced by
+ * an OR of the same fields
+ */
+#define RXH_XFRM_SYM_OR_XOR (1 << 1)
#define RXH_XFRM_NO_CHANGE 0xff
/* L2-L4 network traffic flow types */
diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h
index d303effb2a..a2b5815d89 100644
--- a/include/standard-headers/linux/fuse.h
+++ b/include/standard-headers/linux/fuse.h
@@ -229,6 +229,9 @@
* - FUSE_URING_IN_OUT_HEADER_SZ
* - FUSE_URING_OP_IN_OUT_SZ
* - enum fuse_uring_cmd
+ *
+ * 7.43
+ * - add FUSE_REQUEST_TIMEOUT
*/
#ifndef _LINUX_FUSE_H
@@ -260,7 +263,7 @@
#define FUSE_KERNEL_VERSION 7
/** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 42
+#define FUSE_KERNEL_MINOR_VERSION 43
/** The node ID of the root inode */
#define FUSE_ROOT_ID 1
@@ -431,6 +434,8 @@ struct fuse_file_lock {
* of the request ID indicates resend requests
* FUSE_ALLOW_IDMAP: allow creation of idmapped mounts
* FUSE_OVER_IO_URING: Indicate that client supports io-uring
+ * FUSE_REQUEST_TIMEOUT: kernel supports timing out requests.
+ * init_out.request_timeout contains the timeout (in secs)
*/
#define FUSE_ASYNC_READ (1 << 0)
#define FUSE_POSIX_LOCKS (1 << 1)
@@ -473,11 +478,11 @@ struct fuse_file_lock {
#define FUSE_PASSTHROUGH (1ULL << 37)
#define FUSE_NO_EXPORT_SUPPORT (1ULL << 38)
#define FUSE_HAS_RESEND (1ULL << 39)
-
/* Obsolete alias for FUSE_DIRECT_IO_ALLOW_MMAP */
#define FUSE_DIRECT_IO_RELAX FUSE_DIRECT_IO_ALLOW_MMAP
#define FUSE_ALLOW_IDMAP (1ULL << 40)
#define FUSE_OVER_IO_URING (1ULL << 41)
+#define FUSE_REQUEST_TIMEOUT (1ULL << 42)
/**
* CUSE INIT request/reply flags
@@ -905,7 +910,8 @@ struct fuse_init_out {
uint16_t map_alignment;
uint32_t flags2;
uint32_t max_stack_depth;
- uint32_t unused[6];
+ uint16_t request_timeout;
+ uint16_t unused[11];
};
#define CUSE_INIT_INFO_MAX 4096
diff --git a/include/standard-headers/linux/pci_regs.h b/include/standard-headers/linux/pci_regs.h
index 3445c4970e..ba326710f9 100644
--- a/include/standard-headers/linux/pci_regs.h
+++ b/include/standard-headers/linux/pci_regs.h
@@ -486,6 +486,7 @@
#define PCI_EXP_TYPE_RC_EC 0xa /* Root Complex Event Collector */
#define PCI_EXP_FLAGS_SLOT 0x0100 /* Slot implemented */
#define PCI_EXP_FLAGS_IRQ 0x3e00 /* Interrupt message number */
+#define PCI_EXP_FLAGS_FLIT 0x8000 /* Flit Mode Supported */
#define PCI_EXP_DEVCAP 0x04 /* Device capabilities */
#define PCI_EXP_DEVCAP_PAYLOAD 0x00000007 /* Max_Payload_Size */
#define PCI_EXP_DEVCAP_PHANTOM 0x00000018 /* Phantom functions */
@@ -795,6 +796,8 @@
#define PCI_ERR_CAP_ECRC_CHKC 0x00000080 /* ECRC Check Capable */
#define PCI_ERR_CAP_ECRC_CHKE 0x00000100 /* ECRC Check Enable */
#define PCI_ERR_CAP_PREFIX_LOG_PRESENT 0x00000800 /* TLP Prefix Log Present */
+#define PCI_ERR_CAP_TLP_LOG_FLIT 0x00040000 /* TLP was logged in Flit Mode */
+#define PCI_ERR_CAP_TLP_LOG_SIZE 0x00f80000 /* Logged TLP Size (only in Flit mode) */
#define PCI_ERR_HEADER_LOG 0x1c /* Header Log Register (16 bytes) */
#define PCI_ERR_ROOT_COMMAND 0x2c /* Root Error Command */
#define PCI_ERR_ROOT_CMD_COR_EN 0x00000001 /* Correctable Err Reporting Enable */
@@ -1013,7 +1016,7 @@
/* Resizable BARs */
#define PCI_REBAR_CAP 4 /* capability register */
-#define PCI_REBAR_CAP_SIZES 0x00FFFFF0 /* supported BAR sizes */
+#define PCI_REBAR_CAP_SIZES 0xFFFFFFF0 /* supported BAR sizes */
#define PCI_REBAR_CTRL 8 /* control register */
#define PCI_REBAR_CTRL_BAR_IDX 0x00000007 /* BAR index */
#define PCI_REBAR_CTRL_NBAR_MASK 0x000000E0 /* # of resizable BARs */
@@ -1061,8 +1064,9 @@
#define PCI_EXP_DPC_CAP_RP_EXT 0x0020 /* Root Port Extensions */
#define PCI_EXP_DPC_CAP_POISONED_TLP 0x0040 /* Poisoned TLP Egress Blocking Supported */
#define PCI_EXP_DPC_CAP_SW_TRIGGER 0x0080 /* Software Triggering Supported */
-#define PCI_EXP_DPC_RP_PIO_LOG_SIZE 0x0F00 /* RP PIO Log Size */
+#define PCI_EXP_DPC_RP_PIO_LOG_SIZE 0x0F00 /* RP PIO Log Size [3:0] */
#define PCI_EXP_DPC_CAP_DL_ACTIVE 0x1000 /* ERR_COR signal on DL_Active supported */
+#define PCI_EXP_DPC_RP_PIO_LOG_SIZE4 0x2000 /* RP PIO Log Size [4] */
#define PCI_EXP_DPC_CTL 0x06 /* DPC control */
#define PCI_EXP_DPC_CTL_EN_FATAL 0x0001 /* Enable trigger on ERR_FATAL message */
@@ -1205,9 +1209,12 @@
#define PCI_DOE_DATA_OBJECT_DISC_REQ_3_INDEX 0x000000ff
#define PCI_DOE_DATA_OBJECT_DISC_REQ_3_VER 0x0000ff00
#define PCI_DOE_DATA_OBJECT_DISC_RSP_3_VID 0x0000ffff
-#define PCI_DOE_DATA_OBJECT_DISC_RSP_3_PROTOCOL 0x00ff0000
+#define PCI_DOE_DATA_OBJECT_DISC_RSP_3_TYPE 0x00ff0000
#define PCI_DOE_DATA_OBJECT_DISC_RSP_3_NEXT_INDEX 0xff000000
+/* Deprecated old name, replaced with PCI_DOE_DATA_OBJECT_DISC_RSP_3_TYPE */
+#define PCI_DOE_DATA_OBJECT_DISC_RSP_3_PROTOCOL PCI_DOE_DATA_OBJECT_DISC_RSP_3_TYPE
+
/* Compute Express Link (CXL r3.1, sec 8.1.5) */
#define PCI_DVSEC_CXL_PORT 3
#define PCI_DVSEC_CXL_PORT_CTL 0x0c
diff --git a/include/standard-headers/linux/virtio_net.h b/include/standard-headers/linux/virtio_net.h
index fc594fe5fc..982e854f14 100644
--- a/include/standard-headers/linux/virtio_net.h
+++ b/include/standard-headers/linux/virtio_net.h
@@ -327,6 +327,19 @@ struct virtio_net_rss_config {
uint8_t hash_key_data[/* hash_key_length */];
};
+struct virtio_net_rss_config_hdr {
+ uint32_t hash_types;
+ uint16_t indirection_table_mask;
+ uint16_t unclassified_queue;
+ uint16_t indirection_table[/* 1 + indirection_table_mask */];
+};
+
+struct virtio_net_rss_config_trailer {
+ uint16_t max_tx_vq;
+ uint8_t hash_key_length;
+ uint8_t hash_key_data[/* hash_key_length */];
+};
+
#define VIRTIO_NET_CTRL_MQ_RSS_CONFIG 1
/*
diff --git a/include/standard-headers/linux/virtio_snd.h b/include/standard-headers/linux/virtio_snd.h
index 860f12e0a4..160d57899f 100644
--- a/include/standard-headers/linux/virtio_snd.h
+++ b/include/standard-headers/linux/virtio_snd.h
@@ -25,7 +25,7 @@ struct virtio_snd_config {
uint32_t streams;
/* # of available channel maps */
uint32_t chmaps;
- /* # of available control elements */
+ /* # of available control elements (if VIRTIO_SND_F_CTLS) */
uint32_t controls;
};
diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h
index ec1e82bdc8..4e6aff08df 100644
--- a/linux-headers/asm-arm64/kvm.h
+++ b/linux-headers/asm-arm64/kvm.h
@@ -105,6 +105,7 @@ struct kvm_regs {
#define KVM_ARM_VCPU_PTRAUTH_ADDRESS 5 /* VCPU uses address authentication */
#define KVM_ARM_VCPU_PTRAUTH_GENERIC 6 /* VCPU uses generic authentication */
#define KVM_ARM_VCPU_HAS_EL2 7 /* Support nested virtualization */
+#define KVM_ARM_VCPU_HAS_EL2_E2H0 8 /* Limit NV support to E2H RES0 */
struct kvm_vcpu_init {
__u32 target;
@@ -365,6 +366,7 @@ enum {
KVM_REG_ARM_STD_HYP_BIT_PV_TIME = 0,
};
+/* Vendor hyper call function numbers 0-63 */
#define KVM_REG_ARM_VENDOR_HYP_BMAP KVM_REG_ARM_FW_FEAT_BMAP_REG(2)
enum {
@@ -372,6 +374,14 @@ enum {
KVM_REG_ARM_VENDOR_HYP_BIT_PTP = 1,
};
+/* Vendor hyper call function numbers 64-127 */
+#define KVM_REG_ARM_VENDOR_HYP_BMAP_2 KVM_REG_ARM_FW_FEAT_BMAP_REG(3)
+
+enum {
+ KVM_REG_ARM_VENDOR_HYP_BIT_DISCOVER_IMPL_VER = 0,
+ KVM_REG_ARM_VENDOR_HYP_BIT_DISCOVER_IMPL_CPUS = 1,
+};
+
/* Device Control API on vm fd */
#define KVM_ARM_VM_SMCCC_CTRL 0
#define KVM_ARM_VM_SMCCC_FILTER 0
@@ -394,6 +404,7 @@ enum {
#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7
#define KVM_DEV_ARM_VGIC_GRP_ITS_REGS 8
+#define KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ 9
#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10
#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
(0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
diff --git a/linux-headers/asm-arm64/unistd_64.h b/linux-headers/asm-arm64/unistd_64.h
index d4e90fff76..ee9aaebdf3 100644
--- a/linux-headers/asm-arm64/unistd_64.h
+++ b/linux-headers/asm-arm64/unistd_64.h
@@ -323,6 +323,7 @@
#define __NR_getxattrat 464
#define __NR_listxattrat 465
#define __NR_removexattrat 466
+#define __NR_open_tree_attr 467
#endif /* _ASM_UNISTD_64_H */
diff --git a/linux-headers/asm-generic/mman-common.h b/linux-headers/asm-generic/mman-common.h
index 1ea2c4c33b..ef1c27fa3c 100644
--- a/linux-headers/asm-generic/mman-common.h
+++ b/linux-headers/asm-generic/mman-common.h
@@ -85,6 +85,7 @@
/* compatibility flags */
#define MAP_FILE 0
+#define PKEY_UNRESTRICTED 0x0
#define PKEY_DISABLE_ACCESS 0x1
#define PKEY_DISABLE_WRITE 0x2
#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\
diff --git a/linux-headers/asm-generic/unistd.h b/linux-headers/asm-generic/unistd.h
index 88dc393c2b..2892a45023 100644
--- a/linux-headers/asm-generic/unistd.h
+++ b/linux-headers/asm-generic/unistd.h
@@ -849,9 +849,11 @@ __SYSCALL(__NR_getxattrat, sys_getxattrat)
__SYSCALL(__NR_listxattrat, sys_listxattrat)
#define __NR_removexattrat 466
__SYSCALL(__NR_removexattrat, sys_removexattrat)
+#define __NR_open_tree_attr 467
+__SYSCALL(__NR_open_tree_attr, sys_open_tree_attr)
#undef __NR_syscalls
-#define __NR_syscalls 467
+#define __NR_syscalls 468
/*
* 32 bit systems traditionally used different
diff --git a/linux-headers/asm-loongarch/unistd_64.h b/linux-headers/asm-loongarch/unistd_64.h
index 23fb96a8a7..50d22df8f7 100644
--- a/linux-headers/asm-loongarch/unistd_64.h
+++ b/linux-headers/asm-loongarch/unistd_64.h
@@ -319,6 +319,7 @@
#define __NR_getxattrat 464
#define __NR_listxattrat 465
#define __NR_removexattrat 466
+#define __NR_open_tree_attr 467
#endif /* _ASM_UNISTD_64_H */
diff --git a/linux-headers/asm-mips/unistd_n32.h b/linux-headers/asm-mips/unistd_n32.h
index 9a75719644..bdcc2f460b 100644
--- a/linux-headers/asm-mips/unistd_n32.h
+++ b/linux-headers/asm-mips/unistd_n32.h
@@ -395,5 +395,6 @@
#define __NR_getxattrat (__NR_Linux + 464)
#define __NR_listxattrat (__NR_Linux + 465)
#define __NR_removexattrat (__NR_Linux + 466)
+#define __NR_open_tree_attr (__NR_Linux + 467)
#endif /* _ASM_UNISTD_N32_H */
diff --git a/linux-headers/asm-mips/unistd_n64.h b/linux-headers/asm-mips/unistd_n64.h
index 7086783b0c..3b6b0193b6 100644
--- a/linux-headers/asm-mips/unistd_n64.h
+++ b/linux-headers/asm-mips/unistd_n64.h
@@ -371,5 +371,6 @@
#define __NR_getxattrat (__NR_Linux + 464)
#define __NR_listxattrat (__NR_Linux + 465)
#define __NR_removexattrat (__NR_Linux + 466)
+#define __NR_open_tree_attr (__NR_Linux + 467)
#endif /* _ASM_UNISTD_N64_H */
diff --git a/linux-headers/asm-mips/unistd_o32.h b/linux-headers/asm-mips/unistd_o32.h
index b3825823e4..4609a4b4d3 100644
--- a/linux-headers/asm-mips/unistd_o32.h
+++ b/linux-headers/asm-mips/unistd_o32.h
@@ -441,5 +441,6 @@
#define __NR_getxattrat (__NR_Linux + 464)
#define __NR_listxattrat (__NR_Linux + 465)
#define __NR_removexattrat (__NR_Linux + 466)
+#define __NR_open_tree_attr (__NR_Linux + 467)
#endif /* _ASM_UNISTD_O32_H */
diff --git a/linux-headers/asm-powerpc/unistd_32.h b/linux-headers/asm-powerpc/unistd_32.h
index 38ee4dc35d..5d38a427e0 100644
--- a/linux-headers/asm-powerpc/unistd_32.h
+++ b/linux-headers/asm-powerpc/unistd_32.h
@@ -448,6 +448,7 @@
#define __NR_getxattrat 464
#define __NR_listxattrat 465
#define __NR_removexattrat 466
+#define __NR_open_tree_attr 467
#endif /* _ASM_UNISTD_32_H */
diff --git a/linux-headers/asm-powerpc/unistd_64.h b/linux-headers/asm-powerpc/unistd_64.h
index 5e5f156834..860a488e4d 100644
--- a/linux-headers/asm-powerpc/unistd_64.h
+++ b/linux-headers/asm-powerpc/unistd_64.h
@@ -420,6 +420,7 @@
#define __NR_getxattrat 464
#define __NR_listxattrat 465
#define __NR_removexattrat 466
+#define __NR_open_tree_attr 467
#endif /* _ASM_UNISTD_64_H */
diff --git a/linux-headers/asm-riscv/kvm.h b/linux-headers/asm-riscv/kvm.h
index f06bc5efcd..5f59fd226c 100644
--- a/linux-headers/asm-riscv/kvm.h
+++ b/linux-headers/asm-riscv/kvm.h
@@ -182,6 +182,8 @@ enum KVM_RISCV_ISA_EXT_ID {
KVM_RISCV_ISA_EXT_SVVPTC,
KVM_RISCV_ISA_EXT_ZABHA,
KVM_RISCV_ISA_EXT_ZICCRSE,
+ KVM_RISCV_ISA_EXT_ZAAMO,
+ KVM_RISCV_ISA_EXT_ZALRSC,
KVM_RISCV_ISA_EXT_MAX,
};
diff --git a/linux-headers/asm-riscv/unistd_32.h b/linux-headers/asm-riscv/unistd_32.h
index 74f6127aed..a5e769f1d9 100644
--- a/linux-headers/asm-riscv/unistd_32.h
+++ b/linux-headers/asm-riscv/unistd_32.h
@@ -314,6 +314,7 @@
#define __NR_getxattrat 464
#define __NR_listxattrat 465
#define __NR_removexattrat 466
+#define __NR_open_tree_attr 467
#endif /* _ASM_UNISTD_32_H */
diff --git a/linux-headers/asm-riscv/unistd_64.h b/linux-headers/asm-riscv/unistd_64.h
index bb6a15a2ec..8df4d64841 100644
--- a/linux-headers/asm-riscv/unistd_64.h
+++ b/linux-headers/asm-riscv/unistd_64.h
@@ -324,6 +324,7 @@
#define __NR_getxattrat 464
#define __NR_listxattrat 465
#define __NR_removexattrat 466
+#define __NR_open_tree_attr 467
#endif /* _ASM_UNISTD_64_H */
diff --git a/linux-headers/asm-s390/unistd_32.h b/linux-headers/asm-s390/unistd_32.h
index 620201cb36..85eedbd18e 100644
--- a/linux-headers/asm-s390/unistd_32.h
+++ b/linux-headers/asm-s390/unistd_32.h
@@ -439,5 +439,6 @@
#define __NR_getxattrat 464
#define __NR_listxattrat 465
#define __NR_removexattrat 466
+#define __NR_open_tree_attr 467
#endif /* _ASM_S390_UNISTD_32_H */
diff --git a/linux-headers/asm-s390/unistd_64.h b/linux-headers/asm-s390/unistd_64.h
index e7e4a10aaf..c03b1b9701 100644
--- a/linux-headers/asm-s390/unistd_64.h
+++ b/linux-headers/asm-s390/unistd_64.h
@@ -387,5 +387,6 @@
#define __NR_getxattrat 464
#define __NR_listxattrat 465
#define __NR_removexattrat 466
+#define __NR_open_tree_attr 467
#endif /* _ASM_S390_UNISTD_64_H */
diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h
index 86f2c34e7a..dc591fb17e 100644
--- a/linux-headers/asm-x86/kvm.h
+++ b/linux-headers/asm-x86/kvm.h
@@ -557,6 +557,9 @@ struct kvm_x86_mce {
#define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE (1 << 7)
#define KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA (1 << 8)
+#define KVM_XEN_MSR_MIN_INDEX 0x40000000u
+#define KVM_XEN_MSR_MAX_INDEX 0x4fffffffu
+
struct kvm_xen_hvm_config {
__u32 flags;
__u32 msr;
diff --git a/linux-headers/asm-x86/unistd_32.h b/linux-headers/asm-x86/unistd_32.h
index a2eb492a75..491d6b4eb6 100644
--- a/linux-headers/asm-x86/unistd_32.h
+++ b/linux-headers/asm-x86/unistd_32.h
@@ -457,6 +457,7 @@
#define __NR_getxattrat 464
#define __NR_listxattrat 465
#define __NR_removexattrat 466
+#define __NR_open_tree_attr 467
#endif /* _ASM_UNISTD_32_H */
diff --git a/linux-headers/asm-x86/unistd_64.h b/linux-headers/asm-x86/unistd_64.h
index 2f5fc400f5..7cf88bf9bd 100644
--- a/linux-headers/asm-x86/unistd_64.h
+++ b/linux-headers/asm-x86/unistd_64.h
@@ -380,6 +380,7 @@
#define __NR_getxattrat 464
#define __NR_listxattrat 465
#define __NR_removexattrat 466
+#define __NR_open_tree_attr 467
#endif /* _ASM_UNISTD_64_H */
diff --git a/linux-headers/asm-x86/unistd_x32.h b/linux-headers/asm-x86/unistd_x32.h
index fecd832e7f..82959111e6 100644
--- a/linux-headers/asm-x86/unistd_x32.h
+++ b/linux-headers/asm-x86/unistd_x32.h
@@ -333,6 +333,7 @@
#define __NR_getxattrat (__X32_SYSCALL_BIT + 464)
#define __NR_listxattrat (__X32_SYSCALL_BIT + 465)
#define __NR_removexattrat (__X32_SYSCALL_BIT + 466)
+#define __NR_open_tree_attr (__X32_SYSCALL_BIT + 467)
#define __NR_rt_sigaction (__X32_SYSCALL_BIT + 512)
#define __NR_rt_sigreturn (__X32_SYSCALL_BIT + 513)
#define __NR_ioctl (__X32_SYSCALL_BIT + 514)
diff --git a/linux-headers/linux/bits.h b/linux-headers/linux/bits.h
index c0d00c0a98..58596d18f4 100644
--- a/linux-headers/linux/bits.h
+++ b/linux-headers/linux/bits.h
@@ -4,13 +4,9 @@
#ifndef _LINUX_BITS_H
#define _LINUX_BITS_H
-#define __GENMASK(h, l) \
- (((~_UL(0)) - (_UL(1) << (l)) + 1) & \
- (~_UL(0) >> (__BITS_PER_LONG - 1 - (h))))
+#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (BITS_PER_LONG - 1 - (h))))
-#define __GENMASK_ULL(h, l) \
- (((~_ULL(0)) - (_ULL(1) << (l)) + 1) & \
- (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h))))
+#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (BITS_PER_LONG_LONG - 1 - (h))))
#define __GENMASK_U128(h, l) \
((_BIT128((h)) << 1) - (_BIT128(l)))
diff --git a/linux-headers/linux/const.h b/linux-headers/linux/const.h
index 2122610de7..95ede23342 100644
--- a/linux-headers/linux/const.h
+++ b/linux-headers/linux/const.h
@@ -33,7 +33,7 @@
* Missing __asm__ support
*
* __BIT128() would not work in the __asm__ code, as it shifts an
- * 'unsigned __init128' data type as direct representation of
+ * 'unsigned __int128' data type as direct representation of
* 128 bit constants is not supported in the gcc compiler, as
* they get silently truncated.
*
diff --git a/linux-headers/linux/iommufd.h b/linux-headers/linux/iommufd.h
index ccbdca5e11..cb0f7d6b4d 100644
--- a/linux-headers/linux/iommufd.h
+++ b/linux-headers/linux/iommufd.h
@@ -55,6 +55,7 @@ enum {
IOMMUFD_CMD_VIOMMU_ALLOC = 0x90,
IOMMUFD_CMD_VDEVICE_ALLOC = 0x91,
IOMMUFD_CMD_IOAS_CHANGE_PROCESS = 0x92,
+ IOMMUFD_CMD_VEVENTQ_ALLOC = 0x93,
};
/**
@@ -392,6 +393,9 @@ struct iommu_vfio_ioas {
* Any domain attached to the non-PASID part of the
* device must also be flagged, otherwise attaching a
* PASID will blocked.
+ * For the user that wants to attach PASID, ioas is
+ * not recommended for both the non-PASID part
+ * and PASID part of the device.
* If IOMMU does not support PASID it will return
* error (-EOPNOTSUPP).
*/
@@ -608,9 +612,17 @@ enum iommu_hw_info_type {
* IOMMU_HWPT_GET_DIRTY_BITMAP
* IOMMU_HWPT_SET_DIRTY_TRACKING
*
+ * @IOMMU_HW_CAP_PCI_PASID_EXEC: Execute Permission Supported, user ignores it
+ * when the struct
+ * iommu_hw_info::out_max_pasid_log2 is zero.
+ * @IOMMU_HW_CAP_PCI_PASID_PRIV: Privileged Mode Supported, user ignores it
+ * when the struct
+ * iommu_hw_info::out_max_pasid_log2 is zero.
*/
enum iommufd_hw_capabilities {
IOMMU_HW_CAP_DIRTY_TRACKING = 1 << 0,
+ IOMMU_HW_CAP_PCI_PASID_EXEC = 1 << 1,
+ IOMMU_HW_CAP_PCI_PASID_PRIV = 1 << 2,
};
/**
@@ -626,6 +638,9 @@ enum iommufd_hw_capabilities {
* iommu_hw_info_type.
* @out_capabilities: Output the generic iommu capability info type as defined
* in the enum iommu_hw_capabilities.
+ * @out_max_pasid_log2: Output the width of PASIDs. 0 means no PASID support.
+ * PCI devices turn to out_capabilities to check if the
+ * specific capabilities is supported or not.
* @__reserved: Must be 0
*
* Query an iommu type specific hardware information data from an iommu behind
@@ -649,7 +664,8 @@ struct iommu_hw_info {
__u32 data_len;
__aligned_u64 data_uptr;
__u32 out_data_type;
- __u32 __reserved;
+ __u8 out_max_pasid_log2;
+ __u8 __reserved[3];
__aligned_u64 out_capabilities;
};
#define IOMMU_GET_HW_INFO _IO(IOMMUFD_TYPE, IOMMUFD_CMD_GET_HW_INFO)
@@ -1014,4 +1030,115 @@ struct iommu_ioas_change_process {
#define IOMMU_IOAS_CHANGE_PROCESS \
_IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_CHANGE_PROCESS)
+/**
+ * enum iommu_veventq_flag - flag for struct iommufd_vevent_header
+ * @IOMMU_VEVENTQ_FLAG_LOST_EVENTS: vEVENTQ has lost vEVENTs
+ */
+enum iommu_veventq_flag {
+ IOMMU_VEVENTQ_FLAG_LOST_EVENTS = (1U << 0),
+};
+
+/**
+ * struct iommufd_vevent_header - Virtual Event Header for a vEVENTQ Status
+ * @flags: Combination of enum iommu_veventq_flag
+ * @sequence: The sequence index of a vEVENT in the vEVENTQ, with a range of
+ * [0, INT_MAX] where the following index of INT_MAX is 0
+ *
+ * Each iommufd_vevent_header reports a sequence index of the following vEVENT:
+ *
+ * +----------------------+-------+----------------------+-------+---+-------+
+ * | header0 {sequence=0} | data0 | header1 {sequence=1} | data1 |...| dataN |
+ * +----------------------+-------+----------------------+-------+---+-------+
+ *
+ * And this sequence index is expected to be monotonic to the sequence index of
+ * the previous vEVENT. If two adjacent sequence indexes has a delta larger than
+ * 1, it means that delta - 1 number of vEVENTs has lost, e.g. two lost vEVENTs:
+ *
+ * +-----+----------------------+-------+----------------------+-------+-----+
+ * | ... | header3 {sequence=3} | data3 | header6 {sequence=6} | data6 | ... |
+ * +-----+----------------------+-------+----------------------+-------+-----+
+ *
+ * If a vEVENT lost at the tail of the vEVENTQ and there is no following vEVENT
+ * providing the next sequence index, an IOMMU_VEVENTQ_FLAG_LOST_EVENTS header
+ * would be added to the tail, and no data would follow this header:
+ *
+ * +--+----------------------+-------+-----------------------------------------+
+ * |..| header3 {sequence=3} | data3 | header4 {flags=LOST_EVENTS, sequence=4} |
+ * +--+----------------------+-------+-----------------------------------------+
+ */
+struct iommufd_vevent_header {
+ __u32 flags;
+ __u32 sequence;
+};
+
+/**
+ * enum iommu_veventq_type - Virtual Event Queue Type
+ * @IOMMU_VEVENTQ_TYPE_DEFAULT: Reserved for future use
+ * @IOMMU_VEVENTQ_TYPE_ARM_SMMUV3: ARM SMMUv3 Virtual Event Queue
+ */
+enum iommu_veventq_type {
+ IOMMU_VEVENTQ_TYPE_DEFAULT = 0,
+ IOMMU_VEVENTQ_TYPE_ARM_SMMUV3 = 1,
+};
+
+/**
+ * struct iommu_vevent_arm_smmuv3 - ARM SMMUv3 Virtual Event
+ * (IOMMU_VEVENTQ_TYPE_ARM_SMMUV3)
+ * @evt: 256-bit ARM SMMUv3 Event record, little-endian.
+ * Reported event records: (Refer to "7.3 Event records" in SMMUv3 HW Spec)
+ * - 0x04 C_BAD_STE
+ * - 0x06 F_STREAM_DISABLED
+ * - 0x08 C_BAD_SUBSTREAMID
+ * - 0x0a C_BAD_CD
+ * - 0x10 F_TRANSLATION
+ * - 0x11 F_ADDR_SIZE
+ * - 0x12 F_ACCESS
+ * - 0x13 F_PERMISSION
+ *
+ * StreamID field reports a virtual device ID. To receive a virtual event for a
+ * device, a vDEVICE must be allocated via IOMMU_VDEVICE_ALLOC.
+ */
+struct iommu_vevent_arm_smmuv3 {
+ __aligned_le64 evt[4];
+};
+
+/**
+ * struct iommu_veventq_alloc - ioctl(IOMMU_VEVENTQ_ALLOC)
+ * @size: sizeof(struct iommu_veventq_alloc)
+ * @flags: Must be 0
+ * @viommu_id: virtual IOMMU ID to associate the vEVENTQ with
+ * @type: Type of the vEVENTQ. Must be defined in enum iommu_veventq_type
+ * @veventq_depth: Maximum number of events in the vEVENTQ
+ * @out_veventq_id: The ID of the new vEVENTQ
+ * @out_veventq_fd: The fd of the new vEVENTQ. User space must close the
+ * successfully returned fd after using it
+ * @__reserved: Must be 0
+ *
+ * Explicitly allocate a virtual event queue interface for a vIOMMU. A vIOMMU
+ * can have multiple FDs for different types, but is confined to one per @type.
+ * User space should open the @out_veventq_fd to read vEVENTs out of a vEVENTQ,
+ * if there are vEVENTs available. A vEVENTQ will lose events due to overflow,
+ * if the number of the vEVENTs hits @veventq_depth.
+ *
+ * Each vEVENT in a vEVENTQ encloses a struct iommufd_vevent_header followed by
+ * a type-specific data structure, in a normal case:
+ *
+ * +-+---------+-------+---------+-------+-----+---------+-------+-+
+ * | | header0 | data0 | header1 | data1 | ... | headerN | dataN | |
+ * +-+---------+-------+---------+-------+-----+---------+-------+-+
+ *
+ * unless a tailing IOMMU_VEVENTQ_FLAG_LOST_EVENTS header is logged (refer to
+ * struct iommufd_vevent_header).
+ */
+struct iommu_veventq_alloc {
+ __u32 size;
+ __u32 flags;
+ __u32 viommu_id;
+ __u32 type;
+ __u32 veventq_depth;
+ __u32 out_veventq_id;
+ __u32 out_veventq_fd;
+ __u32 __reserved;
+};
+#define IOMMU_VEVENTQ_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VEVENTQ_ALLOC)
#endif
diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
index 27181b3dd8..e5f3e8b5a0 100644
--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
@@ -921,6 +921,7 @@ struct kvm_enable_cap {
#define KVM_CAP_PRE_FAULT_MEMORY 236
#define KVM_CAP_X86_APIC_BUS_CYCLES_NS 237
#define KVM_CAP_X86_GUEST_MODE 238
+#define KVM_CAP_ARM_WRITABLE_IMP_ID_REGS 239
struct kvm_irq_routing_irqchip {
__u32 irqchip;
diff --git a/linux-headers/linux/psp-sev.h b/linux-headers/linux/psp-sev.h
index 17bf191573..113c4ceb78 100644
--- a/linux-headers/linux/psp-sev.h
+++ b/linux-headers/linux/psp-sev.h
@@ -73,13 +73,20 @@ typedef enum {
SEV_RET_INVALID_PARAM,
SEV_RET_RESOURCE_LIMIT,
SEV_RET_SECURE_DATA_INVALID,
- SEV_RET_INVALID_KEY = 0x27,
- SEV_RET_INVALID_PAGE_SIZE,
- SEV_RET_INVALID_PAGE_STATE,
- SEV_RET_INVALID_MDATA_ENTRY,
- SEV_RET_INVALID_PAGE_OWNER,
- SEV_RET_INVALID_PAGE_AEAD_OFLOW,
- SEV_RET_RMP_INIT_REQUIRED,
+ SEV_RET_INVALID_PAGE_SIZE = 0x0019,
+ SEV_RET_INVALID_PAGE_STATE = 0x001A,
+ SEV_RET_INVALID_MDATA_ENTRY = 0x001B,
+ SEV_RET_INVALID_PAGE_OWNER = 0x001C,
+ SEV_RET_AEAD_OFLOW = 0x001D,
+ SEV_RET_EXIT_RING_BUFFER = 0x001F,
+ SEV_RET_RMP_INIT_REQUIRED = 0x0020,
+ SEV_RET_BAD_SVN = 0x0021,
+ SEV_RET_BAD_VERSION = 0x0022,
+ SEV_RET_SHUTDOWN_REQUIRED = 0x0023,
+ SEV_RET_UPDATE_FAILED = 0x0024,
+ SEV_RET_RESTORE_REQUIRED = 0x0025,
+ SEV_RET_RMP_INITIALIZATION_FAILED = 0x0026,
+ SEV_RET_INVALID_KEY = 0x0027,
SEV_RET_MAX,
} sev_ret_code;
diff --git a/linux-headers/linux/stddef.h b/linux-headers/linux/stddef.h
index e1416f7937..e1fcfcf3b3 100644
--- a/linux-headers/linux/stddef.h
+++ b/linux-headers/linux/stddef.h
@@ -70,4 +70,6 @@
#define __counted_by_be(m)
#endif
+#define __kernel_nonstring
+
#endif /* _LINUX_STDDEF_H */
diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
index 1b5e254d6a..79bf8c0cc5 100644
--- a/linux-headers/linux/vfio.h
+++ b/linux-headers/linux/vfio.h
@@ -671,6 +671,7 @@ enum {
*/
enum {
VFIO_AP_REQ_IRQ_INDEX,
+ VFIO_AP_CFG_CHG_IRQ_INDEX,
VFIO_AP_NUM_IRQS
};
@@ -931,29 +932,34 @@ struct vfio_device_bind_iommufd {
* VFIO_DEVICE_ATTACH_IOMMUFD_PT - _IOW(VFIO_TYPE, VFIO_BASE + 19,
* struct vfio_device_attach_iommufd_pt)
* @argsz: User filled size of this data.
- * @flags: Must be 0.
+ * @flags: Flags for attach.
* @pt_id: Input the target id which can represent an ioas or a hwpt
* allocated via iommufd subsystem.
* Output the input ioas id or the attached hwpt id which could
* be the specified hwpt itself or a hwpt automatically created
* for the specified ioas by kernel during the attachment.
+ * @pasid: The pasid to be attached, only meaningful when
+ * VFIO_DEVICE_ATTACH_PASID is set in @flags
*
* Associate the device with an address space within the bound iommufd.
* Undo by VFIO_DEVICE_DETACH_IOMMUFD_PT or device fd close. This is only
* allowed on cdev fds.
*
- * If a vfio device is currently attached to a valid hw_pagetable, without doing
- * a VFIO_DEVICE_DETACH_IOMMUFD_PT, a second VFIO_DEVICE_ATTACH_IOMMUFD_PT ioctl
- * passing in another hw_pagetable (hwpt) id is allowed. This action, also known
- * as a hw_pagetable replacement, will replace the device's currently attached
- * hw_pagetable with a new hw_pagetable corresponding to the given pt_id.
+ * If a vfio device or a pasid of this device is currently attached to a valid
+ * hw_pagetable (hwpt), without doing a VFIO_DEVICE_DETACH_IOMMUFD_PT, a second
+ * VFIO_DEVICE_ATTACH_IOMMUFD_PT ioctl passing in another hwpt id is allowed.
+ * This action, also known as a hw_pagetable replacement, will replace the
+ * currently attached hwpt of the device or the pasid of this device with a new
+ * hwpt corresponding to the given pt_id.
*
* Return: 0 on success, -errno on failure.
*/
struct vfio_device_attach_iommufd_pt {
__u32 argsz;
__u32 flags;
+#define VFIO_DEVICE_ATTACH_PASID (1 << 0)
__u32 pt_id;
+ __u32 pasid;
};
#define VFIO_DEVICE_ATTACH_IOMMUFD_PT _IO(VFIO_TYPE, VFIO_BASE + 19)
@@ -962,17 +968,21 @@ struct vfio_device_attach_iommufd_pt {
* VFIO_DEVICE_DETACH_IOMMUFD_PT - _IOW(VFIO_TYPE, VFIO_BASE + 20,
* struct vfio_device_detach_iommufd_pt)
* @argsz: User filled size of this data.
- * @flags: Must be 0.
+ * @flags: Flags for detach.
+ * @pasid: The pasid to be detached, only meaningful when
+ * VFIO_DEVICE_DETACH_PASID is set in @flags
*
- * Remove the association of the device and its current associated address
- * space. After it, the device should be in a blocking DMA state. This is only
- * allowed on cdev fds.
+ * Remove the association of the device or a pasid of the device and its current
+ * associated address space. After it, the device or the pasid should be in a
+ * blocking DMA state. This is only allowed on cdev fds.
*
* Return: 0 on success, -errno on failure.
*/
struct vfio_device_detach_iommufd_pt {
__u32 argsz;
__u32 flags;
+#define VFIO_DEVICE_DETACH_PASID (1 << 0)
+ __u32 pasid;
};
#define VFIO_DEVICE_DETACH_IOMMUFD_PT _IO(VFIO_TYPE, VFIO_BASE + 20)
--
2.50.1

View File

@ -0,0 +1,126 @@
From f1d5a02a236b16c839f4acdbb493d532c95987e0 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:45 +0200
Subject: [PATCH 028/115] linux-headers: update from 6.15 + kvm/next
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [28/115] 8eeb6840c789d026eff3112d23332bc3172c51be (bonzini/rhel-qemu-kvm)
This brings in the userspace TDX API.
Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 428c0acd953a626dab55e2c07401ce99c2271119)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
linux-headers/asm-x86/kvm.h | 71 +++++++++++++++++++++++++++++++++++++
linux-headers/linux/kvm.h | 1 +
2 files changed, 72 insertions(+)
diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h
index dc591fb17e..7fb57ccb2a 100644
--- a/linux-headers/asm-x86/kvm.h
+++ b/linux-headers/asm-x86/kvm.h
@@ -439,6 +439,7 @@ struct kvm_sync_regs {
#define KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS (1 << 6)
#define KVM_X86_QUIRK_SLOT_ZAP_ALL (1 << 7)
#define KVM_X86_QUIRK_STUFF_FEATURE_MSRS (1 << 8)
+#define KVM_X86_QUIRK_IGNORE_GUEST_PAT (1 << 9)
#define KVM_STATE_NESTED_FORMAT_VMX 0
#define KVM_STATE_NESTED_FORMAT_SVM 1
@@ -928,4 +929,74 @@ struct kvm_hyperv_eventfd {
#define KVM_X86_SNP_VM 4
#define KVM_X86_TDX_VM 5
+/* Trust Domain eXtension sub-ioctl() commands. */
+enum kvm_tdx_cmd_id {
+ KVM_TDX_CAPABILITIES = 0,
+ KVM_TDX_INIT_VM,
+ KVM_TDX_INIT_VCPU,
+ KVM_TDX_INIT_MEM_REGION,
+ KVM_TDX_FINALIZE_VM,
+ KVM_TDX_GET_CPUID,
+
+ KVM_TDX_CMD_NR_MAX,
+};
+
+struct kvm_tdx_cmd {
+ /* enum kvm_tdx_cmd_id */
+ __u32 id;
+ /* flags for sub-commend. If sub-command doesn't use this, set zero. */
+ __u32 flags;
+ /*
+ * data for each sub-command. An immediate or a pointer to the actual
+ * data in process virtual address. If sub-command doesn't use it,
+ * set zero.
+ */
+ __u64 data;
+ /*
+ * Auxiliary error code. The sub-command may return TDX SEAMCALL
+ * status code in addition to -Exxx.
+ */
+ __u64 hw_error;
+};
+
+struct kvm_tdx_capabilities {
+ __u64 supported_attrs;
+ __u64 supported_xfam;
+ __u64 reserved[254];
+
+ /* Configurable CPUID bits for userspace */
+ struct kvm_cpuid2 cpuid;
+};
+
+struct kvm_tdx_init_vm {
+ __u64 attributes;
+ __u64 xfam;
+ __u64 mrconfigid[6]; /* sha384 digest */
+ __u64 mrowner[6]; /* sha384 digest */
+ __u64 mrownerconfig[6]; /* sha384 digest */
+
+ /* The total space for TD_PARAMS before the CPUIDs is 256 bytes */
+ __u64 reserved[12];
+
+ /*
+ * Call KVM_TDX_INIT_VM before vcpu creation, thus before
+ * KVM_SET_CPUID2.
+ * This configuration supersedes KVM_SET_CPUID2s for VCPUs because the
+ * TDX module directly virtualizes those CPUIDs without VMM. The user
+ * space VMM, e.g. qemu, should make KVM_SET_CPUID2 consistent with
+ * those values. If it doesn't, KVM may have wrong idea of vCPUIDs of
+ * the guest, and KVM may wrongly emulate CPUIDs or MSRs that the TDX
+ * module doesn't virtualize.
+ */
+ struct kvm_cpuid2 cpuid;
+};
+
+#define KVM_TDX_MEASURE_MEMORY_REGION _BITULL(0)
+
+struct kvm_tdx_init_mem_region {
+ __u64 source_addr;
+ __u64 gpa;
+ __u64 nr_pages;
+};
+
#endif /* _ASM_X86_KVM_H */
diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
index e5f3e8b5a0..99cc82a275 100644
--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
@@ -369,6 +369,7 @@ struct kvm_run {
#define KVM_SYSTEM_EVENT_WAKEUP 4
#define KVM_SYSTEM_EVENT_SUSPEND 5
#define KVM_SYSTEM_EVENT_SEV_TERM 6
+#define KVM_SYSTEM_EVENT_TDX_FATAL 7
__u32 type;
__u32 ndata;
union {
--
2.50.1

View File

@ -0,0 +1,160 @@
From 9e0c4adad755988ea9f94f57a87e000dd7045962 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:11:27 +0200
Subject: [PATCH 112/115] memory: Change
memory_region_set_ram_discard_manager() to return the result
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [112/115] 43deb5763dfabf964b55c7a6bb4363b96a5020cc (bonzini/rhel-qemu-kvm)
Modify memory_region_set_ram_discard_manager() to return -EBUSY if a
RamDiscardManager is already set in the MemoryRegion. The caller must
handle this failure, such as having virtio-mem undo its actions and fail
the realize() process. Opportunistically move the call earlier to avoid
complex error handling.
This change is beneficial when introducing a new RamDiscardManager
instance besides virtio-mem. After
ram_block_coordinated_discard_require(true) unlocks all
RamDiscardManager instances, only one instance is allowed to be set for
one MemoryRegion at present.
Suggested-by: David Hildenbrand <david@redhat.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Pankaj Gupta <pankaj.gupta@amd.com>
Tested-by: Alexey Kardashevskiy <aik@amd.com>
Reviewed-by: Alexey Kardashevskiy <aik@amd.com>
Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
Link: https://lore.kernel.org/r/20250612082747.51539-3-chenyi.qiang@intel.com
Signed-off-by: Peter Xu <peterx@redhat.com>
(cherry picked from commit ff1211154c45c9f7f82116ae9a8c72a848e4a8b5)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
hw/virtio/virtio-mem.c | 30 +++++++++++++++++-------------
include/exec/memory.h | 6 +++---
system/memory.c | 10 +++++++---
3 files changed, 27 insertions(+), 19 deletions(-)
diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c
index ae26133f58..6284539243 100644
--- a/hw/virtio/virtio-mem.c
+++ b/hw/virtio/virtio-mem.c
@@ -1049,6 +1049,17 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
return;
}
+ /*
+ * Set ourselves as RamDiscardManager before the plug handler maps the
+ * memory region and exposes it via an address space.
+ */
+ if (memory_region_set_ram_discard_manager(&vmem->memdev->mr,
+ RAM_DISCARD_MANAGER(vmem))) {
+ error_setg(errp, "Failed to set RamDiscardManager");
+ ram_block_coordinated_discard_require(false);
+ return;
+ }
+
/*
* We don't know at this point whether shared RAM is migrated using
* QEMU or migrated using the file content. "x-ignore-shared" will be
@@ -1063,6 +1074,7 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
ret = ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb));
if (ret) {
error_setg_errno(errp, -ret, "Unexpected error discarding RAM");
+ memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL);
ram_block_coordinated_discard_require(false);
return;
}
@@ -1124,13 +1136,6 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
vmem->system_reset = VIRTIO_MEM_SYSTEM_RESET(obj);
vmem->system_reset->vmem = vmem;
qemu_register_resettable(obj);
-
- /*
- * Set ourselves as RamDiscardManager before the plug handler maps the
- * memory region and exposes it via an address space.
- */
- memory_region_set_ram_discard_manager(&vmem->memdev->mr,
- RAM_DISCARD_MANAGER(vmem));
}
static void virtio_mem_device_unrealize(DeviceState *dev)
@@ -1138,12 +1143,6 @@ static void virtio_mem_device_unrealize(DeviceState *dev)
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
VirtIOMEM *vmem = VIRTIO_MEM(dev);
- /*
- * The unplug handler unmapped the memory region, it cannot be
- * found via an address space anymore. Unset ourselves.
- */
- memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL);
-
qemu_unregister_resettable(OBJECT(vmem->system_reset));
object_unref(OBJECT(vmem->system_reset));
@@ -1156,6 +1155,11 @@ static void virtio_mem_device_unrealize(DeviceState *dev)
virtio_del_queue(vdev, 0);
virtio_cleanup(vdev);
g_free(vmem->bitmap);
+ /*
+ * The unplug handler unmapped the memory region, it cannot be
+ * found via an address space anymore. Unset ourselves.
+ */
+ memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL);
ram_block_coordinated_discard_require(false);
}
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 71b4a411cc..7cfbe203f4 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -2488,13 +2488,13 @@ static inline bool memory_region_has_ram_discard_manager(MemoryRegion *mr)
*
* This function must not be called for a mapped #MemoryRegion, a #MemoryRegion
* that does not cover RAM, or a #MemoryRegion that already has a
- * #RamDiscardManager assigned.
+ * #RamDiscardManager assigned. Return 0 if the rdm is set successfully.
*
* @mr: the #MemoryRegion
* @rdm: #RamDiscardManager to set
*/
-void memory_region_set_ram_discard_manager(MemoryRegion *mr,
- RamDiscardManager *rdm);
+int memory_region_set_ram_discard_manager(MemoryRegion *mr,
+ RamDiscardManager *rdm);
/**
* memory_region_find: translate an address/size relative to a
diff --git a/system/memory.c b/system/memory.c
index 5e6eb459d5..35011f731e 100644
--- a/system/memory.c
+++ b/system/memory.c
@@ -2083,12 +2083,16 @@ RamDiscardManager *memory_region_get_ram_discard_manager(MemoryRegion *mr)
return mr->rdm;
}
-void memory_region_set_ram_discard_manager(MemoryRegion *mr,
- RamDiscardManager *rdm)
+int memory_region_set_ram_discard_manager(MemoryRegion *mr,
+ RamDiscardManager *rdm)
{
g_assert(memory_region_is_ram(mr));
- g_assert(!rdm || !mr->rdm);
+ if (mr->rdm && rdm) {
+ return -EBUSY;
+ }
+
mr->rdm = rdm;
+ return 0;
}
uint64_t ram_discard_manager_get_min_granularity(const RamDiscardManager *rdm,
--
2.50.1

View File

@ -0,0 +1,157 @@
From 3a2ef25070d8135a62dfca5bff35114f520c52ad Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:11:27 +0200
Subject: [PATCH 111/115] memory: Export a helper to get intersection of a
MemoryRegionSection with a given range
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [111/115] d7812c678358993209bbe280985f58fce9a34e18 (bonzini/rhel-qemu-kvm)
Rename the helper to memory_region_section_intersect_range() to make it
more generic. Meanwhile, define the @end as Int128 and replace the
related operations with Int128_* format since the helper is exported as
a wider API.
Suggested-by: Alexey Kardashevskiy <aik@amd.com>
Reviewed-by: Alexey Kardashevskiy <aik@amd.com>
Reviewed-by: Pankaj Gupta <pankaj.gupta@amd.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
Link: https://lore.kernel.org/r/20250612082747.51539-2-chenyi.qiang@intel.com
Signed-off-by: Peter Xu <peterx@redhat.com>
(cherry picked from commit f47a672a72acd6e2712031f0bc4d4f3ae4b6302c)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
hw/virtio/virtio-mem.c | 32 +++++---------------------------
include/exec/memory.h | 30 ++++++++++++++++++++++++++++++
2 files changed, 35 insertions(+), 27 deletions(-)
diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c
index 4977658312..ae26133f58 100644
--- a/hw/virtio/virtio-mem.c
+++ b/hw/virtio/virtio-mem.c
@@ -244,28 +244,6 @@ static int virtio_mem_for_each_plugged_range(VirtIOMEM *vmem, void *arg,
return ret;
}
-/*
- * Adjust the memory section to cover the intersection with the given range.
- *
- * Returns false if the intersection is empty, otherwise returns true.
- */
-static bool virtio_mem_intersect_memory_section(MemoryRegionSection *s,
- uint64_t offset, uint64_t size)
-{
- uint64_t start = MAX(s->offset_within_region, offset);
- uint64_t end = MIN(s->offset_within_region + int128_get64(s->size),
- offset + size);
-
- if (end <= start) {
- return false;
- }
-
- s->offset_within_address_space += start - s->offset_within_region;
- s->offset_within_region = start;
- s->size = int128_make64(end - start);
- return true;
-}
-
typedef int (*virtio_mem_section_cb)(MemoryRegionSection *s, void *arg);
static int virtio_mem_for_each_plugged_section(const VirtIOMEM *vmem,
@@ -287,7 +265,7 @@ static int virtio_mem_for_each_plugged_section(const VirtIOMEM *vmem,
first_bit + 1) - 1;
size = (last_bit - first_bit + 1) * vmem->block_size;
- if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) {
+ if (!memory_region_section_intersect_range(&tmp, offset, size)) {
break;
}
ret = cb(&tmp, arg);
@@ -319,7 +297,7 @@ static int virtio_mem_for_each_unplugged_section(const VirtIOMEM *vmem,
first_bit + 1) - 1;
size = (last_bit - first_bit + 1) * vmem->block_size;
- if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) {
+ if (!memory_region_section_intersect_range(&tmp, offset, size)) {
break;
}
ret = cb(&tmp, arg);
@@ -355,7 +333,7 @@ static void virtio_mem_notify_unplug(VirtIOMEM *vmem, uint64_t offset,
QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
MemoryRegionSection tmp = *rdl->section;
- if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) {
+ if (!memory_region_section_intersect_range(&tmp, offset, size)) {
continue;
}
rdl->notify_discard(rdl, &tmp);
@@ -371,7 +349,7 @@ static int virtio_mem_notify_plug(VirtIOMEM *vmem, uint64_t offset,
QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
MemoryRegionSection tmp = *rdl->section;
- if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) {
+ if (!memory_region_section_intersect_range(&tmp, offset, size)) {
continue;
}
ret = rdl->notify_populate(rdl, &tmp);
@@ -388,7 +366,7 @@ static int virtio_mem_notify_plug(VirtIOMEM *vmem, uint64_t offset,
if (rdl2 == rdl) {
break;
}
- if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) {
+ if (!memory_region_section_intersect_range(&tmp, offset, size)) {
continue;
}
rdl2->notify_discard(rdl2, &tmp);
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 296fd068c0..71b4a411cc 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -1200,6 +1200,36 @@ MemoryRegionSection *memory_region_section_new_copy(MemoryRegionSection *s);
*/
void memory_region_section_free_copy(MemoryRegionSection *s);
+/**
+ * memory_region_section_intersect_range: Adjust the memory section to cover
+ * the intersection with the given range.
+ *
+ * @s: the #MemoryRegionSection to be adjusted
+ * @offset: the offset of the given range in the memory region
+ * @size: the size of the given range
+ *
+ * Returns false if the intersection is empty, otherwise returns true.
+ */
+static inline bool memory_region_section_intersect_range(MemoryRegionSection *s,
+ uint64_t offset,
+ uint64_t size)
+{
+ uint64_t start = MAX(s->offset_within_region, offset);
+ Int128 end = int128_min(int128_add(int128_make64(s->offset_within_region),
+ s->size),
+ int128_add(int128_make64(offset),
+ int128_make64(size)));
+
+ if (int128_le(end, int128_make64(start))) {
+ return false;
+ }
+
+ s->offset_within_address_space += start - s->offset_within_region;
+ s->offset_within_region = start;
+ s->size = int128_sub(end, int128_make64(start));
+ return true;
+}
+
/**
* memory_region_init: Initialize a memory region
*
--
2.50.1

View File

@ -0,0 +1,277 @@
From a0a6a0d1131461f2695f28a673f410bbb33262ef Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:11:28 +0200
Subject: [PATCH 113/115] memory: Unify the definiton of ReplayRamPopulate()
and ReplayRamDiscard()
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [113/115] 8ad320473d66f2956d701a4625f815d2fcd8ae4b (bonzini/rhel-qemu-kvm)
Update ReplayRamDiscard() function to return the result and unify the
ReplayRamPopulate() and ReplayRamDiscard() to ReplayRamDiscardState() at
the same time due to their identical definitions. This unification
simplifies related structures, such as VirtIOMEMReplayData, which makes
it cleaner.
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Pankaj Gupta <pankaj.gupta@amd.com>
Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
Link: https://lore.kernel.org/r/20250612082747.51539-4-chenyi.qiang@intel.com
Signed-off-by: Peter Xu <peterx@redhat.com>
(cherry picked from commit 2205b8466733f8c6e3306c964f31c5a7cac69dfa)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
hw/virtio/virtio-mem.c | 21 ++++++------
include/exec/memory.h | 74 ++++++++++++++++++++++++++++++++----------
migration/ram.c | 5 +--
system/memory.c | 12 +++----
4 files changed, 76 insertions(+), 36 deletions(-)
diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c
index 6284539243..32af43852a 100644
--- a/hw/virtio/virtio-mem.c
+++ b/hw/virtio/virtio-mem.c
@@ -1736,7 +1736,7 @@ static bool virtio_mem_rdm_is_populated(const RamDiscardManager *rdm,
}
struct VirtIOMEMReplayData {
- void *fn;
+ ReplayRamDiscardState fn;
void *opaque;
};
@@ -1744,12 +1744,12 @@ static int virtio_mem_rdm_replay_populated_cb(MemoryRegionSection *s, void *arg)
{
struct VirtIOMEMReplayData *data = arg;
- return ((ReplayRamPopulate)data->fn)(s, data->opaque);
+ return data->fn(s, data->opaque);
}
static int virtio_mem_rdm_replay_populated(const RamDiscardManager *rdm,
MemoryRegionSection *s,
- ReplayRamPopulate replay_fn,
+ ReplayRamDiscardState replay_fn,
void *opaque)
{
const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
@@ -1768,14 +1768,13 @@ static int virtio_mem_rdm_replay_discarded_cb(MemoryRegionSection *s,
{
struct VirtIOMEMReplayData *data = arg;
- ((ReplayRamDiscard)data->fn)(s, data->opaque);
- return 0;
+ return data->fn(s, data->opaque);
}
-static void virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm,
- MemoryRegionSection *s,
- ReplayRamDiscard replay_fn,
- void *opaque)
+static int virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm,
+ MemoryRegionSection *s,
+ ReplayRamDiscardState replay_fn,
+ void *opaque)
{
const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
struct VirtIOMEMReplayData data = {
@@ -1784,8 +1783,8 @@ static void virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm,
};
g_assert(s->mr == &vmem->memdev->mr);
- virtio_mem_for_each_unplugged_section(vmem, s, &data,
- virtio_mem_rdm_replay_discarded_cb);
+ return virtio_mem_for_each_unplugged_section(vmem, s, &data,
+ virtio_mem_rdm_replay_discarded_cb);
}
static void virtio_mem_rdm_register_listener(RamDiscardManager *rdm,
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 7cfbe203f4..563353eab6 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -566,8 +566,20 @@ static inline void ram_discard_listener_init(RamDiscardListener *rdl,
rdl->double_discard_supported = double_discard_supported;
}
-typedef int (*ReplayRamPopulate)(MemoryRegionSection *section, void *opaque);
-typedef void (*ReplayRamDiscard)(MemoryRegionSection *section, void *opaque);
+/**
+ * typedef ReplayRamDiscardState:
+ *
+ * The callback handler for #RamDiscardManagerClass.replay_populated/
+ * #RamDiscardManagerClass.replay_discarded to invoke on populated/discarded
+ * parts.
+ *
+ * @section: the #MemoryRegionSection of populated/discarded part
+ * @opaque: pointer to forward to the callback
+ *
+ * Returns 0 on success, or a negative error if failed.
+ */
+typedef int (*ReplayRamDiscardState)(MemoryRegionSection *section,
+ void *opaque);
/*
* RamDiscardManagerClass:
@@ -641,36 +653,38 @@ struct RamDiscardManagerClass {
/**
* @replay_populated:
*
- * Call the #ReplayRamPopulate callback for all populated parts within the
- * #MemoryRegionSection via the #RamDiscardManager.
+ * Call the #ReplayRamDiscardState callback for all populated parts within
+ * the #MemoryRegionSection via the #RamDiscardManager.
*
* In case any call fails, no further calls are made.
*
* @rdm: the #RamDiscardManager
* @section: the #MemoryRegionSection
- * @replay_fn: the #ReplayRamPopulate callback
+ * @replay_fn: the #ReplayRamDiscardState callback
* @opaque: pointer to forward to the callback
*
* Returns 0 on success, or a negative error if any notification failed.
*/
int (*replay_populated)(const RamDiscardManager *rdm,
MemoryRegionSection *section,
- ReplayRamPopulate replay_fn, void *opaque);
+ ReplayRamDiscardState replay_fn, void *opaque);
/**
* @replay_discarded:
*
- * Call the #ReplayRamDiscard callback for all discarded parts within the
- * #MemoryRegionSection via the #RamDiscardManager.
+ * Call the #ReplayRamDiscardState callback for all discarded parts within
+ * the #MemoryRegionSection via the #RamDiscardManager.
*
* @rdm: the #RamDiscardManager
* @section: the #MemoryRegionSection
- * @replay_fn: the #ReplayRamDiscard callback
+ * @replay_fn: the #ReplayRamDiscardState callback
* @opaque: pointer to forward to the callback
+ *
+ * Returns 0 on success, or a negative error if any notification failed.
*/
- void (*replay_discarded)(const RamDiscardManager *rdm,
- MemoryRegionSection *section,
- ReplayRamDiscard replay_fn, void *opaque);
+ int (*replay_discarded)(const RamDiscardManager *rdm,
+ MemoryRegionSection *section,
+ ReplayRamDiscardState replay_fn, void *opaque);
/**
* @register_listener:
@@ -711,15 +725,41 @@ uint64_t ram_discard_manager_get_min_granularity(const RamDiscardManager *rdm,
bool ram_discard_manager_is_populated(const RamDiscardManager *rdm,
const MemoryRegionSection *section);
+/**
+ * ram_discard_manager_replay_populated:
+ *
+ * A wrapper to call the #RamDiscardManagerClass.replay_populated callback
+ * of the #RamDiscardManager.
+ *
+ * @rdm: the #RamDiscardManager
+ * @section: the #MemoryRegionSection
+ * @replay_fn: the #ReplayRamDiscardState callback
+ * @opaque: pointer to forward to the callback
+ *
+ * Returns 0 on success, or a negative error if any notification failed.
+ */
int ram_discard_manager_replay_populated(const RamDiscardManager *rdm,
MemoryRegionSection *section,
- ReplayRamPopulate replay_fn,
+ ReplayRamDiscardState replay_fn,
void *opaque);
-void ram_discard_manager_replay_discarded(const RamDiscardManager *rdm,
- MemoryRegionSection *section,
- ReplayRamDiscard replay_fn,
- void *opaque);
+/**
+ * ram_discard_manager_replay_discarded:
+ *
+ * A wrapper to call the #RamDiscardManagerClass.replay_discarded callback
+ * of the #RamDiscardManager.
+ *
+ * @rdm: the #RamDiscardManager
+ * @section: the #MemoryRegionSection
+ * @replay_fn: the #ReplayRamDiscardState callback
+ * @opaque: pointer to forward to the callback
+ *
+ * Returns 0 on success, or a negative error if any notification failed.
+ */
+int ram_discard_manager_replay_discarded(const RamDiscardManager *rdm,
+ MemoryRegionSection *section,
+ ReplayRamDiscardState replay_fn,
+ void *opaque);
void ram_discard_manager_register_listener(RamDiscardManager *rdm,
RamDiscardListener *rdl,
diff --git a/migration/ram.c b/migration/ram.c
index 0803f85b8a..aaeea04546 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -874,8 +874,8 @@ static inline bool migration_bitmap_clear_dirty(RAMState *rs,
return ret;
}
-static void dirty_bitmap_clear_section(MemoryRegionSection *section,
- void *opaque)
+static int dirty_bitmap_clear_section(MemoryRegionSection *section,
+ void *opaque)
{
const hwaddr offset = section->offset_within_region;
const hwaddr size = int128_get64(section->size);
@@ -894,6 +894,7 @@ static void dirty_bitmap_clear_section(MemoryRegionSection *section,
}
*cleared_bits += bitmap_count_one_with_offset(rb->bmap, start, npages);
bitmap_clear(rb->bmap, start, npages);
+ return 0;
}
/*
diff --git a/system/memory.c b/system/memory.c
index 35011f731e..8c36ab17b6 100644
--- a/system/memory.c
+++ b/system/memory.c
@@ -2115,7 +2115,7 @@ bool ram_discard_manager_is_populated(const RamDiscardManager *rdm,
int ram_discard_manager_replay_populated(const RamDiscardManager *rdm,
MemoryRegionSection *section,
- ReplayRamPopulate replay_fn,
+ ReplayRamDiscardState replay_fn,
void *opaque)
{
RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm);
@@ -2124,15 +2124,15 @@ int ram_discard_manager_replay_populated(const RamDiscardManager *rdm,
return rdmc->replay_populated(rdm, section, replay_fn, opaque);
}
-void ram_discard_manager_replay_discarded(const RamDiscardManager *rdm,
- MemoryRegionSection *section,
- ReplayRamDiscard replay_fn,
- void *opaque)
+int ram_discard_manager_replay_discarded(const RamDiscardManager *rdm,
+ MemoryRegionSection *section,
+ ReplayRamDiscardState replay_fn,
+ void *opaque)
{
RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm);
g_assert(rdmc->replay_discarded);
- rdmc->replay_discarded(rdm, section, replay_fn, opaque);
+ return rdmc->replay_discarded(rdm, section, replay_fn, opaque);
}
void ram_discard_manager_register_listener(RamDiscardManager *rdm,
--
2.50.1

View File

@ -0,0 +1,129 @@
From 8d8fd49920c7b4c49d2d7ca1666c3cc7a53a3e96 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:11:28 +0200
Subject: [PATCH 115/115] physmem: Support coordinated discarding of RAM with
guest_memfd
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [115/115] 5dfe5036d7b5524f0d0af4cde23cdd9ca696717b (bonzini/rhel-qemu-kvm)
A new field, attributes, was introduced in RAMBlock to link to a
RamBlockAttributes object, which centralizes all guest_memfd related
information (such as fd and status bitmap) within a RAMBlock.
Create and initialize the RamBlockAttributes object upon ram_block_add().
Meanwhile, register the object in the target RAMBlock's MemoryRegion.
After that, guest_memfd-backed RAMBlock is associated with the
RamDiscardManager interface, and the users can execute RamDiscardManager
specific handling. For example, VFIO will register the
RamDiscardListener and get notifications when the state_change() helper
invokes.
As coordinate discarding of RAM with guest_memfd is now supported, only
block uncoordinated discard.
Tested-by: Alexey Kardashevskiy <aik@amd.com>
Reviewed-by: Alexey Kardashevskiy <aik@amd.com>
Acked-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
Link: https://lore.kernel.org/r/20250612082747.51539-6-chenyi.qiang@intel.com
Signed-off-by: Peter Xu <peterx@redhat.com>
(cherry picked from commit 2fde3fb916079ee0ff0fc26d9446c813b1d5cc28)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Conflicts: No CPR
---
accel/kvm/kvm-all.c | 9 +++++++++
include/exec/ramblock.h | 1 +
system/physmem.c | 23 +++++++++++++++++++++--
3 files changed, 31 insertions(+), 2 deletions(-)
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 43c10c82f6..1fd7773a28 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -3073,6 +3073,15 @@ int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private)
addr = memory_region_get_ram_ptr(mr) + section.offset_within_region;
rb = qemu_ram_block_from_host(addr, false, &offset);
+ ret = ram_block_attributes_state_change(RAM_BLOCK_ATTRIBUTES(mr->rdm),
+ offset, size, to_private);
+ if (ret) {
+ error_report("Failed to notify the listener the state change of "
+ "(0x%"HWADDR_PRIx" + 0x%"HWADDR_PRIx") to %s",
+ start, size, to_private ? "private" : "shared");
+ goto out_unref;
+ }
+
if (to_private) {
if (rb->page_size != qemu_real_host_page_size()) {
/*
diff --git a/include/exec/ramblock.h b/include/exec/ramblock.h
index 9ae774d268..a504765038 100644
--- a/include/exec/ramblock.h
+++ b/include/exec/ramblock.h
@@ -46,6 +46,7 @@ struct RAMBlock {
int fd;
uint64_t fd_offset;
int guest_memfd;
+ RamBlockAttributes *attributes;
size_t page_size;
/* dirty bitmap used during migration */
unsigned long *bmap;
diff --git a/system/physmem.c b/system/physmem.c
index dccc95030b..35f8f25e22 100644
--- a/system/physmem.c
+++ b/system/physmem.c
@@ -1889,7 +1889,7 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
}
assert(new_block->guest_memfd < 0);
- ret = ram_block_discard_require(true);
+ ret = ram_block_coordinated_discard_require(true);
if (ret < 0) {
error_setg_errno(errp, -ret,
"cannot set up private guest memory: discard currently blocked");
@@ -1903,6 +1903,24 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
qemu_mutex_unlock_ramlist();
goto out_free;
}
+
+ /*
+ * The attribute bitmap of the RamBlockAttributes is default to
+ * discarded, which mimics the behavior of kvm_set_phys_mem() when it
+ * calls kvm_set_memory_attributes_private(). This leads to a brief
+ * period of inconsistency between the creation of the RAMBlock and its
+ * mapping into the physical address space. However, this is not
+ * problematic, as no users rely on the attribute status to perform
+ * any actions during this interval.
+ */
+ new_block->attributes = ram_block_attributes_create(new_block);
+ if (!new_block->attributes) {
+ error_setg(errp, "Failed to create ram block attribute");
+ close(new_block->guest_memfd);
+ ram_block_coordinated_discard_require(false);
+ qemu_mutex_unlock_ramlist();
+ goto out_free;
+ }
}
new_ram_size = MAX(old_ram_size,
@@ -2159,8 +2177,9 @@ static void reclaim_ramblock(RAMBlock *block)
}
if (block->guest_memfd >= 0) {
+ ram_block_attributes_destroy(block->attributes);
close(block->guest_memfd);
- ram_block_discard_require(false);
+ ram_block_coordinated_discard_require(false);
}
g_free(block);
--
2.50.1

View File

@ -0,0 +1,72 @@
From 40d97d335471a77b1491c124d2c109db68bf8ca6 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 17 Feb 2025 13:08:12 +0100
Subject: [PATCH 020/115] physmem: replace assertion with error
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [20/115] 5c17949c668857760ee80279e5776e8dcf4c7c11 (bonzini/rhel-qemu-kvm)
It is possible to start QEMU with a confidential-guest-support object
even in TCG mode. While there is already a check in qemu_machine_creation_done:
if (machine->cgs && !machine->cgs->ready) {
error_setg(errp, "accelerator does not support confidential guest %s",
object_get_typename(OBJECT(machine->cgs)));
exit(1);
}
the creation of RAMBlocks happens earlier, in qemu_init_board(), if
the command line does not override the default memory backend with
-M memdev. Then the RAMBlock will try to use guest_memfd (because
machine_require_guest_memfd correctly returns true; at least correctly
according to the current implementation) and trigger the assertion
failure for kvm_enabled(). This happend with a command line as
simple as the following:
qemu-system-x86_64 -m 512 -nographic -object sev-snp-guest,reduced-phys-bits=48,id=sev0 \
-M q35,kernel-irqchip=split,confidential-guest-support=sev0
qemu-system-x86_64: ../system/physmem.c:1871: ram_block_add: Assertion `kvm_enabled()' failed.
Cc: Xiaoyao Li <xiaoyao.li@intel.com>
Cc: qemu-stable@nongnu.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Pankaj Gupta <pankaj.gupta@amd.com>
Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
Link: https://lore.kernel.org/r/20250217120812.396522-1-pbonzini@redhat.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 6debfb2cb1795427d2dc6a741c7430a233c76695)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
system/physmem.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/system/physmem.c b/system/physmem.c
index 94600a33ec..dccc95030b 100644
--- a/system/physmem.c
+++ b/system/physmem.c
@@ -1882,7 +1882,11 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
if (new_block->flags & RAM_GUEST_MEMFD) {
int ret;
- assert(kvm_enabled());
+ if (!kvm_enabled()) {
+ error_setg(errp, "cannot set up private guest memory for %s: KVM required",
+ object_get_typename(OBJECT(current_machine->cgs)));
+ goto out_free;
+ }
assert(new_block->guest_memfd < 0);
ret = ram_block_discard_require(true);
--
2.50.1

View File

@ -0,0 +1,79 @@
From 60ab87c5cad64e3169ee65f396c6d9f1f7eb0daa Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:44 +0200
Subject: [PATCH 022/115] qom: reverse order of instance_post_init calls
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [22/115] e4e2393adffd671dc4d4f7147c620884757c7d74 (bonzini/rhel-qemu-kvm)
Currently, the instance_post_init calls are performed from the leaf
class and all the way up to Object. This is incorrect because the
leaf class cannot observe property values applied by the superclasses;
for example, a compat property will be set on a device *after*
the class's post_init callback has run.
In particular this makes it impossible for implementations of
accel_cpu_instance_init() to operate based on the actual values of
the properties, though it seems that cxl_dsp_instance_post_init and
rp_instance_post_init might have similar issues.
Follow instead the same order as instance_init, starting with Object
and running the child class's instance_post_init after the parent.
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 220c739903cec99df032219ac94c45b5269a0ab5)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
include/qom/object.h | 3 ++-
qom/object.c | 8 ++++----
2 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/include/qom/object.h b/include/qom/object.h
index 13d3a655dd..668dd1cc08 100644
--- a/include/qom/object.h
+++ b/include/qom/object.h
@@ -444,7 +444,8 @@ struct Object
* class will have already been initialized so the type is only responsible
* for initializing its own members.
* @instance_post_init: This function is called to finish initialization of
- * an object, after all @instance_init functions were called.
+ * an object, after all @instance_init functions were called, as well as
+ * @instance_post_init functions for the parent classes.
* @instance_finalize: This function is called during object destruction. This
* is called before the parent @instance_finalize function has been called.
* An object should only free the members that are unique to its type in this
diff --git a/qom/object.c b/qom/object.c
index 157a45c5f8..c03cd3c733 100644
--- a/qom/object.c
+++ b/qom/object.c
@@ -423,13 +423,13 @@ static void object_init_with_type(Object *obj, TypeImpl *ti)
static void object_post_init_with_type(Object *obj, TypeImpl *ti)
{
- if (ti->instance_post_init) {
- ti->instance_post_init(obj);
- }
-
if (type_has_parent(ti)) {
object_post_init_with_type(obj, type_get_parent(ti));
}
+
+ if (ti->instance_post_init) {
+ ti->instance_post_init(obj);
+ }
}
bool object_apply_global_props(Object *obj, const GPtrArray *props,
--
2.50.1

View File

@ -0,0 +1,613 @@
From 13a29003f5a5502fc2cd13cb22f3fd6318e80196 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:11:28 +0200
Subject: [PATCH 114/115] ram-block-attributes: Introduce RamBlockAttributes to
manage RAMBlock with guest_memfd
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [114/115] eca60c04c6204ee91e4aedb84c993155605e6f5a (bonzini/rhel-qemu-kvm)
Commit 852f0048f3 ("RAMBlock: make guest_memfd require uncoordinated
discard") highlighted that subsystems like VFIO may disable RAM block
discard. However, guest_memfd relies on discard operations for page
conversion between private and shared memory, potentially leading to
the stale IOMMU mapping issue when assigning hardware devices to
confidential VMs via shared memory. To address this and allow shared
device assignement, it is crucial to ensure the VFIO system refreshes
its IOMMU mappings.
RamDiscardManager is an existing interface (used by virtio-mem) to
adjust VFIO mappings in relation to VM page assignment. Effectively page
conversion is similar to hot-removing a page in one mode and adding it
back in the other. Therefore, similar actions are required for page
conversion events. Introduce the RamDiscardManager to guest_memfd to
facilitate this process.
Since guest_memfd is not an object, it cannot directly implement the
RamDiscardManager interface. Implementing it in HostMemoryBackend is
not appropriate because guest_memfd is per RAMBlock, and some RAMBlocks
have a memory backend while others do not. Notably, virtual BIOS
RAMBlocks using memory_region_init_ram_guest_memfd() do not have a
backend.
To manage RAMBlocks with guest_memfd, define a new object named
RamBlockAttributes to implement the RamDiscardManager interface. This
object can store the guest_memfd information such as the bitmap for
shared memory and the registered listeners for event notifications. A
new state_change() helper function is provided to notify listeners, such
as VFIO, allowing VFIO to do dynamically DMA map and unmap for the shared
memory according to conversion events. Note that in the current context
of RamDiscardManager for guest_memfd, the shared state is analogous to
being populated, while the private state can be considered discarded for
simplicity. In the future, it would be more complicated if considering
more states like private/shared/discarded at the same time.
In current implementation, memory state tracking is performed at the
host page size granularity, as the minimum conversion size can be one
page per request. Additionally, VFIO expected the DMA mapping for a
specific IOVA to be mapped and unmapped with the same granularity.
Confidential VMs may perform partial conversions, such as conversions on
small regions within a larger one. To prevent such invalid cases and
until support for DMA mapping cut operations is available, all
operations are performed with 4K granularity.
In addition, memory conversion failures cause QEMU to quit rather than
resuming the guest or retrying the operation at present. It would be
future work to add more error handling or rollback mechanisms once
conversion failures are allowed. For example, in-place conversion of
guest_memfd could retry the unmap operation during the conversion from
shared to private. For now, keep the complex error handling out of the
picture as it is not required.
Tested-by: Alexey Kardashevskiy <aik@amd.com>
Reviewed-by: Alexey Kardashevskiy <aik@amd.com>
Reviewed-by: Pankaj Gupta <pankaj.gupta@amd.com>
Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
Link: https://lore.kernel.org/r/20250612082747.51539-5-chenyi.qiang@intel.com
[peterx: squash fixup from Chenyi to fix builds]
Signed-off-by: Peter Xu <peterx@redhat.com>
(cherry picked from commit 5d6483edaa9232d8f3709f68c8eab4bc2033fb70)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Conflicts: system/->sysemu/ or exec/, context, class_init argument is not const
---
MAINTAINERS | 1 +
include/exec/ramblock.h | 22 ++
system/meson.build | 1 +
system/ram-block-attributes.c | 444 ++++++++++++++++++++++++++++++++++
system/trace-events | 3 +
5 files changed, 471 insertions(+)
create mode 100644 system/ram-block-attributes.c
diff --git a/MAINTAINERS b/MAINTAINERS
index f7b7ceffc4..87ba88da84 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3056,6 +3056,7 @@ F: system/memory.c
F: system/memory_mapping.c
F: system/physmem.c
F: include/exec/memory-internal.h
+F: system/ram-block-attributes.c
F: scripts/coccinelle/memory-region-housekeeping.cocci
Memory devices
diff --git a/include/exec/ramblock.h b/include/exec/ramblock.h
index 0babd105c0..9ae774d268 100644
--- a/include/exec/ramblock.h
+++ b/include/exec/ramblock.h
@@ -23,6 +23,10 @@
#include "cpu-common.h"
#include "qemu/rcu.h"
#include "exec/ramlist.h"
+#include "sysemu/hostmem.h"
+
+#define TYPE_RAM_BLOCK_ATTRIBUTES "ram-block-attributes"
+OBJECT_DECLARE_SIMPLE_TYPE(RamBlockAttributes, RAM_BLOCK_ATTRIBUTES)
struct RAMBlock {
struct rcu_head rcu;
@@ -90,5 +94,23 @@ struct RAMBlock {
*/
ram_addr_t postcopy_length;
};
+
+struct RamBlockAttributes {
+ Object parent;
+
+ RAMBlock *ram_block;
+
+ /* 1-setting of the bitmap represents ram is populated (shared) */
+ unsigned bitmap_size;
+ unsigned long *bitmap;
+
+ QLIST_HEAD(, RamDiscardListener) rdl_list;
+};
+
+RamBlockAttributes *ram_block_attributes_create(RAMBlock *ram_block);
+void ram_block_attributes_destroy(RamBlockAttributes *attr);
+int ram_block_attributes_state_change(RamBlockAttributes *attr, uint64_t offset,
+ uint64_t size, bool to_discard);
+
#endif
#endif
diff --git a/system/meson.build b/system/meson.build
index a296270cb0..b13d9e71ff 100644
--- a/system/meson.build
+++ b/system/meson.build
@@ -16,6 +16,7 @@ system_ss.add(files(
'dirtylimit.c',
'dma-helpers.c',
'globals.c',
+ 'ram-block-attributes.c',
'memory_mapping.c',
'qdev-monitor.c',
'qtest.c',
diff --git a/system/ram-block-attributes.c b/system/ram-block-attributes.c
new file mode 100644
index 0000000000..0bded54e9c
--- /dev/null
+++ b/system/ram-block-attributes.c
@@ -0,0 +1,444 @@
+/*
+ * QEMU ram block attributes
+ *
+ * Copyright Intel
+ *
+ * Author:
+ * Chenyi Qiang <chenyi.qiang@intel.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "exec/ramblock.h"
+#include "trace.h"
+
+OBJECT_DEFINE_SIMPLE_TYPE_WITH_INTERFACES(RamBlockAttributes,
+ ram_block_attributes,
+ RAM_BLOCK_ATTRIBUTES,
+ OBJECT,
+ { TYPE_RAM_DISCARD_MANAGER },
+ { })
+
+static size_t
+ram_block_attributes_get_block_size(const RamBlockAttributes *attr)
+{
+ /*
+ * Because page conversion could be manipulated in the size of at least 4K
+ * or 4K aligned, Use the host page size as the granularity to track the
+ * memory attribute.
+ */
+ g_assert(attr && attr->ram_block);
+ g_assert(attr->ram_block->page_size == qemu_real_host_page_size());
+ return attr->ram_block->page_size;
+}
+
+
+static bool
+ram_block_attributes_rdm_is_populated(const RamDiscardManager *rdm,
+ const MemoryRegionSection *section)
+{
+ const RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(rdm);
+ const size_t block_size = ram_block_attributes_get_block_size(attr);
+ const uint64_t first_bit = section->offset_within_region / block_size;
+ const uint64_t last_bit =
+ first_bit + int128_get64(section->size) / block_size - 1;
+ unsigned long first_discarded_bit;
+
+ first_discarded_bit = find_next_zero_bit(attr->bitmap, last_bit + 1,
+ first_bit);
+ return first_discarded_bit > last_bit;
+}
+
+typedef int (*ram_block_attributes_section_cb)(MemoryRegionSection *s,
+ void *arg);
+
+static int
+ram_block_attributes_notify_populate_cb(MemoryRegionSection *section,
+ void *arg)
+{
+ RamDiscardListener *rdl = arg;
+
+ return rdl->notify_populate(rdl, section);
+}
+
+static int
+ram_block_attributes_notify_discard_cb(MemoryRegionSection *section,
+ void *arg)
+{
+ RamDiscardListener *rdl = arg;
+
+ rdl->notify_discard(rdl, section);
+ return 0;
+}
+
+static int
+ram_block_attributes_for_each_populated_section(const RamBlockAttributes *attr,
+ MemoryRegionSection *section,
+ void *arg,
+ ram_block_attributes_section_cb cb)
+{
+ unsigned long first_bit, last_bit;
+ uint64_t offset, size;
+ const size_t block_size = ram_block_attributes_get_block_size(attr);
+ int ret = 0;
+
+ first_bit = section->offset_within_region / block_size;
+ first_bit = find_next_bit(attr->bitmap, attr->bitmap_size,
+ first_bit);
+
+ while (first_bit < attr->bitmap_size) {
+ MemoryRegionSection tmp = *section;
+
+ offset = first_bit * block_size;
+ last_bit = find_next_zero_bit(attr->bitmap, attr->bitmap_size,
+ first_bit + 1) - 1;
+ size = (last_bit - first_bit + 1) * block_size;
+
+ if (!memory_region_section_intersect_range(&tmp, offset, size)) {
+ break;
+ }
+
+ ret = cb(&tmp, arg);
+ if (ret) {
+ error_report("%s: Failed to notify RAM discard listener: %s",
+ __func__, strerror(-ret));
+ break;
+ }
+
+ first_bit = find_next_bit(attr->bitmap, attr->bitmap_size,
+ last_bit + 2);
+ }
+
+ return ret;
+}
+
+static int
+ram_block_attributes_for_each_discarded_section(const RamBlockAttributes *attr,
+ MemoryRegionSection *section,
+ void *arg,
+ ram_block_attributes_section_cb cb)
+{
+ unsigned long first_bit, last_bit;
+ uint64_t offset, size;
+ const size_t block_size = ram_block_attributes_get_block_size(attr);
+ int ret = 0;
+
+ first_bit = section->offset_within_region / block_size;
+ first_bit = find_next_zero_bit(attr->bitmap, attr->bitmap_size,
+ first_bit);
+
+ while (first_bit < attr->bitmap_size) {
+ MemoryRegionSection tmp = *section;
+
+ offset = first_bit * block_size;
+ last_bit = find_next_bit(attr->bitmap, attr->bitmap_size,
+ first_bit + 1) - 1;
+ size = (last_bit - first_bit + 1) * block_size;
+
+ if (!memory_region_section_intersect_range(&tmp, offset, size)) {
+ break;
+ }
+
+ ret = cb(&tmp, arg);
+ if (ret) {
+ error_report("%s: Failed to notify RAM discard listener: %s",
+ __func__, strerror(-ret));
+ break;
+ }
+
+ first_bit = find_next_zero_bit(attr->bitmap,
+ attr->bitmap_size,
+ last_bit + 2);
+ }
+
+ return ret;
+}
+
+static uint64_t
+ram_block_attributes_rdm_get_min_granularity(const RamDiscardManager *rdm,
+ const MemoryRegion *mr)
+{
+ const RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(rdm);
+
+ g_assert(mr == attr->ram_block->mr);
+ return ram_block_attributes_get_block_size(attr);
+}
+
+static void
+ram_block_attributes_rdm_register_listener(RamDiscardManager *rdm,
+ RamDiscardListener *rdl,
+ MemoryRegionSection *section)
+{
+ RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(rdm);
+ int ret;
+
+ g_assert(section->mr == attr->ram_block->mr);
+ rdl->section = memory_region_section_new_copy(section);
+
+ QLIST_INSERT_HEAD(&attr->rdl_list, rdl, next);
+
+ ret = ram_block_attributes_for_each_populated_section(attr, section, rdl,
+ ram_block_attributes_notify_populate_cb);
+ if (ret) {
+ error_report("%s: Failed to register RAM discard listener: %s",
+ __func__, strerror(-ret));
+ exit(1);
+ }
+}
+
+static void
+ram_block_attributes_rdm_unregister_listener(RamDiscardManager *rdm,
+ RamDiscardListener *rdl)
+{
+ RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(rdm);
+ int ret;
+
+ g_assert(rdl->section);
+ g_assert(rdl->section->mr == attr->ram_block->mr);
+
+ if (rdl->double_discard_supported) {
+ rdl->notify_discard(rdl, rdl->section);
+ } else {
+ ret = ram_block_attributes_for_each_populated_section(attr,
+ rdl->section, rdl, ram_block_attributes_notify_discard_cb);
+ if (ret) {
+ error_report("%s: Failed to unregister RAM discard listener: %s",
+ __func__, strerror(-ret));
+ exit(1);
+ }
+ }
+
+ memory_region_section_free_copy(rdl->section);
+ rdl->section = NULL;
+ QLIST_REMOVE(rdl, next);
+}
+
+typedef struct RamBlockAttributesReplayData {
+ ReplayRamDiscardState fn;
+ void *opaque;
+} RamBlockAttributesReplayData;
+
+static int ram_block_attributes_rdm_replay_cb(MemoryRegionSection *section,
+ void *arg)
+{
+ RamBlockAttributesReplayData *data = arg;
+
+ return data->fn(section, data->opaque);
+}
+
+static int
+ram_block_attributes_rdm_replay_populated(const RamDiscardManager *rdm,
+ MemoryRegionSection *section,
+ ReplayRamDiscardState replay_fn,
+ void *opaque)
+{
+ RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(rdm);
+ RamBlockAttributesReplayData data = { .fn = replay_fn, .opaque = opaque };
+
+ g_assert(section->mr == attr->ram_block->mr);
+ return ram_block_attributes_for_each_populated_section(attr, section, &data,
+ ram_block_attributes_rdm_replay_cb);
+}
+
+static int
+ram_block_attributes_rdm_replay_discarded(const RamDiscardManager *rdm,
+ MemoryRegionSection *section,
+ ReplayRamDiscardState replay_fn,
+ void *opaque)
+{
+ RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(rdm);
+ RamBlockAttributesReplayData data = { .fn = replay_fn, .opaque = opaque };
+
+ g_assert(section->mr == attr->ram_block->mr);
+ return ram_block_attributes_for_each_discarded_section(attr, section, &data,
+ ram_block_attributes_rdm_replay_cb);
+}
+
+static bool
+ram_block_attributes_is_valid_range(RamBlockAttributes *attr, uint64_t offset,
+ uint64_t size)
+{
+ MemoryRegion *mr = attr->ram_block->mr;
+
+ g_assert(mr);
+
+ uint64_t region_size = memory_region_size(mr);
+ const size_t block_size = ram_block_attributes_get_block_size(attr);
+
+ if (!QEMU_IS_ALIGNED(offset, block_size) ||
+ !QEMU_IS_ALIGNED(size, block_size)) {
+ return false;
+ }
+ if (offset + size <= offset) {
+ return false;
+ }
+ if (offset + size > region_size) {
+ return false;
+ }
+ return true;
+}
+
+static void ram_block_attributes_notify_discard(RamBlockAttributes *attr,
+ uint64_t offset,
+ uint64_t size)
+{
+ RamDiscardListener *rdl;
+
+ QLIST_FOREACH(rdl, &attr->rdl_list, next) {
+ MemoryRegionSection tmp = *rdl->section;
+
+ if (!memory_region_section_intersect_range(&tmp, offset, size)) {
+ continue;
+ }
+ rdl->notify_discard(rdl, &tmp);
+ }
+}
+
+static int
+ram_block_attributes_notify_populate(RamBlockAttributes *attr,
+ uint64_t offset, uint64_t size)
+{
+ RamDiscardListener *rdl;
+ int ret = 0;
+
+ QLIST_FOREACH(rdl, &attr->rdl_list, next) {
+ MemoryRegionSection tmp = *rdl->section;
+
+ if (!memory_region_section_intersect_range(&tmp, offset, size)) {
+ continue;
+ }
+ ret = rdl->notify_populate(rdl, &tmp);
+ if (ret) {
+ break;
+ }
+ }
+
+ return ret;
+}
+
+int ram_block_attributes_state_change(RamBlockAttributes *attr,
+ uint64_t offset, uint64_t size,
+ bool to_discard)
+{
+ const size_t block_size = ram_block_attributes_get_block_size(attr);
+ const unsigned long first_bit = offset / block_size;
+ const unsigned long nbits = size / block_size;
+ const unsigned long last_bit = first_bit + nbits - 1;
+ const bool is_discarded = find_next_bit(attr->bitmap, attr->bitmap_size,
+ first_bit) > last_bit;
+ const bool is_populated = find_next_zero_bit(attr->bitmap,
+ attr->bitmap_size, first_bit) > last_bit;
+ unsigned long bit;
+ int ret = 0;
+
+ if (!ram_block_attributes_is_valid_range(attr, offset, size)) {
+ error_report("%s, invalid range: offset 0x%" PRIx64 ", size "
+ "0x%" PRIx64, __func__, offset, size);
+ return -EINVAL;
+ }
+
+ trace_ram_block_attributes_state_change(offset, size,
+ is_discarded ? "discarded" :
+ is_populated ? "populated" :
+ "mixture",
+ to_discard ? "discarded" :
+ "populated");
+ if (to_discard) {
+ if (is_discarded) {
+ /* Already private */
+ } else if (is_populated) {
+ /* Completely shared */
+ bitmap_clear(attr->bitmap, first_bit, nbits);
+ ram_block_attributes_notify_discard(attr, offset, size);
+ } else {
+ /* Unexpected mixture: process individual blocks */
+ for (bit = first_bit; bit < first_bit + nbits; bit++) {
+ if (!test_bit(bit, attr->bitmap)) {
+ continue;
+ }
+ clear_bit(bit, attr->bitmap);
+ ram_block_attributes_notify_discard(attr, bit * block_size,
+ block_size);
+ }
+ }
+ } else {
+ if (is_populated) {
+ /* Already shared */
+ } else if (is_discarded) {
+ /* Completely private */
+ bitmap_set(attr->bitmap, first_bit, nbits);
+ ret = ram_block_attributes_notify_populate(attr, offset, size);
+ } else {
+ /* Unexpected mixture: process individual blocks */
+ for (bit = first_bit; bit < first_bit + nbits; bit++) {
+ if (test_bit(bit, attr->bitmap)) {
+ continue;
+ }
+ set_bit(bit, attr->bitmap);
+ ret = ram_block_attributes_notify_populate(attr,
+ bit * block_size,
+ block_size);
+ if (ret) {
+ break;
+ }
+ }
+ }
+ }
+
+ return ret;
+}
+
+RamBlockAttributes *ram_block_attributes_create(RAMBlock *ram_block)
+{
+ const int block_size = qemu_real_host_page_size();
+ RamBlockAttributes *attr;
+ MemoryRegion *mr = ram_block->mr;
+
+ attr = RAM_BLOCK_ATTRIBUTES(object_new(TYPE_RAM_BLOCK_ATTRIBUTES));
+
+ attr->ram_block = ram_block;
+ if (memory_region_set_ram_discard_manager(mr, RAM_DISCARD_MANAGER(attr))) {
+ object_unref(OBJECT(attr));
+ return NULL;
+ }
+ attr->bitmap_size =
+ ROUND_UP(int128_get64(mr->size), block_size) / block_size;
+ attr->bitmap = bitmap_new(attr->bitmap_size);
+
+ return attr;
+}
+
+void ram_block_attributes_destroy(RamBlockAttributes *attr)
+{
+ g_assert(attr);
+
+ g_free(attr->bitmap);
+ memory_region_set_ram_discard_manager(attr->ram_block->mr, NULL);
+ object_unref(OBJECT(attr));
+}
+
+static void ram_block_attributes_init(Object *obj)
+{
+ RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(obj);
+
+ QLIST_INIT(&attr->rdl_list);
+}
+
+static void ram_block_attributes_finalize(Object *obj)
+{
+}
+
+static void ram_block_attributes_class_init(ObjectClass *klass,
+ void *data)
+{
+ RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_CLASS(klass);
+
+ rdmc->get_min_granularity = ram_block_attributes_rdm_get_min_granularity;
+ rdmc->register_listener = ram_block_attributes_rdm_register_listener;
+ rdmc->unregister_listener = ram_block_attributes_rdm_unregister_listener;
+ rdmc->is_populated = ram_block_attributes_rdm_is_populated;
+ rdmc->replay_populated = ram_block_attributes_rdm_replay_populated;
+ rdmc->replay_discarded = ram_block_attributes_rdm_replay_discarded;
+}
diff --git a/system/trace-events b/system/trace-events
index 2ed1d59b1f..9fd7217472 100644
--- a/system/trace-events
+++ b/system/trace-events
@@ -44,3 +44,6 @@ dirtylimit_state_finalize(void)
dirtylimit_throttle_pct(int cpu_index, uint64_t pct, int64_t time_us) "CPU[%d] throttle percent: %" PRIu64 ", throttle adjust time %"PRIi64 " us"
dirtylimit_set_vcpu(int cpu_index, uint64_t quota) "CPU[%d] set dirty page rate limit %"PRIu64
dirtylimit_vcpu_execute(int cpu_index, int64_t sleep_time_us) "CPU[%d] sleep %"PRIi64 " us"
+
+# ram-block-attributes.c
+ram_block_attributes_state_change(uint64_t offset, uint64_t size, const char *from, const char *to) "offset 0x%"PRIx64" size 0x%"PRIx64" from '%s' to '%s'"
--
2.50.1

View File

@ -0,0 +1,33 @@
From 3cb78d36244833d5e11e88de33bbdbe93a641e16 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:50 +0200
Subject: [PATCH 110/115] redhat: allow 5-level paging for TDX VMs
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [110/115] dd69bc652e2a735234165832ea4bc674753d7fb7 (bonzini/rhel-qemu-kvm)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/kvm/tdx.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index 2ff5211794..e65b1727cf 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -754,6 +754,7 @@ static void tdx_cpu_instance_init(X86ConfidentialGuest *cg, CPUState *cpu)
}
object_property_set_bool(OBJECT(cpu), "pmu", false, &error_abort);
+ object_property_set_int(OBJECT(cpu), "host-phys-bits-limit", 0, &error_abort);
/* invtsc is fixed1 for TD guest */
object_property_set_bool(OBJECT(cpu), "invtsc", true, &error_abort);
--
2.50.1

View File

@ -0,0 +1,33 @@
From 2e9c06611ccabf1a31dfe666814395ce48adae2e Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jul 2025 18:03:50 +0200
Subject: [PATCH 109/115] redhat: enable CONFIG_TDX
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 391: TDX support, including attestation and device assignment
RH-Jira: RHEL-15710 RHEL-20798 RHEL-49728
RH-Acked-by: Yash Mankad <None>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Commit: [109/115] 66eac186ba4c28ffd4b8612053feab81ad7ac608 (bonzini/rhel-qemu-kvm)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
configs/devices/x86_64-softmmu/x86_64-rh-devices.mak | 1 +
1 file changed, 1 insertion(+)
diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak
index 2b15fdc2db..6379077253 100644
--- a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak
+++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak
@@ -73,6 +73,7 @@ CONFIG_SERIAL_PCI=y
CONFIG_SEV=y
CONFIG_SMBIOS=y
CONFIG_SMBUS_EEPROM=y
+CONFIG_TDX=y
CONFIG_TEST_DEVICES=y
CONFIG_USB=y
CONFIG_USB_EHCI=y
--
2.50.1

Some files were not shown because too many files have changed in this diff Show More