* Wed Aug 14 2024 Miroslav Rezanina <mrezanin@redhat.com> - 9.0.0-8

- kvm-introduce-pc_rhel_9_5_compat.patch [RHEL-39544]
- kvm-target-i386-add-guest-phys-bits-cpu-property.patch [RHEL-39544]
- kvm-kvm-add-support-for-guest-physical-bits.patch [RHEL-39544]
- kvm-i386-kvm-Move-architectural-CPUID-leaf-generation-to.patch [RHEL-39544]
- kvm-target-i386-Introduce-Icelake-Server-v7-to-enable-TS.patch [RHEL-39544]
- kvm-target-i386-Add-new-CPU-model-SierraForest.patch [RHEL-39544]
- kvm-target-i386-Export-RFDS-bit-to-guests.patch [RHEL-39544]
- kvm-pci-host-q35-Move-PAM-initialization-above-SMRAM-ini.patch [RHEL-39544]
- kvm-q35-Introduce-smm_ranges-property-for-q35-pci-host.patch [RHEL-39544]
- kvm-hw-i386-acpi-Set-PCAT_COMPAT-bit-only-when-pic-is-no.patch [RHEL-39544]
- kvm-confidential-guest-support-Add-kvm_init-and-kvm_rese.patch [RHEL-39544]
- kvm-i386-sev-Switch-to-use-confidential_guest_kvm_init.patch [RHEL-39544]
- kvm-ppc-pef-switch-to-use-confidential_guest_kvm_init-re.patch [RHEL-39544]
- kvm-s390-Switch-to-use-confidential_guest_kvm_init.patch [RHEL-39544]
- kvm-scripts-update-linux-headers-Add-setup_data.h-to-imp.patch [RHEL-39544]
- kvm-scripts-update-linux-headers-Add-bits.h-to-file-impo.patch [RHEL-39544]
- kvm-linux-headers-update-to-current-kvm-next.patch [RHEL-39544]
- kvm-runstate-skip-initial-CPU-reset-if-reset-is-not-actu.patch [RHEL-39544]
- kvm-KVM-track-whether-guest-state-is-encrypted.patch [RHEL-39544]
- kvm-KVM-remove-kvm_arch_cpu_check_are_resettable.patch [RHEL-39544]
- kvm-target-i386-introduce-x86-confidential-guest.patch [RHEL-39544]
- kvm-target-i386-Implement-mc-kvm_type-to-get-VM-type.patch [RHEL-39544]
- kvm-target-i386-SEV-use-KVM_SEV_INIT2-if-possible.patch [RHEL-39544]
- kvm-i386-sev-Add-legacy-vm-type-parameter-for-SEV-guest-.patch [RHEL-39544]
- kvm-hw-i386-sev-Use-legacy-SEV-VM-types-for-older-machin.patch [RHEL-39544]
- kvm-trace-kvm-Split-address-space-and-slot-id-in-trace_k.patch [RHEL-39544]
- kvm-kvm-Introduce-support-for-memory_attributes.patch [RHEL-39544]
- kvm-RAMBlock-Add-support-of-KVM-private-guest-memfd.patch [RHEL-39544]
- kvm-kvm-Enable-KVM_SET_USER_MEMORY_REGION2-for-memslot.patch [RHEL-39544]
- kvm-kvm-memory-Make-memory-type-private-by-default-if-it.patch [RHEL-39544]
- kvm-HostMem-Add-mechanism-to-opt-in-kvm-guest-memfd-via-.patch [RHEL-39544]
- kvm-RAMBlock-make-guest_memfd-require-uncoordinated-disc.patch [RHEL-39544]
- kvm-physmem-Introduce-ram_block_discard_guest_memfd_rang.patch [RHEL-39544]
- kvm-kvm-handle-KVM_EXIT_MEMORY_FAULT.patch [RHEL-39544]
- kvm-kvm-tdx-Don-t-complain-when-converting-vMMIO-region-.patch [RHEL-39544]
- kvm-kvm-tdx-Ignore-memory-conversion-to-shared-of-unassi.patch [RHEL-39544]
- kvm-hw-i386-x86-Eliminate-two-if-statements-in-x86_bios_.patch [RHEL-39544]
- kvm-hw-i386-Have-x86_bios_rom_init-take-X86MachineState-.patch [RHEL-39544]
- kvm-hw-i386-pc_sysfw-Remove-unused-parameter-from-pc_isa.patch [RHEL-39544]
- kvm-hw-i386-x86-Don-t-leak-isa-bios-memory-regions.patch [RHEL-39544]
- kvm-hw-i386-x86-Don-t-leak-pc.bios-memory-region.patch [RHEL-39544]
- kvm-hw-i386-x86-Extract-x86_isa_bios_init-from-x86_bios_.patch [RHEL-39544]
- kvm-hw-i386-pc_sysfw-Alias-rather-than-copy-isa-bios-reg.patch [RHEL-39544]
- kvm-i386-correctly-select-code-in-hw-i386-that-depends-o.patch [RHEL-39544]
- kvm-i386-pc-remove-unnecessary-MachineClass-overrides.patch [RHEL-39544]
- kvm-hw-i386-split-x86.c-in-multiple-parts.patch [RHEL-39544]
- kvm-scripts-update-linux-header.sh-be-more-src-tree-frie.patch [RHEL-39544]
- kvm-scripts-update-linux-headers.sh-Remove-temporary-dir.patch [RHEL-39544]
- kvm-scripts-update-linux-headers.sh-Fix-the-path-of-setu.patch [RHEL-39544]
- kvm-update-linux-headers-fix-forwarding-to-asm-generic-h.patch [RHEL-39544]
- kvm-update-linux-headers-move-pvpanic.h-to-correct-direc.patch [RHEL-39544]
- kvm-linux-headers-Update-to-current-kvm-next.patch [RHEL-39544]
- kvm-update-linux-headers-import-linux-kvm_para.h-header.patch [RHEL-39544]
- kvm-machine-allow-early-use-of-machine_require_guest_mem.patch [RHEL-39544]
- kvm-i386-sev-Replace-error_report-with-error_setg.patch [RHEL-39544]
- kvm-i386-sev-Introduce-sev-common-type-to-encapsulate-co.patch [RHEL-39544]
- kvm-i386-sev-Move-sev_launch_update-to-separate-class-me.patch [RHEL-39544]
- kvm-i386-sev-Move-sev_launch_finish-to-separate-class-me.patch [RHEL-39544]
- kvm-i386-sev-Introduce-sev-snp-guest-object.patch [RHEL-39544]
- kvm-i386-sev-Add-a-sev_snp_enabled-helper.patch [RHEL-39544]
- kvm-i386-sev-Add-sev_kvm_init-override-for-SEV-class.patch [RHEL-39544]
- kvm-i386-sev-Add-snp_kvm_init-override-for-SNP-class.patch [RHEL-39544]
- kvm-i386-cpu-Set-SEV-SNP-CPUID-bit-when-SNP-enabled.patch [RHEL-39544]
- kvm-i386-sev-Don-t-return-launch-measurements-for-SEV-SN.patch [RHEL-39544]
- kvm-i386-sev-Add-a-class-method-to-determine-KVM-VM-type.patch [RHEL-39544]
- kvm-i386-sev-Update-query-sev-QAPI-format-to-handle-SEV-.patch [RHEL-39544]
- kvm-i386-sev-Add-the-SNP-launch-start-context.patch [RHEL-39544]
- kvm-i386-sev-Add-handling-to-encrypt-finalize-guest-laun.patch [RHEL-39544]
- kvm-i386-sev-Set-CPU-state-to-protected-once-SNP-guest-p.patch [RHEL-39544]
- kvm-hw-i386-sev-Add-function-to-get-SEV-metadata-from-OV.patch [RHEL-39544]
- kvm-i386-sev-Add-support-for-populating-OVMF-metadata-pa.patch [RHEL-39544]
- kvm-i386-sev-Add-support-for-SNP-CPUID-validation.patch [RHEL-39544]
- kvm-hw-i386-sev-Add-support-to-encrypt-BIOS-when-SEV-SNP.patch [RHEL-39544]
- kvm-i386-sev-Invoke-launch_updata_data-for-SEV-class.patch [RHEL-39544]
- kvm-i386-sev-Invoke-launch_updata_data-for-SNP-class.patch [RHEL-39544]
- kvm-i386-kvm-Add-KVM_EXIT_HYPERCALL-handling-for-KVM_HC_.patch [RHEL-39544]
- kvm-i386-sev-Enable-KVM_HC_MAP_GPA_RANGE-hcall-for-SNP-g.patch [RHEL-39544]
- kvm-i386-sev-Extract-build_kernel_loader_hashes.patch [RHEL-39544]
- kvm-i386-sev-Reorder-struct-declarations.patch [RHEL-39544]
- kvm-i386-sev-Allow-measured-direct-kernel-boot-on-SNP.patch [RHEL-39544]
- kvm-memory-Introduce-memory_region_init_ram_guest_memfd.patch [RHEL-39544]
- kvm-hw-i386-sev-Use-guest_memfd-for-legacy-ROMs.patch [RHEL-39544]
- kvm-hw-i386-Add-support-for-loading-BIOS-using-guest_mem.patch [RHEL-39544]
- kvm-i386-sev-fix-unreachable-code-coverity-issue.patch [RHEL-39544]
- kvm-i386-sev-Move-SEV_COMMON-null-check-before-dereferen.patch [RHEL-39544]
- kvm-i386-sev-Return-when-sev_common-is-null.patch [RHEL-39544]
- kvm-target-i386-SEV-fix-formatting-of-CPUID-mismatch-mes.patch [RHEL-39544]
- kvm-i386-sev-Fix-error-message-in-sev_get_capabilities.patch [RHEL-39544]
- kvm-i386-sev-Fallback-to-the-default-SEV-device-if-none-.patch [RHEL-39544]
- kvm-i386-sev-Don-t-allow-automatic-fallback-to-legacy-KV.patch [RHEL-39544]
- kvm-target-i386-SEV-fix-mismatch-in-vcek-disabled-proper.patch [RHEL-39544]
- kvm-virtio-rng-block-max-bytes-0.patch [RHEL-50336]
- kvm-scsi-disk-Use-positive-return-value-for-status-in-dm.patch [RHEL-50000]
- kvm-scsi-block-Don-t-skip-callback-for-sgio-error-status.patch [RHEL-50000]
- kvm-scsi-disk-Add-warning-comments-that-host_status-erro.patch [RHEL-50000]
- kvm-scsi-disk-Always-report-RESERVATION_CONFLICT-to-gues.patch [RHEL-50000]
- kvm-nbd-server-Plumb-in-new-args-to-nbd_client_add.patch [RHEL-52617]
- kvm-nbd-server-CVE-2024-7409-Cap-default-max-connections.patch [RHEL-52617]
- kvm-nbd-server-CVE-2024-7409-Drop-non-negotiating-client.patch [RHEL-52617]
- kvm-nbd-server-CVE-2024-7409-Close-stray-clients-at-serv.patch [RHEL-52617]
- Resolves: RHEL-39544
  ([QEMU] Add support for AMD SEV-SNP to Qemu)
- Resolves: RHEL-50336
  (Fail to boot up the guest including vtpm and virtio-rng (max-bytes=0) devices)
- Resolves: RHEL-50000
  (scsi-block: Cannot setup Windows Failover Cluster, qemu crashes on assert)
- Resolves: RHEL-52617
  (CVE-2024-7409 qemu-kvm: Denial of Service via Improper Synchronization in QEMU NBD Server During Socket Closure [rhel-9.5])
This commit is contained in:
Miroslav Rezanina 2024-08-14 01:38:22 -04:00
parent 1048755cbc
commit b585ec0abd
101 changed files with 18060 additions and 1 deletions

View File

@ -0,0 +1,139 @@
From 93ea86ac8849ad9ca365b1646313dde9a34ba59c Mon Sep 17 00:00:00 2001
From: Xiaoyao Li <xiaoyao.li@intel.com>
Date: Wed, 20 Mar 2024 03:39:03 -0500
Subject: [PATCH 031/100] HostMem: Add mechanism to opt in kvm guest memfd via
MachineState
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [31/91] 43ce32aef954479cdb736301d1adcb919602c321 (bonzini/rhel-qemu-kvm)
Add a new member "guest_memfd" to memory backends. When it's set
to true, it enables RAM_GUEST_MEMFD in ram_flags, thus private kvm
guest_memfd will be allocated during RAMBlock allocation.
Memory backend's @guest_memfd is wired with @require_guest_memfd
field of MachineState. It avoid looking up the machine in phymem.c.
MachineState::require_guest_memfd is supposed to be set by any VMs
that requires KVM guest memfd as private memory, e.g., TDX VM.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Message-ID: <20240320083945.991426-8-michael.roth@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 37662d85b0b7dded0ebdf6747bef6c3bb7ed6a0c)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
backends/hostmem-file.c | 1 +
backends/hostmem-memfd.c | 1 +
backends/hostmem-ram.c | 1 +
backends/hostmem.c | 1 +
hw/core/machine.c | 5 +++++
include/hw/boards.h | 2 ++
include/sysemu/hostmem.h | 1 +
7 files changed, 12 insertions(+)
diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c
index ac3e433cbd..3c69db7946 100644
--- a/backends/hostmem-file.c
+++ b/backends/hostmem-file.c
@@ -85,6 +85,7 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
ram_flags |= fb->readonly ? RAM_READONLY_FD : 0;
ram_flags |= fb->rom == ON_OFF_AUTO_ON ? RAM_READONLY : 0;
ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
+ ram_flags |= backend->guest_memfd ? RAM_GUEST_MEMFD : 0;
ram_flags |= fb->is_pmem ? RAM_PMEM : 0;
ram_flags |= RAM_NAMED_FILE;
return memory_region_init_ram_from_file(&backend->mr, OBJECT(backend), name,
diff --git a/backends/hostmem-memfd.c b/backends/hostmem-memfd.c
index 3923ea9364..745ead0034 100644
--- a/backends/hostmem-memfd.c
+++ b/backends/hostmem-memfd.c
@@ -55,6 +55,7 @@ memfd_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
name = host_memory_backend_get_name(backend);
ram_flags = backend->share ? RAM_SHARED : 0;
ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
+ ram_flags |= backend->guest_memfd ? RAM_GUEST_MEMFD : 0;
return memory_region_init_ram_from_fd(&backend->mr, OBJECT(backend), name,
backend->size, ram_flags, fd, 0, errp);
}
diff --git a/backends/hostmem-ram.c b/backends/hostmem-ram.c
index d121249f0f..f7d81af783 100644
--- a/backends/hostmem-ram.c
+++ b/backends/hostmem-ram.c
@@ -30,6 +30,7 @@ ram_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
name = host_memory_backend_get_name(backend);
ram_flags = backend->share ? RAM_SHARED : 0;
ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
+ ram_flags |= backend->guest_memfd ? RAM_GUEST_MEMFD : 0;
return memory_region_init_ram_flags_nomigrate(&backend->mr, OBJECT(backend),
name, backend->size,
ram_flags, errp);
diff --git a/backends/hostmem.c b/backends/hostmem.c
index 81a72ce40b..eb9682b4a8 100644
--- a/backends/hostmem.c
+++ b/backends/hostmem.c
@@ -277,6 +277,7 @@ static void host_memory_backend_init(Object *obj)
/* TODO: convert access to globals to compat properties */
backend->merge = machine_mem_merge(machine);
backend->dump = machine_dump_guest_core(machine);
+ backend->guest_memfd = machine_require_guest_memfd(machine);
backend->reserve = true;
backend->prealloc_threads = machine->smp.cpus;
}
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 92609aae27..07b994e136 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -1480,6 +1480,11 @@ bool machine_mem_merge(MachineState *machine)
return machine->mem_merge;
}
+bool machine_require_guest_memfd(MachineState *machine)
+{
+ return machine->require_guest_memfd;
+}
+
static char *cpu_slot_to_string(const CPUArchId *cpu)
{
GString *s = g_string_new(NULL);
diff --git a/include/hw/boards.h b/include/hw/boards.h
index cca62f906b..815a1c4b26 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -36,6 +36,7 @@ bool machine_usb(MachineState *machine);
int machine_phandle_start(MachineState *machine);
bool machine_dump_guest_core(MachineState *machine);
bool machine_mem_merge(MachineState *machine);
+bool machine_require_guest_memfd(MachineState *machine);
HotpluggableCPUList *machine_query_hotpluggable_cpus(MachineState *machine);
void machine_set_cpu_numa_node(MachineState *machine,
const CpuInstanceProperties *props,
@@ -372,6 +373,7 @@ struct MachineState {
char *dt_compatible;
bool dump_guest_core;
bool mem_merge;
+ bool require_guest_memfd;
bool usb;
bool usb_disabled;
char *firmware;
diff --git a/include/sysemu/hostmem.h b/include/sysemu/hostmem.h
index 0e411aaa29..04b884bf42 100644
--- a/include/sysemu/hostmem.h
+++ b/include/sysemu/hostmem.h
@@ -74,6 +74,7 @@ struct HostMemoryBackend {
uint64_t size;
bool merge, dump, use_canonical_path;
bool prealloc, is_mapped, share, reserve;
+ bool guest_memfd;
uint32_t prealloc_threads;
ThreadContext *prealloc_context;
DECLARE_BITMAP(host_nodes, MAX_NODES + 1);
--
2.39.3

View File

@ -0,0 +1,203 @@
From c46ac3db0a4db60e667edeabc9ed451c6e8e0ccf Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 18 Mar 2024 14:41:33 -0400
Subject: [PATCH 020/100] KVM: remove kvm_arch_cpu_check_are_resettable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [20/91] d7745bd1a0ed1b215847f150f4a1bb2e912beabc (bonzini/rhel-qemu-kvm)
Board reset requires writing a fresh CPU state. As far as KVM is
concerned, the only thing that blocks reset is that CPU state is
encrypted; therefore, kvm_cpus_are_resettable() can simply check
if that is the case.
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit a99c0c66ebe7d8db3af6f16689ade9375247e43e)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
accel/kvm/kvm-accel-ops.c | 2 +-
accel/kvm/kvm-all.c | 5 -----
include/sysemu/kvm.h | 10 ----------
target/arm/kvm.c | 5 -----
target/i386/kvm/kvm.c | 5 -----
target/loongarch/kvm/kvm.c | 5 -----
target/mips/kvm.c | 5 -----
target/ppc/kvm.c | 5 -----
target/riscv/kvm/kvm-cpu.c | 5 -----
target/s390x/kvm/kvm.c | 5 -----
10 files changed, 1 insertion(+), 51 deletions(-)
diff --git a/accel/kvm/kvm-accel-ops.c b/accel/kvm/kvm-accel-ops.c
index b3c946dc4b..74e3c5785b 100644
--- a/accel/kvm/kvm-accel-ops.c
+++ b/accel/kvm/kvm-accel-ops.c
@@ -82,7 +82,7 @@ static bool kvm_vcpu_thread_is_idle(CPUState *cpu)
static bool kvm_cpus_are_resettable(void)
{
- return !kvm_enabled() || kvm_cpu_check_are_resettable();
+ return !kvm_enabled() || !kvm_state->guest_state_protected;
}
#ifdef KVM_CAP_SET_GUEST_DEBUG
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index ec0f6df7c5..b51e09a583 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -2696,11 +2696,6 @@ void kvm_flush_coalesced_mmio_buffer(void)
s->coalesced_flush_in_progress = false;
}
-bool kvm_cpu_check_are_resettable(void)
-{
- return kvm_arch_cpu_check_are_resettable();
-}
-
static void do_kvm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
{
if (!cpu->vcpu_dirty && !kvm_state->guest_state_protected) {
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index 302e8f6f1e..54f4d83a37 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -525,16 +525,6 @@ int kvm_get_one_reg(CPUState *cs, uint64_t id, void *target);
/* Notify resamplefd for EOI of specific interrupts. */
void kvm_resample_fd_notify(int gsi);
-/**
- * kvm_cpu_check_are_resettable - return whether CPUs can be reset
- *
- * Returns: true: CPUs are resettable
- * false: CPUs are not resettable
- */
-bool kvm_cpu_check_are_resettable(void);
-
-bool kvm_arch_cpu_check_are_resettable(void);
-
bool kvm_dirty_ring_enabled(void);
uint32_t kvm_dirty_ring_size(void);
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index ab85d628a8..21ebbf3b8f 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -1598,11 +1598,6 @@ int kvm_arch_msi_data_to_gsi(uint32_t data)
return (data - 32) & 0xffff;
}
-bool kvm_arch_cpu_check_are_resettable(void)
-{
- return true;
-}
-
static void kvm_arch_get_eager_split_size(Object *obj, Visitor *v,
const char *name, void *opaque,
Error **errp)
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index e271652620..a12207a8ee 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -5623,11 +5623,6 @@ bool kvm_has_waitpkg(void)
return has_msr_umwait;
}
-bool kvm_arch_cpu_check_are_resettable(void)
-{
- return !sev_es_enabled();
-}
-
#define ARCH_REQ_XCOMP_GUEST_PERM 0x1025
void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask)
diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c
index d630cc39cb..8224d94333 100644
--- a/target/loongarch/kvm/kvm.c
+++ b/target/loongarch/kvm/kvm.c
@@ -733,11 +733,6 @@ bool kvm_arch_stop_on_emulation_error(CPUState *cs)
return true;
}
-bool kvm_arch_cpu_check_are_resettable(void)
-{
- return true;
-}
-
int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
{
int ret = 0;
diff --git a/target/mips/kvm.c b/target/mips/kvm.c
index 6c52e59f55..a631ab544f 100644
--- a/target/mips/kvm.c
+++ b/target/mips/kvm.c
@@ -1273,11 +1273,6 @@ int kvm_arch_get_default_type(MachineState *machine)
return -1;
}
-bool kvm_arch_cpu_check_are_resettable(void)
-{
- return true;
-}
-
void kvm_arch_accel_class_init(ObjectClass *oc)
{
}
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index 59f640cf7b..9d9d9f0d79 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -2968,11 +2968,6 @@ void kvmppc_set_reg_tb_offset(PowerPCCPU *cpu, int64_t tb_offset)
}
}
-bool kvm_arch_cpu_check_are_resettable(void)
-{
- return true;
-}
-
void kvm_arch_accel_class_init(ObjectClass *oc)
{
}
diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c
index 6a6c6cae80..49d2f3ad58 100644
--- a/target/riscv/kvm/kvm-cpu.c
+++ b/target/riscv/kvm/kvm-cpu.c
@@ -1475,11 +1475,6 @@ void kvm_riscv_set_irq(RISCVCPU *cpu, int irq, int level)
}
}
-bool kvm_arch_cpu_check_are_resettable(void)
-{
- return true;
-}
-
static int aia_mode;
static const char *kvm_aia_mode_str(uint64_t mode)
diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c
index 55fb4855b1..4db59658e1 100644
--- a/target/s390x/kvm/kvm.c
+++ b/target/s390x/kvm/kvm.c
@@ -2630,11 +2630,6 @@ void kvm_s390_stop_interrupt(S390CPU *cpu)
kvm_s390_vcpu_interrupt(cpu, &irq);
}
-bool kvm_arch_cpu_check_are_resettable(void)
-{
- return true;
-}
-
int kvm_s390_get_zpci_op(void)
{
return cap_zpci_op;
--
2.39.3

View File

@ -0,0 +1,127 @@
From 50399796da938c4ea7c69058fde84695bce9d794 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 18 Mar 2024 14:41:10 -0400
Subject: [PATCH 019/100] KVM: track whether guest state is encrypted
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [19/91] 685b9c54d43d0043d15c33d13afc3a420cbe139b (bonzini/rhel-qemu-kvm)
So far, KVM has allowed KVM_GET/SET_* ioctls to execute even if the
guest state is encrypted, in which case they do nothing. For the new
API using VM types, instead, the ioctls will fail which is a safer and
more robust approach.
The new API will be the only one available for SEV-SNP and TDX, but it
is also usable for SEV and SEV-ES. In preparation for that, require
architecture-specific KVM code to communicate the point at which guest
state is protected (which must be after kvm_cpu_synchronize_post_init(),
though that might change in the future in order to suppor migration).
From that point, skip reading registers so that cpu->vcpu_dirty is
never true: if it ever becomes true, kvm_arch_put_registers() will
fail miserably.
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 5c3131c392f84c660033d511ec39872d8beb4b1e)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
accel/kvm/kvm-all.c | 17 ++++++++++++++---
include/sysemu/kvm.h | 2 ++
include/sysemu/kvm_int.h | 1 +
target/i386/sev.c | 1 +
4 files changed, 18 insertions(+), 3 deletions(-)
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 931f74256e..ec0f6df7c5 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -2703,7 +2703,7 @@ bool kvm_cpu_check_are_resettable(void)
static void do_kvm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
{
- if (!cpu->vcpu_dirty) {
+ if (!cpu->vcpu_dirty && !kvm_state->guest_state_protected) {
int ret = kvm_arch_get_registers(cpu);
if (ret) {
error_report("Failed to get registers: %s", strerror(-ret));
@@ -2717,7 +2717,7 @@ static void do_kvm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
void kvm_cpu_synchronize_state(CPUState *cpu)
{
- if (!cpu->vcpu_dirty) {
+ if (!cpu->vcpu_dirty && !kvm_state->guest_state_protected) {
run_on_cpu(cpu, do_kvm_cpu_synchronize_state, RUN_ON_CPU_NULL);
}
}
@@ -2752,7 +2752,13 @@ static void do_kvm_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg)
void kvm_cpu_synchronize_post_init(CPUState *cpu)
{
- run_on_cpu(cpu, do_kvm_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
+ if (!kvm_state->guest_state_protected) {
+ /*
+ * This runs before the machine_init_done notifiers, and is the last
+ * opportunity to synchronize the state of confidential guests.
+ */
+ run_on_cpu(cpu, do_kvm_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
+ }
}
static void do_kvm_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg)
@@ -4099,3 +4105,8 @@ void query_stats_schemas_cb(StatsSchemaList **result, Error **errp)
query_stats_schema_vcpu(first_cpu, &stats_args);
}
}
+
+void kvm_mark_guest_state_protected(void)
+{
+ kvm_state->guest_state_protected = true;
+}
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index fad9a7e8ff..302e8f6f1e 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -539,6 +539,8 @@ bool kvm_dirty_ring_enabled(void);
uint32_t kvm_dirty_ring_size(void);
+void kvm_mark_guest_state_protected(void);
+
/**
* kvm_hwpoisoned_mem - indicate if there is any hwpoisoned page
* reported for the VM.
diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h
index 882e37e12c..3496be7997 100644
--- a/include/sysemu/kvm_int.h
+++ b/include/sysemu/kvm_int.h
@@ -87,6 +87,7 @@ struct KVMState
bool kernel_irqchip_required;
OnOffAuto kernel_irqchip_split;
bool sync_mmu;
+ bool guest_state_protected;
uint64_t manual_dirty_log_protect;
/* The man page (and posix) say ioctl numbers are signed int, but
* they're not. Linux, glibc and *BSD all treat ioctl numbers as
diff --git a/target/i386/sev.c b/target/i386/sev.c
index b8f79d34d1..c49a8fd55e 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -755,6 +755,7 @@ sev_launch_get_measure(Notifier *notifier, void *unused)
if (ret) {
exit(1);
}
+ kvm_mark_guest_state_protected();
}
/* query the measurement blob length */
--
2.39.3

View File

@ -0,0 +1,329 @@
From f4b01d645926faab2cab86fadb7398c26d6b8285 Mon Sep 17 00:00:00 2001
From: Xiaoyao Li <xiaoyao.li@intel.com>
Date: Wed, 20 Mar 2024 03:39:02 -0500
Subject: [PATCH 028/100] RAMBlock: Add support of KVM private guest memfd
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [28/91] 95fdf196afcb67113834c20fa354ee1397411bfd (bonzini/rhel-qemu-kvm)
Add KVM guest_memfd support to RAMBlock so both normal hva based memory
and kvm guest memfd based private memory can be associated in one RAMBlock.
Introduce new flag RAM_GUEST_MEMFD. When it's set, it calls KVM ioctl to
create private guest_memfd during RAMBlock setup.
Allocating a new RAM_GUEST_MEMFD flag to instruct the setup of guest memfd
is more flexible and extensible than simply relying on the VM type because
in the future we may have the case that not all the memory of a VM need
guest memfd. As a benefit, it also avoid getting MachineState in memory
subsystem.
Note, RAM_GUEST_MEMFD is supposed to be set for memory backends of
confidential guests, such as TDX VM. How and when to set it for memory
backends will be implemented in the following patches.
Introduce memory_region_has_guest_memfd() to query if the MemoryRegion has
KVM guest_memfd allocated.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Message-ID: <20240320083945.991426-7-michael.roth@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 15f7a80c49cb3637f62fa37fa4a17da913bd91ff)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
accel/kvm/kvm-all.c | 28 ++++++++++++++++++++++++++++
accel/stubs/kvm-stub.c | 5 +++++
include/exec/memory.h | 20 +++++++++++++++++---
include/exec/ram_addr.h | 2 +-
include/exec/ramblock.h | 1 +
include/sysemu/kvm.h | 2 ++
system/memory.c | 5 +++++
system/physmem.c | 34 +++++++++++++++++++++++++++++++---
8 files changed, 90 insertions(+), 7 deletions(-)
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 272e945f52..a7b9a127dd 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -92,6 +92,7 @@ static bool kvm_has_guest_debug;
static int kvm_sstep_flags;
static bool kvm_immediate_exit;
static uint64_t kvm_supported_memory_attributes;
+static bool kvm_guest_memfd_supported;
static hwaddr kvm_max_slot_size = ~0;
static const KVMCapabilityInfo kvm_required_capabilites[] = {
@@ -2419,6 +2420,11 @@ static int kvm_init(MachineState *ms)
}
kvm_supported_memory_attributes = kvm_check_extension(s, KVM_CAP_MEMORY_ATTRIBUTES);
+ kvm_guest_memfd_supported =
+ kvm_check_extension(s, KVM_CAP_GUEST_MEMFD) &&
+ kvm_check_extension(s, KVM_CAP_USER_MEMORY2) &&
+ (kvm_supported_memory_attributes & KVM_MEMORY_ATTRIBUTE_PRIVATE);
+
kvm_immediate_exit = kvm_check_extension(s, KVM_CAP_IMMEDIATE_EXIT);
s->nr_slots = kvm_check_extension(s, KVM_CAP_NR_MEMSLOTS);
@@ -4138,3 +4144,25 @@ void kvm_mark_guest_state_protected(void)
{
kvm_state->guest_state_protected = true;
}
+
+int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp)
+{
+ int fd;
+ struct kvm_create_guest_memfd guest_memfd = {
+ .size = size,
+ .flags = flags,
+ };
+
+ if (!kvm_guest_memfd_supported) {
+ error_setg(errp, "KVM does not support guest_memfd");
+ return -1;
+ }
+
+ fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_GUEST_MEMFD, &guest_memfd);
+ if (fd < 0) {
+ error_setg_errno(errp, errno, "Error creating KVM guest_memfd");
+ return -1;
+ }
+
+ return fd;
+}
diff --git a/accel/stubs/kvm-stub.c b/accel/stubs/kvm-stub.c
index ca38172884..8e0eb22e61 100644
--- a/accel/stubs/kvm-stub.c
+++ b/accel/stubs/kvm-stub.c
@@ -129,3 +129,8 @@ bool kvm_hwpoisoned_mem(void)
{
return false;
}
+
+int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp)
+{
+ return -ENOSYS;
+}
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 8626a355b3..679a847685 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -243,6 +243,9 @@ typedef struct IOMMUTLBEvent {
/* RAM FD is opened read-only */
#define RAM_READONLY_FD (1 << 11)
+/* RAM can be private that has kvm guest memfd backend */
+#define RAM_GUEST_MEMFD (1 << 12)
+
static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn,
IOMMUNotifierFlag flags,
hwaddr start, hwaddr end,
@@ -1307,7 +1310,8 @@ bool memory_region_init_ram_nomigrate(MemoryRegion *mr,
* @name: Region name, becomes part of RAMBlock name used in migration stream
* must be unique within any device
* @size: size of the region.
- * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_NORESERVE.
+ * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_NORESERVE,
+ * RAM_GUEST_MEMFD.
* @errp: pointer to Error*, to store an error if it happens.
*
* Note that this function does not do anything to cause the data in the
@@ -1369,7 +1373,7 @@ bool memory_region_init_resizeable_ram(MemoryRegion *mr,
* (getpagesize()) will be used.
* @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM,
* RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY,
- * RAM_READONLY_FD
+ * RAM_READONLY_FD, RAM_GUEST_MEMFD
* @path: the path in which to allocate the RAM.
* @offset: offset within the file referenced by path
* @errp: pointer to Error*, to store an error if it happens.
@@ -1399,7 +1403,7 @@ bool memory_region_init_ram_from_file(MemoryRegion *mr,
* @size: size of the region.
* @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM,
* RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY,
- * RAM_READONLY_FD
+ * RAM_READONLY_FD, RAM_GUEST_MEMFD
* @fd: the fd to mmap.
* @offset: offset within the file referenced by fd
* @errp: pointer to Error*, to store an error if it happens.
@@ -1722,6 +1726,16 @@ static inline bool memory_region_is_romd(MemoryRegion *mr)
*/
bool memory_region_is_protected(MemoryRegion *mr);
+/**
+ * memory_region_has_guest_memfd: check whether a memory region has guest_memfd
+ * associated
+ *
+ * Returns %true if a memory region's ram_block has valid guest_memfd assigned.
+ *
+ * @mr: the memory region being queried
+ */
+bool memory_region_has_guest_memfd(MemoryRegion *mr);
+
/**
* memory_region_get_iommu: check whether a memory region is an iommu
*
diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
index de45ba7bc9..07c8f86375 100644
--- a/include/exec/ram_addr.h
+++ b/include/exec/ram_addr.h
@@ -110,7 +110,7 @@ long qemu_maxrampagesize(void);
* @mr: the memory region where the ram block is
* @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM,
* RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY,
- * RAM_READONLY_FD
+ * RAM_READONLY_FD, RAM_GUEST_MEMFD
* @mem_path or @fd: specify the backing file or device
* @offset: Offset into target file
* @errp: pointer to Error*, to store an error if it happens
diff --git a/include/exec/ramblock.h b/include/exec/ramblock.h
index 848915ea5b..459c8917de 100644
--- a/include/exec/ramblock.h
+++ b/include/exec/ramblock.h
@@ -41,6 +41,7 @@ struct RAMBlock {
QLIST_HEAD(, RAMBlockNotifier) ramblock_notifiers;
int fd;
uint64_t fd_offset;
+ int guest_memfd;
size_t page_size;
/* dirty bitmap used during migration */
unsigned long *bmap;
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index f114ff6986..9e4ab7ae89 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -537,6 +537,8 @@ void kvm_mark_guest_state_protected(void);
*/
bool kvm_hwpoisoned_mem(void);
+int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp);
+
int kvm_set_memory_attributes_private(hwaddr start, uint64_t size);
int kvm_set_memory_attributes_shared(hwaddr start, uint64_t size);
diff --git a/system/memory.c b/system/memory.c
index a229a79988..c756950c0c 100644
--- a/system/memory.c
+++ b/system/memory.c
@@ -1850,6 +1850,11 @@ bool memory_region_is_protected(MemoryRegion *mr)
return mr->ram && (mr->ram_block->flags & RAM_PROTECTED);
}
+bool memory_region_has_guest_memfd(MemoryRegion *mr)
+{
+ return mr->ram_block && mr->ram_block->guest_memfd >= 0;
+}
+
uint8_t memory_region_get_dirty_log_mask(MemoryRegion *mr)
{
uint8_t mask = mr->dirty_log_mask;
diff --git a/system/physmem.c b/system/physmem.c
index a4fe3d2bf8..f5dfa20e57 100644
--- a/system/physmem.c
+++ b/system/physmem.c
@@ -1808,6 +1808,7 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
const bool shared = qemu_ram_is_shared(new_block);
RAMBlock *block;
RAMBlock *last_block = NULL;
+ bool free_on_error = false;
ram_addr_t old_ram_size, new_ram_size;
Error *err = NULL;
@@ -1837,6 +1838,19 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
return;
}
memory_try_enable_merging(new_block->host, new_block->max_length);
+ free_on_error = true;
+ }
+ }
+
+ if (new_block->flags & RAM_GUEST_MEMFD) {
+ assert(kvm_enabled());
+ assert(new_block->guest_memfd < 0);
+
+ new_block->guest_memfd = kvm_create_guest_memfd(new_block->max_length,
+ 0, errp);
+ if (new_block->guest_memfd < 0) {
+ qemu_mutex_unlock_ramlist();
+ goto out_free;
}
}
@@ -1888,6 +1902,13 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
ram_block_notify_add(new_block->host, new_block->used_length,
new_block->max_length);
}
+ return;
+
+out_free:
+ if (free_on_error) {
+ qemu_anon_ram_free(new_block->host, new_block->max_length);
+ new_block->host = NULL;
+ }
}
#ifdef CONFIG_POSIX
@@ -1902,7 +1923,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
/* Just support these ram flags by now. */
assert((ram_flags & ~(RAM_SHARED | RAM_PMEM | RAM_NORESERVE |
RAM_PROTECTED | RAM_NAMED_FILE | RAM_READONLY |
- RAM_READONLY_FD)) == 0);
+ RAM_READONLY_FD | RAM_GUEST_MEMFD)) == 0);
if (xen_enabled()) {
error_setg(errp, "-mem-path not supported with Xen");
@@ -1939,6 +1960,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
new_block->used_length = size;
new_block->max_length = size;
new_block->flags = ram_flags;
+ new_block->guest_memfd = -1;
new_block->host = file_ram_alloc(new_block, size, fd, !file_size, offset,
errp);
if (!new_block->host) {
@@ -2018,7 +2040,7 @@ RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
int align;
assert((ram_flags & ~(RAM_SHARED | RAM_RESIZEABLE | RAM_PREALLOC |
- RAM_NORESERVE)) == 0);
+ RAM_NORESERVE | RAM_GUEST_MEMFD)) == 0);
assert(!host ^ (ram_flags & RAM_PREALLOC));
align = qemu_real_host_page_size();
@@ -2033,6 +2055,7 @@ RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
new_block->max_length = max_size;
assert(max_size >= size);
new_block->fd = -1;
+ new_block->guest_memfd = -1;
new_block->page_size = qemu_real_host_page_size();
new_block->host = host;
new_block->flags = ram_flags;
@@ -2055,7 +2078,7 @@ RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
RAMBlock *qemu_ram_alloc(ram_addr_t size, uint32_t ram_flags,
MemoryRegion *mr, Error **errp)
{
- assert((ram_flags & ~(RAM_SHARED | RAM_NORESERVE)) == 0);
+ assert((ram_flags & ~(RAM_SHARED | RAM_NORESERVE | RAM_GUEST_MEMFD)) == 0);
return qemu_ram_alloc_internal(size, size, NULL, NULL, ram_flags, mr, errp);
}
@@ -2083,6 +2106,11 @@ static void reclaim_ramblock(RAMBlock *block)
} else {
qemu_anon_ram_free(block->host, block->max_length);
}
+
+ if (block->guest_memfd >= 0) {
+ close(block->guest_memfd);
+ }
+
g_free(block);
}
--
2.39.3

View File

@ -0,0 +1,82 @@
From bd289293604d6f33e9fb89196f0b19117ce81f89 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 20 Mar 2024 17:45:29 +0100
Subject: [PATCH 032/100] RAMBlock: make guest_memfd require uncoordinated
discard
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [32/91] 0c005849026c334737b88cbd20a0ac237dfca37e (bonzini/rhel-qemu-kvm)
Some subsystems like VFIO might disable ram block discard, but guest_memfd
uses discard operations to implement conversions between private and
shared memory. Because of this, sequences like the following can result
in stale IOMMU mappings:
1. allocate shared page
2. convert page shared->private
3. discard shared page
4. convert page private->shared
5. allocate shared page
6. issue DMA operations against that shared page
This is not a use-after-free, because after step 3 VFIO is still pinning
the page. However, DMA operations in step 6 will hit the old mapping
that was allocated in step 1.
Address this by taking ram_block_discard_is_enabled() into account when
deciding whether or not to discard pages.
Since kvm_convert_memory()/guest_memfd doesn't implement a
RamDiscardManager handler to convey and replay discard operations,
this is a case of uncoordinated discard, which is blocked/released
by ram_block_discard_require(). Interestingly, this function had
no use so far.
Alternative approaches would be to block discard of shared pages, but
this would cause guests to consume twice the memory if they use VFIO;
or to implement a RamDiscardManager and only block uncoordinated
discard, i.e. use ram_block_coordinated_discard_require().
[Commit message mostly by Michael Roth <michael.roth@amd.com>]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 852f0048f3ea9f14de18eb279a99fccb6d250e8f)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
system/physmem.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/system/physmem.c b/system/physmem.c
index f5dfa20e57..5ebcf5be11 100644
--- a/system/physmem.c
+++ b/system/physmem.c
@@ -1846,6 +1846,13 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
assert(kvm_enabled());
assert(new_block->guest_memfd < 0);
+ if (ram_block_discard_require(true) < 0) {
+ error_setg_errno(errp, errno,
+ "cannot set up private guest memory: discard currently blocked");
+ error_append_hint(errp, "Are you using assigned devices?\n");
+ goto out_free;
+ }
+
new_block->guest_memfd = kvm_create_guest_memfd(new_block->max_length,
0, errp);
if (new_block->guest_memfd < 0) {
@@ -2109,6 +2116,7 @@ static void reclaim_ramblock(RAMBlock *block)
if (block->guest_memfd >= 0) {
close(block->guest_memfd);
+ ram_block_discard_require(false);
}
g_free(block);
--
2.39.3

View File

@ -0,0 +1,90 @@
From 0f0a3a860a07addea21a0282556a5022b9cb8b2c Mon Sep 17 00:00:00 2001
From: Xiaoyao Li <xiaoyao.li@intel.com>
Date: Thu, 29 Feb 2024 01:00:35 -0500
Subject: [PATCH 011/100] confidential guest support: Add kvm_init() and
kvm_reset() in class
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [11/91] 21d2178178bf181a8e4d0b051f64bd983f0d0cf1 (bonzini/rhel-qemu-kvm)
Different confidential VMs in different architectures all have the same
needs to do their specific initialization (and maybe resetting) stuffs
with KVM. Currently each of them exposes individual *_kvm_init()
functions and let machine code or kvm code to call it.
To facilitate the introduction of confidential guest technology from
different x86 vendors, add two virtual functions, kvm_init() and kvm_reset()
in ConfidentialGuestSupportClass, and expose two helpers functions for
invodking them.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Message-Id: <20240229060038.606591-1-xiaoyao.li@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 41a605944e3fecae43ca18ded95ec31f28e0c7fe)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
include/exec/confidential-guest-support.h | 34 ++++++++++++++++++++++-
1 file changed, 33 insertions(+), 1 deletion(-)
diff --git a/include/exec/confidential-guest-support.h b/include/exec/confidential-guest-support.h
index ba2dd4b5df..e5b188cffb 100644
--- a/include/exec/confidential-guest-support.h
+++ b/include/exec/confidential-guest-support.h
@@ -23,7 +23,10 @@
#include "qom/object.h"
#define TYPE_CONFIDENTIAL_GUEST_SUPPORT "confidential-guest-support"
-OBJECT_DECLARE_SIMPLE_TYPE(ConfidentialGuestSupport, CONFIDENTIAL_GUEST_SUPPORT)
+OBJECT_DECLARE_TYPE(ConfidentialGuestSupport,
+ ConfidentialGuestSupportClass,
+ CONFIDENTIAL_GUEST_SUPPORT)
+
struct ConfidentialGuestSupport {
Object parent;
@@ -55,8 +58,37 @@ struct ConfidentialGuestSupport {
typedef struct ConfidentialGuestSupportClass {
ObjectClass parent;
+
+ int (*kvm_init)(ConfidentialGuestSupport *cgs, Error **errp);
+ int (*kvm_reset)(ConfidentialGuestSupport *cgs, Error **errp);
} ConfidentialGuestSupportClass;
+static inline int confidential_guest_kvm_init(ConfidentialGuestSupport *cgs,
+ Error **errp)
+{
+ ConfidentialGuestSupportClass *klass;
+
+ klass = CONFIDENTIAL_GUEST_SUPPORT_GET_CLASS(cgs);
+ if (klass->kvm_init) {
+ return klass->kvm_init(cgs, errp);
+ }
+
+ return 0;
+}
+
+static inline int confidential_guest_kvm_reset(ConfidentialGuestSupport *cgs,
+ Error **errp)
+{
+ ConfidentialGuestSupportClass *klass;
+
+ klass = CONFIDENTIAL_GUEST_SUPPORT_GET_CLASS(cgs);
+ if (klass->kvm_reset) {
+ return klass->kvm_reset(cgs, errp);
+ }
+
+ return 0;
+}
+
#endif /* !CONFIG_USER_ONLY */
#endif /* QEMU_CONFIDENTIAL_GUEST_SUPPORT_H */
--
2.39.3

View File

@ -0,0 +1,73 @@
From e74980be81d641736ea9d44d0fe9af02af63a220 Mon Sep 17 00:00:00 2001
From: Michael Roth <michael.roth@amd.com>
Date: Thu, 30 May 2024 06:16:40 -0500
Subject: [PATCH 083/100] hw/i386: Add support for loading BIOS using
guest_memfd
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [83/91] 7b77d212ef7d83b66ad9d8348179ee84e64fb911 (bonzini/rhel-qemu-kvm)
When guest_memfd is enabled, the BIOS is generally part of the initial
encrypted guest image and will be accessed as private guest memory. Add
the necessary changes to set up the associated RAM region with a
guest_memfd backend to allow for this.
Current support centers around using -bios to load the BIOS data.
Support for loading the BIOS via pflash requires additional enablement
since those interfaces rely on the use of ROM memory regions which make
use of the KVM_MEM_READONLY memslot flag, which is not supported for
guest_memfd-backed memslots.
Signed-off-by: Michael Roth <michael.roth@amd.com>
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
Message-ID: <20240530111643.1091816-29-pankaj.gupta@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit fc7a69e177e4ba26d11fcf47b853f85115b35a11)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
hw/i386/x86-common.c | 17 ++++++++++++-----
1 file changed, 12 insertions(+), 5 deletions(-)
diff --git a/hw/i386/x86-common.c b/hw/i386/x86-common.c
index 35fe6eabea..6cbb76c25c 100644
--- a/hw/i386/x86-common.c
+++ b/hw/i386/x86-common.c
@@ -969,8 +969,13 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware,
(bios_size % 65536) != 0) {
goto bios_error;
}
- memory_region_init_ram(&x86ms->bios, NULL, "pc.bios", bios_size,
- &error_fatal);
+ if (machine_require_guest_memfd(MACHINE(x86ms))) {
+ memory_region_init_ram_guest_memfd(&x86ms->bios, NULL, "pc.bios",
+ bios_size, &error_fatal);
+ } else {
+ memory_region_init_ram(&x86ms->bios, NULL, "pc.bios",
+ bios_size, &error_fatal);
+ }
if (sev_enabled()) {
/*
* The concept of a "reset" simply doesn't exist for
@@ -991,9 +996,11 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware,
}
g_free(filename);
- /* map the last 128KB of the BIOS in ISA space */
- x86_isa_bios_init(&x86ms->isa_bios, rom_memory, &x86ms->bios,
- !isapc_ram_fw);
+ if (!machine_require_guest_memfd(MACHINE(x86ms))) {
+ /* map the last 128KB of the BIOS in ISA space */
+ x86_isa_bios_init(&x86ms->isa_bios, rom_memory, &x86ms->bios,
+ !isapc_ram_fw);
+ }
/* map all the bios at the top of memory */
memory_region_add_subregion(rom_memory,
--
2.39.3

View File

@ -0,0 +1,106 @@
From c1e615d6b8f609b72a94ffe6d31a9848a41744ef Mon Sep 17 00:00:00 2001
From: Bernhard Beschow <shentey@gmail.com>
Date: Tue, 30 Apr 2024 17:06:39 +0200
Subject: [PATCH 038/100] hw/i386: Have x86_bios_rom_init() take
X86MachineState rather than MachineState
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [38/91] 59f388b1dffc5d0aa2f0fff768194d755bc3efbb (bonzini/rhel-qemu-kvm)
The function creates and leaks two MemoryRegion objects regarding the BIOS which
will be moved into X86MachineState in the next steps to avoid the leakage.
Signed-off-by: Bernhard Beschow <shentey@gmail.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Message-ID: <20240430150643.111976-3-shentey@gmail.com>
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
(cherry picked from commit 848351840148f8c3b53ddf6210194506547d3ffd)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
hw/i386/microvm.c | 2 +-
hw/i386/pc_sysfw.c | 4 ++--
hw/i386/x86.c | 4 ++--
include/hw/i386/x86.h | 2 +-
4 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c
index 61a772dfe6..fec63cacfa 100644
--- a/hw/i386/microvm.c
+++ b/hw/i386/microvm.c
@@ -278,7 +278,7 @@ static void microvm_devices_init(MicrovmMachineState *mms)
default_firmware = x86_machine_is_acpi_enabled(x86ms)
? MICROVM_BIOS_FILENAME
: MICROVM_QBOOT_FILENAME;
- x86_bios_rom_init(MACHINE(mms), default_firmware, get_system_memory(), true);
+ x86_bios_rom_init(x86ms, default_firmware, get_system_memory(), true);
}
static void microvm_memory_init(MicrovmMachineState *mms)
diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c
index 3efabbbab2..ef7dea9798 100644
--- a/hw/i386/pc_sysfw.c
+++ b/hw/i386/pc_sysfw.c
@@ -206,7 +206,7 @@ void pc_system_firmware_init(PCMachineState *pcms,
BlockBackend *pflash_blk[ARRAY_SIZE(pcms->flash)];
if (!pcmc->pci_enabled) {
- x86_bios_rom_init(MACHINE(pcms), "bios.bin", rom_memory, true);
+ x86_bios_rom_init(X86_MACHINE(pcms), "bios.bin", rom_memory, true);
return;
}
@@ -227,7 +227,7 @@ void pc_system_firmware_init(PCMachineState *pcms,
if (!pflash_blk[0]) {
/* Machine property pflash0 not set, use ROM mode */
- x86_bios_rom_init(MACHINE(pcms), "bios.bin", rom_memory, false);
+ x86_bios_rom_init(X86_MACHINE(pcms), "bios.bin", rom_memory, false);
} else {
if (kvm_enabled() && !kvm_readonly_mem_enabled()) {
/*
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index 2a4f3ee285..6d3c72f124 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -1128,7 +1128,7 @@ void x86_load_linux(X86MachineState *x86ms,
nb_option_roms++;
}
-void x86_bios_rom_init(MachineState *ms, const char *default_firmware,
+void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware,
MemoryRegion *rom_memory, bool isapc_ram_fw)
{
const char *bios_name;
@@ -1138,7 +1138,7 @@ void x86_bios_rom_init(MachineState *ms, const char *default_firmware,
ssize_t ret;
/* BIOS load */
- bios_name = ms->firmware ?: default_firmware;
+ bios_name = MACHINE(x86ms)->firmware ?: default_firmware;
filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
if (filename) {
bios_size = get_image_size(filename);
diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h
index 4dc30dcb4d..cb07618d19 100644
--- a/include/hw/i386/x86.h
+++ b/include/hw/i386/x86.h
@@ -116,7 +116,7 @@ void x86_cpu_unplug_request_cb(HotplugHandler *hotplug_dev,
void x86_cpu_unplug_cb(HotplugHandler *hotplug_dev,
DeviceState *dev, Error **errp);
-void x86_bios_rom_init(MachineState *ms, const char *default_firmware,
+void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware,
MemoryRegion *rom_memory, bool isapc_ram_fw);
void x86_load_linux(X86MachineState *x86ms,
--
2.39.3

View File

@ -0,0 +1,51 @@
From 7bb1f124413891bc5d2187f12cd19da6e794904b Mon Sep 17 00:00:00 2001
From: Xiaoyao Li <xiaoyao.li@intel.com>
Date: Wed, 3 Apr 2024 10:59:53 -0400
Subject: [PATCH 010/100] hw/i386/acpi: Set PCAT_COMPAT bit only when pic is
not disabled
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [10/91] 62110e4bf52cb3e106c8d2a902bbd31548beba00 (bonzini/rhel-qemu-kvm)
A value 1 of PCAT_COMPAT (bit 0) of MADT.Flags indicates that the system
also has a PC-AT-compatible dual-8259 setup, i.e., the PIC. When PIC
is not enabled (pic=off) for x86 machine, the PCAT_COMPAT bit needs to
be cleared. The PIC probe should then print:
[ 0.155970] Using NULL legacy PIC
However, no such log printed in guest kernel unless PCAT_COMPAT is
cleared.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Message-ID: <20240403145953.3082491-1-xiaoyao.li@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 292dd287e78e0cbafde9d1522c729349d132d844)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
hw/i386/acpi-common.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/hw/i386/acpi-common.c b/hw/i386/acpi-common.c
index 20f19269da..0cc2919bb8 100644
--- a/hw/i386/acpi-common.c
+++ b/hw/i386/acpi-common.c
@@ -107,7 +107,9 @@ void acpi_build_madt(GArray *table_data, BIOSLinker *linker,
acpi_table_begin(&table, table_data);
/* Local APIC Address */
build_append_int_noprefix(table_data, APIC_DEFAULT_ADDRESS, 4);
- build_append_int_noprefix(table_data, 1 /* PCAT_COMPAT */, 4); /* Flags */
+ /* Flags. bit 0: PCAT_COMPAT */
+ build_append_int_noprefix(table_data,
+ x86ms->pic != ON_OFF_AUTO_OFF ? 1 : 0 , 4);
for (i = 0; i < apic_ids->len; i++) {
pc_madt_cpu_entry(i, apic_ids, table_data, false);
--
2.39.3

View File

@ -0,0 +1,164 @@
From fd6de3c5e97bdf13a39342fc71815a20c66867ae Mon Sep 17 00:00:00 2001
From: Bernhard Beschow <shentey@gmail.com>
Date: Wed, 8 May 2024 19:55:07 +0200
Subject: [PATCH 043/100] hw/i386/pc_sysfw: Alias rather than copy isa-bios
region
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [43/91] f64dab2a091838a10a9b94e3d09ea11432b0809f (bonzini/rhel-qemu-kvm)
In the -bios case the "isa-bios" memory region is an alias to the BIOS mapped
to the top of the 4G memory boundary. Do the same in the -pflash case, but only
for new machine versions for migration compatibility. This establishes common
behavior and makes pflash commands work in the "isa-bios" region which some
real-world legacy bioses rely on.
Note that in the sev_enabled() case, the "isa-bios" memory region in the -pflash
case will now also point to encrypted memory, just like it already does in the
-bios case.
When running `info mtree` before and after this commit with
`qemu-system-x86_64 -S -drive \
if=pflash,format=raw,readonly=on,file=/usr/share/qemu/bios-256k.bin` and running
`diff -u before.mtree after.mtree` results in the following changes in the
memory tree:
| --- before.mtree
| +++ after.mtree
| @@ -71,7 +71,7 @@
| 0000000000000000-ffffffffffffffff (prio -1, i/o): pci
| 00000000000a0000-00000000000bffff (prio 1, i/o): vga-lowmem
| 00000000000c0000-00000000000dffff (prio 1, rom): pc.rom
| - 00000000000e0000-00000000000fffff (prio 1, rom): isa-bios
| + 00000000000e0000-00000000000fffff (prio 1, romd): alias isa-bios @system.flash0 0000000000020000-000000000003ffff
| 00000000000a0000-00000000000bffff (prio 1, i/o): alias smram-region @pci 00000000000a0000-00000000000bffff
| 00000000000c0000-00000000000c3fff (prio 1, i/o): alias pam-pci @pci 00000000000c0000-00000000000c3fff
| 00000000000c4000-00000000000c7fff (prio 1, i/o): alias pam-pci @pci 00000000000c4000-00000000000c7fff
| @@ -108,7 +108,7 @@
| 0000000000000000-ffffffffffffffff (prio -1, i/o): pci
| 00000000000a0000-00000000000bffff (prio 1, i/o): vga-lowmem
| 00000000000c0000-00000000000dffff (prio 1, rom): pc.rom
| - 00000000000e0000-00000000000fffff (prio 1, rom): isa-bios
| + 00000000000e0000-00000000000fffff (prio 1, romd): alias isa-bios @system.flash0 0000000000020000-000000000003ffff
| 00000000000a0000-00000000000bffff (prio 1, i/o): alias smram-region @pci 00000000000a0000-00000000000bffff
| 00000000000c0000-00000000000c3fff (prio 1, i/o): alias pam-pci @pci 00000000000c0000-00000000000c3fff
| 00000000000c4000-00000000000c7fff (prio 1, i/o): alias pam-pci @pci 00000000000c4000-00000000000c7fff
| @@ -131,11 +131,14 @@
| memory-region: pc.ram
| 0000000000000000-0000000007ffffff (prio 0, ram): pc.ram
|
| +memory-region: system.flash0
| + 00000000fffc0000-00000000ffffffff (prio 0, romd): system.flash0
| +
| memory-region: pci
| 0000000000000000-ffffffffffffffff (prio -1, i/o): pci
| 00000000000a0000-00000000000bffff (prio 1, i/o): vga-lowmem
| 00000000000c0000-00000000000dffff (prio 1, rom): pc.rom
| - 00000000000e0000-00000000000fffff (prio 1, rom): isa-bios
| + 00000000000e0000-00000000000fffff (prio 1, romd): alias isa-bios @system.flash0 0000000000020000-000000000003ffff
|
| memory-region: smram
| 00000000000a0000-00000000000bffff (prio 0, ram): alias smram-low @pc.ram 00000000000a0000-00000000000bffff
Note that in both cases the "system" memory region contains the entry
00000000fffc0000-00000000ffffffff (prio 0, romd): system.flash0
but the "system.flash0" memory region only appears standalone when "isa-bios" is
an alias.
Signed-off-by: Bernhard Beschow <shentey@gmail.com>
Message-ID: <20240508175507.22270-7-shentey@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit a44ea3fa7f2aa1d809fdca1b84a52695b53d8ad0)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
hw/i386/pc.c | 1 +
hw/i386/pc_piix.c | 1 +
hw/i386/pc_q35.c | 1 +
hw/i386/pc_sysfw.c | 8 +++++++-
include/hw/i386/pc.h | 1 +
5 files changed, 11 insertions(+), 1 deletion(-)
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 1a34bc4522..660a59c63b 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1967,6 +1967,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
pcmc->has_reserved_memory = true;
pcmc->enforce_aligned_dimm = true;
pcmc->enforce_amd_1tb_hole = true;
+ pcmc->isa_bios_alias = true;
/* BIOS ACPI tables: 128K. Other BIOS datastructures: less than 4K reported
* to be used at the moment, 32K should be enough for a while. */
pcmc->acpi_data_size = 0x20000 + 0x8000;
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index bef3e8b73e..dbb7f2ed17 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -975,6 +975,7 @@ static void pc_machine_rhel7_options(MachineClass *m)
m->alias = "pc";
m->is_default = 1;
m->smp_props.prefer_sockets = true;
+ pcmc->isa_bios_alias = false;
}
static void pc_init_rhel760(MachineState *machine)
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index dedc86eec9..f9900ad798 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -735,6 +735,7 @@ static void pc_q35_machine_rhel940_options(MachineClass *m)
m->desc = "RHEL-9.4.0 PC (Q35 + ICH9, 2009)";
pcmc->smbios_stream_product = "RHEL";
pcmc->smbios_stream_version = "9.4.0";
+ pcmc->isa_bios_alias = false;
compat_props_add(m->compat_props, pc_rhel_9_5_compat,
pc_rhel_9_5_compat_len);
diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c
index 82d37cb376..ac88ad4eb9 100644
--- a/hw/i386/pc_sysfw.c
+++ b/hw/i386/pc_sysfw.c
@@ -135,6 +135,7 @@ static void pc_system_flash_map(PCMachineState *pcms,
MemoryRegion *rom_memory)
{
X86MachineState *x86ms = X86_MACHINE(pcms);
+ PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
hwaddr total_size = 0;
int i;
BlockBackend *blk;
@@ -184,7 +185,12 @@ static void pc_system_flash_map(PCMachineState *pcms,
if (i == 0) {
flash_mem = pflash_cfi01_get_memory(system_flash);
- pc_isa_bios_init(&x86ms->isa_bios, rom_memory, flash_mem);
+ if (pcmc->isa_bios_alias) {
+ x86_isa_bios_init(&x86ms->isa_bios, rom_memory, flash_mem,
+ true);
+ } else {
+ pc_isa_bios_init(&x86ms->isa_bios, rom_memory, flash_mem);
+ }
/* Encrypt the pflash boot ROM */
if (sev_enabled()) {
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 467e7fb52f..3f53ec73ac 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -122,6 +122,7 @@ struct PCMachineClass {
bool enforce_aligned_dimm;
bool broken_reserved_end;
bool enforce_amd_1tb_hole;
+ bool isa_bios_alias;
/* generate legacy CPU hotplug AML */
bool legacy_cpu_hotplug;
--
2.39.3

View File

@ -0,0 +1,53 @@
From 9bf1d368c4b53139db39649833d475e097fc98d1 Mon Sep 17 00:00:00 2001
From: Bernhard Beschow <shentey@gmail.com>
Date: Mon, 22 Apr 2024 22:06:22 +0200
Subject: [PATCH 039/100] hw/i386/pc_sysfw: Remove unused parameter from
pc_isa_bios_init()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [39/91] c0019dc2706a8e3f40486fd4a4c0dd1fbe23237b (bonzini/rhel-qemu-kvm)
Signed-off-by: Bernhard Beschow <shentey@gmail.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Message-ID: <20240422200625.2768-2-shentey@gmail.com>
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
(cherry picked from commit f4b63768b91811cdcf1fb7b270587123251dfea5)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
hw/i386/pc_sysfw.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c
index ef7dea9798..59c7a81692 100644
--- a/hw/i386/pc_sysfw.c
+++ b/hw/i386/pc_sysfw.c
@@ -41,8 +41,7 @@
#define FLASH_SECTOR_SIZE 4096
static void pc_isa_bios_init(MemoryRegion *rom_memory,
- MemoryRegion *flash_mem,
- int ram_size)
+ MemoryRegion *flash_mem)
{
int isa_bios_size;
MemoryRegion *isa_bios;
@@ -186,7 +185,7 @@ static void pc_system_flash_map(PCMachineState *pcms,
if (i == 0) {
flash_mem = pflash_cfi01_get_memory(system_flash);
- pc_isa_bios_init(rom_memory, flash_mem, size);
+ pc_isa_bios_init(rom_memory, flash_mem);
/* Encrypt the pflash boot ROM */
if (sev_enabled()) {
--
2.39.3

View File

@ -0,0 +1,158 @@
From e6472ff46cbed97c2a238a8ef7d321351931333a Mon Sep 17 00:00:00 2001
From: Brijesh Singh <brijesh.singh@amd.com>
Date: Thu, 30 May 2024 06:16:30 -0500
Subject: [PATCH 070/100] hw/i386/sev: Add function to get SEV metadata from
OVMF header
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [70/91] ba818dade96119c8a51ca1fb222f4f69e2752396 (bonzini/rhel-qemu-kvm)
A recent version of OVMF expanded the reset vector GUID list to add
SEV-specific metadata GUID. The SEV metadata describes the reserved
memory regions such as the secrets and CPUID page used during the SEV-SNP
guest launch.
The pc_system_get_ovmf_sev_metadata_ptr() is used to retieve the SEV
metadata pointer from the OVMF GUID list.
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Michael Roth <michael.roth@amd.com>
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
Message-ID: <20240530111643.1091816-19-pankaj.gupta@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit f3c30c575d34122573b7370a7da5ca3a27dde481)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
hw/i386/pc_sysfw.c | 4 ++++
include/hw/i386/pc.h | 26 ++++++++++++++++++++++++++
target/i386/sev-sysemu-stub.c | 4 ++++
target/i386/sev.c | 32 ++++++++++++++++++++++++++++++++
target/i386/sev.h | 2 ++
5 files changed, 68 insertions(+)
diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c
index ac88ad4eb9..9b8671c441 100644
--- a/hw/i386/pc_sysfw.c
+++ b/hw/i386/pc_sysfw.c
@@ -260,6 +260,10 @@ void x86_firmware_configure(void *ptr, int size)
pc_system_parse_ovmf_flash(ptr, size);
if (sev_enabled()) {
+
+ /* Copy the SEV metadata table (if it exists) */
+ pc_system_parse_sev_metadata(ptr, size);
+
ret = sev_es_save_reset_vector(ptr, size);
if (ret) {
error_report("failed to locate and/or save reset vector");
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 3f53ec73ac..94b49310f5 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -167,6 +167,32 @@ void pc_acpi_smi_interrupt(void *opaque, int irq, int level);
#define PCI_HOST_ABOVE_4G_MEM_SIZE "above-4g-mem-size"
#define PCI_HOST_PROP_SMM_RANGES "smm-ranges"
+typedef enum {
+ SEV_DESC_TYPE_UNDEF,
+ /* The section contains the region that must be validated by the VMM. */
+ SEV_DESC_TYPE_SNP_SEC_MEM,
+ /* The section contains the SNP secrets page */
+ SEV_DESC_TYPE_SNP_SECRETS,
+ /* The section contains address that can be used as a CPUID page */
+ SEV_DESC_TYPE_CPUID,
+
+} ovmf_sev_metadata_desc_type;
+
+typedef struct __attribute__((__packed__)) OvmfSevMetadataDesc {
+ uint32_t base;
+ uint32_t len;
+ ovmf_sev_metadata_desc_type type;
+} OvmfSevMetadataDesc;
+
+typedef struct __attribute__((__packed__)) OvmfSevMetadata {
+ uint8_t signature[4];
+ uint32_t len;
+ uint32_t version;
+ uint32_t num_desc;
+ OvmfSevMetadataDesc descs[];
+} OvmfSevMetadata;
+
+OvmfSevMetadata *pc_system_get_ovmf_sev_metadata_ptr(void);
void pc_pci_as_mapping_init(MemoryRegion *system_memory,
MemoryRegion *pci_address_space);
diff --git a/target/i386/sev-sysemu-stub.c b/target/i386/sev-sysemu-stub.c
index 96e1c15cc3..fc1c57c411 100644
--- a/target/i386/sev-sysemu-stub.c
+++ b/target/i386/sev-sysemu-stub.c
@@ -67,3 +67,7 @@ void hmp_info_sev(Monitor *mon, const QDict *qdict)
{
monitor_printf(mon, "SEV is not available in this QEMU\n");
}
+
+void pc_system_parse_sev_metadata(uint8_t *flash_ptr, size_t flash_size)
+{
+}
diff --git a/target/i386/sev.c b/target/i386/sev.c
index e84e4395a5..17281bb2c7 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -597,6 +597,38 @@ SevCapability *qmp_query_sev_capabilities(Error **errp)
return sev_get_capabilities(errp);
}
+static OvmfSevMetadata *ovmf_sev_metadata_table;
+
+#define OVMF_SEV_META_DATA_GUID "dc886566-984a-4798-A75e-5585a7bf67cc"
+typedef struct __attribute__((__packed__)) OvmfSevMetadataOffset {
+ uint32_t offset;
+} OvmfSevMetadataOffset;
+
+OvmfSevMetadata *pc_system_get_ovmf_sev_metadata_ptr(void)
+{
+ return ovmf_sev_metadata_table;
+}
+
+void pc_system_parse_sev_metadata(uint8_t *flash_ptr, size_t flash_size)
+{
+ OvmfSevMetadata *metadata;
+ OvmfSevMetadataOffset *data;
+
+ if (!pc_system_ovmf_table_find(OVMF_SEV_META_DATA_GUID, (uint8_t **)&data,
+ NULL)) {
+ return;
+ }
+
+ metadata = (OvmfSevMetadata *)(flash_ptr + flash_size - data->offset);
+ if (memcmp(metadata->signature, "ASEV", 4) != 0 ||
+ metadata->len < sizeof(OvmfSevMetadata) ||
+ metadata->len > flash_size - data->offset) {
+ return;
+ }
+
+ ovmf_sev_metadata_table = g_memdup2(metadata, metadata->len);
+}
+
static SevAttestationReport *sev_get_attestation_report(const char *mnonce,
Error **errp)
{
diff --git a/target/i386/sev.h b/target/i386/sev.h
index 5dc4767b1e..cc12824dd6 100644
--- a/target/i386/sev.h
+++ b/target/i386/sev.h
@@ -66,4 +66,6 @@ int sev_inject_launch_secret(const char *hdr, const char *secret,
int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size);
void sev_es_set_reset_vector(CPUState *cpu);
+void pc_system_parse_sev_metadata(uint8_t *flash_ptr, size_t flash_size);
+
#endif
--
2.39.3

View File

@ -0,0 +1,165 @@
From 226cf6c3d3e2fd1a35422043dbe0b73d1216df83 Mon Sep 17 00:00:00 2001
From: Brijesh Singh <brijesh.singh@amd.com>
Date: Thu, 30 May 2024 06:16:36 -0500
Subject: [PATCH 073/100] hw/i386/sev: Add support to encrypt BIOS when SEV-SNP
is enabled
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [73/91] 844afd322c12c3e8992cf6ec692c94e70747bd0c (bonzini/rhel-qemu-kvm)
As with SEV, an SNP guest requires that the BIOS be part of the initial
encrypted/measured guest payload. Extend sev_encrypt_flash() to handle
the SNP case and plumb through the GPA of the BIOS location since this
is needed for SNP.
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Michael Roth <michael.roth@amd.com>
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
Message-ID: <20240530111643.1091816-25-pankaj.gupta@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 77d1abd91e5352ad30ae2f83790f95fa6a3c0b6b)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
hw/i386/pc_sysfw.c | 12 +++++++-----
hw/i386/x86-common.c | 2 +-
include/hw/i386/x86.h | 2 +-
target/i386/sev-sysemu-stub.c | 2 +-
target/i386/sev.c | 5 +++--
target/i386/sev.h | 2 +-
6 files changed, 14 insertions(+), 11 deletions(-)
diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c
index 9b8671c441..7cdbafc8d2 100644
--- a/hw/i386/pc_sysfw.c
+++ b/hw/i386/pc_sysfw.c
@@ -148,6 +148,8 @@ static void pc_system_flash_map(PCMachineState *pcms,
assert(PC_MACHINE_GET_CLASS(pcms)->pci_enabled);
for (i = 0; i < ARRAY_SIZE(pcms->flash); i++) {
+ hwaddr gpa;
+
system_flash = pcms->flash[i];
blk = pflash_cfi01_get_blk(system_flash);
if (!blk) {
@@ -177,11 +179,11 @@ static void pc_system_flash_map(PCMachineState *pcms,
}
total_size += size;
+ gpa = 0x100000000ULL - total_size; /* where the flash is mapped */
qdev_prop_set_uint32(DEVICE(system_flash), "num-blocks",
size / FLASH_SECTOR_SIZE);
sysbus_realize_and_unref(SYS_BUS_DEVICE(system_flash), &error_fatal);
- sysbus_mmio_map(SYS_BUS_DEVICE(system_flash), 0,
- 0x100000000ULL - total_size);
+ sysbus_mmio_map(SYS_BUS_DEVICE(system_flash), 0, gpa);
if (i == 0) {
flash_mem = pflash_cfi01_get_memory(system_flash);
@@ -196,7 +198,7 @@ static void pc_system_flash_map(PCMachineState *pcms,
if (sev_enabled()) {
flash_ptr = memory_region_get_ram_ptr(flash_mem);
flash_size = memory_region_size(flash_mem);
- x86_firmware_configure(flash_ptr, flash_size);
+ x86_firmware_configure(gpa, flash_ptr, flash_size);
}
}
}
@@ -249,7 +251,7 @@ void pc_system_firmware_init(PCMachineState *pcms,
pc_system_flash_cleanup_unused(pcms);
}
-void x86_firmware_configure(void *ptr, int size)
+void x86_firmware_configure(hwaddr gpa, void *ptr, int size)
{
int ret;
@@ -270,6 +272,6 @@ void x86_firmware_configure(void *ptr, int size)
exit(1);
}
- sev_encrypt_flash(ptr, size, &error_fatal);
+ sev_encrypt_flash(gpa, ptr, size, &error_fatal);
}
}
diff --git a/hw/i386/x86-common.c b/hw/i386/x86-common.c
index 67b03c913a..35fe6eabea 100644
--- a/hw/i386/x86-common.c
+++ b/hw/i386/x86-common.c
@@ -981,7 +981,7 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware,
*/
void *ptr = memory_region_get_ram_ptr(&x86ms->bios);
load_image_size(filename, ptr, bios_size);
- x86_firmware_configure(ptr, bios_size);
+ x86_firmware_configure(0x100000000ULL - bios_size, ptr, bios_size);
} else {
memory_region_set_readonly(&x86ms->bios, !isapc_ram_fw);
ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1);
diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h
index b006f16b8d..d43cb3908e 100644
--- a/include/hw/i386/x86.h
+++ b/include/hw/i386/x86.h
@@ -154,6 +154,6 @@ void ioapic_init_gsi(GSIState *gsi_state, Object *parent);
DeviceState *ioapic_init_secondary(GSIState *gsi_state);
/* pc_sysfw.c */
-void x86_firmware_configure(void *ptr, int size);
+void x86_firmware_configure(hwaddr gpa, void *ptr, int size);
#endif
diff --git a/target/i386/sev-sysemu-stub.c b/target/i386/sev-sysemu-stub.c
index fc1c57c411..d5bf886e79 100644
--- a/target/i386/sev-sysemu-stub.c
+++ b/target/i386/sev-sysemu-stub.c
@@ -42,7 +42,7 @@ void qmp_sev_inject_launch_secret(const char *packet_header, const char *secret,
error_setg(errp, "SEV is not available in this QEMU");
}
-int sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp)
+int sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp)
{
g_assert_not_reached();
}
diff --git a/target/i386/sev.c b/target/i386/sev.c
index 06401f0526..7b5c4b4874 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -1484,7 +1484,7 @@ static int sev_snp_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
}
int
-sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp)
+sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp)
{
SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs);
@@ -1841,7 +1841,8 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp)
/* zero the excess data so the measurement can be reliably calculated */
memset(padded_ht->padding, 0, sizeof(padded_ht->padding));
- if (sev_encrypt_flash((uint8_t *)padded_ht, sizeof(*padded_ht), errp) < 0) {
+ if (sev_encrypt_flash(area->base, (uint8_t *)padded_ht,
+ sizeof(*padded_ht), errp) < 0) {
ret = false;
}
diff --git a/target/i386/sev.h b/target/i386/sev.h
index cc12824dd6..858005a119 100644
--- a/target/i386/sev.h
+++ b/target/i386/sev.h
@@ -59,7 +59,7 @@ uint32_t sev_get_cbit_position(void);
uint32_t sev_get_reduced_phys_bits(void);
bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp);
-int sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp);
+int sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp);
int sev_inject_launch_secret(const char *hdr, const char *secret,
uint64_t gpa, Error **errp);
--
2.39.3

View File

@ -0,0 +1,123 @@
From a20b2e3e52b9589ac1abc8b9b818d526c86368cf Mon Sep 17 00:00:00 2001
From: Michael Roth <michael.roth@amd.com>
Date: Thu, 30 May 2024 06:16:39 -0500
Subject: [PATCH 082/100] hw/i386/sev: Use guest_memfd for legacy ROMs
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [82/91] a591e85e00c353009803b143c80852b8c9b1f15e (bonzini/rhel-qemu-kvm)
Current SNP guest kernels will attempt to access these regions with
with C-bit set, so guest_memfd is needed to handle that. Otherwise,
kvm_convert_memory() will fail when the guest kernel tries to access it
and QEMU attempts to call KVM_SET_MEMORY_ATTRIBUTES to set these ranges
to private.
Whether guests should actually try to access ROM regions in this way (or
need to deal with legacy ROM regions at all), is a separate issue to be
addressed on kernel side, but current SNP guest kernels will exhibit
this behavior and so this handling is needed to allow QEMU to continue
running existing SNP guest kernels.
Signed-off-by: Michael Roth <michael.roth@amd.com>
[pankaj: Added sev_snp_enabled() check]
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
Message-ID: <20240530111643.1091816-28-pankaj.gupta@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 413a67450750e0459efeffc3db3ba9759c3e381c)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
hw/i386/pc.c | 14 ++++++++++----
hw/i386/pc_sysfw.c | 19 +++++++++++++------
2 files changed, 23 insertions(+), 10 deletions(-)
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 0aca0cc79e..b25d075b59 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -62,6 +62,7 @@
#include "hw/mem/memory-device.h"
#include "e820_memory_layout.h"
#include "trace.h"
+#include "sev.h"
#include CONFIG_DEVICES
#ifdef CONFIG_XEN_EMU
@@ -1173,10 +1174,15 @@ void pc_memory_init(PCMachineState *pcms,
pc_system_firmware_init(pcms, rom_memory);
option_rom_mr = g_malloc(sizeof(*option_rom_mr));
- memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE,
- &error_fatal);
- if (pcmc->pci_enabled) {
- memory_region_set_readonly(option_rom_mr, true);
+ if (machine_require_guest_memfd(machine)) {
+ memory_region_init_ram_guest_memfd(option_rom_mr, NULL, "pc.rom",
+ PC_ROM_SIZE, &error_fatal);
+ } else {
+ memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE,
+ &error_fatal);
+ if (pcmc->pci_enabled) {
+ memory_region_set_readonly(option_rom_mr, true);
+ }
}
memory_region_add_subregion_overlap(rom_memory,
PC_ROM_MIN_VGA,
diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c
index 7cdbafc8d2..ef80281d28 100644
--- a/hw/i386/pc_sysfw.c
+++ b/hw/i386/pc_sysfw.c
@@ -40,8 +40,8 @@
#define FLASH_SECTOR_SIZE 4096
-static void pc_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *rom_memory,
- MemoryRegion *flash_mem)
+static void pc_isa_bios_init(PCMachineState *pcms, MemoryRegion *isa_bios,
+ MemoryRegion *rom_memory, MemoryRegion *flash_mem)
{
int isa_bios_size;
uint64_t flash_size;
@@ -51,8 +51,13 @@ static void pc_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *rom_memory,
/* map the last 128KB of the BIOS in ISA space */
isa_bios_size = MIN(flash_size, 128 * KiB);
- memory_region_init_ram(isa_bios, NULL, "isa-bios", isa_bios_size,
- &error_fatal);
+ if (machine_require_guest_memfd(MACHINE(pcms))) {
+ memory_region_init_ram_guest_memfd(isa_bios, NULL, "isa-bios",
+ isa_bios_size, &error_fatal);
+ } else {
+ memory_region_init_ram(isa_bios, NULL, "isa-bios", isa_bios_size,
+ &error_fatal);
+ }
memory_region_add_subregion_overlap(rom_memory,
0x100000 - isa_bios_size,
isa_bios,
@@ -65,7 +70,9 @@ static void pc_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *rom_memory,
((uint8_t*)flash_ptr) + (flash_size - isa_bios_size),
isa_bios_size);
- memory_region_set_readonly(isa_bios, true);
+ if (!machine_require_guest_memfd(current_machine)) {
+ memory_region_set_readonly(isa_bios, true);
+ }
}
static PFlashCFI01 *pc_pflash_create(PCMachineState *pcms,
@@ -191,7 +198,7 @@ static void pc_system_flash_map(PCMachineState *pcms,
x86_isa_bios_init(&x86ms->isa_bios, rom_memory, flash_mem,
true);
} else {
- pc_isa_bios_init(&x86ms->isa_bios, rom_memory, flash_mem);
+ pc_isa_bios_init(pcms, &x86ms->isa_bios, rom_memory, flash_mem);
}
/* Encrypt the pflash boot ROM */
--
2.39.3

View File

@ -0,0 +1,58 @@
From 4331180aa09e44550ff8de781c618bae5e99bb70 Mon Sep 17 00:00:00 2001
From: Michael Roth <michael.roth@amd.com>
Date: Tue, 9 Apr 2024 18:07:43 -0500
Subject: [PATCH 025/100] hw/i386/sev: Use legacy SEV VM types for older
machine types
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [25/91] 8c73cd312736ccb0818b4d3216fd13712f21f3c9 (bonzini/rhel-qemu-kvm)
Newer 9.1 machine types will default to using the KVM_SEV_INIT2 API for
creating SEV/SEV-ES going forward. However, this API results in guest
measurement changes which are generally not expected for users of these
older guest types and can cause disruption if they switch to a newer
QEMU/kernel version. Avoid this by continuing to use the older
KVM_SEV_INIT/KVM_SEV_ES_INIT APIs for older machine types.
Signed-off-by: Michael Roth <michael.roth@amd.com>
Message-ID: <20240409230743.962513-4-michael.roth@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit ea7fbd37537b3a598335c21ccb2ea674630fc810)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
hw/i386/pc.c | 1 +
target/i386/sev.c | 1 +
2 files changed, 2 insertions(+)
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index b9fde3cec1..1a34bc4522 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -351,6 +351,7 @@ const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat);
GlobalProperty pc_rhel_9_5_compat[] = {
/* pc_rhel_9_5_compat from pc_compat_pc_9_0 (backported from 9.1) */
{ TYPE_X86_CPU, "guest-phys-bits", "0" },
+ { "sev-guest", "legacy-vm-type", "true" },
};
const size_t pc_rhel_9_5_compat_len = G_N_ELEMENTS(pc_rhel_9_5_compat);
diff --git a/target/i386/sev.c b/target/i386/sev.c
index f4ee317cb0..d30b68c11e 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -1417,6 +1417,7 @@ sev_guest_instance_init(Object *obj)
object_property_add_uint32_ptr(obj, "reduced-phys-bits",
&sev->reduced_phys_bits,
OBJ_PROP_FLAG_READWRITE);
+ object_apply_compat_props(obj);
}
/* sev guest info */
--
2.39.3

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,133 @@
From ebf08d2a822576acfa60fbd5f552d26de1e4c4be Mon Sep 17 00:00:00 2001
From: Bernhard Beschow <shentey@gmail.com>
Date: Wed, 8 May 2024 19:55:04 +0200
Subject: [PATCH 040/100] hw/i386/x86: Don't leak "isa-bios" memory regions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [40/91] bb595357c6cc2d5a80bf3873853c69553c5feee5 (bonzini/rhel-qemu-kvm)
Fix the leaking in x86_bios_rom_init() and pc_isa_bios_init() by adding an
"isa_bios" attribute to X86MachineState.
Suggested-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Bernhard Beschow <shentey@gmail.com>
Message-ID: <20240508175507.22270-4-shentey@gmail.com>
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
(cherry picked from commit 32d3ee87a17fc91e981a23dba94855bff89f5920)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
hw/i386/pc_sysfw.c | 7 +++----
hw/i386/x86.c | 9 ++++-----
include/hw/i386/x86.h | 7 +++++++
3 files changed, 14 insertions(+), 9 deletions(-)
diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c
index 59c7a81692..82d37cb376 100644
--- a/hw/i386/pc_sysfw.c
+++ b/hw/i386/pc_sysfw.c
@@ -40,11 +40,10 @@
#define FLASH_SECTOR_SIZE 4096
-static void pc_isa_bios_init(MemoryRegion *rom_memory,
+static void pc_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *rom_memory,
MemoryRegion *flash_mem)
{
int isa_bios_size;
- MemoryRegion *isa_bios;
uint64_t flash_size;
void *flash_ptr, *isa_bios_ptr;
@@ -52,7 +51,6 @@ static void pc_isa_bios_init(MemoryRegion *rom_memory,
/* map the last 128KB of the BIOS in ISA space */
isa_bios_size = MIN(flash_size, 128 * KiB);
- isa_bios = g_malloc(sizeof(*isa_bios));
memory_region_init_ram(isa_bios, NULL, "isa-bios", isa_bios_size,
&error_fatal);
memory_region_add_subregion_overlap(rom_memory,
@@ -136,6 +134,7 @@ void pc_system_flash_cleanup_unused(PCMachineState *pcms)
static void pc_system_flash_map(PCMachineState *pcms,
MemoryRegion *rom_memory)
{
+ X86MachineState *x86ms = X86_MACHINE(pcms);
hwaddr total_size = 0;
int i;
BlockBackend *blk;
@@ -185,7 +184,7 @@ static void pc_system_flash_map(PCMachineState *pcms,
if (i == 0) {
flash_mem = pflash_cfi01_get_memory(system_flash);
- pc_isa_bios_init(rom_memory, flash_mem);
+ pc_isa_bios_init(&x86ms->isa_bios, rom_memory, flash_mem);
/* Encrypt the pflash boot ROM */
if (sev_enabled()) {
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index 6d3c72f124..457e8a34a5 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -1133,7 +1133,7 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware,
{
const char *bios_name;
char *filename;
- MemoryRegion *bios, *isa_bios;
+ MemoryRegion *bios;
int bios_size, isa_bios_size;
ssize_t ret;
@@ -1173,14 +1173,13 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware,
/* map the last 128KB of the BIOS in ISA space */
isa_bios_size = MIN(bios_size, 128 * KiB);
- isa_bios = g_malloc(sizeof(*isa_bios));
- memory_region_init_alias(isa_bios, NULL, "isa-bios", bios,
+ memory_region_init_alias(&x86ms->isa_bios, NULL, "isa-bios", bios,
bios_size - isa_bios_size, isa_bios_size);
memory_region_add_subregion_overlap(rom_memory,
0x100000 - isa_bios_size,
- isa_bios,
+ &x86ms->isa_bios,
1);
- memory_region_set_readonly(isa_bios, !isapc_ram_fw);
+ memory_region_set_readonly(&x86ms->isa_bios, !isapc_ram_fw);
/* map all the bios at the top of memory */
memory_region_add_subregion(rom_memory,
diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h
index cb07618d19..a07de79167 100644
--- a/include/hw/i386/x86.h
+++ b/include/hw/i386/x86.h
@@ -18,6 +18,7 @@
#define HW_I386_X86_H
#include "exec/hwaddr.h"
+#include "exec/memory.h"
#include "hw/boards.h"
#include "hw/intc/ioapic.h"
@@ -52,6 +53,12 @@ struct X86MachineState {
GMappedFile *initrd_mapped_file;
HotplugHandler *acpi_dev;
+ /*
+ * Map the upper 128 KiB of the BIOS just underneath the 1 MiB address
+ * boundary.
+ */
+ MemoryRegion isa_bios;
+
/* RAM information (sizes, addresses, configuration): */
ram_addr_t below_4g_mem_size, above_4g_mem_size;
--
2.39.3

View File

@ -0,0 +1,105 @@
From e1f2265b5f6bf5b63bf3808bb540888f3cf8badb Mon Sep 17 00:00:00 2001
From: Bernhard Beschow <shentey@gmail.com>
Date: Wed, 8 May 2024 19:55:05 +0200
Subject: [PATCH 041/100] hw/i386/x86: Don't leak "pc.bios" memory region
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [41/91] a9cd61d8d240134c09c46e244efb89217cadf60c (bonzini/rhel-qemu-kvm)
Fix the leaking in x86_bios_rom_init() by adding a "bios" attribute to
X86MachineState. Note that it is only used in the -bios case.
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Bernhard Beschow <shentey@gmail.com>
Message-ID: <20240508175507.22270-5-shentey@gmail.com>
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
(cherry picked from commit 865d95321ffc8d9941e33000b10140550f094556)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
hw/i386/x86.c | 13 ++++++-------
include/hw/i386/x86.h | 6 ++++++
2 files changed, 12 insertions(+), 7 deletions(-)
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index 457e8a34a5..29167de97d 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -1133,7 +1133,6 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware,
{
const char *bios_name;
char *filename;
- MemoryRegion *bios;
int bios_size, isa_bios_size;
ssize_t ret;
@@ -1149,8 +1148,8 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware,
(bios_size % 65536) != 0) {
goto bios_error;
}
- bios = g_malloc(sizeof(*bios));
- memory_region_init_ram(bios, NULL, "pc.bios", bios_size, &error_fatal);
+ memory_region_init_ram(&x86ms->bios, NULL, "pc.bios", bios_size,
+ &error_fatal);
if (sev_enabled()) {
/*
* The concept of a "reset" simply doesn't exist for
@@ -1159,11 +1158,11 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware,
* the firmware as rom to properly re-initialize on reset.
* Just go for a straight file load instead.
*/
- void *ptr = memory_region_get_ram_ptr(bios);
+ void *ptr = memory_region_get_ram_ptr(&x86ms->bios);
load_image_size(filename, ptr, bios_size);
x86_firmware_configure(ptr, bios_size);
} else {
- memory_region_set_readonly(bios, !isapc_ram_fw);
+ memory_region_set_readonly(&x86ms->bios, !isapc_ram_fw);
ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1);
if (ret != 0) {
goto bios_error;
@@ -1173,7 +1172,7 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware,
/* map the last 128KB of the BIOS in ISA space */
isa_bios_size = MIN(bios_size, 128 * KiB);
- memory_region_init_alias(&x86ms->isa_bios, NULL, "isa-bios", bios,
+ memory_region_init_alias(&x86ms->isa_bios, NULL, "isa-bios", &x86ms->bios,
bios_size - isa_bios_size, isa_bios_size);
memory_region_add_subregion_overlap(rom_memory,
0x100000 - isa_bios_size,
@@ -1184,7 +1183,7 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware,
/* map all the bios at the top of memory */
memory_region_add_subregion(rom_memory,
(uint32_t)(-bios_size),
- bios);
+ &x86ms->bios);
return;
bios_error:
diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h
index a07de79167..55c6809ae0 100644
--- a/include/hw/i386/x86.h
+++ b/include/hw/i386/x86.h
@@ -53,6 +53,12 @@ struct X86MachineState {
GMappedFile *initrd_mapped_file;
HotplugHandler *acpi_dev;
+ /*
+ * Map the whole BIOS just underneath the 4 GiB address boundary. Only used
+ * in the ROM (-bios) case.
+ */
+ MemoryRegion bios;
+
/*
* Map the upper 128 KiB of the BIOS just underneath the 1 MiB address
* boundary.
--
2.39.3

View File

@ -0,0 +1,69 @@
From b9d0c78f04160fbc1eee6cfd94b17f1133a35d83 Mon Sep 17 00:00:00 2001
From: Bernhard Beschow <shentey@gmail.com>
Date: Tue, 30 Apr 2024 17:06:38 +0200
Subject: [PATCH 037/100] hw/i386/x86: Eliminate two if statements in
x86_bios_rom_init()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [37/91] 1ef6a13214e85f6ef773f5c894c720f20330912b (bonzini/rhel-qemu-kvm)
Given that memory_region_set_readonly() is a no-op when the readonlyness is
already as requested it is possible to simplify the pattern
if (condition) {
foo(true);
}
to
foo(condition);
which is shorter and allows to see the invariant of the code more easily.
Signed-off-by: Bernhard Beschow <shentey@gmail.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Message-ID: <20240430150643.111976-2-shentey@gmail.com>
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
(cherry picked from commit 014dbdac8798799d081abc9dff3e4876ca54f49e)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
hw/i386/x86.c | 8 ++------
1 file changed, 2 insertions(+), 6 deletions(-)
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index 3d5b51e92d..2a4f3ee285 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -1163,9 +1163,7 @@ void x86_bios_rom_init(MachineState *ms, const char *default_firmware,
load_image_size(filename, ptr, bios_size);
x86_firmware_configure(ptr, bios_size);
} else {
- if (!isapc_ram_fw) {
- memory_region_set_readonly(bios, true);
- }
+ memory_region_set_readonly(bios, !isapc_ram_fw);
ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1);
if (ret != 0) {
goto bios_error;
@@ -1182,9 +1180,7 @@ void x86_bios_rom_init(MachineState *ms, const char *default_firmware,
0x100000 - isa_bios_size,
isa_bios,
1);
- if (!isapc_ram_fw) {
- memory_region_set_readonly(isa_bios, true);
- }
+ memory_region_set_readonly(isa_bios, !isapc_ram_fw);
/* map all the bios at the top of memory */
memory_region_add_subregion(rom_memory,
--
2.39.3

View File

@ -0,0 +1,98 @@
From 1baf67564d4227d6ba98923217a15814c438c32b Mon Sep 17 00:00:00 2001
From: Bernhard Beschow <shentey@gmail.com>
Date: Wed, 8 May 2024 19:55:06 +0200
Subject: [PATCH 042/100] hw/i386/x86: Extract x86_isa_bios_init() from
x86_bios_rom_init()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [42/91] 1db417a5995480924f7fd0661a306f2d2bfa0a77 (bonzini/rhel-qemu-kvm)
The function is inspired by pc_isa_bios_init() and should eventually replace it.
Using x86_isa_bios_init() rather than pc_isa_bios_init() fixes pflash commands
to work in the isa-bios region.
While at it convert the magic number 0x100000 (== 1MiB) to increase readability.
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Bernhard Beschow <shentey@gmail.com>
Message-ID: <20240508175507.22270-6-shentey@gmail.com>
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
(cherry picked from commit 5c5ffec12c30d2017cbdee6798f54d8fad3f9656)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
hw/i386/x86.c | 25 ++++++++++++++++---------
include/hw/i386/x86.h | 2 ++
2 files changed, 18 insertions(+), 9 deletions(-)
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index 29167de97d..c61f4ebfa6 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -1128,12 +1128,25 @@ void x86_load_linux(X86MachineState *x86ms,
nb_option_roms++;
}
+void x86_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *isa_memory,
+ MemoryRegion *bios, bool read_only)
+{
+ uint64_t bios_size = memory_region_size(bios);
+ uint64_t isa_bios_size = MIN(bios_size, 128 * KiB);
+
+ memory_region_init_alias(isa_bios, NULL, "isa-bios", bios,
+ bios_size - isa_bios_size, isa_bios_size);
+ memory_region_add_subregion_overlap(isa_memory, 1 * MiB - isa_bios_size,
+ isa_bios, 1);
+ memory_region_set_readonly(isa_bios, read_only);
+}
+
void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware,
MemoryRegion *rom_memory, bool isapc_ram_fw)
{
const char *bios_name;
char *filename;
- int bios_size, isa_bios_size;
+ int bios_size;
ssize_t ret;
/* BIOS load */
@@ -1171,14 +1184,8 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware,
g_free(filename);
/* map the last 128KB of the BIOS in ISA space */
- isa_bios_size = MIN(bios_size, 128 * KiB);
- memory_region_init_alias(&x86ms->isa_bios, NULL, "isa-bios", &x86ms->bios,
- bios_size - isa_bios_size, isa_bios_size);
- memory_region_add_subregion_overlap(rom_memory,
- 0x100000 - isa_bios_size,
- &x86ms->isa_bios,
- 1);
- memory_region_set_readonly(&x86ms->isa_bios, !isapc_ram_fw);
+ x86_isa_bios_init(&x86ms->isa_bios, rom_memory, &x86ms->bios,
+ !isapc_ram_fw);
/* map all the bios at the top of memory */
memory_region_add_subregion(rom_memory,
diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h
index 55c6809ae0..d7b7d3f3ce 100644
--- a/include/hw/i386/x86.h
+++ b/include/hw/i386/x86.h
@@ -129,6 +129,8 @@ void x86_cpu_unplug_request_cb(HotplugHandler *hotplug_dev,
void x86_cpu_unplug_cb(HotplugHandler *hotplug_dev,
DeviceState *dev, Error **errp);
+void x86_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *isa_memory,
+ MemoryRegion *bios, bool read_only);
void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware,
MemoryRegion *rom_memory, bool isapc_ram_fw);
--
2.39.3

View File

@ -0,0 +1,68 @@
From f572a40924c7138072e387111d0f092185972477 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 9 May 2024 19:00:39 +0200
Subject: [PATCH 044/100] i386: correctly select code in hw/i386 that depends
on other components
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [44/91] 1327a5eb2b91edacf56cc4e93255cad456abbbeb (bonzini/rhel-qemu-kvm)
fw_cfg.c and vapic.c are currently included unconditionally but
depend on other components. vapic.c depends on the local APIC,
while fw_cfg.c includes a piece of AML builder code that depends
on CONFIG_ACPI.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Message-ID: <20240509170044.190795-9-pbonzini@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 7974e51342775c87f6e759a8c525db1045ddfa24)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
hw/i386/fw_cfg.c | 2 ++
hw/i386/meson.build | 2 +-
2 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c
index 283c3f4c16..7f97d40616 100644
--- a/hw/i386/fw_cfg.c
+++ b/hw/i386/fw_cfg.c
@@ -204,6 +204,7 @@ void fw_cfg_build_feature_control(MachineState *ms, FWCfgState *fw_cfg)
fw_cfg_add_file(fw_cfg, "etc/msr_feature_control", val, sizeof(*val));
}
+#ifdef CONFIG_ACPI
void fw_cfg_add_acpi_dsdt(Aml *scope, FWCfgState *fw_cfg)
{
/*
@@ -230,3 +231,4 @@ void fw_cfg_add_acpi_dsdt(Aml *scope, FWCfgState *fw_cfg)
aml_append(dev, aml_name_decl("_CRS", crs));
aml_append(scope, dev);
}
+#endif
diff --git a/hw/i386/meson.build b/hw/i386/meson.build
index d8b70ef3e9..d9da676038 100644
--- a/hw/i386/meson.build
+++ b/hw/i386/meson.build
@@ -1,12 +1,12 @@
i386_ss = ss.source_set()
i386_ss.add(files(
'fw_cfg.c',
- 'vapic.c',
'e820_memory_layout.c',
'multiboot.c',
'x86.c',
))
+i386_ss.add(when: 'CONFIG_APIC', if_true: files('vapic.c'))
i386_ss.add(when: 'CONFIG_X86_IOMMU', if_true: files('x86-iommu.c'),
if_false: files('x86-iommu-stub.c'))
i386_ss.add(when: 'CONFIG_AMD_IOMMU', if_true: files('amd_iommu.c'),
--
2.39.3

View File

@ -0,0 +1,40 @@
From 127f3c60668e1bd08ec00856a317cb841adf0440 Mon Sep 17 00:00:00 2001
From: Michael Roth <michael.roth@amd.com>
Date: Thu, 30 May 2024 06:16:23 -0500
Subject: [PATCH 063/100] i386/cpu: Set SEV-SNP CPUID bit when SNP enabled
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [63/91] 0f834a6897c5cdc0e29a5b1862e621f8ce309657 (bonzini/rhel-qemu-kvm)
SNP guests will rely on this bit to determine certain feature support.
Signed-off-by: Michael Roth <michael.roth@amd.com>
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
Message-ID: <20240530111643.1091816-12-pankaj.gupta@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 7831221941cccbde922412c1550ed8b4bce7c361)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/cpu.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 489c853b42..13737cd703 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -6822,6 +6822,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
if (sev_enabled()) {
*eax = 0x2;
*eax |= sev_es_enabled() ? 0x8 : 0;
+ *eax |= sev_snp_enabled() ? 0x10 : 0;
*ebx = sev_get_cbit_position() & 0x3f; /* EBX[5:0] */
*ebx |= (sev_get_reduced_phys_bits() & 0x3f) << 6; /* EBX[11:6] */
}
--
2.39.3

View File

@ -0,0 +1,145 @@
From 14aa42bbacde75b2ce9a59d1267f73d613026461 Mon Sep 17 00:00:00 2001
From: Michael Roth <michael.roth@amd.com>
Date: Thu, 30 May 2024 06:16:42 -0500
Subject: [PATCH 076/100] i386/kvm: Add KVM_EXIT_HYPERCALL handling for
KVM_HC_MAP_GPA_RANGE
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [76/91] 3e1201c330dc826af1ec4650974d47053270eb16 (bonzini/rhel-qemu-kvm)
KVM_HC_MAP_GPA_RANGE will be used to send requests to userspace for
private/shared memory attribute updates requested by the guest.
Implement handling for that use-case along with some basic
infrastructure for enabling specific hypercall events.
Signed-off-by: Michael Roth <michael.roth@amd.com>
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
Message-ID: <20240530111643.1091816-31-pankaj.gupta@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 47e76d03b155e43beca550251a6eb7ea926c059f)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/kvm/kvm.c | 55 ++++++++++++++++++++++++++++++++++++
target/i386/kvm/kvm_i386.h | 1 +
target/i386/kvm/trace-events | 1 +
3 files changed, 57 insertions(+)
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 75e75d9772..2935e3931a 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -21,6 +21,7 @@
#include <sys/syscall.h>
#include <linux/kvm.h>
+#include <linux/kvm_para.h>
#include "standard-headers/asm-x86/kvm_para.h"
#include "hw/xen/interface/arch-x86/cpuid.h"
@@ -208,6 +209,13 @@ int kvm_get_vm_type(MachineState *ms)
return kvm_type;
}
+bool kvm_enable_hypercall(uint64_t enable_mask)
+{
+ KVMState *s = KVM_STATE(current_accel());
+
+ return !kvm_vm_enable_cap(s, KVM_CAP_EXIT_HYPERCALL, 0, enable_mask);
+}
+
bool kvm_has_smm(void)
{
return kvm_vm_check_extension(kvm_state, KVM_CAP_X86_SMM);
@@ -5325,6 +5333,50 @@ static bool host_supports_vmx(void)
return ecx & CPUID_EXT_VMX;
}
+/*
+ * Currently the handling here only supports use of KVM_HC_MAP_GPA_RANGE
+ * to service guest-initiated memory attribute update requests so that
+ * KVM_SET_MEMORY_ATTRIBUTES can update whether or not a page should be
+ * backed by the private memory pool provided by guest_memfd, and as such
+ * is only applicable to guest_memfd-backed guests (e.g. SNP/TDX).
+ *
+ * Other other use-cases for KVM_HC_MAP_GPA_RANGE, such as for SEV live
+ * migration, are not implemented here currently.
+ *
+ * For the guest_memfd use-case, these exits will generally be synthesized
+ * by KVM based on platform-specific hypercalls, like GHCB requests in the
+ * case of SEV-SNP, and not issued directly within the guest though the
+ * KVM_HC_MAP_GPA_RANGE hypercall. So in this case, KVM_HC_MAP_GPA_RANGE is
+ * not actually advertised to guests via the KVM CPUID feature bit, as
+ * opposed to SEV live migration where it would be. Since it is unlikely the
+ * SEV live migration use-case would be useful for guest-memfd backed guests,
+ * because private/shared page tracking is already provided through other
+ * means, these 2 use-cases should be treated as being mutually-exclusive.
+ */
+static int kvm_handle_hc_map_gpa_range(struct kvm_run *run)
+{
+ uint64_t gpa, size, attributes;
+
+ if (!machine_require_guest_memfd(current_machine))
+ return -EINVAL;
+
+ gpa = run->hypercall.args[0];
+ size = run->hypercall.args[1] * TARGET_PAGE_SIZE;
+ attributes = run->hypercall.args[2];
+
+ trace_kvm_hc_map_gpa_range(gpa, size, attributes, run->hypercall.flags);
+
+ return kvm_convert_memory(gpa, size, attributes & KVM_MAP_GPA_RANGE_ENCRYPTED);
+}
+
+static int kvm_handle_hypercall(struct kvm_run *run)
+{
+ if (run->hypercall.nr == KVM_HC_MAP_GPA_RANGE)
+ return kvm_handle_hc_map_gpa_range(run);
+
+ return -EINVAL;
+}
+
#define VMX_INVALID_GUEST_STATE 0x80000021
int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
@@ -5420,6 +5472,9 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
ret = kvm_xen_handle_exit(cpu, &run->xen);
break;
#endif
+ case KVM_EXIT_HYPERCALL:
+ ret = kvm_handle_hypercall(run);
+ break;
default:
fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
ret = -1;
diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h
index 6b44844d95..34fc60774b 100644
--- a/target/i386/kvm/kvm_i386.h
+++ b/target/i386/kvm/kvm_i386.h
@@ -33,6 +33,7 @@
bool kvm_has_smm(void);
bool kvm_enable_x2apic(void);
bool kvm_hv_vpindex_settable(void);
+bool kvm_enable_hypercall(uint64_t enable_mask);
bool kvm_enable_sgx_provisioning(KVMState *s);
bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp);
diff --git a/target/i386/kvm/trace-events b/target/i386/kvm/trace-events
index b365a8e8e2..74a6234ff7 100644
--- a/target/i386/kvm/trace-events
+++ b/target/i386/kvm/trace-events
@@ -5,6 +5,7 @@ kvm_x86_fixup_msi_error(uint32_t gsi) "VT-d failed to remap interrupt for GSI %"
kvm_x86_add_msi_route(int virq) "Adding route entry for virq %d"
kvm_x86_remove_msi_route(int virq) "Removing route entry for virq %d"
kvm_x86_update_msi_routes(int num) "Updated %d MSI routes"
+kvm_hc_map_gpa_range(uint64_t gpa, uint64_t size, uint64_t attributes, uint64_t flags) "gpa 0x%" PRIx64 " size 0x%" PRIx64 " attributes 0x%" PRIx64 " flags 0x%" PRIx64
# xen-emu.c
kvm_xen_hypercall(int cpu, uint8_t cpl, uint64_t input, uint64_t a0, uint64_t a1, uint64_t a2, uint64_t ret) "xen_hypercall: cpu %d cpl %d input %" PRIu64 " a0 0x%" PRIx64 " a1 0x%" PRIx64 " a2 0x%" PRIx64" ret 0x%" PRIx64
--
2.39.3

View File

@ -0,0 +1,536 @@
From 5ead79f45e8e90b7a04586c89e70cb9d0b66b730 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson@intel.com>
Date: Thu, 29 Feb 2024 01:36:43 -0500
Subject: [PATCH 004/100] i386/kvm: Move architectural CPUID leaf generation to
separate helper
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [4/91] 06ecdbcf05ad3d658273980b114f02477d0b0475 (bonzini/rhel-qemu-kvm)
Move the architectural (for lack of a better term) CPUID leaf generation
to a separate helper so that the generation code can be reused by TDX,
which needs to generate a canonical VM-scoped configuration.
For now this is just a cleanup, so keep the function static.
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Message-ID: <20240229063726.610065-23-xiaoyao.li@intel.com>
Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit a5acf4f26c208a05d05ef1bde65553ce2ab5e5d0)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/kvm/kvm.c | 417 +++++++++++++++++++++---------------------
1 file changed, 211 insertions(+), 206 deletions(-)
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 739f33db47..5f30b649a0 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -1706,195 +1706,22 @@ static void kvm_init_nested_state(CPUX86State *env)
}
}
-int kvm_arch_init_vcpu(CPUState *cs)
+static uint32_t kvm_x86_build_cpuid(CPUX86State *env,
+ struct kvm_cpuid_entry2 *entries,
+ uint32_t cpuid_i)
{
- struct {
- struct kvm_cpuid2 cpuid;
- struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES];
- } cpuid_data;
- /*
- * The kernel defines these structs with padding fields so there
- * should be no extra padding in our cpuid_data struct.
- */
- QEMU_BUILD_BUG_ON(sizeof(cpuid_data) !=
- sizeof(struct kvm_cpuid2) +
- sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES);
-
- X86CPU *cpu = X86_CPU(cs);
- CPUX86State *env = &cpu->env;
- uint32_t limit, i, j, cpuid_i;
+ uint32_t limit, i, j;
uint32_t unused;
struct kvm_cpuid_entry2 *c;
- uint32_t signature[3];
- int kvm_base = KVM_CPUID_SIGNATURE;
- int max_nested_state_len;
- int r;
- Error *local_err = NULL;
-
- memset(&cpuid_data, 0, sizeof(cpuid_data));
-
- cpuid_i = 0;
-
- has_xsave2 = kvm_check_extension(cs->kvm_state, KVM_CAP_XSAVE2);
-
- r = kvm_arch_set_tsc_khz(cs);
- if (r < 0) {
- return r;
- }
-
- /* vcpu's TSC frequency is either specified by user, or following
- * the value used by KVM if the former is not present. In the
- * latter case, we query it from KVM and record in env->tsc_khz,
- * so that vcpu's TSC frequency can be migrated later via this field.
- */
- if (!env->tsc_khz) {
- r = kvm_check_extension(cs->kvm_state, KVM_CAP_GET_TSC_KHZ) ?
- kvm_vcpu_ioctl(cs, KVM_GET_TSC_KHZ) :
- -ENOTSUP;
- if (r > 0) {
- env->tsc_khz = r;
- }
- }
-
- env->apic_bus_freq = KVM_APIC_BUS_FREQUENCY;
-
- /*
- * kvm_hyperv_expand_features() is called here for the second time in case
- * KVM_CAP_SYS_HYPERV_CPUID is not supported. While we can't possibly handle
- * 'query-cpu-model-expansion' in this case as we don't have a KVM vCPU to
- * check which Hyper-V enlightenments are supported and which are not, we
- * can still proceed and check/expand Hyper-V enlightenments here so legacy
- * behavior is preserved.
- */
- if (!kvm_hyperv_expand_features(cpu, &local_err)) {
- error_report_err(local_err);
- return -ENOSYS;
- }
-
- if (hyperv_enabled(cpu)) {
- r = hyperv_init_vcpu(cpu);
- if (r) {
- return r;
- }
-
- cpuid_i = hyperv_fill_cpuids(cs, cpuid_data.entries);
- kvm_base = KVM_CPUID_SIGNATURE_NEXT;
- has_msr_hv_hypercall = true;
- }
-
- if (cs->kvm_state->xen_version) {
-#ifdef CONFIG_XEN_EMU
- struct kvm_cpuid_entry2 *xen_max_leaf;
-
- memcpy(signature, "XenVMMXenVMM", 12);
-
- xen_max_leaf = c = &cpuid_data.entries[cpuid_i++];
- c->function = kvm_base + XEN_CPUID_SIGNATURE;
- c->eax = kvm_base + XEN_CPUID_TIME;
- c->ebx = signature[0];
- c->ecx = signature[1];
- c->edx = signature[2];
-
- c = &cpuid_data.entries[cpuid_i++];
- c->function = kvm_base + XEN_CPUID_VENDOR;
- c->eax = cs->kvm_state->xen_version;
- c->ebx = 0;
- c->ecx = 0;
- c->edx = 0;
-
- c = &cpuid_data.entries[cpuid_i++];
- c->function = kvm_base + XEN_CPUID_HVM_MSR;
- /* Number of hypercall-transfer pages */
- c->eax = 1;
- /* Hypercall MSR base address */
- if (hyperv_enabled(cpu)) {
- c->ebx = XEN_HYPERCALL_MSR_HYPERV;
- kvm_xen_init(cs->kvm_state, c->ebx);
- } else {
- c->ebx = XEN_HYPERCALL_MSR;
- }
- c->ecx = 0;
- c->edx = 0;
-
- c = &cpuid_data.entries[cpuid_i++];
- c->function = kvm_base + XEN_CPUID_TIME;
- c->eax = ((!!tsc_is_stable_and_known(env) << 1) |
- (!!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_RDTSCP) << 2));
- /* default=0 (emulate if necessary) */
- c->ebx = 0;
- /* guest tsc frequency */
- c->ecx = env->user_tsc_khz;
- /* guest tsc incarnation (migration count) */
- c->edx = 0;
-
- c = &cpuid_data.entries[cpuid_i++];
- c->function = kvm_base + XEN_CPUID_HVM;
- xen_max_leaf->eax = kvm_base + XEN_CPUID_HVM;
- if (cs->kvm_state->xen_version >= XEN_VERSION(4, 5)) {
- c->function = kvm_base + XEN_CPUID_HVM;
-
- if (cpu->xen_vapic) {
- c->eax |= XEN_HVM_CPUID_APIC_ACCESS_VIRT;
- c->eax |= XEN_HVM_CPUID_X2APIC_VIRT;
- }
-
- c->eax |= XEN_HVM_CPUID_IOMMU_MAPPINGS;
-
- if (cs->kvm_state->xen_version >= XEN_VERSION(4, 6)) {
- c->eax |= XEN_HVM_CPUID_VCPU_ID_PRESENT;
- c->ebx = cs->cpu_index;
- }
-
- if (cs->kvm_state->xen_version >= XEN_VERSION(4, 17)) {
- c->eax |= XEN_HVM_CPUID_UPCALL_VECTOR;
- }
- }
-
- r = kvm_xen_init_vcpu(cs);
- if (r) {
- return r;
- }
-
- kvm_base += 0x100;
-#else /* CONFIG_XEN_EMU */
- /* This should never happen as kvm_arch_init() would have died first. */
- fprintf(stderr, "Cannot enable Xen CPUID without Xen support\n");
- abort();
-#endif
- } else if (cpu->expose_kvm) {
- memcpy(signature, "KVMKVMKVM\0\0\0", 12);
- c = &cpuid_data.entries[cpuid_i++];
- c->function = KVM_CPUID_SIGNATURE | kvm_base;
- c->eax = KVM_CPUID_FEATURES | kvm_base;
- c->ebx = signature[0];
- c->ecx = signature[1];
- c->edx = signature[2];
-
- c = &cpuid_data.entries[cpuid_i++];
- c->function = KVM_CPUID_FEATURES | kvm_base;
- c->eax = env->features[FEAT_KVM];
- c->edx = env->features[FEAT_KVM_HINTS];
- }
cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused);
- if (cpu->kvm_pv_enforce_cpuid) {
- r = kvm_vcpu_enable_cap(cs, KVM_CAP_ENFORCE_PV_FEATURE_CPUID, 0, 1);
- if (r < 0) {
- fprintf(stderr,
- "failed to enable KVM_CAP_ENFORCE_PV_FEATURE_CPUID: %s",
- strerror(-r));
- abort();
- }
- }
-
for (i = 0; i <= limit; i++) {
+ j = 0;
if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
- fprintf(stderr, "unsupported level value: 0x%x\n", limit);
- abort();
+ goto full;
}
- c = &cpuid_data.entries[cpuid_i++];
-
+ c = &entries[cpuid_i++];
switch (i) {
case 2: {
/* Keep reading function 2 till all the input is received */
@@ -1908,11 +1735,9 @@ int kvm_arch_init_vcpu(CPUState *cs)
for (j = 1; j < times; ++j) {
if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
- fprintf(stderr, "cpuid_data is full, no space for "
- "cpuid(eax:2):eax & 0xf = 0x%x\n", times);
- abort();
+ goto full;
}
- c = &cpuid_data.entries[cpuid_i++];
+ c = &entries[cpuid_i++];
c->function = i;
c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC;
cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
@@ -1951,11 +1776,9 @@ int kvm_arch_init_vcpu(CPUState *cs)
continue;
}
if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
- fprintf(stderr, "cpuid_data is full, no space for "
- "cpuid(eax:0x%x,ecx:0x%x)\n", i, j);
- abort();
+ goto full;
}
- c = &cpuid_data.entries[cpuid_i++];
+ c = &entries[cpuid_i++];
}
break;
case 0x12:
@@ -1970,11 +1793,9 @@ int kvm_arch_init_vcpu(CPUState *cs)
}
if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
- fprintf(stderr, "cpuid_data is full, no space for "
- "cpuid(eax:0x12,ecx:0x%x)\n", j);
- abort();
+ goto full;
}
- c = &cpuid_data.entries[cpuid_i++];
+ c = &entries[cpuid_i++];
}
break;
case 0x7:
@@ -1991,11 +1812,9 @@ int kvm_arch_init_vcpu(CPUState *cs)
for (j = 1; j <= times; ++j) {
if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
- fprintf(stderr, "cpuid_data is full, no space for "
- "cpuid(eax:0x%x,ecx:0x%x)\n", i, j);
- abort();
+ goto full;
}
- c = &cpuid_data.entries[cpuid_i++];
+ c = &entries[cpuid_i++];
c->function = i;
c->index = j;
c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
@@ -2048,11 +1867,11 @@ int kvm_arch_init_vcpu(CPUState *cs)
cpu_x86_cpuid(env, 0x80000000, 0, &limit, &unused, &unused, &unused);
for (i = 0x80000000; i <= limit; i++) {
+ j = 0;
if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
- fprintf(stderr, "unsupported xlevel value: 0x%x\n", limit);
- abort();
+ goto full;
}
- c = &cpuid_data.entries[cpuid_i++];
+ c = &entries[cpuid_i++];
switch (i) {
case 0x8000001d:
@@ -2067,11 +1886,9 @@ int kvm_arch_init_vcpu(CPUState *cs)
break;
}
if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
- fprintf(stderr, "cpuid_data is full, no space for "
- "cpuid(eax:0x%x,ecx:0x%x)\n", i, j);
- abort();
+ goto full;
}
- c = &cpuid_data.entries[cpuid_i++];
+ c = &entries[cpuid_i++];
}
break;
default:
@@ -2094,11 +1911,11 @@ int kvm_arch_init_vcpu(CPUState *cs)
cpu_x86_cpuid(env, 0xC0000000, 0, &limit, &unused, &unused, &unused);
for (i = 0xC0000000; i <= limit; i++) {
+ j = 0;
if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
- fprintf(stderr, "unsupported xlevel2 value: 0x%x\n", limit);
- abort();
+ goto full;
}
- c = &cpuid_data.entries[cpuid_i++];
+ c = &entries[cpuid_i++];
c->function = i;
c->flags = 0;
@@ -2106,6 +1923,194 @@ int kvm_arch_init_vcpu(CPUState *cs)
}
}
+ return cpuid_i;
+
+full:
+ fprintf(stderr, "cpuid_data is full, no space for "
+ "cpuid(eax:0x%x,ecx:0x%x)\n", i, j);
+ abort();
+}
+
+int kvm_arch_init_vcpu(CPUState *cs)
+{
+ struct {
+ struct kvm_cpuid2 cpuid;
+ struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES];
+ } cpuid_data;
+ /*
+ * The kernel defines these structs with padding fields so there
+ * should be no extra padding in our cpuid_data struct.
+ */
+ QEMU_BUILD_BUG_ON(sizeof(cpuid_data) !=
+ sizeof(struct kvm_cpuid2) +
+ sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES);
+
+ X86CPU *cpu = X86_CPU(cs);
+ CPUX86State *env = &cpu->env;
+ uint32_t cpuid_i;
+ struct kvm_cpuid_entry2 *c;
+ uint32_t signature[3];
+ int kvm_base = KVM_CPUID_SIGNATURE;
+ int max_nested_state_len;
+ int r;
+ Error *local_err = NULL;
+
+ memset(&cpuid_data, 0, sizeof(cpuid_data));
+
+ cpuid_i = 0;
+
+ has_xsave2 = kvm_check_extension(cs->kvm_state, KVM_CAP_XSAVE2);
+
+ r = kvm_arch_set_tsc_khz(cs);
+ if (r < 0) {
+ return r;
+ }
+
+ /* vcpu's TSC frequency is either specified by user, or following
+ * the value used by KVM if the former is not present. In the
+ * latter case, we query it from KVM and record in env->tsc_khz,
+ * so that vcpu's TSC frequency can be migrated later via this field.
+ */
+ if (!env->tsc_khz) {
+ r = kvm_check_extension(cs->kvm_state, KVM_CAP_GET_TSC_KHZ) ?
+ kvm_vcpu_ioctl(cs, KVM_GET_TSC_KHZ) :
+ -ENOTSUP;
+ if (r > 0) {
+ env->tsc_khz = r;
+ }
+ }
+
+ env->apic_bus_freq = KVM_APIC_BUS_FREQUENCY;
+
+ /*
+ * kvm_hyperv_expand_features() is called here for the second time in case
+ * KVM_CAP_SYS_HYPERV_CPUID is not supported. While we can't possibly handle
+ * 'query-cpu-model-expansion' in this case as we don't have a KVM vCPU to
+ * check which Hyper-V enlightenments are supported and which are not, we
+ * can still proceed and check/expand Hyper-V enlightenments here so legacy
+ * behavior is preserved.
+ */
+ if (!kvm_hyperv_expand_features(cpu, &local_err)) {
+ error_report_err(local_err);
+ return -ENOSYS;
+ }
+
+ if (hyperv_enabled(cpu)) {
+ r = hyperv_init_vcpu(cpu);
+ if (r) {
+ return r;
+ }
+
+ cpuid_i = hyperv_fill_cpuids(cs, cpuid_data.entries);
+ kvm_base = KVM_CPUID_SIGNATURE_NEXT;
+ has_msr_hv_hypercall = true;
+ }
+
+ if (cs->kvm_state->xen_version) {
+#ifdef CONFIG_XEN_EMU
+ struct kvm_cpuid_entry2 *xen_max_leaf;
+
+ memcpy(signature, "XenVMMXenVMM", 12);
+
+ xen_max_leaf = c = &cpuid_data.entries[cpuid_i++];
+ c->function = kvm_base + XEN_CPUID_SIGNATURE;
+ c->eax = kvm_base + XEN_CPUID_TIME;
+ c->ebx = signature[0];
+ c->ecx = signature[1];
+ c->edx = signature[2];
+
+ c = &cpuid_data.entries[cpuid_i++];
+ c->function = kvm_base + XEN_CPUID_VENDOR;
+ c->eax = cs->kvm_state->xen_version;
+ c->ebx = 0;
+ c->ecx = 0;
+ c->edx = 0;
+
+ c = &cpuid_data.entries[cpuid_i++];
+ c->function = kvm_base + XEN_CPUID_HVM_MSR;
+ /* Number of hypercall-transfer pages */
+ c->eax = 1;
+ /* Hypercall MSR base address */
+ if (hyperv_enabled(cpu)) {
+ c->ebx = XEN_HYPERCALL_MSR_HYPERV;
+ kvm_xen_init(cs->kvm_state, c->ebx);
+ } else {
+ c->ebx = XEN_HYPERCALL_MSR;
+ }
+ c->ecx = 0;
+ c->edx = 0;
+
+ c = &cpuid_data.entries[cpuid_i++];
+ c->function = kvm_base + XEN_CPUID_TIME;
+ c->eax = ((!!tsc_is_stable_and_known(env) << 1) |
+ (!!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_RDTSCP) << 2));
+ /* default=0 (emulate if necessary) */
+ c->ebx = 0;
+ /* guest tsc frequency */
+ c->ecx = env->user_tsc_khz;
+ /* guest tsc incarnation (migration count) */
+ c->edx = 0;
+
+ c = &cpuid_data.entries[cpuid_i++];
+ c->function = kvm_base + XEN_CPUID_HVM;
+ xen_max_leaf->eax = kvm_base + XEN_CPUID_HVM;
+ if (cs->kvm_state->xen_version >= XEN_VERSION(4, 5)) {
+ c->function = kvm_base + XEN_CPUID_HVM;
+
+ if (cpu->xen_vapic) {
+ c->eax |= XEN_HVM_CPUID_APIC_ACCESS_VIRT;
+ c->eax |= XEN_HVM_CPUID_X2APIC_VIRT;
+ }
+
+ c->eax |= XEN_HVM_CPUID_IOMMU_MAPPINGS;
+
+ if (cs->kvm_state->xen_version >= XEN_VERSION(4, 6)) {
+ c->eax |= XEN_HVM_CPUID_VCPU_ID_PRESENT;
+ c->ebx = cs->cpu_index;
+ }
+
+ if (cs->kvm_state->xen_version >= XEN_VERSION(4, 17)) {
+ c->eax |= XEN_HVM_CPUID_UPCALL_VECTOR;
+ }
+ }
+
+ r = kvm_xen_init_vcpu(cs);
+ if (r) {
+ return r;
+ }
+
+ kvm_base += 0x100;
+#else /* CONFIG_XEN_EMU */
+ /* This should never happen as kvm_arch_init() would have died first. */
+ fprintf(stderr, "Cannot enable Xen CPUID without Xen support\n");
+ abort();
+#endif
+ } else if (cpu->expose_kvm) {
+ memcpy(signature, "KVMKVMKVM\0\0\0", 12);
+ c = &cpuid_data.entries[cpuid_i++];
+ c->function = KVM_CPUID_SIGNATURE | kvm_base;
+ c->eax = KVM_CPUID_FEATURES | kvm_base;
+ c->ebx = signature[0];
+ c->ecx = signature[1];
+ c->edx = signature[2];
+
+ c = &cpuid_data.entries[cpuid_i++];
+ c->function = KVM_CPUID_FEATURES | kvm_base;
+ c->eax = env->features[FEAT_KVM];
+ c->edx = env->features[FEAT_KVM_HINTS];
+ }
+
+ if (cpu->kvm_pv_enforce_cpuid) {
+ r = kvm_vcpu_enable_cap(cs, KVM_CAP_ENFORCE_PV_FEATURE_CPUID, 0, 1);
+ if (r < 0) {
+ fprintf(stderr,
+ "failed to enable KVM_CAP_ENFORCE_PV_FEATURE_CPUID: %s",
+ strerror(-r));
+ abort();
+ }
+ }
+
+ cpuid_i = kvm_x86_build_cpuid(env, cpuid_data.entries, cpuid_i);
cpuid_data.cpuid.nent = cpuid_i;
if (((env->cpuid_version >> 8)&0xF) >= 6
--
2.39.3

View File

@ -0,0 +1,91 @@
From 03e275023b482ac79b4f92ca4ceef6de3caa634f Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 9 May 2024 19:00:40 +0200
Subject: [PATCH 045/100] i386: pc: remove unnecessary MachineClass overrides
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [45/91] c03d5b57014d0d02f6ce0cdfb19a34996d100dea (bonzini/rhel-qemu-kvm)
There is no need to override these fields of MachineClass because they are
already set to the right value in the superclass.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Message-ID: <20240509170044.190795-10-pbonzini@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit b348fdcdac9f9fc70be9ae56c54e41765e9aae24)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
hw/i386/pc.c | 3 ---
hw/i386/x86.c | 6 +++---
include/hw/i386/x86.h | 4 ----
3 files changed, 3 insertions(+), 10 deletions(-)
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 660a59c63b..0aca0cc79e 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1979,9 +1979,6 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
mc->async_pf_vmexit_disable = false;
mc->get_hotplug_handler = pc_get_hotplug_handler;
mc->hotplug_allowed = pc_hotplug_allowed;
- mc->cpu_index_to_instance_props = x86_cpu_index_to_props;
- mc->get_default_cpu_node_id = x86_get_default_cpu_node_id;
- mc->possible_cpu_arch_ids = x86_possible_cpu_arch_ids;
mc->auto_enable_numa_with_memhp = true;
mc->auto_enable_numa_with_memdev = true;
mc->has_hotpluggable_cpus = true;
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index c61f4ebfa6..fcef652c1e 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -443,7 +443,7 @@ void x86_cpu_pre_plug(HotplugHandler *hotplug_dev,
numa_cpu_pre_plug(cpu_slot, dev, errp);
}
-CpuInstanceProperties
+static CpuInstanceProperties
x86_cpu_index_to_props(MachineState *ms, unsigned cpu_index)
{
MachineClass *mc = MACHINE_GET_CLASS(ms);
@@ -453,7 +453,7 @@ x86_cpu_index_to_props(MachineState *ms, unsigned cpu_index)
return possible_cpus->cpus[cpu_index].props;
}
-int64_t x86_get_default_cpu_node_id(const MachineState *ms, int idx)
+static int64_t x86_get_default_cpu_node_id(const MachineState *ms, int idx)
{
X86CPUTopoIDs topo_ids;
X86MachineState *x86ms = X86_MACHINE(ms);
@@ -467,7 +467,7 @@ int64_t x86_get_default_cpu_node_id(const MachineState *ms, int idx)
return topo_ids.pkg_id % ms->numa_state->num_nodes;
}
-const CPUArchIdList *x86_possible_cpu_arch_ids(MachineState *ms)
+static const CPUArchIdList *x86_possible_cpu_arch_ids(MachineState *ms)
{
X86MachineState *x86ms = X86_MACHINE(ms);
unsigned int max_cpus = ms->smp.max_cpus;
diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h
index d7b7d3f3ce..c2062db13f 100644
--- a/include/hw/i386/x86.h
+++ b/include/hw/i386/x86.h
@@ -114,10 +114,6 @@ uint32_t x86_cpu_apic_id_from_index(X86MachineState *pcms,
void x86_cpu_new(X86MachineState *pcms, int64_t apic_id, Error **errp);
void x86_cpus_init(X86MachineState *pcms, int default_cpu_version);
-CpuInstanceProperties x86_cpu_index_to_props(MachineState *ms,
- unsigned cpu_index);
-int64_t x86_get_default_cpu_node_id(const MachineState *ms, int idx);
-const CPUArchIdList *x86_possible_cpu_arch_ids(MachineState *ms);
CPUArchId *x86_find_cpu_slot(MachineState *ms, uint32_t id, int *idx);
void x86_rtc_set_cpus_count(ISADevice *rtc, uint16_t cpus_count);
void x86_cpu_pre_plug(HotplugHandler *hotplug_dev,
--
2.39.3

View File

@ -0,0 +1,116 @@
From 652793962000d6906e219ceae36348a476b78c28 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 31 May 2024 12:44:44 +0200
Subject: [PATCH 065/100] i386/sev: Add a class method to determine KVM VM type
for SNP guests
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [65/91] c6cbeac0a6f691138df212b80efaa9b1143fdaa8 (bonzini/rhel-qemu-kvm)
SEV guests can use either KVM_X86_DEFAULT_VM, KVM_X86_SEV_VM,
or KVM_X86_SEV_ES_VM depending on the configuration and what
the host kernel supports. SNP guests on the other hand can only
ever use KVM_X86_SNP_VM, so split determination of VM type out
into a separate class method that can be set accordingly for
sev-guest vs. sev-snp-guest objects and add handling for SNP.
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
Message-ID: <20240530111643.1091816-14-pankaj.gupta@amd.com>
[Remove unnecessary function pointer declaration. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit a808132f6d8e855bd83a400570ec91d2e00bebe3)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/kvm/kvm.c | 1 +
target/i386/sev.c | 15 ++++++++++++---
2 files changed, 13 insertions(+), 3 deletions(-)
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 408568d053..75e75d9772 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -166,6 +166,7 @@ static const char *vm_type_name[] = {
[KVM_X86_DEFAULT_VM] = "default",
[KVM_X86_SEV_VM] = "SEV",
[KVM_X86_SEV_ES_VM] = "SEV-ES",
+ [KVM_X86_SNP_VM] = "SEV-SNP",
};
bool kvm_is_vm_type_supported(int type)
diff --git a/target/i386/sev.c b/target/i386/sev.c
index c3daaf1ad5..072cc4f853 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -885,6 +885,11 @@ out:
return sev_common->kvm_type;
}
+static int sev_snp_kvm_type(X86ConfidentialGuest *cg)
+{
+ return KVM_X86_SNP_VM;
+}
+
static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
{
char *devname;
@@ -894,6 +899,8 @@ static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
struct sev_user_data_status status = {};
SevCommonState *sev_common = SEV_COMMON(cgs);
SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(cgs);
+ X86ConfidentialGuestClass *x86_klass =
+ X86_CONFIDENTIAL_GUEST_GET_CLASS(cgs);
sev_common->state = SEV_STATE_UNINIT;
@@ -964,7 +971,7 @@ static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
}
trace_kvm_sev_init();
- if (sev_kvm_type(X86_CONFIDENTIAL_GUEST(sev_common)) == KVM_X86_DEFAULT_VM) {
+ if (x86_klass->kvm_type(X86_CONFIDENTIAL_GUEST(sev_common)) == KVM_X86_DEFAULT_VM) {
cmd = sev_es_enabled() ? KVM_SEV_ES_INIT : KVM_SEV_INIT;
ret = sev_ioctl(sev_common->sev_fd, cmd, NULL, &fw_error);
@@ -1441,10 +1448,8 @@ static void
sev_common_class_init(ObjectClass *oc, void *data)
{
ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc);
- X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc);
klass->kvm_init = sev_common_kvm_init;
- x86_klass->kvm_type = sev_kvm_type;
object_class_property_add_str(oc, "sev-device",
sev_common_get_sev_device,
@@ -1529,10 +1534,12 @@ static void
sev_guest_class_init(ObjectClass *oc, void *data)
{
SevCommonStateClass *klass = SEV_COMMON_CLASS(oc);
+ X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc);
klass->launch_start = sev_launch_start;
klass->launch_finish = sev_launch_finish;
klass->kvm_init = sev_kvm_init;
+ x86_klass->kvm_type = sev_kvm_type;
object_class_property_add_str(oc, "dh-cert-file",
sev_guest_get_dh_cert_file,
@@ -1770,8 +1777,10 @@ static void
sev_snp_guest_class_init(ObjectClass *oc, void *data)
{
SevCommonStateClass *klass = SEV_COMMON_CLASS(oc);
+ X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc);
klass->kvm_init = sev_snp_kvm_init;
+ x86_klass->kvm_type = sev_snp_kvm_type;
object_class_property_add(oc, "policy", "uint64",
sev_snp_guest_get_policy,
--
2.39.3

View File

@ -0,0 +1,84 @@
From 82a714b79851b5c2d1389d2fa7a01548c486a854 Mon Sep 17 00:00:00 2001
From: Michael Roth <michael.roth@amd.com>
Date: Thu, 30 May 2024 06:16:20 -0500
Subject: [PATCH 060/100] i386/sev: Add a sev_snp_enabled() helper
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [60/91] c35ead095028ccfb1e1be0fe010ca4f7688530a0 (bonzini/rhel-qemu-kvm)
Add a simple helper to check if the current guest type is SNP. Also have
SNP-enabled imply that SEV-ES is enabled as well, and fix up any places
where the sev_es_enabled() check is expecting a pure/non-SNP guest.
Signed-off-by: Michael Roth <michael.roth@amd.com>
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
Message-ID: <20240530111643.1091816-9-pankaj.gupta@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 99190f805dca9475fe244fbd8041961842657dc2)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/sev.c | 13 ++++++++++++-
target/i386/sev.h | 2 ++
2 files changed, 14 insertions(+), 1 deletion(-)
diff --git a/target/i386/sev.c b/target/i386/sev.c
index a81b3228d4..4edfedc139 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -325,12 +325,21 @@ sev_enabled(void)
return !!object_dynamic_cast(OBJECT(cgs), TYPE_SEV_COMMON);
}
+bool
+sev_snp_enabled(void)
+{
+ ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs;
+
+ return !!object_dynamic_cast(OBJECT(cgs), TYPE_SEV_SNP_GUEST);
+}
+
bool
sev_es_enabled(void)
{
ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs;
- return sev_enabled() && (SEV_GUEST(cgs)->policy & SEV_POLICY_ES);
+ return sev_snp_enabled() ||
+ (sev_enabled() && SEV_GUEST(cgs)->policy & SEV_POLICY_ES);
}
uint32_t
@@ -946,7 +955,9 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
"support", __func__);
goto err;
}
+ }
+ if (sev_es_enabled() && !sev_snp_enabled()) {
if (!(status.flags & SEV_STATUS_FLAGS_CONFIG_ES)) {
error_setg(errp, "%s: guest policy requires SEV-ES, but "
"host SEV-ES support unavailable",
diff --git a/target/i386/sev.h b/target/i386/sev.h
index bedc667eeb..94295ee74f 100644
--- a/target/i386/sev.h
+++ b/target/i386/sev.h
@@ -45,9 +45,11 @@ typedef struct SevKernelLoaderContext {
#ifdef CONFIG_SEV
bool sev_enabled(void);
bool sev_es_enabled(void);
+bool sev_snp_enabled(void);
#else
#define sev_enabled() 0
#define sev_es_enabled() 0
+#define sev_snp_enabled() 0
#endif
uint32_t sev_get_cbit_position(void);
--
2.39.3

View File

@ -0,0 +1,187 @@
From 0e435819540b0d39da2c828aacc0f35ecaadbdf6 Mon Sep 17 00:00:00 2001
From: Brijesh Singh <brijesh.singh@amd.com>
Date: Thu, 30 May 2024 06:16:28 -0500
Subject: [PATCH 068/100] i386/sev: Add handling to encrypt/finalize guest
launch data
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [68/91] fe77931d279aa8df061823da88a320fb5f72ffea (bonzini/rhel-qemu-kvm)
Process any queued up launch data and encrypt/measure it into the SNP
guest instance prior to initial guest launch.
This also updates the KVM_SEV_SNP_LAUNCH_UPDATE call to handle partial
update responses.
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Co-developed-by: Michael Roth <michael.roth@amd.com>
Signed-off-by: Michael Roth <michael.roth@amd.com>
Co-developed-by: Pankaj Gupta <pankaj.gupta@amd.com>
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
Message-ID: <20240530111643.1091816-17-pankaj.gupta@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 9f3a6999f9730a694d7db448a99f9c9cb6515992)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/sev.c | 112 ++++++++++++++++++++++++++++++++++++++-
target/i386/trace-events | 2 +
2 files changed, 113 insertions(+), 1 deletion(-)
diff --git a/target/i386/sev.c b/target/i386/sev.c
index e89b87d2f5..ef2e592ca7 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -756,6 +756,76 @@ out:
return ret;
}
+static const char *
+snp_page_type_to_str(int type)
+{
+ switch (type) {
+ case KVM_SEV_SNP_PAGE_TYPE_NORMAL: return "Normal";
+ case KVM_SEV_SNP_PAGE_TYPE_ZERO: return "Zero";
+ case KVM_SEV_SNP_PAGE_TYPE_UNMEASURED: return "Unmeasured";
+ case KVM_SEV_SNP_PAGE_TYPE_SECRETS: return "Secrets";
+ case KVM_SEV_SNP_PAGE_TYPE_CPUID: return "Cpuid";
+ default: return "unknown";
+ }
+}
+
+static int
+sev_snp_launch_update(SevSnpGuestState *sev_snp_guest,
+ SevLaunchUpdateData *data)
+{
+ int ret, fw_error;
+ struct kvm_sev_snp_launch_update update = {0};
+
+ if (!data->hva || !data->len) {
+ error_report("SNP_LAUNCH_UPDATE called with invalid address"
+ "/ length: %p / %lx",
+ data->hva, data->len);
+ return 1;
+ }
+
+ update.uaddr = (__u64)(unsigned long)data->hva;
+ update.gfn_start = data->gpa >> TARGET_PAGE_BITS;
+ update.len = data->len;
+ update.type = data->type;
+
+ /*
+ * KVM_SEV_SNP_LAUNCH_UPDATE requires that GPA ranges have the private
+ * memory attribute set in advance.
+ */
+ ret = kvm_set_memory_attributes_private(data->gpa, data->len);
+ if (ret) {
+ error_report("SEV-SNP: failed to configure initial"
+ "private guest memory");
+ goto out;
+ }
+
+ while (update.len || ret == -EAGAIN) {
+ trace_kvm_sev_snp_launch_update(update.uaddr, update.gfn_start <<
+ TARGET_PAGE_BITS, update.len,
+ snp_page_type_to_str(update.type));
+
+ ret = sev_ioctl(SEV_COMMON(sev_snp_guest)->sev_fd,
+ KVM_SEV_SNP_LAUNCH_UPDATE,
+ &update, &fw_error);
+ if (ret && ret != -EAGAIN) {
+ error_report("SNP_LAUNCH_UPDATE ret=%d fw_error=%d '%s'",
+ ret, fw_error, fw_error_to_str(fw_error));
+ break;
+ }
+ }
+
+out:
+ if (!ret && update.gfn_start << TARGET_PAGE_BITS != data->gpa + data->len) {
+ error_report("SEV-SNP: expected update of GPA range %lx-%lx,"
+ "got GPA range %lx-%llx",
+ data->gpa, data->gpa + data->len, data->gpa,
+ update.gfn_start << TARGET_PAGE_BITS);
+ ret = -EIO;
+ }
+
+ return ret;
+}
+
static int
sev_launch_update_data(SevGuestState *sev_guest, uint8_t *addr, uint64_t len)
{
@@ -901,6 +971,46 @@ sev_launch_finish(SevCommonState *sev_common)
migrate_add_blocker(&sev_mig_blocker, &error_fatal);
}
+static void
+sev_snp_launch_finish(SevCommonState *sev_common)
+{
+ int ret, error;
+ Error *local_err = NULL;
+ SevLaunchUpdateData *data;
+ SevSnpGuestState *sev_snp = SEV_SNP_GUEST(sev_common);
+ struct kvm_sev_snp_launch_finish *finish = &sev_snp->kvm_finish_conf;
+
+ QTAILQ_FOREACH(data, &launch_update, next) {
+ ret = sev_snp_launch_update(sev_snp, data);
+ if (ret) {
+ exit(1);
+ }
+ }
+
+ trace_kvm_sev_snp_launch_finish(sev_snp->id_block, sev_snp->id_auth,
+ sev_snp->host_data);
+ ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_SNP_LAUNCH_FINISH,
+ finish, &error);
+ if (ret) {
+ error_report("SNP_LAUNCH_FINISH ret=%d fw_error=%d '%s'",
+ ret, error, fw_error_to_str(error));
+ exit(1);
+ }
+
+ sev_set_guest_state(sev_common, SEV_STATE_RUNNING);
+
+ /* add migration blocker */
+ error_setg(&sev_mig_blocker,
+ "SEV-SNP: Migration is not implemented");
+ ret = migrate_add_blocker(&sev_mig_blocker, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ error_free(sev_mig_blocker);
+ exit(1);
+ }
+}
+
+
static void
sev_vm_state_change(void *opaque, bool running, RunState state)
{
@@ -1832,10 +1942,10 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data)
X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc);
klass->launch_start = sev_snp_launch_start;
+ klass->launch_finish = sev_snp_launch_finish;
klass->kvm_init = sev_snp_kvm_init;
x86_klass->kvm_type = sev_snp_kvm_type;
-
object_class_property_add(oc, "policy", "uint64",
sev_snp_guest_get_policy,
sev_snp_guest_set_policy, NULL, NULL);
diff --git a/target/i386/trace-events b/target/i386/trace-events
index cb26d8a925..06b44ead2e 100644
--- a/target/i386/trace-events
+++ b/target/i386/trace-events
@@ -12,3 +12,5 @@ kvm_sev_launch_finish(void) ""
kvm_sev_launch_secret(uint64_t hpa, uint64_t hva, uint64_t secret, int len) "hpa 0x%" PRIx64 " hva 0x%" PRIx64 " data 0x%" PRIx64 " len %d"
kvm_sev_attestation_report(const char *mnonce, const char *data) "mnonce %s data %s"
kvm_sev_snp_launch_start(uint64_t policy, char *gosvw) "policy 0x%" PRIx64 " gosvw %s"
+kvm_sev_snp_launch_update(uint64_t src, uint64_t gpa, uint64_t len, const char *type) "src 0x%" PRIx64 " gpa 0x%" PRIx64 " len 0x%" PRIx64 " (%s page)"
+kvm_sev_snp_launch_finish(char *id_block, char *id_auth, char *host_data) "id_block %s id_auth %s host_data %s"
--
2.39.3

View File

@ -0,0 +1,127 @@
From 2872c423fa44dcbf50b581a5c3feac064a0473a0 Mon Sep 17 00:00:00 2001
From: Michael Roth <michael.roth@amd.com>
Date: Tue, 9 Apr 2024 18:07:41 -0500
Subject: [PATCH 024/100] i386/sev: Add 'legacy-vm-type' parameter for SEV
guest objects
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [24/91] ce35d1b09fe8aa8772ff149543f7760455c1e6b5 (bonzini/rhel-qemu-kvm)
QEMU will currently automatically make use of the KVM_SEV_INIT2 API for
initializing SEV and SEV-ES guests verses the older
KVM_SEV_INIT/KVM_SEV_ES_INIT interfaces.
However, the older interfaces will silently avoid sync'ing FPU/XSAVE
state to the VMSA prior to encryption, thus relying on behavior and
measurements that assume the related fields to be allow zero.
With KVM_SEV_INIT2, this state is now synced into the VMSA, resulting in
measurements changes and, theoretically, behaviorial changes, though the
latter are unlikely to be seen in practice.
To allow a smooth transition to the newer interface, while still
providing a mechanism to maintain backward compatibility with VMs
created using the older interfaces, provide a new command-line
parameter:
-object sev-guest,legacy-vm-type=true,...
and have it default to false.
Signed-off-by: Michael Roth <michael.roth@amd.com>
Message-ID: <20240409230743.962513-2-michael.roth@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 023267334da375226720e62963df9545aa8fc2fd)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
qapi/qom.json | 11 ++++++++++-
target/i386/sev.c | 18 +++++++++++++++++-
2 files changed, 27 insertions(+), 2 deletions(-)
diff --git a/qapi/qom.json b/qapi/qom.json
index 85e6b4f84a..38dde6d785 100644
--- a/qapi/qom.json
+++ b/qapi/qom.json
@@ -898,6 +898,14 @@
# designated guest firmware page for measured boot with -kernel
# (default: false) (since 6.2)
#
+# @legacy-vm-type: Use legacy KVM_SEV_INIT KVM interface for creating the VM.
+# The newer KVM_SEV_INIT2 interface syncs additional vCPU
+# state when initializing the VMSA structures, which will
+# result in a different guest measurement. Set this to
+# maintain compatibility with older QEMU or kernel versions
+# that rely on legacy KVM_SEV_INIT behavior.
+# (default: false) (since 9.1)
+#
# Since: 2.12
##
{ 'struct': 'SevGuestProperties',
@@ -908,7 +916,8 @@
'*handle': 'uint32',
'*cbitpos': 'uint32',
'reduced-phys-bits': 'uint32',
- '*kernel-hashes': 'bool' } }
+ '*kernel-hashes': 'bool',
+ '*legacy-vm-type': 'bool' } }
##
# @ThreadContextProperties:
diff --git a/target/i386/sev.c b/target/i386/sev.c
index 9dab4060b8..f4ee317cb0 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -67,6 +67,7 @@ struct SevGuestState {
uint32_t cbitpos;
uint32_t reduced_phys_bits;
bool kernel_hashes;
+ bool legacy_vm_type;
/* runtime state */
uint32_t handle;
@@ -356,6 +357,16 @@ static void sev_guest_set_kernel_hashes(Object *obj, bool value, Error **errp)
sev->kernel_hashes = value;
}
+static bool sev_guest_get_legacy_vm_type(Object *obj, Error **errp)
+{
+ return SEV_GUEST(obj)->legacy_vm_type;
+}
+
+static void sev_guest_set_legacy_vm_type(Object *obj, bool value, Error **errp)
+{
+ SEV_GUEST(obj)->legacy_vm_type = value;
+}
+
bool
sev_enabled(void)
{
@@ -863,7 +874,7 @@ static int sev_kvm_type(X86ConfidentialGuest *cg)
}
kvm_type = (sev->policy & SEV_POLICY_ES) ? KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM;
- if (kvm_is_vm_type_supported(kvm_type)) {
+ if (kvm_is_vm_type_supported(kvm_type) && !sev->legacy_vm_type) {
sev->kvm_type = kvm_type;
} else {
sev->kvm_type = KVM_X86_DEFAULT_VM;
@@ -1381,6 +1392,11 @@ sev_guest_class_init(ObjectClass *oc, void *data)
sev_guest_set_kernel_hashes);
object_class_property_set_description(oc, "kernel-hashes",
"add kernel hashes to guest firmware for measured Linux boot");
+ object_class_property_add_bool(oc, "legacy-vm-type",
+ sev_guest_get_legacy_vm_type,
+ sev_guest_set_legacy_vm_type);
+ object_class_property_set_description(oc, "legacy-vm-type",
+ "use legacy VM type to maintain measurement compatibility with older QEMU or kernel versions.");
}
static void
--
2.39.3

View File

@ -0,0 +1,203 @@
From a236548a903aa8350fff9601d481b2f529c8d4a7 Mon Sep 17 00:00:00 2001
From: Pankaj Gupta <pankaj.gupta@amd.com>
Date: Thu, 30 May 2024 06:16:21 -0500
Subject: [PATCH 061/100] i386/sev: Add sev_kvm_init() override for SEV class
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [61/91] b24fcbc8712e7394e029312229da023c63803969 (bonzini/rhel-qemu-kvm)
Some aspects of the init routine SEV are specific to SEV and not
applicable for SNP guests, so move the SEV-specific bits into
separate class method and retain only the common functionality.
Co-developed-by: Michael Roth <michael.roth@amd.com>
Signed-off-by: Michael Roth <michael.roth@amd.com>
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
Message-ID: <20240530111643.1091816-10-pankaj.gupta@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 990da8d243a8c59dafcbed78b56a0e4ffb1605d9)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/sev.c | 72 +++++++++++++++++++++++++++++++++--------------
1 file changed, 51 insertions(+), 21 deletions(-)
diff --git a/target/i386/sev.c b/target/i386/sev.c
index 4edfedc139..5519de1c6b 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -73,6 +73,7 @@ struct SevCommonStateClass {
/* public */
int (*launch_start)(SevCommonState *sev_common);
void (*launch_finish)(SevCommonState *sev_common);
+ int (*kvm_init)(ConfidentialGuestSupport *cgs, Error **errp);
};
/**
@@ -882,7 +883,7 @@ out:
return sev_common->kvm_type;
}
-static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
+static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
{
SevCommonState *sev_common = SEV_COMMON(cgs);
char *devname;
@@ -892,12 +893,6 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
struct sev_user_data_status status = {};
SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(cgs);
- ret = ram_block_discard_disable(true);
- if (ret) {
- error_report("%s: cannot disable RAM discard", __func__);
- return -1;
- }
-
sev_common->state = SEV_STATE_UNINIT;
host_cpuid(0x8000001F, 0, NULL, &ebx, NULL, NULL);
@@ -911,7 +906,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
if (host_cbitpos != sev_common->cbitpos) {
error_setg(errp, "%s: cbitpos check failed, host '%d' requested '%d'",
__func__, host_cbitpos, sev_common->cbitpos);
- goto err;
+ return -1;
}
/*
@@ -924,7 +919,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
error_setg(errp, "%s: reduced_phys_bits check failed,"
" it should be in the range of 1 to 63, requested '%d'",
__func__, sev_common->reduced_phys_bits);
- goto err;
+ return -1;
}
devname = object_property_get_str(OBJECT(sev_common), "sev-device", NULL);
@@ -933,7 +928,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
error_setg(errp, "%s: Failed to open %s '%s'", __func__,
devname, strerror(errno));
g_free(devname);
- goto err;
+ return -1;
}
g_free(devname);
@@ -943,7 +938,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
error_setg(errp, "%s: failed to get platform status ret=%d "
"fw_error='%d: %s'", __func__, ret, fw_error,
fw_error_to_str(fw_error));
- goto err;
+ return -1;
}
sev_common->build_id = status.build;
sev_common->api_major = status.api_major;
@@ -953,7 +948,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
if (!kvm_kernel_irqchip_allowed()) {
error_setg(errp, "%s: SEV-ES guests require in-kernel irqchip"
"support", __func__);
- goto err;
+ return -1;
}
}
@@ -962,7 +957,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
error_setg(errp, "%s: guest policy requires SEV-ES, but "
"host SEV-ES support unavailable",
__func__);
- goto err;
+ return -1;
}
}
@@ -980,25 +975,59 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
if (ret) {
error_setg(errp, "%s: failed to initialize ret=%d fw_error=%d '%s'",
__func__, ret, fw_error, fw_error_to_str(fw_error));
- goto err;
+ return -1;
}
ret = klass->launch_start(sev_common);
if (ret) {
error_setg(errp, "%s: failed to create encryption context", __func__);
- goto err;
+ return -1;
+ }
+
+ if (klass->kvm_init && klass->kvm_init(cgs, errp)) {
+ return -1;
}
- ram_block_notifier_add(&sev_ram_notifier);
- qemu_add_machine_init_done_notifier(&sev_machine_done_notify);
qemu_add_vm_change_state_handler(sev_vm_state_change, sev_common);
cgs->ready = true;
return 0;
-err:
- ram_block_discard_disable(false);
- return -1;
+}
+
+static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
+{
+ int ret;
+
+ /*
+ * SEV/SEV-ES rely on pinned memory to back guest RAM so discarding
+ * isn't actually possible. With SNP, only guest_memfd pages are used
+ * for private guest memory, so discarding of shared memory is still
+ * possible..
+ */
+ ret = ram_block_discard_disable(true);
+ if (ret) {
+ error_setg(errp, "%s: cannot disable RAM discard", __func__);
+ return -1;
+ }
+
+ /*
+ * SEV uses these notifiers to register/pin pages prior to guest use,
+ * but SNP relies on guest_memfd for private pages, which has its
+ * own internal mechanisms for registering/pinning private memory.
+ */
+ ram_block_notifier_add(&sev_ram_notifier);
+
+ /*
+ * The machine done notify event is used for SEV guests to get the
+ * measurement of the encrypted images. When SEV-SNP is enabled, the
+ * measurement is part of the guest attestation process where it can
+ * be collected without any reliance on the VMM. So skip registering
+ * the notifier for SNP in favor of using guest attestation instead.
+ */
+ qemu_add_machine_init_done_notifier(&sev_machine_done_notify);
+
+ return 0;
}
int
@@ -1397,7 +1426,7 @@ sev_common_class_init(ObjectClass *oc, void *data)
ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc);
X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc);
- klass->kvm_init = sev_kvm_init;
+ klass->kvm_init = sev_common_kvm_init;
x86_klass->kvm_type = sev_kvm_type;
object_class_property_add_str(oc, "sev-device",
@@ -1486,6 +1515,7 @@ sev_guest_class_init(ObjectClass *oc, void *data)
klass->launch_start = sev_launch_start;
klass->launch_finish = sev_launch_finish;
+ klass->kvm_init = sev_kvm_init;
object_class_property_add_str(oc, "dh-cert-file",
sev_guest_get_dh_cert_file,
--
2.39.3

View File

@ -0,0 +1,94 @@
From 35ceebdeccbf5dceb374c6f89a12e9981def570b Mon Sep 17 00:00:00 2001
From: Pankaj Gupta <pankaj.gupta@amd.com>
Date: Thu, 30 May 2024 06:16:22 -0500
Subject: [PATCH 062/100] i386/sev: Add snp_kvm_init() override for SNP class
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [62/91] 8fa537961c9262b99a4ffb99e1c25f080d76d1de (bonzini/rhel-qemu-kvm)
SNP does not support SMM and requires guest_memfd for
private guest memory, so add SNP specific kvm_init()
functionality in snp_kvm_init() class method.
Signed-off-by: Michael Roth <michael.roth@amd.com>
Co-developed-by: Pankaj Gupta <pankaj.gupta@amd.com>
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
Message-ID: <20240530111643.1091816-11-pankaj.gupta@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 125b95a6d465a03ff30816eff0b1889aec01f0c3)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/sev.c | 24 +++++++++++++++++++++++-
1 file changed, 23 insertions(+), 1 deletion(-)
diff --git a/target/i386/sev.c b/target/i386/sev.c
index 5519de1c6b..6525b3c1a0 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -885,12 +885,12 @@ out:
static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
{
- SevCommonState *sev_common = SEV_COMMON(cgs);
char *devname;
int ret, fw_error, cmd;
uint32_t ebx;
uint32_t host_cbitpos;
struct sev_user_data_status status = {};
+ SevCommonState *sev_common = SEV_COMMON(cgs);
SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(cgs);
sev_common->state = SEV_STATE_UNINIT;
@@ -1030,6 +1030,21 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
return 0;
}
+static int sev_snp_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
+{
+ MachineState *ms = MACHINE(qdev_get_machine());
+ X86MachineState *x86ms = X86_MACHINE(ms);
+
+ if (x86ms->smm == ON_OFF_AUTO_AUTO) {
+ x86ms->smm = ON_OFF_AUTO_OFF;
+ } else if (x86ms->smm == ON_OFF_AUTO_ON) {
+ error_setg(errp, "SEV-SNP does not support SMM.");
+ return -1;
+ }
+
+ return 0;
+}
+
int
sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp)
{
@@ -1752,6 +1767,10 @@ sev_snp_guest_set_host_data(Object *obj, const char *value, Error **errp)
static void
sev_snp_guest_class_init(ObjectClass *oc, void *data)
{
+ SevCommonStateClass *klass = SEV_COMMON_CLASS(oc);
+
+ klass->kvm_init = sev_snp_kvm_init;
+
object_class_property_add(oc, "policy", "uint64",
sev_snp_guest_get_policy,
sev_snp_guest_set_policy, NULL, NULL);
@@ -1778,8 +1797,11 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data)
static void
sev_snp_guest_instance_init(Object *obj)
{
+ ConfidentialGuestSupport *cgs = CONFIDENTIAL_GUEST_SUPPORT(obj);
SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj);
+ cgs->require_guest_memfd = true;
+
/* default init/start/finish params for kvm */
sev_snp_guest->kvm_start_conf.policy = DEFAULT_SEV_SNP_POLICY;
}
--
2.39.3

View File

@ -0,0 +1,262 @@
From 4013364679757161d6b9754bfc33ae38be0a1b7f Mon Sep 17 00:00:00 2001
From: Michael Roth <michael.roth@amd.com>
Date: Thu, 30 May 2024 06:16:32 -0500
Subject: [PATCH 072/100] i386/sev: Add support for SNP CPUID validation
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [72/91] 080e2942552dc7de8966e69d0d0d3b8951392030 (bonzini/rhel-qemu-kvm)
SEV-SNP firmware allows a special guest page to be populated with a
table of guest CPUID values so that they can be validated through
firmware before being loaded into encrypted guest memory where they can
be used in place of hypervisor-provided values[1].
As part of SEV-SNP guest initialization, use this interface to validate
the CPUID entries reported by KVM_GET_CPUID2 prior to initial guest
start and populate the CPUID page reserved by OVMF with the resulting
encrypted data.
[1] SEV SNP Firmware ABI Specification, Rev. 0.8, 8.13.2.6
Signed-off-by: Michael Roth <michael.roth@amd.com>
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
Message-ID: <20240530111643.1091816-21-pankaj.gupta@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 70943ad8e4dfbe5f77006b880290219be9d03553)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/sev.c | 164 +++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 162 insertions(+), 2 deletions(-)
diff --git a/target/i386/sev.c b/target/i386/sev.c
index c57534fca2..06401f0526 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -200,6 +200,36 @@ static const char *const sev_fw_errlist[] = {
#define SEV_FW_MAX_ERROR ARRAY_SIZE(sev_fw_errlist)
+/* <linux/kvm.h> doesn't expose this, so re-use the max from kvm.c */
+#define KVM_MAX_CPUID_ENTRIES 100
+
+typedef struct KvmCpuidInfo {
+ struct kvm_cpuid2 cpuid;
+ struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES];
+} KvmCpuidInfo;
+
+#define SNP_CPUID_FUNCTION_MAXCOUNT 64
+#define SNP_CPUID_FUNCTION_UNKNOWN 0xFFFFFFFF
+
+typedef struct {
+ uint32_t eax_in;
+ uint32_t ecx_in;
+ uint64_t xcr0_in;
+ uint64_t xss_in;
+ uint32_t eax;
+ uint32_t ebx;
+ uint32_t ecx;
+ uint32_t edx;
+ uint64_t reserved;
+} __attribute__((packed)) SnpCpuidFunc;
+
+typedef struct {
+ uint32_t count;
+ uint32_t reserved1;
+ uint64_t reserved2;
+ SnpCpuidFunc entries[SNP_CPUID_FUNCTION_MAXCOUNT];
+} __attribute__((packed)) SnpCpuidInfo;
+
static int
sev_ioctl(int fd, int cmd, void *data, int *error)
{
@@ -788,6 +818,35 @@ out:
return ret;
}
+static void
+sev_snp_cpuid_report_mismatches(SnpCpuidInfo *old,
+ SnpCpuidInfo *new)
+{
+ size_t i;
+
+ if (old->count != new->count) {
+ error_report("SEV-SNP: CPUID validation failed due to count mismatch,"
+ "provided: %d, expected: %d", old->count, new->count);
+ return;
+ }
+
+ for (i = 0; i < old->count; i++) {
+ SnpCpuidFunc *old_func, *new_func;
+
+ old_func = &old->entries[i];
+ new_func = &new->entries[i];
+
+ if (memcmp(old_func, new_func, sizeof(SnpCpuidFunc))) {
+ error_report("SEV-SNP: CPUID validation failed for function 0x%x, index: 0x%x"
+ "provided: eax:0x%08x, ebx: 0x%08x, ecx: 0x%08x, edx: 0x%08x"
+ "expected: eax:0x%08x, ebx: 0x%08x, ecx: 0x%08x, edx: 0x%08x",
+ old_func->eax_in, old_func->ecx_in,
+ old_func->eax, old_func->ebx, old_func->ecx, old_func->edx,
+ new_func->eax, new_func->ebx, new_func->ecx, new_func->edx);
+ }
+ }
+}
+
static const char *
snp_page_type_to_str(int type)
{
@@ -806,6 +865,7 @@ sev_snp_launch_update(SevSnpGuestState *sev_snp_guest,
SevLaunchUpdateData *data)
{
int ret, fw_error;
+ SnpCpuidInfo snp_cpuid_info;
struct kvm_sev_snp_launch_update update = {0};
if (!data->hva || !data->len) {
@@ -815,6 +875,11 @@ sev_snp_launch_update(SevSnpGuestState *sev_snp_guest,
return 1;
}
+ if (data->type == KVM_SEV_SNP_PAGE_TYPE_CPUID) {
+ /* Save a copy for comparison in case the LAUNCH_UPDATE fails */
+ memcpy(&snp_cpuid_info, data->hva, sizeof(snp_cpuid_info));
+ }
+
update.uaddr = (__u64)(unsigned long)data->hva;
update.gfn_start = data->gpa >> TARGET_PAGE_BITS;
update.len = data->len;
@@ -842,6 +907,11 @@ sev_snp_launch_update(SevSnpGuestState *sev_snp_guest,
if (ret && ret != -EAGAIN) {
error_report("SNP_LAUNCH_UPDATE ret=%d fw_error=%d '%s'",
ret, fw_error, fw_error_to_str(fw_error));
+
+ if (data->type == KVM_SEV_SNP_PAGE_TYPE_CPUID) {
+ sev_snp_cpuid_report_mismatches(&snp_cpuid_info, data->hva);
+ error_report("SEV-SNP: failed update CPUID page");
+ }
break;
}
}
@@ -1004,7 +1074,8 @@ sev_launch_finish(SevCommonState *sev_common)
}
static int
-snp_launch_update_data(uint64_t gpa, void *hva, uint32_t len, int type)
+snp_launch_update_data(uint64_t gpa, void *hva,
+ uint32_t len, int type)
{
SevLaunchUpdateData *data;
@@ -1019,6 +1090,90 @@ snp_launch_update_data(uint64_t gpa, void *hva, uint32_t len, int type)
return 0;
}
+static int
+sev_snp_cpuid_info_fill(SnpCpuidInfo *snp_cpuid_info,
+ const KvmCpuidInfo *kvm_cpuid_info)
+{
+ size_t i;
+
+ if (kvm_cpuid_info->cpuid.nent > SNP_CPUID_FUNCTION_MAXCOUNT) {
+ error_report("SEV-SNP: CPUID entry count (%d) exceeds max (%d)",
+ kvm_cpuid_info->cpuid.nent, SNP_CPUID_FUNCTION_MAXCOUNT);
+ return -1;
+ }
+
+ memset(snp_cpuid_info, 0, sizeof(*snp_cpuid_info));
+
+ for (i = 0; i < kvm_cpuid_info->cpuid.nent; i++) {
+ const struct kvm_cpuid_entry2 *kvm_cpuid_entry;
+ SnpCpuidFunc *snp_cpuid_entry;
+
+ kvm_cpuid_entry = &kvm_cpuid_info->entries[i];
+ snp_cpuid_entry = &snp_cpuid_info->entries[i];
+
+ snp_cpuid_entry->eax_in = kvm_cpuid_entry->function;
+ if (kvm_cpuid_entry->flags == KVM_CPUID_FLAG_SIGNIFCANT_INDEX) {
+ snp_cpuid_entry->ecx_in = kvm_cpuid_entry->index;
+ }
+ snp_cpuid_entry->eax = kvm_cpuid_entry->eax;
+ snp_cpuid_entry->ebx = kvm_cpuid_entry->ebx;
+ snp_cpuid_entry->ecx = kvm_cpuid_entry->ecx;
+ snp_cpuid_entry->edx = kvm_cpuid_entry->edx;
+
+ /*
+ * Guest kernels will calculate EBX themselves using the 0xD
+ * subfunctions corresponding to the individual XSAVE areas, so only
+ * encode the base XSAVE size in the initial leaves, corresponding
+ * to the initial XCR0=1 state.
+ */
+ if (snp_cpuid_entry->eax_in == 0xD &&
+ (snp_cpuid_entry->ecx_in == 0x0 || snp_cpuid_entry->ecx_in == 0x1)) {
+ snp_cpuid_entry->ebx = 0x240;
+ snp_cpuid_entry->xcr0_in = 1;
+ snp_cpuid_entry->xss_in = 0;
+ }
+ }
+
+ snp_cpuid_info->count = i;
+
+ return 0;
+}
+
+static int
+snp_launch_update_cpuid(uint32_t cpuid_addr, void *hva, uint32_t cpuid_len)
+{
+ KvmCpuidInfo kvm_cpuid_info = {0};
+ SnpCpuidInfo snp_cpuid_info;
+ CPUState *cs = first_cpu;
+ int ret;
+ uint32_t i = 0;
+
+ assert(sizeof(snp_cpuid_info) <= cpuid_len);
+
+ /* get the cpuid list from KVM */
+ do {
+ kvm_cpuid_info.cpuid.nent = ++i;
+ ret = kvm_vcpu_ioctl(cs, KVM_GET_CPUID2, &kvm_cpuid_info);
+ } while (ret == -E2BIG);
+
+ if (ret) {
+ error_report("SEV-SNP: unable to query CPUID values for CPU: '%s'",
+ strerror(-ret));
+ return 1;
+ }
+
+ ret = sev_snp_cpuid_info_fill(&snp_cpuid_info, &kvm_cpuid_info);
+ if (ret) {
+ error_report("SEV-SNP: failed to generate CPUID table information");
+ return 1;
+ }
+
+ memcpy(hva, &snp_cpuid_info, sizeof(snp_cpuid_info));
+
+ return snp_launch_update_data(cpuid_addr, hva, cpuid_len,
+ KVM_SEV_SNP_PAGE_TYPE_CPUID);
+}
+
static int
snp_metadata_desc_to_page_type(int desc_type)
{
@@ -1053,7 +1208,12 @@ snp_populate_metadata_pages(SevSnpGuestState *sev_snp,
exit(1);
}
- ret = snp_launch_update_data(desc->base, hva, desc->len, type);
+ if (type == KVM_SEV_SNP_PAGE_TYPE_CPUID) {
+ ret = snp_launch_update_cpuid(desc->base, hva, desc->len);
+ } else {
+ ret = snp_launch_update_data(desc->base, hva, desc->len, type);
+ }
+
if (ret) {
error_report("%s: Failed to add metadata page gpa 0x%x+%x type %d",
__func__, desc->base, desc->len, desc->type);
--
2.39.3

View File

@ -0,0 +1,127 @@
From b2cfd4d89026e76ba86ea7adea323f2c3a588790 Mon Sep 17 00:00:00 2001
From: Brijesh Singh <brijesh.singh@amd.com>
Date: Thu, 30 May 2024 06:16:31 -0500
Subject: [PATCH 071/100] i386/sev: Add support for populating OVMF metadata
pages
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [71/91] b563442c0e2f6ea01937425d300b56d9e641fd57 (bonzini/rhel-qemu-kvm)
OVMF reserves various pages so they can be pre-initialized/validated
prior to launching the guest. Add support for populating these pages
with the expected content.
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Michael Roth <michael.roth@amd.com>
Co-developed-by: Pankaj Gupta <pankaj.gupta@amd.com>
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
Message-ID: <20240530111643.1091816-20-pankaj.gupta@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 3d8c2a7f4806ff39423312e503737fd76c34dcae)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/sev.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 74 insertions(+)
diff --git a/target/i386/sev.c b/target/i386/sev.c
index 17281bb2c7..c57534fca2 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -1003,15 +1003,89 @@ sev_launch_finish(SevCommonState *sev_common)
migrate_add_blocker(&sev_mig_blocker, &error_fatal);
}
+static int
+snp_launch_update_data(uint64_t gpa, void *hva, uint32_t len, int type)
+{
+ SevLaunchUpdateData *data;
+
+ data = g_new0(SevLaunchUpdateData, 1);
+ data->gpa = gpa;
+ data->hva = hva;
+ data->len = len;
+ data->type = type;
+
+ QTAILQ_INSERT_TAIL(&launch_update, data, next);
+
+ return 0;
+}
+
+static int
+snp_metadata_desc_to_page_type(int desc_type)
+{
+ switch (desc_type) {
+ /* Add the umeasured prevalidated pages as a zero page */
+ case SEV_DESC_TYPE_SNP_SEC_MEM: return KVM_SEV_SNP_PAGE_TYPE_ZERO;
+ case SEV_DESC_TYPE_SNP_SECRETS: return KVM_SEV_SNP_PAGE_TYPE_SECRETS;
+ case SEV_DESC_TYPE_CPUID: return KVM_SEV_SNP_PAGE_TYPE_CPUID;
+ default:
+ return KVM_SEV_SNP_PAGE_TYPE_ZERO;
+ }
+}
+
+static void
+snp_populate_metadata_pages(SevSnpGuestState *sev_snp,
+ OvmfSevMetadata *metadata)
+{
+ OvmfSevMetadataDesc *desc;
+ int type, ret, i;
+ void *hva;
+ MemoryRegion *mr = NULL;
+
+ for (i = 0; i < metadata->num_desc; i++) {
+ desc = &metadata->descs[i];
+
+ type = snp_metadata_desc_to_page_type(desc->type);
+
+ hva = gpa2hva(&mr, desc->base, desc->len, NULL);
+ if (!hva) {
+ error_report("%s: Failed to get HVA for GPA 0x%x sz 0x%x",
+ __func__, desc->base, desc->len);
+ exit(1);
+ }
+
+ ret = snp_launch_update_data(desc->base, hva, desc->len, type);
+ if (ret) {
+ error_report("%s: Failed to add metadata page gpa 0x%x+%x type %d",
+ __func__, desc->base, desc->len, desc->type);
+ exit(1);
+ }
+ }
+}
+
static void
sev_snp_launch_finish(SevCommonState *sev_common)
{
int ret, error;
Error *local_err = NULL;
+ OvmfSevMetadata *metadata;
SevLaunchUpdateData *data;
SevSnpGuestState *sev_snp = SEV_SNP_GUEST(sev_common);
struct kvm_sev_snp_launch_finish *finish = &sev_snp->kvm_finish_conf;
+ /*
+ * To boot the SNP guest, the hypervisor is required to populate the CPUID
+ * and Secrets page before finalizing the launch flow. The location of
+ * the secrets and CPUID page is available through the OVMF metadata GUID.
+ */
+ metadata = pc_system_get_ovmf_sev_metadata_ptr();
+ if (metadata == NULL) {
+ error_report("%s: Failed to locate SEV metadata header", __func__);
+ exit(1);
+ }
+
+ /* Populate all the metadata pages */
+ snp_populate_metadata_pages(sev_snp, metadata);
+
QTAILQ_FOREACH(data, &launch_update, next) {
ret = sev_snp_launch_update(sev_snp, data);
if (ret) {
--
2.39.3

View File

@ -0,0 +1,122 @@
From 0f7432f2b968298b64fd243df793b176f67a538f Mon Sep 17 00:00:00 2001
From: Brijesh Singh <brijesh.singh@amd.com>
Date: Thu, 30 May 2024 06:16:27 -0500
Subject: [PATCH 067/100] i386/sev: Add the SNP launch start context
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [67/91] 63759a25a413a7a9a7274fb4c3b8bc2528634855 (bonzini/rhel-qemu-kvm)
The SNP_LAUNCH_START is called first to create a cryptographic launch
context within the firmware.
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Michael Roth <michael.roth@amd.com>
Co-developed-by: Pankaj Gupta <pankaj.gupta@amd.com>
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
Message-ID: <20240530111643.1091816-16-pankaj.gupta@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit d3107f882ec22cfb211eab7efa0c4e95f5ce11bb)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/sev.c | 39 +++++++++++++++++++++++++++++++++++++++
target/i386/trace-events | 1 +
2 files changed, 40 insertions(+)
diff --git a/target/i386/sev.c b/target/i386/sev.c
index 43d1c48bd9..e89b87d2f5 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -39,6 +39,7 @@
#include "confidential-guest.h"
#include "hw/i386/pc.h"
#include "exec/address-spaces.h"
+#include "qemu/queue.h"
OBJECT_DECLARE_TYPE(SevCommonState, SevCommonStateClass, SEV_COMMON)
OBJECT_DECLARE_TYPE(SevGuestState, SevCommonStateClass, SEV_GUEST)
@@ -115,6 +116,16 @@ struct SevSnpGuestState {
#define DEFAULT_SEV_DEVICE "/dev/sev"
#define DEFAULT_SEV_SNP_POLICY 0x30000
+typedef struct SevLaunchUpdateData {
+ QTAILQ_ENTRY(SevLaunchUpdateData) next;
+ hwaddr gpa;
+ void *hva;
+ uint64_t len;
+ int type;
+} SevLaunchUpdateData;
+
+static QTAILQ_HEAD(, SevLaunchUpdateData) launch_update;
+
#define SEV_INFO_BLOCK_GUID "00f771de-1a7e-4fcb-890e-68c77e2fb44e"
typedef struct __attribute__((__packed__)) SevInfoBlock {
/* SEV-ES Reset Vector Address */
@@ -674,6 +685,31 @@ sev_read_file_base64(const char *filename, guchar **data, gsize *len)
return 0;
}
+static int
+sev_snp_launch_start(SevCommonState *sev_common)
+{
+ int fw_error, rc;
+ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(sev_common);
+ struct kvm_sev_snp_launch_start *start = &sev_snp_guest->kvm_start_conf;
+
+ trace_kvm_sev_snp_launch_start(start->policy,
+ sev_snp_guest->guest_visible_workarounds);
+
+ rc = sev_ioctl(sev_common->sev_fd, KVM_SEV_SNP_LAUNCH_START,
+ start, &fw_error);
+ if (rc < 0) {
+ error_report("%s: SNP_LAUNCH_START ret=%d fw_error=%d '%s'",
+ __func__, rc, fw_error, fw_error_to_str(fw_error));
+ return 1;
+ }
+
+ QTAILQ_INIT(&launch_update);
+
+ sev_set_guest_state(sev_common, SEV_STATE_LAUNCH_UPDATE);
+
+ return 0;
+}
+
static int
sev_launch_start(SevCommonState *sev_common)
{
@@ -1003,6 +1039,7 @@ static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
}
ret = klass->launch_start(sev_common);
+
if (ret) {
error_setg(errp, "%s: failed to create encryption context", __func__);
return -1;
@@ -1794,9 +1831,11 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data)
SevCommonStateClass *klass = SEV_COMMON_CLASS(oc);
X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc);
+ klass->launch_start = sev_snp_launch_start;
klass->kvm_init = sev_snp_kvm_init;
x86_klass->kvm_type = sev_snp_kvm_type;
+
object_class_property_add(oc, "policy", "uint64",
sev_snp_guest_get_policy,
sev_snp_guest_set_policy, NULL, NULL);
diff --git a/target/i386/trace-events b/target/i386/trace-events
index 2cd8726eeb..cb26d8a925 100644
--- a/target/i386/trace-events
+++ b/target/i386/trace-events
@@ -11,3 +11,4 @@ kvm_sev_launch_measurement(const char *value) "data %s"
kvm_sev_launch_finish(void) ""
kvm_sev_launch_secret(uint64_t hpa, uint64_t hva, uint64_t secret, int len) "hpa 0x%" PRIx64 " hva 0x%" PRIx64 " data 0x%" PRIx64 " len %d"
kvm_sev_attestation_report(const char *mnonce, const char *data) "mnonce %s data %s"
+kvm_sev_snp_launch_start(uint64_t policy, char *gosvw) "policy 0x%" PRIx64 " gosvw %s"
--
2.39.3

View File

@ -0,0 +1,237 @@
From ec786a1ec0a76775e980862d77500f5196a937e3 Mon Sep 17 00:00:00 2001
From: Dov Murik <dovmurik@linux.ibm.com>
Date: Thu, 30 May 2024 06:16:35 -0500
Subject: [PATCH 080/100] i386/sev: Allow measured direct kernel boot on SNP
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [80/91] 11c629862519c1a279566febf5a537c63c5fcf61 (bonzini/rhel-qemu-kvm)
In SNP, the hashes page designated with a specific metadata entry
published in AmdSev OVMF.
Therefore, if the user enabled kernel hashes (for measured direct boot),
QEMU should prepare the content of hashes table, and during the
processing of the metadata entry it copy the content into the designated
page and encrypt it.
Note that in SNP (unlike SEV and SEV-ES) the measurements is done in
whole 4KB pages. Therefore QEMU zeros the whole page that includes the
hashes table, and fills in the kernel hashes area in that page, and then
encrypts the whole page. The rest of the page is reserved for SEV
launch secrets which are not usable anyway on SNP.
If the user disabled kernel hashes, QEMU pre-validates the kernel hashes
page as a zero page.
Signed-off-by: Dov Murik <dovmurik@linux.ibm.com>
Signed-off-by: Michael Roth <michael.roth@amd.com>
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
Message-ID: <20240530111643.1091816-24-pankaj.gupta@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit c1996992cc882b00139f78067d6a64e2ec9cb0d8)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
include/hw/i386/pc.h | 2 +
target/i386/sev.c | 111 ++++++++++++++++++++++++++++++++-----------
2 files changed, 85 insertions(+), 28 deletions(-)
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 94b49310f5..ee3bfb7be9 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -175,6 +175,8 @@ typedef enum {
SEV_DESC_TYPE_SNP_SECRETS,
/* The section contains address that can be used as a CPUID page */
SEV_DESC_TYPE_CPUID,
+ /* The section contains the region for kernel hashes for measured direct boot */
+ SEV_DESC_TYPE_SNP_KERNEL_HASHES = 0x10,
} ovmf_sev_metadata_desc_type;
diff --git a/target/i386/sev.c b/target/i386/sev.c
index 3fce4c08eb..004c667ac1 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -115,6 +115,10 @@ struct SevCommonStateClass {
X86ConfidentialGuestClass parent_class;
/* public */
+ bool (*build_kernel_loader_hashes)(SevCommonState *sev_common,
+ SevHashTableDescriptor *area,
+ SevKernelLoaderContext *ctx,
+ Error **errp);
int (*launch_start)(SevCommonState *sev_common);
void (*launch_finish)(SevCommonState *sev_common);
int (*launch_update_data)(SevCommonState *sev_common, hwaddr gpa, uint8_t *ptr, uint64_t len);
@@ -154,6 +158,9 @@ struct SevSnpGuestState {
struct kvm_sev_snp_launch_start kvm_start_conf;
struct kvm_sev_snp_launch_finish kvm_finish_conf;
+
+ uint32_t kernel_hashes_offset;
+ PaddedSevHashTable *kernel_hashes_data;
};
#define DEFAULT_GUEST_POLICY 0x1 /* disable debug */
@@ -1189,6 +1196,23 @@ snp_launch_update_cpuid(uint32_t cpuid_addr, void *hva, uint32_t cpuid_len)
KVM_SEV_SNP_PAGE_TYPE_CPUID);
}
+static int
+snp_launch_update_kernel_hashes(SevSnpGuestState *sev_snp, uint32_t addr,
+ void *hva, uint32_t len)
+{
+ int type = KVM_SEV_SNP_PAGE_TYPE_ZERO;
+ if (sev_snp->parent_obj.kernel_hashes) {
+ assert(sev_snp->kernel_hashes_data);
+ assert((sev_snp->kernel_hashes_offset +
+ sizeof(*sev_snp->kernel_hashes_data)) <= len);
+ memset(hva, 0, len);
+ memcpy(hva + sev_snp->kernel_hashes_offset, sev_snp->kernel_hashes_data,
+ sizeof(*sev_snp->kernel_hashes_data));
+ type = KVM_SEV_SNP_PAGE_TYPE_NORMAL;
+ }
+ return snp_launch_update_data(addr, hva, len, type);
+}
+
static int
snp_metadata_desc_to_page_type(int desc_type)
{
@@ -1225,6 +1249,9 @@ snp_populate_metadata_pages(SevSnpGuestState *sev_snp,
if (type == KVM_SEV_SNP_PAGE_TYPE_CPUID) {
ret = snp_launch_update_cpuid(desc->base, hva, desc->len);
+ } else if (desc->type == SEV_DESC_TYPE_SNP_KERNEL_HASHES) {
+ ret = snp_launch_update_kernel_hashes(sev_snp, desc->base, hva,
+ desc->len);
} else {
ret = snp_launch_update_data(desc->base, hva, desc->len, type);
}
@@ -1823,6 +1850,58 @@ static bool build_kernel_loader_hashes(PaddedSevHashTable *padded_ht,
return true;
}
+static bool sev_snp_build_kernel_loader_hashes(SevCommonState *sev_common,
+ SevHashTableDescriptor *area,
+ SevKernelLoaderContext *ctx,
+ Error **errp)
+{
+ /*
+ * SNP: Populate the hashes table in an area that later in
+ * snp_launch_update_kernel_hashes() will be copied to the guest memory
+ * and encrypted.
+ */
+ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(sev_common);
+ sev_snp_guest->kernel_hashes_offset = area->base & ~TARGET_PAGE_MASK;
+ sev_snp_guest->kernel_hashes_data = g_new0(PaddedSevHashTable, 1);
+ return build_kernel_loader_hashes(sev_snp_guest->kernel_hashes_data, ctx, errp);
+}
+
+static bool sev_build_kernel_loader_hashes(SevCommonState *sev_common,
+ SevHashTableDescriptor *area,
+ SevKernelLoaderContext *ctx,
+ Error **errp)
+{
+ PaddedSevHashTable *padded_ht;
+ hwaddr mapped_len = sizeof(*padded_ht);
+ MemTxAttrs attrs = { 0 };
+ bool ret = true;
+
+ /*
+ * Populate the hashes table in the guest's memory at the OVMF-designated
+ * area for the SEV hashes table
+ */
+ padded_ht = address_space_map(&address_space_memory, area->base,
+ &mapped_len, true, attrs);
+ if (!padded_ht || mapped_len != sizeof(*padded_ht)) {
+ error_setg(errp, "SEV: cannot map hashes table guest memory area");
+ return false;
+ }
+
+ if (build_kernel_loader_hashes(padded_ht, ctx, errp)) {
+ if (sev_encrypt_flash(area->base, (uint8_t *)padded_ht,
+ sizeof(*padded_ht), errp) < 0) {
+ ret = false;
+ }
+ } else {
+ ret = false;
+ }
+
+ address_space_unmap(&address_space_memory, padded_ht,
+ mapped_len, true, mapped_len);
+
+ return ret;
+}
+
/*
* Add the hashes of the linux kernel/initrd/cmdline to an encrypted guest page
* which is included in SEV's initial memory measurement.
@@ -1831,11 +1910,8 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp)
{
uint8_t *data;
SevHashTableDescriptor *area;
- PaddedSevHashTable *padded_ht;
- hwaddr mapped_len = sizeof(*padded_ht);
- MemTxAttrs attrs = { 0 };
- bool ret = true;
SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs);
+ SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(sev_common);
/*
* Only add the kernel hashes if the sev-guest configuration explicitly
@@ -1858,30 +1934,7 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp)
return false;
}
- /*
- * Populate the hashes table in the guest's memory at the OVMF-designated
- * area for the SEV hashes table
- */
- padded_ht = address_space_map(&address_space_memory, area->base,
- &mapped_len, true, attrs);
- if (!padded_ht || mapped_len != sizeof(*padded_ht)) {
- error_setg(errp, "SEV: cannot map hashes table guest memory area");
- return false;
- }
-
- if (build_kernel_loader_hashes(padded_ht, ctx, errp)) {
- if (sev_encrypt_flash(area->base, (uint8_t *)padded_ht,
- sizeof(*padded_ht), errp) < 0) {
- ret = false;
- }
- } else {
- ret = false;
- }
-
- address_space_unmap(&address_space_memory, padded_ht,
- mapped_len, true, mapped_len);
-
- return ret;
+ return klass->build_kernel_loader_hashes(sev_common, area, ctx, errp);
}
static char *
@@ -1998,6 +2051,7 @@ sev_guest_class_init(ObjectClass *oc, void *data)
SevCommonStateClass *klass = SEV_COMMON_CLASS(oc);
X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc);
+ klass->build_kernel_loader_hashes = sev_build_kernel_loader_hashes;
klass->launch_start = sev_launch_start;
klass->launch_finish = sev_launch_finish;
klass->launch_update_data = sev_launch_update_data;
@@ -2242,6 +2296,7 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data)
SevCommonStateClass *klass = SEV_COMMON_CLASS(oc);
X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc);
+ klass->build_kernel_loader_hashes = sev_snp_build_kernel_loader_hashes;
klass->launch_start = sev_snp_launch_start;
klass->launch_finish = sev_snp_launch_finish;
klass->launch_update_data = sev_snp_launch_update_data;
--
2.39.3

View File

@ -0,0 +1,268 @@
From ab6197309551bd6ddd9f8239191f68dfac23684b Mon Sep 17 00:00:00 2001
From: Michael Roth <michael.roth@amd.com>
Date: Tue, 9 Jul 2024 23:10:05 -0500
Subject: [PATCH 090/100] i386/sev: Don't allow automatic fallback to legacy
KVM_SEV*_INIT
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [90/91] 2b1345faa56f993bb6e13d63e11656c784e20412 (bonzini/rhel-qemu-kvm)
Currently if the 'legacy-vm-type' property of the sev-guest object is
'on', QEMU will attempt to use the newer KVM_SEV_INIT2 kernel
interface in conjunction with the newer KVM_X86_SEV_VM and
KVM_X86_SEV_ES_VM KVM VM types.
This can lead to measurement changes if, for instance, an SEV guest was
created on a host that originally had an older kernel that didn't
support KVM_SEV_INIT2, but is booted on the same host later on after the
host kernel was upgraded.
Instead, if legacy-vm-type is 'off', QEMU should fail if the
KVM_SEV_INIT2 interface is not provided by the current host kernel.
Modify the fallback handling accordingly.
In the future, VMSA features and other flags might be added to QEMU
which will require legacy-vm-type to be 'off' because they will rely
on the newer KVM_SEV_INIT2 interface. It may be difficult to convey to
users what values of legacy-vm-type are compatible with which
features/options, so as part of this rework, switch legacy-vm-type to a
tri-state OnOffAuto option. 'auto' in this case will automatically
switch to using the newer KVM_SEV_INIT2, but only if it is required to
make use of new VMSA features or other options only available via
KVM_SEV_INIT2.
Defining 'auto' in this way would avoid inadvertantly breaking
compatibility with older kernels since it would only be used in cases
where users opt into newer features that are only available via
KVM_SEV_INIT2 and newer kernels, and provide better default behavior
than the legacy-vm-type=off behavior that was previously in place, so
make it the default for 9.1+ machine types.
Cc: Daniel P. Berrangé <berrange@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
cc: kvm@vger.kernel.org
Signed-off-by: Michael Roth <michael.roth@amd.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Link: https://lore.kernel.org/r/20240710041005.83720-1-michael.roth@amd.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 9d38d9dca2a81aaf5752d45d221021ef96d496cd)
RHEL: adjust compatiility setting, applying it to 9.4 machine type
---
hw/i386/pc.c | 2 +-
qapi/qom.json | 18 ++++++----
target/i386/sev.c | 85 +++++++++++++++++++++++++++++++++++++++--------
3 files changed, 83 insertions(+), 22 deletions(-)
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index b25d075b59..e9c5ea5d8f 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -352,7 +352,7 @@ const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat);
GlobalProperty pc_rhel_9_5_compat[] = {
/* pc_rhel_9_5_compat from pc_compat_pc_9_0 (backported from 9.1) */
{ TYPE_X86_CPU, "guest-phys-bits", "0" },
- { "sev-guest", "legacy-vm-type", "true" },
+ { "sev-guest", "legacy-vm-type", "on" },
};
const size_t pc_rhel_9_5_compat_len = G_N_ELEMENTS(pc_rhel_9_5_compat);
diff --git a/qapi/qom.json b/qapi/qom.json
index 8bd299265e..17bd5a0cf7 100644
--- a/qapi/qom.json
+++ b/qapi/qom.json
@@ -912,12 +912,16 @@
# @handle: SEV firmware handle (default: 0)
#
# @legacy-vm-type: Use legacy KVM_SEV_INIT KVM interface for creating the VM.
-# The newer KVM_SEV_INIT2 interface syncs additional vCPU
-# state when initializing the VMSA structures, which will
-# result in a different guest measurement. Set this to
-# maintain compatibility with older QEMU or kernel versions
-# that rely on legacy KVM_SEV_INIT behavior.
-# (default: false) (since 9.1)
+# The newer KVM_SEV_INIT2 interface, from Linux >= 6.10, syncs
+# additional vCPU state when initializing the VMSA structures,
+# which will result in a different guest measurement. Set
+# this to 'on' to force compatibility with older QEMU or kernel
+# versions that rely on legacy KVM_SEV_INIT behavior. 'auto'
+# will behave identically to 'on', but will automatically
+# switch to using KVM_SEV_INIT2 if the user specifies any
+# additional options that require it. If set to 'off', QEMU
+# will require KVM_SEV_INIT2 unconditionally.
+# (default: off) (since 9.1)
#
# Since: 2.12
##
@@ -927,7 +931,7 @@
'*session-file': 'str',
'*policy': 'uint32',
'*handle': 'uint32',
- '*legacy-vm-type': 'bool' } }
+ '*legacy-vm-type': 'OnOffAuto' } }
##
# @SevSnpGuestProperties:
diff --git a/target/i386/sev.c b/target/i386/sev.c
index 491fab74fd..b921defb63 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -144,7 +144,7 @@ struct SevGuestState {
uint32_t policy;
char *dh_cert_file;
char *session_file;
- bool legacy_vm_type;
+ OnOffAuto legacy_vm_type;
};
struct SevSnpGuestState {
@@ -1334,6 +1334,17 @@ sev_vm_state_change(void *opaque, bool running, RunState state)
}
}
+/*
+ * This helper is to examine sev-guest properties and determine if any options
+ * have been set which rely on the newer KVM_SEV_INIT2 interface and associated
+ * KVM VM types.
+ */
+static bool sev_init2_required(SevGuestState *sev_guest)
+{
+ /* Currently no KVM_SEV_INIT2-specific options are exposed via QEMU */
+ return false;
+}
+
static int sev_kvm_type(X86ConfidentialGuest *cg)
{
SevCommonState *sev_common = SEV_COMMON(cg);
@@ -1344,14 +1355,39 @@ static int sev_kvm_type(X86ConfidentialGuest *cg)
goto out;
}
+ /* These are the only cases where legacy VM types can be used. */
+ if (sev_guest->legacy_vm_type == ON_OFF_AUTO_ON ||
+ (sev_guest->legacy_vm_type == ON_OFF_AUTO_AUTO &&
+ !sev_init2_required(sev_guest))) {
+ sev_common->kvm_type = KVM_X86_DEFAULT_VM;
+ goto out;
+ }
+
+ /*
+ * Newer VM types are required, either explicitly via legacy-vm-type=on, or
+ * implicitly via legacy-vm-type=auto along with additional sev-guest
+ * properties that require the newer VM types.
+ */
kvm_type = (sev_guest->policy & SEV_POLICY_ES) ?
KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM;
- if (kvm_is_vm_type_supported(kvm_type) && !sev_guest->legacy_vm_type) {
- sev_common->kvm_type = kvm_type;
- } else {
- sev_common->kvm_type = KVM_X86_DEFAULT_VM;
+ if (!kvm_is_vm_type_supported(kvm_type)) {
+ if (sev_guest->legacy_vm_type == ON_OFF_AUTO_AUTO) {
+ error_report("SEV: host kernel does not support requested %s VM type, which is required "
+ "for the set of options specified. To allow use of the legacy "
+ "KVM_X86_DEFAULT_VM VM type, please disable any options that are not "
+ "compatible with the legacy VM type, or upgrade your kernel.",
+ kvm_type == KVM_X86_SEV_VM ? "KVM_X86_SEV_VM" : "KVM_X86_SEV_ES_VM");
+ } else {
+ error_report("SEV: host kernel does not support requested %s VM type. To allow use of "
+ "the legacy KVM_X86_DEFAULT_VM VM type, the 'legacy-vm-type' argument "
+ "must be set to 'on' or 'auto' for the sev-guest object.",
+ kvm_type == KVM_X86_SEV_VM ? "KVM_X86_SEV_VM" : "KVM_X86_SEV_ES_VM");
+ }
+
+ return -1;
}
+ sev_common->kvm_type = kvm_type;
out:
return sev_common->kvm_type;
}
@@ -1442,14 +1478,24 @@ static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
}
trace_kvm_sev_init();
- if (x86_klass->kvm_type(X86_CONFIDENTIAL_GUEST(sev_common)) == KVM_X86_DEFAULT_VM) {
+ switch (x86_klass->kvm_type(X86_CONFIDENTIAL_GUEST(sev_common))) {
+ case KVM_X86_DEFAULT_VM:
cmd = sev_es_enabled() ? KVM_SEV_ES_INIT : KVM_SEV_INIT;
ret = sev_ioctl(sev_common->sev_fd, cmd, NULL, &fw_error);
- } else {
+ break;
+ case KVM_X86_SEV_VM:
+ case KVM_X86_SEV_ES_VM:
+ case KVM_X86_SNP_VM: {
struct kvm_sev_init args = { 0 };
ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_INIT2, &args, &fw_error);
+ break;
+ }
+ default:
+ error_setg(errp, "%s: host kernel does not support the requested SEV configuration.",
+ __func__);
+ return -1;
}
if (ret) {
@@ -2037,14 +2083,23 @@ sev_guest_set_session_file(Object *obj, const char *value, Error **errp)
SEV_GUEST(obj)->session_file = g_strdup(value);
}
-static bool sev_guest_get_legacy_vm_type(Object *obj, Error **errp)
+static void sev_guest_get_legacy_vm_type(Object *obj, Visitor *v,
+ const char *name, void *opaque,
+ Error **errp)
{
- return SEV_GUEST(obj)->legacy_vm_type;
+ SevGuestState *sev_guest = SEV_GUEST(obj);
+ OnOffAuto legacy_vm_type = sev_guest->legacy_vm_type;
+
+ visit_type_OnOffAuto(v, name, &legacy_vm_type, errp);
}
-static void sev_guest_set_legacy_vm_type(Object *obj, bool value, Error **errp)
+static void sev_guest_set_legacy_vm_type(Object *obj, Visitor *v,
+ const char *name, void *opaque,
+ Error **errp)
{
- SEV_GUEST(obj)->legacy_vm_type = value;
+ SevGuestState *sev_guest = SEV_GUEST(obj);
+
+ visit_type_OnOffAuto(v, name, &sev_guest->legacy_vm_type, errp);
}
static void
@@ -2070,9 +2125,9 @@ sev_guest_class_init(ObjectClass *oc, void *data)
sev_guest_set_session_file);
object_class_property_set_description(oc, "session-file",
"guest owners session parameters (encoded with base64)");
- object_class_property_add_bool(oc, "legacy-vm-type",
- sev_guest_get_legacy_vm_type,
- sev_guest_set_legacy_vm_type);
+ object_class_property_add(oc, "legacy-vm-type", "OnOffAuto",
+ sev_guest_get_legacy_vm_type,
+ sev_guest_set_legacy_vm_type, NULL, NULL);
object_class_property_set_description(oc, "legacy-vm-type",
"use legacy VM type to maintain measurement compatibility with older QEMU or kernel versions.");
}
@@ -2088,6 +2143,8 @@ sev_guest_instance_init(Object *obj)
object_property_add_uint32_ptr(obj, "policy", &sev_guest->policy,
OBJ_PROP_FLAG_READWRITE);
object_apply_compat_props(obj);
+
+ sev_guest->legacy_vm_type = ON_OFF_AUTO_AUTO;
}
/* guest info specific sev/sev-es */
--
2.39.3

View File

@ -0,0 +1,46 @@
From ebb3c3536366c383fa09b0987a4efb68d018b7b8 Mon Sep 17 00:00:00 2001
From: Michael Roth <michael.roth@amd.com>
Date: Thu, 30 May 2024 06:16:24 -0500
Subject: [PATCH 064/100] i386/sev: Don't return launch measurements for
SEV-SNP guests
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [64/91] 5a29bb2d8b5a07aec6fd271ec37345e665e9cce4 (bonzini/rhel-qemu-kvm)
For SEV-SNP guests, launch measurement is queried from within the guest
during attestation, so don't attempt to return it as part of
query-sev-launch-measure.
Signed-off-by: Michael Roth <michael.roth@amd.com>
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
Message-ID: <20240530111643.1091816-13-pankaj.gupta@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 73ae63b162fc1fed520f53ad200712964d7d0264)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/sev.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/target/i386/sev.c b/target/i386/sev.c
index 6525b3c1a0..c3daaf1ad5 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -795,7 +795,9 @@ sev_launch_get_measure(Notifier *notifier, void *unused)
static char *sev_get_launch_measurement(void)
{
- SevGuestState *sev_guest = SEV_GUEST(MACHINE(qdev_get_machine())->cgs);
+ ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs;
+ SevGuestState *sev_guest =
+ (SevGuestState *)object_dynamic_cast(OBJECT(cgs), TYPE_SEV_GUEST);
if (sev_guest &&
SEV_COMMON(sev_guest)->state >= SEV_STATE_LAUNCH_SECRET) {
--
2.39.3

View File

@ -0,0 +1,54 @@
From 0612c7ed587422ec7e07c27c8ca11b89c7aa8b02 Mon Sep 17 00:00:00 2001
From: Michael Roth <michael.roth@amd.com>
Date: Thu, 30 May 2024 06:16:43 -0500
Subject: [PATCH 077/100] i386/sev: Enable KVM_HC_MAP_GPA_RANGE hcall for SNP
guests
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [77/91] 3c494eb54499c24121cc2c47045626478b8bb41e (bonzini/rhel-qemu-kvm)
KVM will forward GHCB page-state change requests to userspace in the
form of KVM_HC_MAP_GPA_RANGE, so make sure the hypercall handling is
enabled for SNP guests.
Signed-off-by: Michael Roth <michael.roth@amd.com>
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
Message-ID: <20240530111643.1091816-32-pankaj.gupta@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit e3cddff93c1f88fea3b26841e792dc0be6b6fae8)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/sev.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/target/i386/sev.c b/target/i386/sev.c
index eaf5fc6c6b..abb63062ac 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -14,6 +14,7 @@
#include "qemu/osdep.h"
#include <linux/kvm.h>
+#include <linux/kvm_para.h>
#include <linux/psp-sev.h>
#include <sys/ioctl.h>
@@ -758,6 +759,10 @@ sev_snp_launch_start(SevCommonState *sev_common)
trace_kvm_sev_snp_launch_start(start->policy,
sev_snp_guest->guest_visible_workarounds);
+ if (!kvm_enable_hypercall(BIT_ULL(KVM_HC_MAP_GPA_RANGE))) {
+ return 1;
+ }
+
rc = sev_ioctl(sev_common->sev_fd, KVM_SEV_SNP_LAUNCH_START,
start, &fw_error);
if (rc < 0) {
--
2.39.3

View File

@ -0,0 +1,167 @@
From eed17520567c202f53ab767bfd42cfe303838772 Mon Sep 17 00:00:00 2001
From: Dov Murik <dovmurik@linux.ibm.com>
Date: Thu, 30 May 2024 06:16:33 -0500
Subject: [PATCH 078/100] i386/sev: Extract build_kernel_loader_hashes
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [78/91] 291ea10e774178826d1afd38fc8292d67c5fd42d (bonzini/rhel-qemu-kvm)
Extract the building of the kernel hashes table out from
sev_add_kernel_loader_hashes() to allow building it in
other memory areas (for SNP support).
No functional change intended.
Signed-off-by: Dov Murik <dovmurik@linux.ibm.com>
Signed-off-by: Michael Roth <michael.roth@amd.com>
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
Message-ID: <20240530111643.1091816-22-pankaj.gupta@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 06cbd66cecaa3230cccb330facac241a677b29d5)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/sev.c | 102 ++++++++++++++++++++++++++--------------------
1 file changed, 58 insertions(+), 44 deletions(-)
diff --git a/target/i386/sev.c b/target/i386/sev.c
index abb63062ac..73f9406715 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -1754,45 +1754,16 @@ static const QemuUUID sev_cmdline_entry_guid = {
0x4d, 0x36, 0xab, 0x2a)
};
-/*
- * Add the hashes of the linux kernel/initrd/cmdline to an encrypted guest page
- * which is included in SEV's initial memory measurement.
- */
-bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp)
+static bool build_kernel_loader_hashes(PaddedSevHashTable *padded_ht,
+ SevKernelLoaderContext *ctx,
+ Error **errp)
{
- uint8_t *data;
- SevHashTableDescriptor *area;
SevHashTable *ht;
- PaddedSevHashTable *padded_ht;
uint8_t cmdline_hash[HASH_SIZE];
uint8_t initrd_hash[HASH_SIZE];
uint8_t kernel_hash[HASH_SIZE];
uint8_t *hashp;
size_t hash_len = HASH_SIZE;
- hwaddr mapped_len = sizeof(*padded_ht);
- MemTxAttrs attrs = { 0 };
- bool ret = true;
- SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs);
-
- /*
- * Only add the kernel hashes if the sev-guest configuration explicitly
- * stated kernel-hashes=on.
- */
- if (!sev_common->kernel_hashes) {
- return false;
- }
-
- if (!pc_system_ovmf_table_find(SEV_HASH_TABLE_RV_GUID, &data, NULL)) {
- error_setg(errp, "SEV: kernel specified but guest firmware "
- "has no hashes table GUID");
- return false;
- }
- area = (SevHashTableDescriptor *)data;
- if (!area->base || area->size < sizeof(PaddedSevHashTable)) {
- error_setg(errp, "SEV: guest firmware hashes table area is invalid "
- "(base=0x%x size=0x%x)", area->base, area->size);
- return false;
- }
/*
* Calculate hash of kernel command-line with the terminating null byte. If
@@ -1829,16 +1800,6 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp)
}
assert(hash_len == HASH_SIZE);
- /*
- * Populate the hashes table in the guest's memory at the OVMF-designated
- * area for the SEV hashes table
- */
- padded_ht = address_space_map(&address_space_memory, area->base,
- &mapped_len, true, attrs);
- if (!padded_ht || mapped_len != sizeof(*padded_ht)) {
- error_setg(errp, "SEV: cannot map hashes table guest memory area");
- return false;
- }
ht = &padded_ht->ht;
ht->guid = sev_hash_table_header_guid;
@@ -1859,8 +1820,61 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp)
/* zero the excess data so the measurement can be reliably calculated */
memset(padded_ht->padding, 0, sizeof(padded_ht->padding));
- if (sev_encrypt_flash(area->base, (uint8_t *)padded_ht,
- sizeof(*padded_ht), errp) < 0) {
+ return true;
+}
+
+/*
+ * Add the hashes of the linux kernel/initrd/cmdline to an encrypted guest page
+ * which is included in SEV's initial memory measurement.
+ */
+bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp)
+{
+ uint8_t *data;
+ SevHashTableDescriptor *area;
+ PaddedSevHashTable *padded_ht;
+ hwaddr mapped_len = sizeof(*padded_ht);
+ MemTxAttrs attrs = { 0 };
+ bool ret = true;
+ SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs);
+
+ /*
+ * Only add the kernel hashes if the sev-guest configuration explicitly
+ * stated kernel-hashes=on.
+ */
+ if (!sev_common->kernel_hashes) {
+ return false;
+ }
+
+ if (!pc_system_ovmf_table_find(SEV_HASH_TABLE_RV_GUID, &data, NULL)) {
+ error_setg(errp, "SEV: kernel specified but guest firmware "
+ "has no hashes table GUID");
+ return false;
+ }
+
+ area = (SevHashTableDescriptor *)data;
+ if (!area->base || area->size < sizeof(PaddedSevHashTable)) {
+ error_setg(errp, "SEV: guest firmware hashes table area is invalid "
+ "(base=0x%x size=0x%x)", area->base, area->size);
+ return false;
+ }
+
+ /*
+ * Populate the hashes table in the guest's memory at the OVMF-designated
+ * area for the SEV hashes table
+ */
+ padded_ht = address_space_map(&address_space_memory, area->base,
+ &mapped_len, true, attrs);
+ if (!padded_ht || mapped_len != sizeof(*padded_ht)) {
+ error_setg(errp, "SEV: cannot map hashes table guest memory area");
+ return false;
+ }
+
+ if (build_kernel_loader_hashes(padded_ht, ctx, errp)) {
+ if (sev_encrypt_flash(area->base, (uint8_t *)padded_ht,
+ sizeof(*padded_ht), errp) < 0) {
+ ret = false;
+ }
+ } else {
ret = false;
}
--
2.39.3

View File

@ -0,0 +1,65 @@
From a9530c89225fce9e381929c4cd8e372068827acf Mon Sep 17 00:00:00 2001
From: Michal Privoznik <mprivozn@redhat.com>
Date: Mon, 24 Jun 2024 10:52:49 +0200
Subject: [PATCH 089/100] i386/sev: Fallback to the default SEV device if none
provided in sev_get_capabilities()
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [89/91] 22318c20d7102815f754cec0efaf383e05ef79c1 (bonzini/rhel-qemu-kvm)
When management tools (e.g. libvirt) query QEMU capabilities,
they start QEMU with a minimalistic configuration and issue
various commands on monitor. One of the command issued is/might
be "query-sev-capabilities" to learn values like cbitpos or
reduced-phys-bits. But as of v9.0.0-1145-g16dcf200dc the monitor
command returns an error instead.
This creates a chicken-egg problem because in order to query
those aforementioned values QEMU needs to be started with a
'sev-guest' object. But to start QEMU with the values must be
known.
I think it's safe to assume that the default path ("/dev/sev")
provides the same data as user provided one. So fall back to it.
Fixes: 16dcf200dc951c1cde3e5b442457db5f690b8cf0
Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
Link: https://lore.kernel.org/r/157f93712c23818be193ce785f648f0060b33dee.1719218926.git.mprivozn@redhat.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 3fb24530b2bb1346a44e17becefc9865b40a2257)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/sev.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/target/i386/sev.c b/target/i386/sev.c
index 53b7f7315b..491fab74fd 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -585,13 +585,13 @@ static SevCapability *sev_get_capabilities(Error **errp)
}
sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs);
- if (!sev_common) {
- error_setg(errp, "SEV is not configured");
- return NULL;
+ if (sev_common) {
+ sev_device = object_property_get_str(OBJECT(sev_common), "sev-device",
+ &error_abort);
+ } else {
+ sev_device = g_strdup(DEFAULT_SEV_DEVICE);
}
- sev_device = object_property_get_str(OBJECT(sev_common), "sev-device",
- &error_abort);
fd = open(sev_device, O_RDWR);
if (fd < 0) {
error_setg_errno(errp, errno, "SEV: Failed to open %s",
--
2.39.3

View File

@ -0,0 +1,48 @@
From b672cdf8c10a530b5bcf6dd4489632891eb2c731 Mon Sep 17 00:00:00 2001
From: Michal Privoznik <mprivozn@redhat.com>
Date: Mon, 24 Jun 2024 10:52:48 +0200
Subject: [PATCH 088/100] i386/sev: Fix error message in sev_get_capabilities()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [88/91] ff8a8b27af02e565172ffe39d0571c234317713d (bonzini/rhel-qemu-kvm)
When a custom path is provided to sev-guest object and opening
the path fails an error message is reported. But the error
message still mentions DEFAULT_SEV_DEVICE ("/dev/sev") instead of
the custom path.
Fixes: 16dcf200dc951c1cde3e5b442457db5f690b8cf0
Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Link: https://lore.kernel.org/r/b4648905d399780063dc70851d3d6a3cd28719a5.1719218926.git.mprivozn@redhat.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit e306ae87e0ef04bc7a5dec6db693f6ea09d64d45)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/sev.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/target/i386/sev.c b/target/i386/sev.c
index 37de80adc7..53b7f7315b 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -595,7 +595,7 @@ static SevCapability *sev_get_capabilities(Error **errp)
fd = open(sev_device, O_RDWR);
if (fd < 0) {
error_setg_errno(errp, errno, "SEV: Failed to open %s",
- DEFAULT_SEV_DEVICE);
+ sev_device);
g_free(sev_device);
return NULL;
}
--
2.39.3

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,530 @@
From 900859fd3445b9a71f1a9a8befda17f0c33f3923 Mon Sep 17 00:00:00 2001
From: Brijesh Singh <brijesh.singh@amd.com>
Date: Thu, 30 May 2024 06:16:19 -0500
Subject: [PATCH 059/100] i386/sev: Introduce 'sev-snp-guest' object
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [59/91] 3e585113d209176c2b97ad5e4fe943f19dfdcaeb (bonzini/rhel-qemu-kvm)
SEV-SNP support relies on a different set of properties/state than the
existing 'sev-guest' object. This patch introduces the 'sev-snp-guest'
object, which can be used to configure an SEV-SNP guest. For example,
a default-configured SEV-SNP guest with no additional information
passed in for use with attestation:
-object sev-snp-guest,id=sev0
or a fully-specified SEV-SNP guest where all spec-defined binary
blobs are passed in as base64-encoded strings:
-object sev-snp-guest,id=sev0, \
policy=0x30000, \
init-flags=0, \
id-block=YWFhYWFhYWFhYWFhYWFhCg==, \
id-auth=CxHK/OKLkXGn/KpAC7Wl1FSiisWDbGTEKz..., \
author-key-enabled=on, \
host-data=LNkCWBRC5CcdGXirbNUV1OrsR28s..., \
guest-visible-workarounds=AA==, \
See the QAPI schema updates included in this patch for more usage
details.
In some cases these blobs may be up to 4096 characters, but this is
generally well below the default limit for linux hosts where
command-line sizes are defined by the sysconf-configurable ARG_MAX
value, which defaults to 2097152 characters for Ubuntu hosts, for
example.
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Co-developed-by: Michael Roth <michael.roth@amd.com>
Acked-by: Markus Armbruster <armbru@redhat.com> (for QAPI schema)
Signed-off-by: Michael Roth <michael.roth@amd.com>
Co-developed-by: Pankaj Gupta <pankaj.gupta@amd.com>
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
Message-ID: <20240530111643.1091816-8-pankaj.gupta@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 7b34df44260b391e33bc3acf1ced30019d9aadf1)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
docs/system/i386/amd-memory-encryption.rst | 70 +++++-
qapi/qom.json | 58 +++++
target/i386/sev.c | 253 +++++++++++++++++++++
target/i386/sev.h | 1 +
4 files changed, 380 insertions(+), 2 deletions(-)
diff --git a/docs/system/i386/amd-memory-encryption.rst b/docs/system/i386/amd-memory-encryption.rst
index e9bc142bc1..748f5094ba 100644
--- a/docs/system/i386/amd-memory-encryption.rst
+++ b/docs/system/i386/amd-memory-encryption.rst
@@ -25,8 +25,8 @@ support for notifying a guest's operating system when certain types of VMEXITs
are about to occur. This allows the guest to selectively share information with
the hypervisor to satisfy the requested function.
-Launching
----------
+Launching (SEV and SEV-ES)
+--------------------------
Boot images (such as bios) must be encrypted before a guest can be booted. The
``MEMORY_ENCRYPT_OP`` ioctl provides commands to encrypt the images: ``LAUNCH_START``,
@@ -161,6 +161,72 @@ The value of GCTX.LD is
If kernel hashes are not used, or SEV-ES is disabled, use empty blobs for
``kernel_hashes_blob`` and ``vmsas_blob`` as needed.
+Launching (SEV-SNP)
+-------------------
+Boot images (such as bios) must be encrypted before a guest can be booted. The
+``MEMORY_ENCRYPT_OP`` ioctl provides commands to encrypt the images:
+``SNP_LAUNCH_START``, ``SNP_LAUNCH_UPDATE``, and ``SNP_LAUNCH_FINISH``. These
+three commands communicate with SEV-SNP firmware to generate a fresh memory
+encryption key for the VM, encrypt the boot images for a successful launch. For
+more details on the SEV-SNP firmware interfaces used by these commands please
+see the SEV-SNP Firmware ABI.
+
+``SNP_LAUNCH_START`` is called first to create a cryptographic launch context
+within the firmware. To create this context, the guest owner must provide a
+guest policy and other parameters as described in the SEV-SNP firmware
+specification. The launch parameters should be specified as described in the
+QAPI schema for the sev-snp-guest object.
+
+The ``SNP_LAUNCH_START`` uses the following parameters, which can be configured
+by the corresponding parameters documented in the QAPI schema for the
+'sev-snp-guest' object.
+
++--------+-------+----------+-------------------------------------------------+
+| key | type | default | meaning |
++---------------------------+-------------------------------------------------+
+| policy | hex | 0x30000 | a 64-bit guest policy |
++---------------------------+-------------------------------------------------+
+| guest-visible-workarounds | string| 0 | 16-byte base64 encoded string|
+| | | | for guest OS visible |
+| | | | workarounds. |
++---------------------------+-------------------------------------------------+
+
+``SNP_LAUNCH_UPDATE`` encrypts the memory region using the cryptographic context
+created via the ``SNP_LAUNCH_START`` command. If required, this command can be
+called multiple times to encrypt different memory regions. The command also
+calculates the measurement of the memory contents as it encrypts.
+
+``SNP_LAUNCH_FINISH`` finalizes the guest launch flow. Optionally, while
+finalizing the launch the firmware can perform checks on the launch digest
+computing through the ``SNP_LAUNCH_UPDATE``. To perform the check the user must
+supply the id block, authentication blob and host data that should be included
+in the attestation report. See the SEV-SNP spec for further details.
+
+The ``SNP_LAUNCH_FINISH`` uses the following parameters, which can be configured
+by the corresponding parameters documented in the QAPI schema for the
+'sev-snp-guest' object.
+
++--------------------+-------+----------+-------------------------------------+
+| key | type | default | meaning |
++--------------------+-------+----------+-------------------------------------+
+| id-block | string| none | base64 encoded ID block |
++--------------------+-------+----------+-------------------------------------+
+| id-auth | string| none | base64 encoded authentication |
+| | | | information |
++--------------------+-------+----------+-------------------------------------+
+| author-key-enabled | bool | 0 | auth block contains author key |
++--------------------+-------+----------+-------------------------------------+
+| host_data | string| none | host provided data |
++--------------------+-------+----------+-------------------------------------+
+
+To launch a SEV-SNP guest (additional parameters are documented in the QAPI
+schema for the 'sev-snp-guest' object)::
+
+ # ${QEMU} \
+ -machine ...,confidential-guest-support=sev0 \
+ -object sev-snp-guest,id=sev0,cbitpos=51,reduced-phys-bits=1
+
+
Debugging
---------
diff --git a/qapi/qom.json b/qapi/qom.json
index 056b38f491..8bd299265e 100644
--- a/qapi/qom.json
+++ b/qapi/qom.json
@@ -929,6 +929,62 @@
'*handle': 'uint32',
'*legacy-vm-type': 'bool' } }
+##
+# @SevSnpGuestProperties:
+#
+# Properties for sev-snp-guest objects. Most of these are direct
+# arguments for the KVM_SNP_* interfaces documented in the Linux
+# kernel source under
+# Documentation/arch/x86/amd-memory-encryption.rst, which are in turn
+# closely coupled with the SNP_INIT/SNP_LAUNCH_* firmware commands
+# documented in the SEV-SNP Firmware ABI Specification (Rev 0.9).
+#
+# More usage information is also available in the QEMU source tree
+# under docs/amd-memory-encryption.
+#
+# @policy: the 'POLICY' parameter to the SNP_LAUNCH_START command, as
+# defined in the SEV-SNP firmware ABI (default: 0x30000)
+#
+# @guest-visible-workarounds: 16-byte, base64-encoded blob to report
+# hypervisor-defined workarounds, corresponding to the 'GOSVW'
+# parameter of the SNP_LAUNCH_START command defined in the SEV-SNP
+# firmware ABI (default: all-zero)
+#
+# @id-block: 96-byte, base64-encoded blob to provide the 'ID Block'
+# structure for the SNP_LAUNCH_FINISH command defined in the
+# SEV-SNP firmware ABI (default: all-zero)
+#
+# @id-auth: 4096-byte, base64-encoded blob to provide the 'ID
+# Authentication Information Structure' for the SNP_LAUNCH_FINISH
+# command defined in the SEV-SNP firmware ABI (default: all-zero)
+#
+# @author-key-enabled: true if 'id-auth' blob contains the 'AUTHOR_KEY'
+# field defined SEV-SNP firmware ABI (default: false)
+#
+# @host-data: 32-byte, base64-encoded, user-defined blob to provide to
+# the guest, as documented for the 'HOST_DATA' parameter of the
+# SNP_LAUNCH_FINISH command in the SEV-SNP firmware ABI (default:
+# all-zero)
+#
+# @vcek-disabled: Guests are by default allowed to choose between VLEK
+# (Versioned Loaded Endorsement Key) or VCEK (Versioned Chip
+# Endorsement Key) when requesting attestation reports from
+# firmware. Set this to true to disable the use of VCEK.
+# (default: false) (since: 9.1)
+#
+# Since: 9.1
+##
+{ 'struct': 'SevSnpGuestProperties',
+ 'base': 'SevCommonProperties',
+ 'data': {
+ '*policy': 'uint64',
+ '*guest-visible-workarounds': 'str',
+ '*id-block': 'str',
+ '*id-auth': 'str',
+ '*author-key-enabled': 'bool',
+ '*host-data': 'str',
+ '*vcek-disabled': 'bool' } }
+
##
# @ThreadContextProperties:
#
@@ -1007,6 +1063,7 @@
{ 'name': 'secret_keyring',
'if': 'CONFIG_SECRET_KEYRING' },
'sev-guest',
+ 'sev-snp-guest',
'thread-context',
's390-pv-guest',
'throttle-group',
@@ -1077,6 +1134,7 @@
'secret_keyring': { 'type': 'SecretKeyringProperties',
'if': 'CONFIG_SECRET_KEYRING' },
'sev-guest': 'SevGuestProperties',
+ 'sev-snp-guest': 'SevSnpGuestProperties',
'thread-context': 'ThreadContextProperties',
'throttle-group': 'ThrottleGroupProperties',
'tls-creds-anon': 'TlsCredsAnonProperties',
diff --git a/target/i386/sev.c b/target/i386/sev.c
index 28a018ed83..a81b3228d4 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -42,6 +42,7 @@
OBJECT_DECLARE_TYPE(SevCommonState, SevCommonStateClass, SEV_COMMON)
OBJECT_DECLARE_TYPE(SevGuestState, SevCommonStateClass, SEV_GUEST)
+OBJECT_DECLARE_TYPE(SevSnpGuestState, SevCommonStateClass, SEV_SNP_GUEST)
struct SevCommonState {
X86ConfidentialGuest parent_obj;
@@ -96,8 +97,22 @@ struct SevGuestState {
bool legacy_vm_type;
};
+struct SevSnpGuestState {
+ SevCommonState parent_obj;
+
+ /* configuration parameters */
+ char *guest_visible_workarounds;
+ char *id_block;
+ char *id_auth;
+ char *host_data;
+
+ struct kvm_sev_snp_launch_start kvm_start_conf;
+ struct kvm_sev_snp_launch_finish kvm_finish_conf;
+};
+
#define DEFAULT_GUEST_POLICY 0x1 /* disable debug */
#define DEFAULT_SEV_DEVICE "/dev/sev"
+#define DEFAULT_SEV_SNP_POLICY 0x30000
#define SEV_INFO_BLOCK_GUID "00f771de-1a7e-4fcb-890e-68c77e2fb44e"
typedef struct __attribute__((__packed__)) SevInfoBlock {
@@ -1500,11 +1515,249 @@ static const TypeInfo sev_guest_info = {
.class_init = sev_guest_class_init,
};
+static void
+sev_snp_guest_get_policy(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+{
+ visit_type_uint64(v, name,
+ (uint64_t *)&SEV_SNP_GUEST(obj)->kvm_start_conf.policy,
+ errp);
+}
+
+static void
+sev_snp_guest_set_policy(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+{
+ visit_type_uint64(v, name,
+ (uint64_t *)&SEV_SNP_GUEST(obj)->kvm_start_conf.policy,
+ errp);
+}
+
+static char *
+sev_snp_guest_get_guest_visible_workarounds(Object *obj, Error **errp)
+{
+ return g_strdup(SEV_SNP_GUEST(obj)->guest_visible_workarounds);
+}
+
+static void
+sev_snp_guest_set_guest_visible_workarounds(Object *obj, const char *value,
+ Error **errp)
+{
+ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj);
+ struct kvm_sev_snp_launch_start *start = &sev_snp_guest->kvm_start_conf;
+ g_autofree guchar *blob;
+ gsize len;
+
+ g_free(sev_snp_guest->guest_visible_workarounds);
+
+ /* store the base64 str so we don't need to re-encode in getter */
+ sev_snp_guest->guest_visible_workarounds = g_strdup(value);
+
+ blob = qbase64_decode(sev_snp_guest->guest_visible_workarounds,
+ -1, &len, errp);
+ if (!blob) {
+ return;
+ }
+
+ if (len != sizeof(start->gosvw)) {
+ error_setg(errp, "parameter length of %lu exceeds max of %lu",
+ len, sizeof(start->gosvw));
+ return;
+ }
+
+ memcpy(start->gosvw, blob, len);
+}
+
+static char *
+sev_snp_guest_get_id_block(Object *obj, Error **errp)
+{
+ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj);
+
+ return g_strdup(sev_snp_guest->id_block);
+}
+
+static void
+sev_snp_guest_set_id_block(Object *obj, const char *value, Error **errp)
+{
+ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj);
+ struct kvm_sev_snp_launch_finish *finish = &sev_snp_guest->kvm_finish_conf;
+ gsize len;
+
+ g_free(sev_snp_guest->id_block);
+ g_free((guchar *)finish->id_block_uaddr);
+
+ /* store the base64 str so we don't need to re-encode in getter */
+ sev_snp_guest->id_block = g_strdup(value);
+
+ finish->id_block_uaddr =
+ (uint64_t)qbase64_decode(sev_snp_guest->id_block, -1, &len, errp);
+
+ if (!finish->id_block_uaddr) {
+ return;
+ }
+
+ if (len != KVM_SEV_SNP_ID_BLOCK_SIZE) {
+ error_setg(errp, "parameter length of %lu not equal to %u",
+ len, KVM_SEV_SNP_ID_BLOCK_SIZE);
+ return;
+ }
+
+ finish->id_block_en = (len) ? 1 : 0;
+}
+
+static char *
+sev_snp_guest_get_id_auth(Object *obj, Error **errp)
+{
+ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj);
+
+ return g_strdup(sev_snp_guest->id_auth);
+}
+
+static void
+sev_snp_guest_set_id_auth(Object *obj, const char *value, Error **errp)
+{
+ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj);
+ struct kvm_sev_snp_launch_finish *finish = &sev_snp_guest->kvm_finish_conf;
+ gsize len;
+
+ g_free(sev_snp_guest->id_auth);
+ g_free((guchar *)finish->id_auth_uaddr);
+
+ /* store the base64 str so we don't need to re-encode in getter */
+ sev_snp_guest->id_auth = g_strdup(value);
+
+ finish->id_auth_uaddr =
+ (uint64_t)qbase64_decode(sev_snp_guest->id_auth, -1, &len, errp);
+
+ if (!finish->id_auth_uaddr) {
+ return;
+ }
+
+ if (len > KVM_SEV_SNP_ID_AUTH_SIZE) {
+ error_setg(errp, "parameter length:ID_AUTH %lu exceeds max of %u",
+ len, KVM_SEV_SNP_ID_AUTH_SIZE);
+ return;
+ }
+}
+
+static bool
+sev_snp_guest_get_author_key_enabled(Object *obj, Error **errp)
+{
+ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj);
+
+ return !!sev_snp_guest->kvm_finish_conf.auth_key_en;
+}
+
+static void
+sev_snp_guest_set_author_key_enabled(Object *obj, bool value, Error **errp)
+{
+ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj);
+
+ sev_snp_guest->kvm_finish_conf.auth_key_en = value;
+}
+
+static bool
+sev_snp_guest_get_vcek_disabled(Object *obj, Error **errp)
+{
+ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj);
+
+ return !!sev_snp_guest->kvm_finish_conf.vcek_disabled;
+}
+
+static void
+sev_snp_guest_set_vcek_disabled(Object *obj, bool value, Error **errp)
+{
+ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj);
+
+ sev_snp_guest->kvm_finish_conf.vcek_disabled = value;
+}
+
+static char *
+sev_snp_guest_get_host_data(Object *obj, Error **errp)
+{
+ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj);
+
+ return g_strdup(sev_snp_guest->host_data);
+}
+
+static void
+sev_snp_guest_set_host_data(Object *obj, const char *value, Error **errp)
+{
+ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj);
+ struct kvm_sev_snp_launch_finish *finish = &sev_snp_guest->kvm_finish_conf;
+ g_autofree guchar *blob;
+ gsize len;
+
+ g_free(sev_snp_guest->host_data);
+
+ /* store the base64 str so we don't need to re-encode in getter */
+ sev_snp_guest->host_data = g_strdup(value);
+
+ blob = qbase64_decode(sev_snp_guest->host_data, -1, &len, errp);
+
+ if (!blob) {
+ return;
+ }
+
+ if (len != sizeof(finish->host_data)) {
+ error_setg(errp, "parameter length of %lu not equal to %lu",
+ len, sizeof(finish->host_data));
+ return;
+ }
+
+ memcpy(finish->host_data, blob, len);
+}
+
+static void
+sev_snp_guest_class_init(ObjectClass *oc, void *data)
+{
+ object_class_property_add(oc, "policy", "uint64",
+ sev_snp_guest_get_policy,
+ sev_snp_guest_set_policy, NULL, NULL);
+ object_class_property_add_str(oc, "guest-visible-workarounds",
+ sev_snp_guest_get_guest_visible_workarounds,
+ sev_snp_guest_set_guest_visible_workarounds);
+ object_class_property_add_str(oc, "id-block",
+ sev_snp_guest_get_id_block,
+ sev_snp_guest_set_id_block);
+ object_class_property_add_str(oc, "id-auth",
+ sev_snp_guest_get_id_auth,
+ sev_snp_guest_set_id_auth);
+ object_class_property_add_bool(oc, "author-key-enabled",
+ sev_snp_guest_get_author_key_enabled,
+ sev_snp_guest_set_author_key_enabled);
+ object_class_property_add_bool(oc, "vcek-required",
+ sev_snp_guest_get_vcek_disabled,
+ sev_snp_guest_set_vcek_disabled);
+ object_class_property_add_str(oc, "host-data",
+ sev_snp_guest_get_host_data,
+ sev_snp_guest_set_host_data);
+}
+
+static void
+sev_snp_guest_instance_init(Object *obj)
+{
+ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj);
+
+ /* default init/start/finish params for kvm */
+ sev_snp_guest->kvm_start_conf.policy = DEFAULT_SEV_SNP_POLICY;
+}
+
+/* guest info specific to sev-snp */
+static const TypeInfo sev_snp_guest_info = {
+ .parent = TYPE_SEV_COMMON,
+ .name = TYPE_SEV_SNP_GUEST,
+ .instance_size = sizeof(SevSnpGuestState),
+ .class_init = sev_snp_guest_class_init,
+ .instance_init = sev_snp_guest_instance_init,
+};
+
static void
sev_register_types(void)
{
type_register_static(&sev_common_info);
type_register_static(&sev_guest_info);
+ type_register_static(&sev_snp_guest_info);
}
type_init(sev_register_types);
diff --git a/target/i386/sev.h b/target/i386/sev.h
index 668374eef3..bedc667eeb 100644
--- a/target/i386/sev.h
+++ b/target/i386/sev.h
@@ -22,6 +22,7 @@
#define TYPE_SEV_COMMON "sev-common"
#define TYPE_SEV_GUEST "sev-guest"
+#define TYPE_SEV_SNP_GUEST "sev-snp-guest"
#define SEV_POLICY_NODBG 0x1
#define SEV_POLICY_NOKS 0x2
--
2.39.3

View File

@ -0,0 +1,85 @@
From be37914ae54c8aebc218cf41b37bc0ea1563daae Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 31 May 2024 12:51:44 +0200
Subject: [PATCH 074/100] i386/sev: Invoke launch_updata_data() for SEV class
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [74/91] f1b588a9ffecd6944a78186d88a6be3849698710 (bonzini/rhel-qemu-kvm)
Add launch_update_data() in SevCommonStateClass and
invoke as sev_launch_update_data() for SEV object.
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
Message-ID: <20240530111643.1091816-26-pankaj.gupta@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 9861405a8f845133b7984322c2df0c43a45553c3)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/sev.c | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/target/i386/sev.c b/target/i386/sev.c
index 7b5c4b4874..8834cf9441 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -74,6 +74,7 @@ struct SevCommonStateClass {
/* public */
int (*launch_start)(SevCommonState *sev_common);
void (*launch_finish)(SevCommonState *sev_common);
+ int (*launch_update_data)(SevCommonState *sev_common, hwaddr gpa, uint8_t *ptr, uint64_t len);
int (*kvm_init)(ConfidentialGuestSupport *cgs, Error **errp);
};
@@ -929,7 +930,7 @@ out:
}
static int
-sev_launch_update_data(SevGuestState *sev_guest, uint8_t *addr, uint64_t len)
+sev_launch_update_data(SevCommonState *sev_common, hwaddr gpa, uint8_t *addr, uint64_t len)
{
int ret, fw_error;
struct kvm_sev_launch_update_data update;
@@ -941,7 +942,7 @@ sev_launch_update_data(SevGuestState *sev_guest, uint8_t *addr, uint64_t len)
update.uaddr = (uintptr_t)addr;
update.len = len;
trace_kvm_sev_launch_update_data(addr, len);
- ret = sev_ioctl(SEV_COMMON(sev_guest)->sev_fd, KVM_SEV_LAUNCH_UPDATE_DATA,
+ ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_LAUNCH_UPDATE_DATA,
&update, &fw_error);
if (ret) {
error_report("%s: LAUNCH_UPDATE ret=%d fw_error=%d '%s'",
@@ -1487,6 +1488,7 @@ int
sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp)
{
SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs);
+ SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(sev_common);
if (!sev_common) {
return 0;
@@ -1494,7 +1496,9 @@ sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp)
/* if SEV is in update state then encrypt the data else do nothing */
if (sev_check_state(sev_common, SEV_STATE_LAUNCH_UPDATE)) {
- int ret = sev_launch_update_data(SEV_GUEST(sev_common), ptr, len);
+ int ret;
+
+ ret = klass->launch_update_data(sev_common, gpa, ptr, len);
if (ret < 0) {
error_setg(errp, "SEV: Failed to encrypt pflash rom");
return ret;
@@ -1968,6 +1972,7 @@ sev_guest_class_init(ObjectClass *oc, void *data)
klass->launch_start = sev_launch_start;
klass->launch_finish = sev_launch_finish;
+ klass->launch_update_data = sev_launch_update_data;
klass->kvm_init = sev_kvm_init;
x86_klass->kvm_type = sev_kvm_type;
--
2.39.3

View File

@ -0,0 +1,55 @@
From 32899eb4fa5143b795b107de4857adce2cf1d434 Mon Sep 17 00:00:00 2001
From: Pankaj Gupta <pankaj.gupta@amd.com>
Date: Thu, 30 May 2024 06:16:38 -0500
Subject: [PATCH 075/100] i386/sev: Invoke launch_updata_data() for SNP class
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [75/91] 3520af5847f8dddb6d7fe7ad5feb308230f387b9 (bonzini/rhel-qemu-kvm)
Invoke as sev_snp_launch_update_data() for SNP object.
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
Message-ID: <20240530111643.1091816-27-pankaj.gupta@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 0765d136eba400ad1cb7cae18438bb10eace64dc)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/sev.c | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/target/i386/sev.c b/target/i386/sev.c
index 8834cf9441..eaf5fc6c6b 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -1091,6 +1091,15 @@ snp_launch_update_data(uint64_t gpa, void *hva,
return 0;
}
+static int
+sev_snp_launch_update_data(SevCommonState *sev_common, hwaddr gpa,
+ uint8_t *ptr, uint64_t len)
+{
+ int ret = snp_launch_update_data(gpa, ptr, len,
+ KVM_SEV_SNP_PAGE_TYPE_NORMAL);
+ return ret;
+}
+
static int
sev_snp_cpuid_info_fill(SnpCpuidInfo *snp_cpuid_info,
const KvmCpuidInfo *kvm_cpuid_info)
@@ -2216,6 +2225,7 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data)
klass->launch_start = sev_snp_launch_start;
klass->launch_finish = sev_snp_launch_finish;
+ klass->launch_update_data = sev_snp_launch_update_data;
klass->kvm_init = sev_snp_kvm_init;
x86_klass->kvm_type = sev_snp_kvm_type;
--
2.39.3

View File

@ -0,0 +1,47 @@
From fa6076291eb45255bc2fe523399d7d0647fc5570 Mon Sep 17 00:00:00 2001
From: Pankaj Gupta <pankaj.gupta@amd.com>
Date: Fri, 7 Jun 2024 13:36:10 -0500
Subject: [PATCH 085/100] i386/sev: Move SEV_COMMON null check before
dereferencing
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [85/91] e8d2bfd077766a5e7777b9337d0e77146f883224 (bonzini/rhel-qemu-kvm)
Fixes Coverity CID 1546886.
Fixes: 9861405a8f ("i386/sev: Invoke launch_updata_data() for SEV class")
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
Message-ID: <20240607183611.1111100-3-pankaj.gupta@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 48779faef3c8e2fe70bd8285bffa731bd76dc844)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/sev.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/target/i386/sev.c b/target/i386/sev.c
index 7c9df621de..f18432f58e 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -1529,11 +1529,12 @@ int
sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp)
{
SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs);
- SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(sev_common);
+ SevCommonStateClass *klass;
if (!sev_common) {
return 0;
}
+ klass = SEV_COMMON_GET_CLASS(sev_common);
/* if SEV is in update state then encrypt the data else do nothing */
if (sev_check_state(sev_common, SEV_STATE_LAUNCH_UPDATE)) {
--
2.39.3

View File

@ -0,0 +1,88 @@
From 4d96ca893126d4c17c9fe03c76973b1d4a414f21 Mon Sep 17 00:00:00 2001
From: Pankaj Gupta <pankaj.gupta@amd.com>
Date: Thu, 30 May 2024 06:16:18 -0500
Subject: [PATCH 058/100] i386/sev: Move sev_launch_finish to separate class
method
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [58/91] 7865710d320a6df7038ef7016d350aa9cdcea326 (bonzini/rhel-qemu-kvm)
When sev-snp-guest objects are introduced there will be a number of
differences in how the launch finish is handled compared to the existing
sev-guest object. Move sev_launch_finish() to a class method to make it
easier to implement SNP-specific launch update functionality later.
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
Message-ID: <20240530111643.1091816-7-pankaj.gupta@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit bce615a14aec07cab0488e5a242f6a91e641efcb)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/sev.c | 11 +++++++----
1 file changed, 7 insertions(+), 4 deletions(-)
diff --git a/target/i386/sev.c b/target/i386/sev.c
index b2aa0d6f99..28a018ed83 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -71,6 +71,7 @@ struct SevCommonStateClass {
/* public */
int (*launch_start)(SevCommonState *sev_common);
+ void (*launch_finish)(SevCommonState *sev_common);
};
/**
@@ -801,12 +802,12 @@ static Notifier sev_machine_done_notify = {
};
static void
-sev_launch_finish(SevGuestState *sev_guest)
+sev_launch_finish(SevCommonState *sev_common)
{
int ret, error;
trace_kvm_sev_launch_finish();
- ret = sev_ioctl(SEV_COMMON(sev_guest)->sev_fd, KVM_SEV_LAUNCH_FINISH, 0,
+ ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_LAUNCH_FINISH, 0,
&error);
if (ret) {
error_report("%s: LAUNCH_FINISH ret=%d fw_error=%d '%s'",
@@ -814,7 +815,7 @@ sev_launch_finish(SevGuestState *sev_guest)
exit(1);
}
- sev_set_guest_state(SEV_COMMON(sev_guest), SEV_STATE_RUNNING);
+ sev_set_guest_state(sev_common, SEV_STATE_RUNNING);
/* add migration blocker */
error_setg(&sev_mig_blocker,
@@ -826,10 +827,11 @@ static void
sev_vm_state_change(void *opaque, bool running, RunState state)
{
SevCommonState *sev_common = opaque;
+ SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(opaque);
if (running) {
if (!sev_check_state(sev_common, SEV_STATE_RUNNING)) {
- sev_launch_finish(SEV_GUEST(sev_common));
+ klass->launch_finish(sev_common);
}
}
}
@@ -1457,6 +1459,7 @@ sev_guest_class_init(ObjectClass *oc, void *data)
SevCommonStateClass *klass = SEV_COMMON_CLASS(oc);
klass->launch_start = sev_launch_start;
+ klass->launch_finish = sev_launch_finish;
object_class_property_add_str(oc, "dh-cert-file",
sev_guest_get_dh_cert_file,
--
2.39.3

View File

@ -0,0 +1,91 @@
From a170ba2c7dbf2775eb9047779d3643a2a81bb372 Mon Sep 17 00:00:00 2001
From: Pankaj Gupta <pankaj.gupta@amd.com>
Date: Thu, 30 May 2024 06:16:17 -0500
Subject: [PATCH 057/100] i386/sev: Move sev_launch_update to separate class
method
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [57/91] 4f31e7afaec6f2c2a7c06cda4d7d27d4037e53e0 (bonzini/rhel-qemu-kvm)
When sev-snp-guest objects are introduced there will be a number of
differences in how the launch data is handled compared to the existing
sev-guest object. Move sev_launch_start() to a class method to make it
easier to implement SNP-specific launch update functionality later.
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
Message-ID: <20240530111643.1091816-6-pankaj.gupta@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 6600f1ac0c81cbe67faf048ea07f78542dea925f)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/sev.c | 13 ++++++++++---
1 file changed, 10 insertions(+), 3 deletions(-)
diff --git a/target/i386/sev.c b/target/i386/sev.c
index 33e606eea0..b2aa0d6f99 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -69,6 +69,8 @@ struct SevCommonState {
struct SevCommonStateClass {
X86ConfidentialGuestClass parent_class;
+ /* public */
+ int (*launch_start)(SevCommonState *sev_common);
};
/**
@@ -632,16 +634,16 @@ sev_read_file_base64(const char *filename, guchar **data, gsize *len)
}
static int
-sev_launch_start(SevGuestState *sev_guest)
+sev_launch_start(SevCommonState *sev_common)
{
gsize sz;
int ret = 1;
int fw_error, rc;
+ SevGuestState *sev_guest = SEV_GUEST(sev_common);
struct kvm_sev_launch_start start = {
.handle = sev_guest->handle, .policy = sev_guest->policy
};
guchar *session = NULL, *dh_cert = NULL;
- SevCommonState *sev_common = SEV_COMMON(sev_guest);
if (sev_guest->session_file) {
if (sev_read_file_base64(sev_guest->session_file, &session, &sz) < 0) {
@@ -862,6 +864,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
uint32_t ebx;
uint32_t host_cbitpos;
struct sev_user_data_status status = {};
+ SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(cgs);
ret = ram_block_discard_disable(true);
if (ret) {
@@ -952,7 +955,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
goto err;
}
- sev_launch_start(SEV_GUEST(sev_common));
+ ret = klass->launch_start(sev_common);
if (ret) {
error_setg(errp, "%s: failed to create encryption context", __func__);
goto err;
@@ -1451,6 +1454,10 @@ static void sev_guest_set_legacy_vm_type(Object *obj, bool value, Error **errp)
static void
sev_guest_class_init(ObjectClass *oc, void *data)
{
+ SevCommonStateClass *klass = SEV_COMMON_CLASS(oc);
+
+ klass->launch_start = sev_launch_start;
+
object_class_property_add_str(oc, "dh-cert-file",
sev_guest_get_dh_cert_file,
sev_guest_set_dh_cert_file);
--
2.39.3

View File

@ -0,0 +1,134 @@
From d009fa2cebebd1da80f4f2f5d0c4fffb87e02afc Mon Sep 17 00:00:00 2001
From: Dov Murik <dovmurik@linux.ibm.com>
Date: Thu, 30 May 2024 06:16:34 -0500
Subject: [PATCH 079/100] i386/sev: Reorder struct declarations
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [79/91] 1274d4620e88dda99ec10173ca5e3cd4184c8fb6 (bonzini/rhel-qemu-kvm)
Move the declaration of PaddedSevHashTable before SevSnpGuest so
we can add a new such field to the latter.
No functional change intended.
Signed-off-by: Dov Murik <dovmurik@linux.ibm.com>
Signed-off-by: Michael Roth <michael.roth@amd.com>
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
Message-ID: <20240530111643.1091816-23-pankaj.gupta@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit cc483bf911931f405dea682c74a3d8b9b6c54369)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/sev.c | 84 +++++++++++++++++++++++------------------------
1 file changed, 42 insertions(+), 42 deletions(-)
diff --git a/target/i386/sev.c b/target/i386/sev.c
index 73f9406715..3fce4c08eb 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -46,6 +46,48 @@ OBJECT_DECLARE_TYPE(SevCommonState, SevCommonStateClass, SEV_COMMON)
OBJECT_DECLARE_TYPE(SevGuestState, SevCommonStateClass, SEV_GUEST)
OBJECT_DECLARE_TYPE(SevSnpGuestState, SevCommonStateClass, SEV_SNP_GUEST)
+/* hard code sha256 digest size */
+#define HASH_SIZE 32
+
+typedef struct QEMU_PACKED SevHashTableEntry {
+ QemuUUID guid;
+ uint16_t len;
+ uint8_t hash[HASH_SIZE];
+} SevHashTableEntry;
+
+typedef struct QEMU_PACKED SevHashTable {
+ QemuUUID guid;
+ uint16_t len;
+ SevHashTableEntry cmdline;
+ SevHashTableEntry initrd;
+ SevHashTableEntry kernel;
+} SevHashTable;
+
+/*
+ * Data encrypted by sev_encrypt_flash() must be padded to a multiple of
+ * 16 bytes.
+ */
+typedef struct QEMU_PACKED PaddedSevHashTable {
+ SevHashTable ht;
+ uint8_t padding[ROUND_UP(sizeof(SevHashTable), 16) - sizeof(SevHashTable)];
+} PaddedSevHashTable;
+
+QEMU_BUILD_BUG_ON(sizeof(PaddedSevHashTable) % 16 != 0);
+
+#define SEV_INFO_BLOCK_GUID "00f771de-1a7e-4fcb-890e-68c77e2fb44e"
+typedef struct __attribute__((__packed__)) SevInfoBlock {
+ /* SEV-ES Reset Vector Address */
+ uint32_t reset_addr;
+} SevInfoBlock;
+
+#define SEV_HASH_TABLE_RV_GUID "7255371f-3a3b-4b04-927b-1da6efa8d454"
+typedef struct QEMU_PACKED SevHashTableDescriptor {
+ /* SEV hash table area guest address */
+ uint32_t base;
+ /* SEV hash table area size (in bytes) */
+ uint32_t size;
+} SevHashTableDescriptor;
+
struct SevCommonState {
X86ConfidentialGuest parent_obj;
@@ -128,48 +170,6 @@ typedef struct SevLaunchUpdateData {
static QTAILQ_HEAD(, SevLaunchUpdateData) launch_update;
-#define SEV_INFO_BLOCK_GUID "00f771de-1a7e-4fcb-890e-68c77e2fb44e"
-typedef struct __attribute__((__packed__)) SevInfoBlock {
- /* SEV-ES Reset Vector Address */
- uint32_t reset_addr;
-} SevInfoBlock;
-
-#define SEV_HASH_TABLE_RV_GUID "7255371f-3a3b-4b04-927b-1da6efa8d454"
-typedef struct QEMU_PACKED SevHashTableDescriptor {
- /* SEV hash table area guest address */
- uint32_t base;
- /* SEV hash table area size (in bytes) */
- uint32_t size;
-} SevHashTableDescriptor;
-
-/* hard code sha256 digest size */
-#define HASH_SIZE 32
-
-typedef struct QEMU_PACKED SevHashTableEntry {
- QemuUUID guid;
- uint16_t len;
- uint8_t hash[HASH_SIZE];
-} SevHashTableEntry;
-
-typedef struct QEMU_PACKED SevHashTable {
- QemuUUID guid;
- uint16_t len;
- SevHashTableEntry cmdline;
- SevHashTableEntry initrd;
- SevHashTableEntry kernel;
-} SevHashTable;
-
-/*
- * Data encrypted by sev_encrypt_flash() must be padded to a multiple of
- * 16 bytes.
- */
-typedef struct QEMU_PACKED PaddedSevHashTable {
- SevHashTable ht;
- uint8_t padding[ROUND_UP(sizeof(SevHashTable), 16) - sizeof(SevHashTable)];
-} PaddedSevHashTable;
-
-QEMU_BUILD_BUG_ON(sizeof(PaddedSevHashTable) % 16 != 0);
-
static Error *sev_mig_blocker;
static const char *const sev_fw_errlist[] = {
--
2.39.3

View File

@ -0,0 +1,46 @@
From 80c1d78e31b2567d1c610c8939b75d159ff6ea27 Mon Sep 17 00:00:00 2001
From: Pankaj Gupta <pankaj.gupta@amd.com>
Date: Thu, 30 May 2024 06:16:13 -0500
Subject: [PATCH 055/100] i386/sev: Replace error_report with error_setg
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [55/91] 1e15fc2458687e564af9fa5022c29e79ddc8edfd (bonzini/rhel-qemu-kvm)
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
Message-ID: <20240530111643.1091816-2-pankaj.gupta@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 18c453409a3a84cf7b2c764c5a03fb429a73bbeb)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/sev.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/target/i386/sev.c b/target/i386/sev.c
index d30b68c11e..67ed32e5ea 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -952,13 +952,13 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
if (sev_es_enabled()) {
if (!kvm_kernel_irqchip_allowed()) {
- error_report("%s: SEV-ES guests require in-kernel irqchip support",
- __func__);
+ error_setg(errp, "%s: SEV-ES guests require in-kernel irqchip"
+ "support", __func__);
goto err;
}
if (!(status.flags & SEV_STATUS_FLAGS_CONFIG_ES)) {
- error_report("%s: guest policy requires SEV-ES, but "
+ error_setg(errp, "%s: guest policy requires SEV-ES, but "
"host SEV-ES support unavailable",
__func__);
goto err;
--
2.39.3

View File

@ -0,0 +1,40 @@
From 88da6d01b1de2b92adb5c47c6d482876a054705f Mon Sep 17 00:00:00 2001
From: Pankaj Gupta <pankaj.gupta@amd.com>
Date: Fri, 7 Jun 2024 13:36:11 -0500
Subject: [PATCH 086/100] i386/sev: Return when sev_common is null
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [86/91] 02ce4a6a51ce9fd961f417c13db0a760673591ba (bonzini/rhel-qemu-kvm)
Fixes Coverity CID 1546885.
Fixes: 16dcf200dc ("i386/sev: Introduce "sev-common" type to encapsulate common SEV state")
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
Message-ID: <20240607183611.1111100-4-pankaj.gupta@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit cd7093a7a168a823d07671348996f049d45e8f67)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/sev.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/target/i386/sev.c b/target/i386/sev.c
index f18432f58e..c40562dce3 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -587,6 +587,7 @@ static SevCapability *sev_get_capabilities(Error **errp)
sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs);
if (!sev_common) {
error_setg(errp, "SEV is not configured");
+ return NULL;
}
sev_device = object_property_get_str(OBJECT(sev_common), "sev-device",
--
2.39.3

View File

@ -0,0 +1,47 @@
From c7649ac1b958dc48de50f32b1ad80d84b17945a8 Mon Sep 17 00:00:00 2001
From: Michael Roth <michael.roth@amd.com>
Date: Thu, 30 May 2024 06:16:29 -0500
Subject: [PATCH 069/100] i386/sev: Set CPU state to protected once SNP guest
payload is finalized
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [69/91] 09280f987a186511ec7d62c3f340b2148e8556d7 (bonzini/rhel-qemu-kvm)
Once KVM_SNP_LAUNCH_FINISH is called the vCPU state is copied into the
vCPU's VMSA page and measured/encrypted. Any attempt to read/write CPU
state afterward will only be acting on the initial data and so are
effectively no-ops.
Set the vCPU state to protected at this point so that QEMU don't
continue trying to re-sync vCPU data during guest runtime.
Signed-off-by: Michael Roth <michael.roth@amd.com>
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
Message-ID: <20240530111643.1091816-18-pankaj.gupta@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 3d44fdff60ea66fbd7a33f5d32b50843cd80f48a)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/sev.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/target/i386/sev.c b/target/i386/sev.c
index ef2e592ca7..e84e4395a5 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -997,6 +997,7 @@ sev_snp_launch_finish(SevCommonState *sev_common)
exit(1);
}
+ kvm_mark_guest_state_protected();
sev_set_guest_state(sev_common, SEV_STATE_RUNNING);
/* add migration blocker */
--
2.39.3

View File

@ -0,0 +1,268 @@
From 5540bb5ca052531563df1ade68995e268ae65224 Mon Sep 17 00:00:00 2001
From: Xiaoyao Li <xiaoyao.li@intel.com>
Date: Thu, 29 Feb 2024 01:00:36 -0500
Subject: [PATCH 012/100] i386/sev: Switch to use confidential_guest_kvm_init()
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [12/91] 6f5f8d1b818826f7ee4b6ae527963ef23c97f531 (bonzini/rhel-qemu-kvm)
Use confidential_guest_kvm_init() instead of calling SEV
specific sev_kvm_init(). This allows the introduction of multiple
confidential-guest-support subclasses for different x86 vendors.
As a bonus, stubs are not needed anymore since there is no
direct call from target/i386/kvm/kvm.c to SEV code.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Message-Id: <20240229060038.606591-1-xiaoyao.li@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 637c95b37b106c2eeba313e0abb38ec12e918a59)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/kvm/kvm.c | 10 +--
target/i386/kvm/meson.build | 2 -
target/i386/kvm/sev-stub.c | 21 ------
target/i386/sev.c | 127 ++++++++++++++++++------------------
target/i386/sev.h | 2 -
5 files changed, 69 insertions(+), 93 deletions(-)
delete mode 100644 target/i386/kvm/sev-stub.c
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 5f30b649a0..e271652620 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -2543,10 +2543,12 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
* mechanisms are supported in future (e.g. TDX), they'll need
* their own initialization either here or elsewhere.
*/
- ret = sev_kvm_init(ms->cgs, &local_err);
- if (ret < 0) {
- error_report_err(local_err);
- return ret;
+ if (ms->cgs) {
+ ret = confidential_guest_kvm_init(ms->cgs, &local_err);
+ if (ret < 0) {
+ error_report_err(local_err);
+ return ret;
+ }
}
has_xcrs = kvm_check_extension(s, KVM_CAP_XCRS);
diff --git a/target/i386/kvm/meson.build b/target/i386/kvm/meson.build
index 84d9143e60..e7850981e6 100644
--- a/target/i386/kvm/meson.build
+++ b/target/i386/kvm/meson.build
@@ -7,8 +7,6 @@ i386_kvm_ss.add(files(
i386_kvm_ss.add(when: 'CONFIG_XEN_EMU', if_true: files('xen-emu.c'))
-i386_kvm_ss.add(when: 'CONFIG_SEV', if_false: files('sev-stub.c'))
-
i386_system_ss.add(when: 'CONFIG_HYPERV', if_true: files('hyperv.c'), if_false: files('hyperv-stub.c'))
i386_system_ss.add_all(when: 'CONFIG_KVM', if_true: i386_kvm_ss)
diff --git a/target/i386/kvm/sev-stub.c b/target/i386/kvm/sev-stub.c
deleted file mode 100644
index 1be5341e8a..0000000000
--- a/target/i386/kvm/sev-stub.c
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * QEMU SEV stub
- *
- * Copyright Advanced Micro Devices 2018
- *
- * Authors:
- * Brijesh Singh <brijesh.singh@amd.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#include "qemu/osdep.h"
-#include "sev.h"
-
-int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
-{
- /* If we get here, cgs must be some non-SEV thing */
- return 0;
-}
diff --git a/target/i386/sev.c b/target/i386/sev.c
index 72930ff0dc..b8f79d34d1 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -353,63 +353,6 @@ static void sev_guest_set_kernel_hashes(Object *obj, bool value, Error **errp)
sev->kernel_hashes = value;
}
-static void
-sev_guest_class_init(ObjectClass *oc, void *data)
-{
- object_class_property_add_str(oc, "sev-device",
- sev_guest_get_sev_device,
- sev_guest_set_sev_device);
- object_class_property_set_description(oc, "sev-device",
- "SEV device to use");
- object_class_property_add_str(oc, "dh-cert-file",
- sev_guest_get_dh_cert_file,
- sev_guest_set_dh_cert_file);
- object_class_property_set_description(oc, "dh-cert-file",
- "guest owners DH certificate (encoded with base64)");
- object_class_property_add_str(oc, "session-file",
- sev_guest_get_session_file,
- sev_guest_set_session_file);
- object_class_property_set_description(oc, "session-file",
- "guest owners session parameters (encoded with base64)");
- object_class_property_add_bool(oc, "kernel-hashes",
- sev_guest_get_kernel_hashes,
- sev_guest_set_kernel_hashes);
- object_class_property_set_description(oc, "kernel-hashes",
- "add kernel hashes to guest firmware for measured Linux boot");
-}
-
-static void
-sev_guest_instance_init(Object *obj)
-{
- SevGuestState *sev = SEV_GUEST(obj);
-
- sev->sev_device = g_strdup(DEFAULT_SEV_DEVICE);
- sev->policy = DEFAULT_GUEST_POLICY;
- object_property_add_uint32_ptr(obj, "policy", &sev->policy,
- OBJ_PROP_FLAG_READWRITE);
- object_property_add_uint32_ptr(obj, "handle", &sev->handle,
- OBJ_PROP_FLAG_READWRITE);
- object_property_add_uint32_ptr(obj, "cbitpos", &sev->cbitpos,
- OBJ_PROP_FLAG_READWRITE);
- object_property_add_uint32_ptr(obj, "reduced-phys-bits",
- &sev->reduced_phys_bits,
- OBJ_PROP_FLAG_READWRITE);
-}
-
-/* sev guest info */
-static const TypeInfo sev_guest_info = {
- .parent = TYPE_CONFIDENTIAL_GUEST_SUPPORT,
- .name = TYPE_SEV_GUEST,
- .instance_size = sizeof(SevGuestState),
- .instance_finalize = sev_guest_finalize,
- .class_init = sev_guest_class_init,
- .instance_init = sev_guest_instance_init,
- .interfaces = (InterfaceInfo[]) {
- { TYPE_USER_CREATABLE },
- { }
- }
-};
-
bool
sev_enabled(void)
{
@@ -906,20 +849,15 @@ sev_vm_state_change(void *opaque, bool running, RunState state)
}
}
-int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
+static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
{
- SevGuestState *sev
- = (SevGuestState *)object_dynamic_cast(OBJECT(cgs), TYPE_SEV_GUEST);
+ SevGuestState *sev = SEV_GUEST(cgs);
char *devname;
int ret, fw_error, cmd;
uint32_t ebx;
uint32_t host_cbitpos;
struct sev_user_data_status status = {};
- if (!sev) {
- return 0;
- }
-
ret = ram_block_discard_disable(true);
if (ret) {
error_report("%s: cannot disable RAM discard", __func__);
@@ -1384,6 +1322,67 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp)
return ret;
}
+static void
+sev_guest_class_init(ObjectClass *oc, void *data)
+{
+ ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc);
+
+ klass->kvm_init = sev_kvm_init;
+
+ object_class_property_add_str(oc, "sev-device",
+ sev_guest_get_sev_device,
+ sev_guest_set_sev_device);
+ object_class_property_set_description(oc, "sev-device",
+ "SEV device to use");
+ object_class_property_add_str(oc, "dh-cert-file",
+ sev_guest_get_dh_cert_file,
+ sev_guest_set_dh_cert_file);
+ object_class_property_set_description(oc, "dh-cert-file",
+ "guest owners DH certificate (encoded with base64)");
+ object_class_property_add_str(oc, "session-file",
+ sev_guest_get_session_file,
+ sev_guest_set_session_file);
+ object_class_property_set_description(oc, "session-file",
+ "guest owners session parameters (encoded with base64)");
+ object_class_property_add_bool(oc, "kernel-hashes",
+ sev_guest_get_kernel_hashes,
+ sev_guest_set_kernel_hashes);
+ object_class_property_set_description(oc, "kernel-hashes",
+ "add kernel hashes to guest firmware for measured Linux boot");
+}
+
+static void
+sev_guest_instance_init(Object *obj)
+{
+ SevGuestState *sev = SEV_GUEST(obj);
+
+ sev->sev_device = g_strdup(DEFAULT_SEV_DEVICE);
+ sev->policy = DEFAULT_GUEST_POLICY;
+ object_property_add_uint32_ptr(obj, "policy", &sev->policy,
+ OBJ_PROP_FLAG_READWRITE);
+ object_property_add_uint32_ptr(obj, "handle", &sev->handle,
+ OBJ_PROP_FLAG_READWRITE);
+ object_property_add_uint32_ptr(obj, "cbitpos", &sev->cbitpos,
+ OBJ_PROP_FLAG_READWRITE);
+ object_property_add_uint32_ptr(obj, "reduced-phys-bits",
+ &sev->reduced_phys_bits,
+ OBJ_PROP_FLAG_READWRITE);
+}
+
+/* sev guest info */
+static const TypeInfo sev_guest_info = {
+ .parent = TYPE_CONFIDENTIAL_GUEST_SUPPORT,
+ .name = TYPE_SEV_GUEST,
+ .instance_size = sizeof(SevGuestState),
+ .instance_finalize = sev_guest_finalize,
+ .class_init = sev_guest_class_init,
+ .instance_init = sev_guest_instance_init,
+ .interfaces = (InterfaceInfo[]) {
+ { TYPE_USER_CREATABLE },
+ { }
+ }
+};
+
static void
sev_register_types(void)
{
diff --git a/target/i386/sev.h b/target/i386/sev.h
index e7499c95b1..9e10d09539 100644
--- a/target/i386/sev.h
+++ b/target/i386/sev.h
@@ -57,6 +57,4 @@ int sev_inject_launch_secret(const char *hdr, const char *secret,
int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size);
void sev_es_set_reset_vector(CPUState *cpu);
-int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp);
-
#endif
--
2.39.3

View File

@ -0,0 +1,240 @@
From a870e7c31d9605baea4741d82521612b6164c99b Mon Sep 17 00:00:00 2001
From: Michael Roth <michael.roth@amd.com>
Date: Thu, 30 May 2024 06:16:26 -0500
Subject: [PATCH 066/100] i386/sev: Update query-sev QAPI format to handle
SEV-SNP
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [66/91] a19b3e226e857f3995176e7d2ef1ce2e4329a885 (bonzini/rhel-qemu-kvm)
Most of the current 'query-sev' command is relevant to both legacy
SEV/SEV-ES guests and SEV-SNP guests, with 2 exceptions:
- 'policy' is a 64-bit field for SEV-SNP, not 32-bit, and
the meaning of the bit positions has changed
- 'handle' is not relevant to SEV-SNP
To address this, this patch adds a new 'sev-type' field that can be
used as a discriminator to select between SEV and SEV-SNP-specific
fields/formats without breaking compatibility for existing management
tools (so long as management tools that add support for launching
SEV-SNP guest update their handling of query-sev appropriately).
The corresponding HMP command has also been fixed up similarly.
Signed-off-by: Michael Roth <michael.roth@amd.com>
Co-developed-by:Pankaj Gupta <pankaj.gupta@amd.com>
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
Message-ID: <20240530111643.1091816-15-pankaj.gupta@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 59d3740cb4ac0f010ce35877572904f6297284b4)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
qapi/misc-target.json | 72 ++++++++++++++++++++++++++++++++++---------
target/i386/sev.c | 55 +++++++++++++++++++++------------
target/i386/sev.h | 3 ++
3 files changed, 96 insertions(+), 34 deletions(-)
diff --git a/qapi/misc-target.json b/qapi/misc-target.json
index 4e0a6492a9..2d7d4d89bd 100644
--- a/qapi/misc-target.json
+++ b/qapi/misc-target.json
@@ -47,6 +47,50 @@
'send-update', 'receive-update' ],
'if': 'TARGET_I386' }
+##
+# @SevGuestType:
+#
+# An enumeration indicating the type of SEV guest being run.
+#
+# @sev: The guest is a legacy SEV or SEV-ES guest.
+#
+# @sev-snp: The guest is an SEV-SNP guest.
+#
+# Since: 6.2
+##
+{ 'enum': 'SevGuestType',
+ 'data': [ 'sev', 'sev-snp' ],
+ 'if': 'TARGET_I386' }
+
+##
+# @SevGuestInfo:
+#
+# Information specific to legacy SEV/SEV-ES guests.
+#
+# @policy: SEV policy value
+#
+# @handle: SEV firmware handle
+#
+# Since: 2.12
+##
+{ 'struct': 'SevGuestInfo',
+ 'data': { 'policy': 'uint32',
+ 'handle': 'uint32' },
+ 'if': 'TARGET_I386' }
+
+##
+# @SevSnpGuestInfo:
+#
+# Information specific to SEV-SNP guests.
+#
+# @snp-policy: SEV-SNP policy value
+#
+# Since: 9.1
+##
+{ 'struct': 'SevSnpGuestInfo',
+ 'data': { 'snp-policy': 'uint64' },
+ 'if': 'TARGET_I386' }
+
##
# @SevInfo:
#
@@ -60,25 +104,25 @@
#
# @build-id: SEV FW build id
#
-# @policy: SEV policy value
-#
# @state: SEV guest state
#
-# @handle: SEV firmware handle
+# @sev-type: Type of SEV guest being run
#
# Since: 2.12
##
-{ 'struct': 'SevInfo',
- 'data': { 'enabled': 'bool',
- 'api-major': 'uint8',
- 'api-minor' : 'uint8',
- 'build-id' : 'uint8',
- 'policy' : 'uint32',
- 'state' : 'SevState',
- 'handle' : 'uint32'
- },
- 'if': 'TARGET_I386'
-}
+{ 'union': 'SevInfo',
+ 'base': { 'enabled': 'bool',
+ 'api-major': 'uint8',
+ 'api-minor' : 'uint8',
+ 'build-id' : 'uint8',
+ 'state' : 'SevState',
+ 'sev-type' : 'SevGuestType' },
+ 'discriminator': 'sev-type',
+ 'data': {
+ 'sev': 'SevGuestInfo',
+ 'sev-snp': 'SevSnpGuestInfo' },
+ 'if': 'TARGET_I386' }
+
##
# @query-sev:
diff --git a/target/i386/sev.c b/target/i386/sev.c
index 072cc4f853..43d1c48bd9 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -363,25 +363,27 @@ static SevInfo *sev_get_info(void)
{
SevInfo *info;
SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs);
- SevGuestState *sev_guest =
- (SevGuestState *)object_dynamic_cast(OBJECT(sev_common),
- TYPE_SEV_GUEST);
info = g_new0(SevInfo, 1);
info->enabled = sev_enabled();
if (info->enabled) {
- if (sev_guest) {
- info->handle = sev_guest->handle;
- }
info->api_major = sev_common->api_major;
info->api_minor = sev_common->api_minor;
info->build_id = sev_common->build_id;
info->state = sev_common->state;
- /* we only report the lower 32-bits of policy for SNP, ok for now... */
- info->policy =
- (uint32_t)object_property_get_uint(OBJECT(sev_common),
- "policy", NULL);
+
+ if (sev_snp_enabled()) {
+ info->sev_type = SEV_GUEST_TYPE_SEV_SNP;
+ info->u.sev_snp.snp_policy =
+ object_property_get_uint(OBJECT(sev_common), "policy", NULL);
+ } else {
+ info->sev_type = SEV_GUEST_TYPE_SEV;
+ info->u.sev.handle = SEV_GUEST(sev_common)->handle;
+ info->u.sev.policy =
+ (uint32_t)object_property_get_uint(OBJECT(sev_common),
+ "policy", NULL);
+ }
}
return info;
@@ -404,20 +406,33 @@ void hmp_info_sev(Monitor *mon, const QDict *qdict)
{
SevInfo *info = sev_get_info();
- if (info && info->enabled) {
- monitor_printf(mon, "handle: %d\n", info->handle);
- monitor_printf(mon, "state: %s\n", SevState_str(info->state));
- monitor_printf(mon, "build: %d\n", info->build_id);
- monitor_printf(mon, "api version: %d.%d\n",
- info->api_major, info->api_minor);
+ if (!info || !info->enabled) {
+ monitor_printf(mon, "SEV is not enabled\n");
+ goto out;
+ }
+
+ monitor_printf(mon, "SEV type: %s\n", SevGuestType_str(info->sev_type));
+ monitor_printf(mon, "state: %s\n", SevState_str(info->state));
+ monitor_printf(mon, "build: %d\n", info->build_id);
+ monitor_printf(mon, "api version: %d.%d\n", info->api_major,
+ info->api_minor);
+
+ if (sev_snp_enabled()) {
monitor_printf(mon, "debug: %s\n",
- info->policy & SEV_POLICY_NODBG ? "off" : "on");
- monitor_printf(mon, "key-sharing: %s\n",
- info->policy & SEV_POLICY_NOKS ? "off" : "on");
+ info->u.sev_snp.snp_policy & SEV_SNP_POLICY_DBG ? "on"
+ : "off");
+ monitor_printf(mon, "SMT allowed: %s\n",
+ info->u.sev_snp.snp_policy & SEV_SNP_POLICY_SMT ? "on"
+ : "off");
} else {
- monitor_printf(mon, "SEV is not enabled\n");
+ monitor_printf(mon, "handle: %d\n", info->u.sev.handle);
+ monitor_printf(mon, "debug: %s\n",
+ info->u.sev.policy & SEV_POLICY_NODBG ? "off" : "on");
+ monitor_printf(mon, "key-sharing: %s\n",
+ info->u.sev.policy & SEV_POLICY_NOKS ? "off" : "on");
}
+out:
qapi_free_SevInfo(info);
}
diff --git a/target/i386/sev.h b/target/i386/sev.h
index 94295ee74f..5dc4767b1e 100644
--- a/target/i386/sev.h
+++ b/target/i386/sev.h
@@ -31,6 +31,9 @@
#define SEV_POLICY_DOMAIN 0x10
#define SEV_POLICY_SEV 0x20
+#define SEV_SNP_POLICY_SMT 0x10000
+#define SEV_SNP_POLICY_DBG 0x80000
+
typedef struct SevKernelLoaderContext {
char *setup_data;
size_t setup_size;
--
2.39.3

View File

@ -0,0 +1,51 @@
From 98057e3adafa052b21a4fe5ef22835d30df3e644 Mon Sep 17 00:00:00 2001
From: Pankaj Gupta <pankaj.gupta@amd.com>
Date: Fri, 7 Jun 2024 13:36:09 -0500
Subject: [PATCH 084/100] i386/sev: fix unreachable code coverity issue
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [84/91] dc7bf28f491bf675b22a98ea593fba72d8bc415a (bonzini/rhel-qemu-kvm)
Set 'finish->id_block_en' early, so that it is properly reset.
Fixes coverity CID 1546887.
Fixes: 7b34df4426 ("i386/sev: Introduce 'sev-snp-guest' object")
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
Message-ID: <20240607183611.1111100-2-pankaj.gupta@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit c94eb5db8e409c932da9eb187e68d4cdc14acc5b)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/sev.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/target/i386/sev.c b/target/i386/sev.c
index 004c667ac1..7c9df621de 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -2165,6 +2165,7 @@ sev_snp_guest_set_id_block(Object *obj, const char *value, Error **errp)
struct kvm_sev_snp_launch_finish *finish = &sev_snp_guest->kvm_finish_conf;
gsize len;
+ finish->id_block_en = 0;
g_free(sev_snp_guest->id_block);
g_free((guchar *)finish->id_block_uaddr);
@@ -2184,7 +2185,7 @@ sev_snp_guest_set_id_block(Object *obj, const char *value, Error **errp)
return;
}
- finish->id_block_en = (len) ? 1 : 0;
+ finish->id_block_en = 1;
}
static char *
--
2.39.3

View File

@ -0,0 +1,81 @@
From deae6c3b57c3919946a5ce1613e667a3240cf158 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 15 Apr 2024 12:45:09 +0200
Subject: [PATCH 001/100] introduce pc_rhel_9_5_compat
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [1/91] cfd402fa5080eddba7c954e81ed79f9a1dd654cf (bonzini/rhel-qemu-kvm)
Allow undoing backported changes that impact guest ABI.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
hw/i386/pc.c | 4 ++++
hw/i386/pc_piix.c | 2 ++
hw/i386/pc_q35.c | 2 ++
include/hw/i386/pc.h | 3 +++
4 files changed, 11 insertions(+)
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 4a154c1a9a..648762d908 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -348,6 +348,10 @@ GlobalProperty pc_rhel_compat[] = {
};
const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat);
+GlobalProperty pc_rhel_9_5_compat[] = {
+};
+const size_t pc_rhel_9_5_compat_len = G_N_ELEMENTS(pc_rhel_9_5_compat);
+
GlobalProperty pc_rhel_9_3_compat[] = {
/* pc_rhel_9_3_compat from pc_compat_8_0 */
{ "virtio-mem", "unplugged-inaccessible", "auto" },
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 6b260682eb..bef3e8b73e 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -1015,6 +1015,8 @@ static void pc_machine_rhel760_options(MachineClass *m)
object_class_property_set_description(oc, "x-south-bridge",
"Use a different south bridge than PIIX3");
+ compat_props_add(m->compat_props, pc_rhel_9_5_compat,
+ pc_rhel_9_5_compat_len);
compat_props_add(m->compat_props, hw_compat_rhel_9_5,
hw_compat_rhel_9_5_len);
compat_props_add(m->compat_props, hw_compat_rhel_9_4,
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 2b54944c0f..9adcdadce8 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -734,6 +734,8 @@ static void pc_q35_machine_rhel940_options(MachineClass *m)
pcmc->smbios_stream_product = "RHEL";
pcmc->smbios_stream_version = "9.4.0";
+ compat_props_add(m->compat_props, pc_rhel_9_5_compat,
+ pc_rhel_9_5_compat_len);
compat_props_add(m->compat_props, hw_compat_rhel_9_5,
hw_compat_rhel_9_5_len);
}
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index a984c951ad..87420783ab 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -294,6 +294,9 @@ extern const size_t pc_compat_2_0_len;
extern GlobalProperty pc_rhel_compat[];
extern const size_t pc_rhel_compat_len;
+extern GlobalProperty pc_rhel_9_5_compat[];
+extern const size_t pc_rhel_9_5_compat_len;
+
extern GlobalProperty pc_rhel_9_3_compat[];
extern const size_t pc_rhel_9_3_compat_len;
--
2.39.3

View File

@ -0,0 +1,153 @@
From 120157257ac239050779fdddc9abb56bd39958b3 Mon Sep 17 00:00:00 2001
From: Chao Peng <chao.p.peng@linux.intel.com>
Date: Wed, 20 Mar 2024 03:39:05 -0500
Subject: [PATCH 029/100] kvm: Enable KVM_SET_USER_MEMORY_REGION2 for memslot
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [29/91] 9a08c8699f632cd046a6307e33bd053a7cc7db46 (bonzini/rhel-qemu-kvm)
Switch to KVM_SET_USER_MEMORY_REGION2 when supported by KVM.
With KVM_SET_USER_MEMORY_REGION2, QEMU can set up memory region that
backend'ed both by hva-based shared memory and guest memfd based private
memory.
Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com>
Co-developed-by: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Message-ID: <20240320083945.991426-10-michael.roth@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit ce5a983233b4ca94ced88c9581014346509b5c71)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
accel/kvm/kvm-all.c | 46 +++++++++++++++++++++++++++++++++-------
accel/kvm/trace-events | 2 +-
include/sysemu/kvm_int.h | 2 ++
3 files changed, 41 insertions(+), 9 deletions(-)
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index a7b9a127dd..5ef55e4dd7 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -284,35 +284,58 @@ int kvm_physical_memory_addr_from_host(KVMState *s, void *ram,
static int kvm_set_user_memory_region(KVMMemoryListener *kml, KVMSlot *slot, bool new)
{
KVMState *s = kvm_state;
- struct kvm_userspace_memory_region mem;
+ struct kvm_userspace_memory_region2 mem;
int ret;
mem.slot = slot->slot | (kml->as_id << 16);
mem.guest_phys_addr = slot->start_addr;
mem.userspace_addr = (unsigned long)slot->ram;
mem.flags = slot->flags;
+ mem.guest_memfd = slot->guest_memfd;
+ mem.guest_memfd_offset = slot->guest_memfd_offset;
if (slot->memory_size && !new && (mem.flags ^ slot->old_flags) & KVM_MEM_READONLY) {
/* Set the slot size to 0 before setting the slot to the desired
* value. This is needed based on KVM commit 75d61fbc. */
mem.memory_size = 0;
- ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
+
+ if (kvm_guest_memfd_supported) {
+ ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION2, &mem);
+ } else {
+ ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
+ }
if (ret < 0) {
goto err;
}
}
mem.memory_size = slot->memory_size;
- ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
+ if (kvm_guest_memfd_supported) {
+ ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION2, &mem);
+ } else {
+ ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
+ }
slot->old_flags = mem.flags;
err:
trace_kvm_set_user_memory(mem.slot >> 16, (uint16_t)mem.slot, mem.flags,
mem.guest_phys_addr, mem.memory_size,
- mem.userspace_addr, ret);
+ mem.userspace_addr, mem.guest_memfd,
+ mem.guest_memfd_offset, ret);
if (ret < 0) {
- error_report("%s: KVM_SET_USER_MEMORY_REGION failed, slot=%d,"
- " start=0x%" PRIx64 ", size=0x%" PRIx64 ": %s",
- __func__, mem.slot, slot->start_addr,
- (uint64_t)mem.memory_size, strerror(errno));
+ if (kvm_guest_memfd_supported) {
+ error_report("%s: KVM_SET_USER_MEMORY_REGION2 failed, slot=%d,"
+ " start=0x%" PRIx64 ", size=0x%" PRIx64 ","
+ " flags=0x%" PRIx32 ", guest_memfd=%" PRId32 ","
+ " guest_memfd_offset=0x%" PRIx64 ": %s",
+ __func__, mem.slot, slot->start_addr,
+ (uint64_t)mem.memory_size, mem.flags,
+ mem.guest_memfd, (uint64_t)mem.guest_memfd_offset,
+ strerror(errno));
+ } else {
+ error_report("%s: KVM_SET_USER_MEMORY_REGION failed, slot=%d,"
+ " start=0x%" PRIx64 ", size=0x%" PRIx64 ": %s",
+ __func__, mem.slot, slot->start_addr,
+ (uint64_t)mem.memory_size, strerror(errno));
+ }
}
return ret;
}
@@ -467,6 +490,10 @@ static int kvm_mem_flags(MemoryRegion *mr)
if (readonly && kvm_readonly_mem_allowed) {
flags |= KVM_MEM_READONLY;
}
+ if (memory_region_has_guest_memfd(mr)) {
+ assert(kvm_guest_memfd_supported);
+ flags |= KVM_MEM_GUEST_MEMFD;
+ }
return flags;
}
@@ -1394,6 +1421,9 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
mem->ram_start_offset = ram_start_offset;
mem->ram = ram;
mem->flags = kvm_mem_flags(mr);
+ mem->guest_memfd = mr->ram_block->guest_memfd;
+ mem->guest_memfd_offset = (uint8_t*)ram - mr->ram_block->host;
+
kvm_slot_init_dirty_bitmap(mem);
err = kvm_set_user_memory_region(kml, mem, true);
if (err) {
diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events
index 9f599abc17..e8c52cb9e7 100644
--- a/accel/kvm/trace-events
+++ b/accel/kvm/trace-events
@@ -15,7 +15,7 @@ kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d"
kvm_irqchip_release_virq(int virq) "virq %d"
kvm_set_ioeventfd_mmio(int fd, uint64_t addr, uint32_t val, bool assign, uint32_t size, bool datamatch) "fd: %d @0x%" PRIx64 " val=0x%x assign: %d size: %d match: %d"
kvm_set_ioeventfd_pio(int fd, uint16_t addr, uint32_t val, bool assign, uint32_t size, bool datamatch) "fd: %d @0x%x val=0x%x assign: %d size: %d match: %d"
-kvm_set_user_memory(uint16_t as, uint16_t slot, uint32_t flags, uint64_t guest_phys_addr, uint64_t memory_size, uint64_t userspace_addr, int ret) "AddrSpace#%d Slot#%d flags=0x%x gpa=0x%"PRIx64 " size=0x%"PRIx64 " ua=0x%"PRIx64 " ret=%d"
+kvm_set_user_memory(uint16_t as, uint16_t slot, uint32_t flags, uint64_t guest_phys_addr, uint64_t memory_size, uint64_t userspace_addr, uint32_t fd, uint64_t fd_offset, int ret) "AddrSpace#%d Slot#%d flags=0x%x gpa=0x%"PRIx64 " size=0x%"PRIx64 " ua=0x%"PRIx64 " guest_memfd=%d" " guest_memfd_offset=0x%" PRIx64 " ret=%d"
kvm_clear_dirty_log(uint32_t slot, uint64_t start, uint32_t size) "slot#%"PRId32" start 0x%"PRIx64" size 0x%"PRIx32
kvm_resample_fd_notify(int gsi) "gsi %d"
kvm_dirty_ring_full(int id) "vcpu %d"
diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h
index 3496be7997..a5a3fee411 100644
--- a/include/sysemu/kvm_int.h
+++ b/include/sysemu/kvm_int.h
@@ -30,6 +30,8 @@ typedef struct KVMSlot
int as_id;
/* Cache of the offset in ram address space */
ram_addr_t ram_start_offset;
+ int guest_memfd;
+ hwaddr guest_memfd_offset;
} KVMSlot;
typedef struct KVMMemoryUpdate {
--
2.39.3

View File

@ -0,0 +1,103 @@
From 37e6c98987bb2d4be7ce1fdda4475cd0266271c3 Mon Sep 17 00:00:00 2001
From: Xiaoyao Li <xiaoyao.li@intel.com>
Date: Wed, 20 Mar 2024 03:39:06 -0500
Subject: [PATCH 027/100] kvm: Introduce support for memory_attributes
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [27/91] 1b4428289949478f7390196ae4b098c5e6f36bb0 (bonzini/rhel-qemu-kvm)
Introduce the helper functions to set the attributes of a range of
memory to private or shared.
This is necessary to notify KVM the private/shared attribute of each gpa
range. KVM needs the information to decide the GPA needs to be mapped at
hva-based shared memory or guest_memfd based private memory.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Message-ID: <20240320083945.991426-11-michael.roth@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 0811baed49010a9b651b8029ab6b9828b09a884f)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
accel/kvm/kvm-all.c | 32 ++++++++++++++++++++++++++++++++
include/sysemu/kvm.h | 4 ++++
2 files changed, 36 insertions(+)
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 9bd235c969..272e945f52 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -91,6 +91,7 @@ bool kvm_msi_use_devid;
static bool kvm_has_guest_debug;
static int kvm_sstep_flags;
static bool kvm_immediate_exit;
+static uint64_t kvm_supported_memory_attributes;
static hwaddr kvm_max_slot_size = ~0;
static const KVMCapabilityInfo kvm_required_capabilites[] = {
@@ -1266,6 +1267,36 @@ void kvm_set_max_memslot_size(hwaddr max_slot_size)
kvm_max_slot_size = max_slot_size;
}
+static int kvm_set_memory_attributes(hwaddr start, uint64_t size, uint64_t attr)
+{
+ struct kvm_memory_attributes attrs;
+ int r;
+
+ assert((attr & kvm_supported_memory_attributes) == attr);
+ attrs.attributes = attr;
+ attrs.address = start;
+ attrs.size = size;
+ attrs.flags = 0;
+
+ r = kvm_vm_ioctl(kvm_state, KVM_SET_MEMORY_ATTRIBUTES, &attrs);
+ if (r) {
+ error_report("failed to set memory (0x%" HWADDR_PRIx "+0x%" PRIx64 ") "
+ "with attr 0x%" PRIx64 " error '%s'",
+ start, size, attr, strerror(errno));
+ }
+ return r;
+}
+
+int kvm_set_memory_attributes_private(hwaddr start, uint64_t size)
+{
+ return kvm_set_memory_attributes(start, size, KVM_MEMORY_ATTRIBUTE_PRIVATE);
+}
+
+int kvm_set_memory_attributes_shared(hwaddr start, uint64_t size)
+{
+ return kvm_set_memory_attributes(start, size, 0);
+}
+
/* Called with KVMMemoryListener.slots_lock held */
static void kvm_set_phys_mem(KVMMemoryListener *kml,
MemoryRegionSection *section, bool add)
@@ -2387,6 +2418,7 @@ static int kvm_init(MachineState *ms)
goto err;
}
+ kvm_supported_memory_attributes = kvm_check_extension(s, KVM_CAP_MEMORY_ATTRIBUTES);
kvm_immediate_exit = kvm_check_extension(s, KVM_CAP_IMMEDIATE_EXIT);
s->nr_slots = kvm_check_extension(s, KVM_CAP_NR_MEMSLOTS);
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index 54f4d83a37..f114ff6986 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -536,4 +536,8 @@ void kvm_mark_guest_state_protected(void);
* reported for the VM.
*/
bool kvm_hwpoisoned_mem(void);
+
+int kvm_set_memory_attributes_private(hwaddr start, uint64_t size);
+int kvm_set_memory_attributes_shared(hwaddr start, uint64_t size);
+
#endif
--
2.39.3

View File

@ -0,0 +1,116 @@
From 31cc494d69449811f4d995326479372da7c1241e Mon Sep 17 00:00:00 2001
From: Gerd Hoffmann <kraxel@redhat.com>
Date: Mon, 18 Mar 2024 16:53:35 +0100
Subject: [PATCH 003/100] kvm: add support for guest physical bits
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [3/91] abb1ba3a584152d8efabd8255b86afe609f8ffbd (bonzini/rhel-qemu-kvm)
Query kvm for supported guest physical address bits, in cpuid
function 80000008, eax[23:16]. Usually this is identical to host
physical address bits. With NPT or EPT being used this might be
restricted to 48 (max 4-level paging address space size) even if
the host cpu supports more physical address bits.
When set pass this to the guest, using cpuid too. Guest firmware
can use this to figure how big the usable guest physical address
space is, so PCI bar mapping are actually reachable.
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Message-ID: <20240318155336.156197-2-kraxel@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 0d08c423688edcca857f88dab20f1fc56de2b281)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/kvm/kvm-cpu.c | 50 ++++++++++++++++++++++++++++++++-------
1 file changed, 42 insertions(+), 8 deletions(-)
diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c
index b91af5051f..7ef94c681f 100644
--- a/target/i386/kvm/kvm-cpu.c
+++ b/target/i386/kvm/kvm-cpu.c
@@ -18,10 +18,32 @@
#include "kvm_i386.h"
#include "hw/core/accel-cpu.h"
+static void kvm_set_guest_phys_bits(CPUState *cs)
+{
+ X86CPU *cpu = X86_CPU(cs);
+ uint32_t eax, guest_phys_bits;
+
+ eax = kvm_arch_get_supported_cpuid(cs->kvm_state, 0x80000008, 0, R_EAX);
+ guest_phys_bits = (eax >> 16) & 0xff;
+ if (!guest_phys_bits) {
+ return;
+ }
+ cpu->guest_phys_bits = guest_phys_bits;
+ if (cpu->guest_phys_bits > cpu->phys_bits) {
+ cpu->guest_phys_bits = cpu->phys_bits;
+ }
+
+ if (cpu->host_phys_bits && cpu->host_phys_bits_limit &&
+ cpu->guest_phys_bits > cpu->host_phys_bits_limit) {
+ cpu->guest_phys_bits = cpu->host_phys_bits_limit;
+ }
+}
+
static bool kvm_cpu_realizefn(CPUState *cs, Error **errp)
{
X86CPU *cpu = X86_CPU(cs);
CPUX86State *env = &cpu->env;
+ bool ret;
/*
* The realize order is important, since x86_cpu_realize() checks if
@@ -32,13 +54,15 @@ static bool kvm_cpu_realizefn(CPUState *cs, Error **errp)
*
* realize order:
*
- * x86_cpu_realize():
- * -> x86_cpu_expand_features()
- * -> cpu_exec_realizefn():
- * -> accel_cpu_common_realize()
- * kvm_cpu_realizefn() -> host_cpu_realizefn()
- * -> cpu_common_realizefn()
- * -> check/update ucode_rev, phys_bits, mwait
+ * x86_cpu_realizefn():
+ * x86_cpu_expand_features()
+ * cpu_exec_realizefn():
+ * accel_cpu_common_realize()
+ * kvm_cpu_realizefn()
+ * host_cpu_realizefn()
+ * kvm_set_guest_phys_bits()
+ * check/update ucode_rev, phys_bits, guest_phys_bits, mwait
+ * cpu_common_realizefn() (via xcc->parent_realize)
*/
if (cpu->max_features) {
if (enable_cpu_pm && kvm_has_waitpkg()) {
@@ -50,7 +74,17 @@ static bool kvm_cpu_realizefn(CPUState *cs, Error **errp)
MSR_IA32_UCODE_REV);
}
}
- return host_cpu_realizefn(cs, errp);
+ ret = host_cpu_realizefn(cs, errp);
+ if (!ret) {
+ return ret;
+ }
+
+ if ((env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) &&
+ cpu->guest_phys_bits == -1) {
+ kvm_set_guest_phys_bits(cs);
+ }
+
+ return true;
}
static bool lmce_supported(void)
--
2.39.3

View File

@ -0,0 +1,194 @@
From 33cc1b469689ee2bb7c4f745189472c74a0a98ab Mon Sep 17 00:00:00 2001
From: Chao Peng <chao.p.peng@linux.intel.com>
Date: Wed, 20 Mar 2024 03:39:08 -0500
Subject: [PATCH 034/100] kvm: handle KVM_EXIT_MEMORY_FAULT
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [34/91] 59c672f6b19a3afcb61878775eb6425c6fdea6d5 (bonzini/rhel-qemu-kvm)
Upon an KVM_EXIT_MEMORY_FAULT exit, userspace needs to do the memory
conversion on the RAMBlock to turn the memory into desired attribute,
switching between private and shared.
Currently only KVM_MEMORY_EXIT_FLAG_PRIVATE in flags is valid when
KVM_EXIT_MEMORY_FAULT happens.
Note, KVM_EXIT_MEMORY_FAULT makes sense only when the RAMBlock has
guest_memfd memory backend.
Note, KVM_EXIT_MEMORY_FAULT returns with -EFAULT, so special handling is
added.
When page is converted from shared to private, the original shared
memory can be discarded via ram_block_discard_range(). Note, shared
memory can be discarded only when it's not back'ed by hugetlb because
hugetlb is supposed to be pre-allocated and no need for discarding.
Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com>
Co-developed-by: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Message-ID: <20240320083945.991426-13-michael.roth@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit c15e5684071d93174e446be318f49d8d59b15d6d)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
accel/kvm/kvm-all.c | 98 +++++++++++++++++++++++++++++++++++++-----
accel/kvm/trace-events | 2 +
include/sysemu/kvm.h | 2 +
3 files changed, 92 insertions(+), 10 deletions(-)
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 3f99efc8cc..09164e346c 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -2900,6 +2900,69 @@ static void kvm_eat_signals(CPUState *cpu)
} while (sigismember(&chkset, SIG_IPI));
}
+int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private)
+{
+ MemoryRegionSection section;
+ ram_addr_t offset;
+ MemoryRegion *mr;
+ RAMBlock *rb;
+ void *addr;
+ int ret = -1;
+
+ trace_kvm_convert_memory(start, size, to_private ? "shared_to_private" : "private_to_shared");
+
+ if (!QEMU_PTR_IS_ALIGNED(start, qemu_real_host_page_size()) ||
+ !QEMU_PTR_IS_ALIGNED(size, qemu_real_host_page_size())) {
+ return -1;
+ }
+
+ if (!size) {
+ return -1;
+ }
+
+ section = memory_region_find(get_system_memory(), start, size);
+ mr = section.mr;
+ if (!mr) {
+ return -1;
+ }
+
+ if (!memory_region_has_guest_memfd(mr)) {
+ error_report("Converting non guest_memfd backed memory region "
+ "(0x%"HWADDR_PRIx" ,+ 0x%"HWADDR_PRIx") to %s",
+ start, size, to_private ? "private" : "shared");
+ goto out_unref;
+ }
+
+ if (to_private) {
+ ret = kvm_set_memory_attributes_private(start, size);
+ } else {
+ ret = kvm_set_memory_attributes_shared(start, size);
+ }
+ if (ret) {
+ goto out_unref;
+ }
+
+ addr = memory_region_get_ram_ptr(mr) + section.offset_within_region;
+ rb = qemu_ram_block_from_host(addr, false, &offset);
+
+ if (to_private) {
+ if (rb->page_size != qemu_real_host_page_size()) {
+ /*
+ * shared memory is backed by hugetlb, which is supposed to be
+ * pre-allocated and doesn't need to be discarded
+ */
+ goto out_unref;
+ }
+ ret = ram_block_discard_range(rb, offset, size);
+ } else {
+ ret = ram_block_discard_guest_memfd_range(rb, offset, size);
+ }
+
+out_unref:
+ memory_region_unref(mr);
+ return ret;
+}
+
int kvm_cpu_exec(CPUState *cpu)
{
struct kvm_run *run = cpu->kvm_run;
@@ -2967,18 +3030,20 @@ int kvm_cpu_exec(CPUState *cpu)
ret = EXCP_INTERRUPT;
break;
}
- fprintf(stderr, "error: kvm run failed %s\n",
- strerror(-run_ret));
+ if (!(run_ret == -EFAULT && run->exit_reason == KVM_EXIT_MEMORY_FAULT)) {
+ fprintf(stderr, "error: kvm run failed %s\n",
+ strerror(-run_ret));
#ifdef TARGET_PPC
- if (run_ret == -EBUSY) {
- fprintf(stderr,
- "This is probably because your SMT is enabled.\n"
- "VCPU can only run on primary threads with all "
- "secondary threads offline.\n");
- }
+ if (run_ret == -EBUSY) {
+ fprintf(stderr,
+ "This is probably because your SMT is enabled.\n"
+ "VCPU can only run on primary threads with all "
+ "secondary threads offline.\n");
+ }
#endif
- ret = -1;
- break;
+ ret = -1;
+ break;
+ }
}
trace_kvm_run_exit(cpu->cpu_index, run->exit_reason);
@@ -3061,6 +3126,19 @@ int kvm_cpu_exec(CPUState *cpu)
break;
}
break;
+ case KVM_EXIT_MEMORY_FAULT:
+ trace_kvm_memory_fault(run->memory_fault.gpa,
+ run->memory_fault.size,
+ run->memory_fault.flags);
+ if (run->memory_fault.flags & ~KVM_MEMORY_EXIT_FLAG_PRIVATE) {
+ error_report("KVM_EXIT_MEMORY_FAULT: Unknown flag 0x%" PRIx64,
+ (uint64_t)run->memory_fault.flags);
+ ret = -1;
+ break;
+ }
+ ret = kvm_convert_memory(run->memory_fault.gpa, run->memory_fault.size,
+ run->memory_fault.flags & KVM_MEMORY_EXIT_FLAG_PRIVATE);
+ break;
default:
ret = kvm_arch_handle_exit(cpu, run);
break;
diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events
index e8c52cb9e7..681ccb667d 100644
--- a/accel/kvm/trace-events
+++ b/accel/kvm/trace-events
@@ -31,3 +31,5 @@ kvm_cpu_exec(void) ""
kvm_interrupt_exit_request(void) ""
kvm_io_window_exit(void) ""
kvm_run_exit_system_event(int cpu_index, uint32_t event_type) "cpu_index %d, system_even_type %"PRIu32
+kvm_convert_memory(uint64_t start, uint64_t size, const char *msg) "start 0x%" PRIx64 " size 0x%" PRIx64 " %s"
+kvm_memory_fault(uint64_t start, uint64_t size, uint64_t flags) "start 0x%" PRIx64 " size 0x%" PRIx64 " flags 0x%" PRIx64
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index 9e4ab7ae89..74f23dff9c 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -542,4 +542,6 @@ int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp);
int kvm_set_memory_attributes_private(hwaddr start, uint64_t size);
int kvm_set_memory_attributes_shared(hwaddr start, uint64_t size);
+int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private);
+
#endif
--
2.39.3

View File

@ -0,0 +1,56 @@
From f9dc55dd179bb534d589af371c5c2a7886bd461e Mon Sep 17 00:00:00 2001
From: Xiaoyao Li <xiaoyao.li@intel.com>
Date: Wed, 20 Mar 2024 03:39:11 -0500
Subject: [PATCH 030/100] kvm/memory: Make memory type private by default if it
has guest memfd backend
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [30/91] 5e21edf844b5629ee32c4075843b028561b97ae2 (bonzini/rhel-qemu-kvm)
KVM side leaves the memory to shared by default, which may incur the
overhead of paging conversion on the first visit of each page. Because
the expectation is that page is likely to private for the VMs that
require private memory (has guest memfd).
Explicitly set the memory to private when memory region has valid
guest memfd backend.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Michael Roth <michael.roth@amd.com>
Message-ID: <20240320083945.991426-16-michael.roth@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit bd3bcf6962b664ca3bf9c60fdcc4534e8e3d0641)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
accel/kvm/kvm-all.c | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 5ef55e4dd7..3f99efc8cc 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -1431,6 +1431,16 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
strerror(-err));
abort();
}
+
+ if (memory_region_has_guest_memfd(mr)) {
+ err = kvm_set_memory_attributes_private(start_addr, slot_size);
+ if (err) {
+ error_report("%s: failed to set memory attribute private: %s",
+ __func__, strerror(-err));
+ exit(1);
+ }
+ }
+
start_addr += slot_size;
ram_start_offset += slot_size;
ram += slot_size;
--
2.39.3

View File

@ -0,0 +1,61 @@
From aeaa7061139202448d466b7e18682081f9cd2097 Mon Sep 17 00:00:00 2001
From: Isaku Yamahata <isaku.yamahata@intel.com>
Date: Thu, 29 Feb 2024 01:36:54 -0500
Subject: [PATCH 035/100] kvm/tdx: Don't complain when converting vMMIO region
to shared
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [35/91] c42870771d7af5badc2e10d42be9b5620d72f95d (bonzini/rhel-qemu-kvm)
Because vMMIO region needs to be shared region, guest TD may explicitly
convert such region from private to shared. Don't complain such
conversion.
Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Message-ID: <20240229063726.610065-34-xiaoyao.li@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit c5d9425ef4da9f43fc0903905ad415456d1ab843)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
accel/kvm/kvm-all.c | 19 ++++++++++++++++---
1 file changed, 16 insertions(+), 3 deletions(-)
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 09164e346c..6efaff90a7 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -2927,9 +2927,22 @@ int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private)
}
if (!memory_region_has_guest_memfd(mr)) {
- error_report("Converting non guest_memfd backed memory region "
- "(0x%"HWADDR_PRIx" ,+ 0x%"HWADDR_PRIx") to %s",
- start, size, to_private ? "private" : "shared");
+ /*
+ * Because vMMIO region must be shared, guest TD may convert vMMIO
+ * region to shared explicitly. Don't complain such case. See
+ * memory_region_type() for checking if the region is MMIO region.
+ */
+ if (!to_private &&
+ !memory_region_is_ram(mr) &&
+ !memory_region_is_ram_device(mr) &&
+ !memory_region_is_rom(mr) &&
+ !memory_region_is_romd(mr)) {
+ ret = 0;
+ } else {
+ error_report("Convert non guest_memfd backed memory region "
+ "(0x%"HWADDR_PRIx" ,+ 0x%"HWADDR_PRIx") to %s",
+ start, size, to_private ? "private" : "shared");
+ }
goto out_unref;
}
--
2.39.3

View File

@ -0,0 +1,62 @@
From 2b2dfff3e383c99d0f759a8c12659d1a0ce50e8e Mon Sep 17 00:00:00 2001
From: Isaku Yamahata <isaku.yamahata@intel.com>
Date: Thu, 29 Feb 2024 01:36:55 -0500
Subject: [PATCH 036/100] kvm/tdx: Ignore memory conversion to shared of
unassigned region
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [36/91] 84515b9dcfc2e07b272bb2477acf6430e9d33f28 (bonzini/rhel-qemu-kvm)
TDX requires vMMIO region to be shared. For KVM, MMIO region is the region
which kvm memslot isn't assigned to (except in-kernel emulation).
qemu has the memory region for vMMIO at each device level.
While OVMF issues MapGPA(to-shared) conservatively on 32bit PCI MMIO
region, qemu doesn't find corresponding vMMIO region because it's before
PCI device allocation and memory_region_find() finds the device region, not
PCI bus region. It's safe to ignore MapGPA(to-shared) because when guest
accesses those region they use GPA with shared bit set for vMMIO. Ignore
memory conversion request of non-assigned region to shared and return
success. Otherwise OVMF is confused and panics there.
Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Message-ID: <20240229063726.610065-35-xiaoyao.li@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 565f4768bb9cf840b2f8cca41483bb91aa3196a3)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
accel/kvm/kvm-all.c | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 6efaff90a7..f6268855b4 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -2923,6 +2923,18 @@ int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private)
section = memory_region_find(get_system_memory(), start, size);
mr = section.mr;
if (!mr) {
+ /*
+ * Ignore converting non-assigned region to shared.
+ *
+ * TDX requires vMMIO region to be shared to inject #VE to guest.
+ * OVMF issues conservatively MapGPA(shared) on 32bit PCI MMIO region,
+ * and vIO-APIC 0xFEC00000 4K page.
+ * OVMF assigns 32bit PCI MMIO region to
+ * [top of low memory: typically 2GB=0xC000000, 0xFC00000)
+ */
+ if (!to_private) {
+ return 0;
+ }
return -1;
}
--
2.39.3

View File

@ -0,0 +1,189 @@
From c3e2bc3319882c16fa36eafc7a613073746cfc8b Mon Sep 17 00:00:00 2001
From: Pankaj Gupta <pankaj.gupta@amd.com>
Date: Thu, 30 May 2024 06:16:14 -0500
Subject: [PATCH 052/100] linux-headers: Update to current kvm/next
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [52/91] df77e867072f60110b8387a54ba2db6226b35007 (bonzini/rhel-qemu-kvm)
This updates kernel headers to commit 6f627b425378 ("KVM: SVM: Add module
parameter to enable SEV-SNP", 2024-05-12). The SNP host patches will
be included in Linux 6.11, to be released next July.
Also brings in an linux-headers/linux/vhost.h fix from v6.9-rc4.
Co-developed-by: Michael Roth <michael.roth@amd.com>
Signed-off-by: Michael Roth <michael.roth@amd.com>
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
Message-ID: <20240530111643.1091816-3-pankaj.gupta@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 5f69e42da5b40a2213f4db70ca461f554abca686)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
linux-headers/asm-loongarch/kvm.h | 4 +++
linux-headers/asm-riscv/kvm.h | 1 +
linux-headers/asm-x86/kvm.h | 52 ++++++++++++++++++++++++++++++-
linux-headers/linux/vhost.h | 15 ++++-----
4 files changed, 64 insertions(+), 8 deletions(-)
diff --git a/linux-headers/asm-loongarch/kvm.h b/linux-headers/asm-loongarch/kvm.h
index 109785922c..f9abef3823 100644
--- a/linux-headers/asm-loongarch/kvm.h
+++ b/linux-headers/asm-loongarch/kvm.h
@@ -17,6 +17,8 @@
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
#define KVM_DIRTY_LOG_PAGE_OFFSET 64
+#define KVM_GUESTDBG_USE_SW_BP 0x00010000
+
/*
* for KVM_GET_REGS and KVM_SET_REGS
*/
@@ -72,6 +74,8 @@ struct kvm_fpu {
#define KVM_REG_LOONGARCH_COUNTER (KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 1)
#define KVM_REG_LOONGARCH_VCPU_RESET (KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 2)
+/* Debugging: Special instruction for software breakpoint */
+#define KVM_REG_LOONGARCH_DEBUG_INST (KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 3)
#define LOONGARCH_REG_SHIFT 3
#define LOONGARCH_REG_64(TYPE, REG) (TYPE | KVM_REG_SIZE_U64 | (REG << LOONGARCH_REG_SHIFT))
diff --git a/linux-headers/asm-riscv/kvm.h b/linux-headers/asm-riscv/kvm.h
index b1c503c295..e878e7cc39 100644
--- a/linux-headers/asm-riscv/kvm.h
+++ b/linux-headers/asm-riscv/kvm.h
@@ -167,6 +167,7 @@ enum KVM_RISCV_ISA_EXT_ID {
KVM_RISCV_ISA_EXT_ZFA,
KVM_RISCV_ISA_EXT_ZTSO,
KVM_RISCV_ISA_EXT_ZACAS,
+ KVM_RISCV_ISA_EXT_SSCOFPMF,
KVM_RISCV_ISA_EXT_MAX,
};
diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h
index 31c95c2dfe..1c8f918234 100644
--- a/linux-headers/asm-x86/kvm.h
+++ b/linux-headers/asm-x86/kvm.h
@@ -695,6 +695,11 @@ enum sev_cmd_id {
/* Second time is the charm; improved versions of the above ioctls. */
KVM_SEV_INIT2,
+ /* SNP-specific commands */
+ KVM_SEV_SNP_LAUNCH_START = 100,
+ KVM_SEV_SNP_LAUNCH_UPDATE,
+ KVM_SEV_SNP_LAUNCH_FINISH,
+
KVM_SEV_NR_MAX,
};
@@ -709,7 +714,9 @@ struct kvm_sev_cmd {
struct kvm_sev_init {
__u64 vmsa_features;
__u32 flags;
- __u32 pad[9];
+ __u16 ghcb_version;
+ __u16 pad1;
+ __u32 pad2[8];
};
struct kvm_sev_launch_start {
@@ -820,6 +827,48 @@ struct kvm_sev_receive_update_data {
__u32 pad2;
};
+struct kvm_sev_snp_launch_start {
+ __u64 policy;
+ __u8 gosvw[16];
+ __u16 flags;
+ __u8 pad0[6];
+ __u64 pad1[4];
+};
+
+/* Kept in sync with firmware values for simplicity. */
+#define KVM_SEV_SNP_PAGE_TYPE_NORMAL 0x1
+#define KVM_SEV_SNP_PAGE_TYPE_ZERO 0x3
+#define KVM_SEV_SNP_PAGE_TYPE_UNMEASURED 0x4
+#define KVM_SEV_SNP_PAGE_TYPE_SECRETS 0x5
+#define KVM_SEV_SNP_PAGE_TYPE_CPUID 0x6
+
+struct kvm_sev_snp_launch_update {
+ __u64 gfn_start;
+ __u64 uaddr;
+ __u64 len;
+ __u8 type;
+ __u8 pad0;
+ __u16 flags;
+ __u32 pad1;
+ __u64 pad2[4];
+};
+
+#define KVM_SEV_SNP_ID_BLOCK_SIZE 96
+#define KVM_SEV_SNP_ID_AUTH_SIZE 4096
+#define KVM_SEV_SNP_FINISH_DATA_SIZE 32
+
+struct kvm_sev_snp_launch_finish {
+ __u64 id_block_uaddr;
+ __u64 id_auth_uaddr;
+ __u8 id_block_en;
+ __u8 auth_key_en;
+ __u8 vcek_disabled;
+ __u8 host_data[KVM_SEV_SNP_FINISH_DATA_SIZE];
+ __u8 pad0[3];
+ __u16 flags;
+ __u64 pad1[4];
+};
+
#define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0)
#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1)
@@ -870,5 +919,6 @@ struct kvm_hyperv_eventfd {
#define KVM_X86_SW_PROTECTED_VM 1
#define KVM_X86_SEV_VM 2
#define KVM_X86_SEV_ES_VM 3
+#define KVM_X86_SNP_VM 4
#endif /* _ASM_X86_KVM_H */
diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h
index bea6973906..b95dd84eef 100644
--- a/linux-headers/linux/vhost.h
+++ b/linux-headers/linux/vhost.h
@@ -179,12 +179,6 @@
/* Get the config size */
#define VHOST_VDPA_GET_CONFIG_SIZE _IOR(VHOST_VIRTIO, 0x79, __u32)
-/* Get the count of all virtqueues */
-#define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32)
-
-/* Get the number of virtqueue groups. */
-#define VHOST_VDPA_GET_GROUP_NUM _IOR(VHOST_VIRTIO, 0x81, __u32)
-
/* Get the number of address spaces. */
#define VHOST_VDPA_GET_AS_NUM _IOR(VHOST_VIRTIO, 0x7A, unsigned int)
@@ -228,10 +222,17 @@
#define VHOST_VDPA_GET_VRING_DESC_GROUP _IOWR(VHOST_VIRTIO, 0x7F, \
struct vhost_vring_state)
+
+/* Get the count of all virtqueues */
+#define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32)
+
+/* Get the number of virtqueue groups. */
+#define VHOST_VDPA_GET_GROUP_NUM _IOR(VHOST_VIRTIO, 0x81, __u32)
+
/* Get the queue size of a specific virtqueue.
* userspace set the vring index in vhost_vring_state.index
* kernel set the queue size in vhost_vring_state.num
*/
-#define VHOST_VDPA_GET_VRING_SIZE _IOWR(VHOST_VIRTIO, 0x80, \
+#define VHOST_VDPA_GET_VRING_SIZE _IOWR(VHOST_VIRTIO, 0x82, \
struct vhost_vring_state)
#endif
--
2.39.3

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,71 @@
From 9f485c8df885bcd1ff6c5692463c6168bfec07fb Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 31 May 2024 13:29:53 +0200
Subject: [PATCH 054/100] machine: allow early use of
machine_require_guest_memfd
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [54/91] fd8c1a6624d5f27268215c8aa70dfc9d37bdb981 (bonzini/rhel-qemu-kvm)
Ask the ConfidentialGuestSupport object whether to use guest_memfd
for KVM-backend private memory. This bool can be set in instance_init
(or user_complete) so that it is available when the machine is created.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit dc0d28ca46c0e7ee3c055ad4da24022995bd3765)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
hw/core/machine.c | 2 +-
include/exec/confidential-guest-support.h | 5 +++++
include/hw/boards.h | 1 -
3 files changed, 6 insertions(+), 2 deletions(-)
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 07b994e136..2055e0d312 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -1482,7 +1482,7 @@ bool machine_mem_merge(MachineState *machine)
bool machine_require_guest_memfd(MachineState *machine)
{
- return machine->require_guest_memfd;
+ return machine->cgs && machine->cgs->require_guest_memfd;
}
static char *cpu_slot_to_string(const CPUArchId *cpu)
diff --git a/include/exec/confidential-guest-support.h b/include/exec/confidential-guest-support.h
index e5b188cffb..02dc4e518f 100644
--- a/include/exec/confidential-guest-support.h
+++ b/include/exec/confidential-guest-support.h
@@ -31,6 +31,11 @@ OBJECT_DECLARE_TYPE(ConfidentialGuestSupport,
struct ConfidentialGuestSupport {
Object parent;
+ /*
+ * True if the machine should use guest_memfd for RAM.
+ */
+ bool require_guest_memfd;
+
/*
* ready: flag set by CGS initialization code once it's ready to
* start executing instructions in a potentially-secure
diff --git a/include/hw/boards.h b/include/hw/boards.h
index 815a1c4b26..0d1f9533ef 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -373,7 +373,6 @@ struct MachineState {
char *dt_compatible;
bool dump_guest_core;
bool mem_merge;
- bool require_guest_memfd;
bool usb;
bool usb_disabled;
char *firmware;
--
2.39.3

View File

@ -0,0 +1,85 @@
From 331c58d87dde8b4757e1d1e09d9b16bac2952d22 Mon Sep 17 00:00:00 2001
From: Xiaoyao Li <xiaoyao.li@intel.com>
Date: Thu, 30 May 2024 06:16:15 -0500
Subject: [PATCH 081/100] memory: Introduce
memory_region_init_ram_guest_memfd()
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [81/91] d5b0898d791f3f90d1acda0230f96ca9bf5be5e4 (bonzini/rhel-qemu-kvm)
Introduce memory_region_init_ram_guest_memfd() to allocate private
guset memfd on the MemoryRegion initialization. It's for the use case of
TDVF, which must be private on TDX case.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Michael Roth <michael.roth@amd.com>
Signed-off-by: Pankaj Gupta <pankaj.gupta@amd.com>
Message-ID: <20240530111643.1091816-4-pankaj.gupta@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit a0aa6db7ce72a08703774107185e639e73e7754c)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
include/exec/memory.h | 6 ++++++
system/memory.c | 24 ++++++++++++++++++++++++
2 files changed, 30 insertions(+)
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 679a847685..1e351f6fc8 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -1603,6 +1603,12 @@ bool memory_region_init_ram(MemoryRegion *mr,
uint64_t size,
Error **errp);
+bool memory_region_init_ram_guest_memfd(MemoryRegion *mr,
+ Object *owner,
+ const char *name,
+ uint64_t size,
+ Error **errp);
+
/**
* memory_region_init_rom: Initialize a ROM memory region.
*
diff --git a/system/memory.c b/system/memory.c
index c756950c0c..b09065eef3 100644
--- a/system/memory.c
+++ b/system/memory.c
@@ -3606,6 +3606,30 @@ bool memory_region_init_ram(MemoryRegion *mr,
return true;
}
+bool memory_region_init_ram_guest_memfd(MemoryRegion *mr,
+ Object *owner,
+ const char *name,
+ uint64_t size,
+ Error **errp)
+{
+ DeviceState *owner_dev;
+
+ if (!memory_region_init_ram_flags_nomigrate(mr, owner, name, size,
+ RAM_GUEST_MEMFD, errp)) {
+ return false;
+ }
+ /* This will assert if owner is neither NULL nor a DeviceState.
+ * We only want the owner here for the purposes of defining a
+ * unique name for migration. TODO: Ideally we should implement
+ * a naming scheme for Objects which are not DeviceStates, in
+ * which case we can relax this restriction.
+ */
+ owner_dev = DEVICE(owner);
+ vmstate_register_ram(mr, owner_dev);
+
+ return true;
+}
+
bool memory_region_init_rom(MemoryRegion *mr,
Object *owner,
const char *name,
--
2.39.3

View File

@ -0,0 +1,184 @@
From f76d73f62555ad73081558c1f56bcb832fbb8c35 Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Tue, 6 Aug 2024 13:53:00 -0500
Subject: [PATCH 098/100] nbd/server: CVE-2024-7409: Cap default
max-connections to 100
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Eric Blake <eblake@redhat.com>
RH-MergeRequest: 262: nbd/server: fix CVE-2024-7409 (qemu crash on nbd-server-stop)
RH-Jira: RHEL-52617
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [2/4] 1fb3b8cd9781a66bba2f4a6bee2b320e96de86aa (redhat/centos-stream/src/qemu-kvm)
Allowing an unlimited number of clients to any web service is a recipe
for a rudimentary denial of service attack: the client merely needs to
open lots of sockets without closing them, until qemu no longer has
any more fds available to allocate.
For qemu-nbd, we default to allowing only 1 connection unless more are
explicitly asked for (-e or --shared); this was historically picked as
a nice default (without an explicit -t, a non-persistent qemu-nbd goes
away after a client disconnects, without needing any additional
follow-up commands), and we are not going to change that interface now
(besides, someday we want to point people towards qemu-storage-daemon
instead of qemu-nbd).
But for qemu proper, and the newer qemu-storage-daemon, the QMP
nbd-server-start command has historically had a default of unlimited
number of connections, in part because unlike qemu-nbd it is
inherently persistent until nbd-server-stop. Allowing multiple client
sockets is particularly useful for clients that can take advantage of
MULTI_CONN (creating parallel sockets to increase throughput),
although known clients that do so (such as libnbd's nbdcopy) typically
use only 8 or 16 connections (the benefits of scaling diminish once
more sockets are competing for kernel attention). Picking a number
large enough for typical use cases, but not unlimited, makes it
slightly harder for a malicious client to perform a denial of service
merely by opening lots of connections withot progressing through the
handshake.
This change does not eliminate CVE-2024-7409 on its own, but reduces
the chance for fd exhaustion or unlimited memory usage as an attack
surface. On the other hand, by itself, it makes it more obvious that
with a finite limit, we have the problem of an unauthenticated client
holding 100 fds opened as a way to block out a legitimate client from
being able to connect; thus, later patches will further add timeouts
to reject clients that are not making progress.
This is an INTENTIONAL change in behavior, and will break any client
of nbd-server-start that was not passing an explicit max-connections
parameter, yet expects more than 100 simultaneous connections. We are
not aware of any such client (as stated above, most clients aware of
MULTI_CONN get by just fine on 8 or 16 connections, and probably cope
with later connections failing by relying on the earlier connections;
libvirt has not yet been passing max-connections, but generally
creates NBD servers with the intent for a single client for the sake
of live storage migration; meanwhile, the KubeSAN project anticipates
a large cluster sharing multiple clients [up to 8 per node, and up to
100 nodes in a cluster], but it currently uses qemu-nbd with an
explicit --shared=0 rather than qemu-storage-daemon with
nbd-server-start).
We considered using a deprecation period (declare that omitting
max-parameters is deprecated, and make it mandatory in 3 releases -
then we don't need to pick an arbitrary default); that has zero risk
of breaking any apps that accidentally depended on more than 100
connections, and where such breakage might not be noticed under unit
testing but only under the larger loads of production usage. But it
does not close the denial-of-service hole until far into the future,
and requires all apps to change to add the parameter even if 100 was
good enough. It also has a drawback that any app (like libvirt) that
is accidentally relying on an unlimited default should seriously
consider their own CVE now, at which point they are going to change to
pass explicit max-connections sooner than waiting for 3 qemu releases.
Finally, if our changed default breaks an app, that app can always
pass in an explicit max-parameters with a larger value.
It is also intentional that the HMP interface to nbd-server-start is
not changed to expose max-connections (any client needing to fine-tune
things should be using QMP).
Suggested-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-ID: <20240807174943.771624-12-eblake@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
[ericb: Expand commit message to summarize Dan's argument for why we
break corner-case back-compat behavior without a deprecation period]
Signed-off-by: Eric Blake <eblake@redhat.com>
(cherry picked from commit c8a76dbd90c2f48df89b75bef74917f90a59b623)
Jira: https://issues.redhat.com/browse/RHEL-52617
Signed-off-by: Eric Blake <eblake@redhat.com>
---
block/monitor/block-hmp-cmds.c | 3 ++-
blockdev-nbd.c | 8 ++++++++
include/block/nbd.h | 7 +++++++
qapi/block-export.json | 4 ++--
4 files changed, 19 insertions(+), 3 deletions(-)
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index d954bec6f1..bdf2eb50b6 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -402,7 +402,8 @@ void hmp_nbd_server_start(Monitor *mon, const QDict *qdict)
goto exit;
}
- nbd_server_start(addr, NULL, NULL, 0, &local_err);
+ nbd_server_start(addr, NULL, NULL, NBD_DEFAULT_MAX_CONNECTIONS,
+ &local_err);
qapi_free_SocketAddress(addr);
if (local_err != NULL) {
goto exit;
diff --git a/blockdev-nbd.c b/blockdev-nbd.c
index 267a1de903..24ba5382db 100644
--- a/blockdev-nbd.c
+++ b/blockdev-nbd.c
@@ -170,6 +170,10 @@ void nbd_server_start(SocketAddress *addr, const char *tls_creds,
void nbd_server_start_options(NbdServerOptions *arg, Error **errp)
{
+ if (!arg->has_max_connections) {
+ arg->max_connections = NBD_DEFAULT_MAX_CONNECTIONS;
+ }
+
nbd_server_start(arg->addr, arg->tls_creds, arg->tls_authz,
arg->max_connections, errp);
}
@@ -182,6 +186,10 @@ void qmp_nbd_server_start(SocketAddressLegacy *addr,
{
SocketAddress *addr_flat = socket_address_flatten(addr);
+ if (!has_max_connections) {
+ max_connections = NBD_DEFAULT_MAX_CONNECTIONS;
+ }
+
nbd_server_start(addr_flat, tls_creds, tls_authz, max_connections, errp);
qapi_free_SocketAddress(addr_flat);
}
diff --git a/include/block/nbd.h b/include/block/nbd.h
index 1d4d65922d..d4f8b21aec 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -39,6 +39,13 @@ extern const BlockExportDriver blk_exp_nbd;
*/
#define NBD_DEFAULT_HANDSHAKE_MAX_SECS 10
+/*
+ * NBD_DEFAULT_MAX_CONNECTIONS: Number of client sockets to allow at
+ * once; must be large enough to allow a MULTI_CONN-aware client like
+ * nbdcopy to create its typical number of 8-16 sockets.
+ */
+#define NBD_DEFAULT_MAX_CONNECTIONS 100
+
/* Handshake phase structs - this struct is passed on the wire */
typedef struct NBDOption {
diff --git a/qapi/block-export.json b/qapi/block-export.json
index 3919a2d5b9..f45e4fd481 100644
--- a/qapi/block-export.json
+++ b/qapi/block-export.json
@@ -28,7 +28,7 @@
# @max-connections: The maximum number of connections to allow at the
# same time, 0 for unlimited. Setting this to 1 also stops the
# server from advertising multiple client support (since 5.2;
-# default: 0)
+# default: 100)
#
# Since: 4.2
##
@@ -63,7 +63,7 @@
# @max-connections: The maximum number of connections to allow at the
# same time, 0 for unlimited. Setting this to 1 also stops the
# server from advertising multiple client support (since 5.2;
-# default: 0).
+# default: 100).
#
# Errors:
# - if the server is already running
--
2.39.3

View File

@ -0,0 +1,173 @@
From 6522c68268f00c9c5665f8f98cf6ed1984124cf3 Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Wed, 7 Aug 2024 12:23:13 -0500
Subject: [PATCH 100/100] nbd/server: CVE-2024-7409: Close stray clients at
server-stop
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Eric Blake <eblake@redhat.com>
RH-MergeRequest: 262: nbd/server: fix CVE-2024-7409 (qemu crash on nbd-server-stop)
RH-Jira: RHEL-52617
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [4/4] c00bb5a7e73446e9f071ef83e4f1576f73a17059 (redhat/centos-stream/src/qemu-kvm)
A malicious client can attempt to connect to an NBD server, and then
intentionally delay progress in the handshake, including if it does
not know the TLS secrets. Although the previous two patches reduce
this behavior by capping the default max-connections parameter and
killing slow clients, they did not eliminate the possibility of a
client waiting to close the socket until after the QMP nbd-server-stop
command is executed, at which point qemu would SEGV when trying to
dereference the NULL nbd_server global which is no longer present.
This amounts to a denial of service attack. Worse, if another NBD
server is started before the malicious client disconnects, I cannot
rule out additional adverse effects when the old client interferes
with the connection count of the new server (although the most likely
is a crash due to an assertion failure when checking
nbd_server->connections > 0).
For environments without this patch, the CVE can be mitigated by
ensuring (such as via a firewall) that only trusted clients can
connect to an NBD server. Note that using frameworks like libvirt
that ensure that TLS is used and that nbd-server-stop is not executed
while any trusted clients are still connected will only help if there
is also no possibility for an untrusted client to open a connection
but then stall on the NBD handshake.
Given the previous patches, it would be possible to guarantee that no
clients remain connected by having nbd-server-stop sleep for longer
than the default handshake deadline before finally freeing the global
nbd_server object, but that could make QMP non-responsive for a long
time. So intead, this patch fixes the problem by tracking all client
sockets opened while the server is running, and forcefully closing any
such sockets remaining without a completed handshake at the time of
nbd-server-stop, then waiting until the coroutines servicing those
sockets notice the state change. nbd-server-stop now has a second
AIO_WAIT_WHILE_UNLOCKED (the first is indirectly through the
blk_exp_close_all_type() that disconnects all clients that completed
handshakes), but forced socket shutdown is enough to progress the
coroutines and quickly tear down all clients before the server is
freed, thus finally fixing the CVE.
This patch relies heavily on the fact that nbd/server.c guarantees
that it only calls nbd_blockdev_client_closed() from the main loop
(see the assertion in nbd_client_put() and the hoops used in
nbd_client_put_nonzero() to achieve that); if we did not have that
guarantee, we would also need a mutex protecting our accesses of the
list of connections to survive re-entrancy from independent iothreads.
Although I did not actually try to test old builds, it looks like this
problem has existed since at least commit 862172f45c (v2.12.0, 2017) -
even back when that patch started using a QIONetListener to handle
listening on multiple sockets, nbd_server_free() was already unaware
that the nbd_blockdev_client_closed callback can be reached later by a
client thread that has not completed handshakes (and therefore the
client's socket never got added to the list closed in
nbd_export_close_all), despite that patch intentionally tearing down
the QIONetListener to prevent new clients.
Reported-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
Fixes: CVE-2024-7409
CC: qemu-stable@nongnu.org
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-ID: <20240807174943.771624-14-eblake@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
(cherry picked from commit 3e7ef738c8462c45043a1d39f702a0990406a3b3)
Jira: https://issues.redhat.com/browse/RHEL-52617
Signed-off-by: Eric Blake <eblake@redhat.com>
---
blockdev-nbd.c | 35 ++++++++++++++++++++++++++++++++++-
1 file changed, 34 insertions(+), 1 deletion(-)
diff --git a/blockdev-nbd.c b/blockdev-nbd.c
index 24ba5382db..f73409ae49 100644
--- a/blockdev-nbd.c
+++ b/blockdev-nbd.c
@@ -21,12 +21,18 @@
#include "io/channel-socket.h"
#include "io/net-listener.h"
+typedef struct NBDConn {
+ QIOChannelSocket *cioc;
+ QLIST_ENTRY(NBDConn) next;
+} NBDConn;
+
typedef struct NBDServerData {
QIONetListener *listener;
QCryptoTLSCreds *tlscreds;
char *tlsauthz;
uint32_t max_connections;
uint32_t connections;
+ QLIST_HEAD(, NBDConn) conns;
} NBDServerData;
static NBDServerData *nbd_server;
@@ -51,6 +57,14 @@ int nbd_server_max_connections(void)
static void nbd_blockdev_client_closed(NBDClient *client, bool ignored)
{
+ NBDConn *conn = nbd_client_owner(client);
+
+ assert(qemu_in_main_thread() && nbd_server);
+
+ object_unref(OBJECT(conn->cioc));
+ QLIST_REMOVE(conn, next);
+ g_free(conn);
+
nbd_client_put(client);
assert(nbd_server->connections > 0);
nbd_server->connections--;
@@ -60,14 +74,20 @@ static void nbd_blockdev_client_closed(NBDClient *client, bool ignored)
static void nbd_accept(QIONetListener *listener, QIOChannelSocket *cioc,
gpointer opaque)
{
+ NBDConn *conn = g_new0(NBDConn, 1);
+
+ assert(qemu_in_main_thread() && nbd_server);
nbd_server->connections++;
+ object_ref(OBJECT(cioc));
+ conn->cioc = cioc;
+ QLIST_INSERT_HEAD(&nbd_server->conns, conn, next);
nbd_update_server_watch(nbd_server);
qio_channel_set_name(QIO_CHANNEL(cioc), "nbd-server");
/* TODO - expose handshake timeout as QMP option */
nbd_client_new(cioc, NBD_DEFAULT_HANDSHAKE_MAX_SECS,
nbd_server->tlscreds, nbd_server->tlsauthz,
- nbd_blockdev_client_closed, NULL);
+ nbd_blockdev_client_closed, conn);
}
static void nbd_update_server_watch(NBDServerData *s)
@@ -81,12 +101,25 @@ static void nbd_update_server_watch(NBDServerData *s)
static void nbd_server_free(NBDServerData *server)
{
+ NBDConn *conn, *tmp;
+
if (!server) {
return;
}
+ /*
+ * Forcefully close the listener socket, and any clients that have
+ * not yet disconnected on their own.
+ */
qio_net_listener_disconnect(server->listener);
object_unref(OBJECT(server->listener));
+ QLIST_FOREACH_SAFE(conn, &server->conns, next, tmp) {
+ qio_channel_shutdown(QIO_CHANNEL(conn->cioc), QIO_CHANNEL_SHUTDOWN_BOTH,
+ NULL);
+ }
+
+ AIO_WAIT_WHILE_UNLOCKED(NULL, server->connections > 0);
+
if (server->tlscreds) {
object_unref(OBJECT(server->tlscreds));
}
--
2.39.3

View File

@ -0,0 +1,135 @@
From ca30846351f1136d15f55717a5534ad927f7cf52 Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Thu, 8 Aug 2024 16:05:08 -0500
Subject: [PATCH 099/100] nbd/server: CVE-2024-7409: Drop non-negotiating
clients
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Eric Blake <eblake@redhat.com>
RH-MergeRequest: 262: nbd/server: fix CVE-2024-7409 (qemu crash on nbd-server-stop)
RH-Jira: RHEL-52617
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [3/4] 8008a1067766951d9752bcc41c2127a07fce934d (redhat/centos-stream/src/qemu-kvm)
A client that opens a socket but does not negotiate is merely hogging
qemu's resources (an open fd and a small amount of memory); and a
malicious client that can access the port where NBD is listening can
attempt a denial of service attack by intentionally opening and
abandoning lots of unfinished connections. The previous patch put a
default bound on the number of such ongoing connections, but once that
limit is hit, no more clients can connect (including legitimate ones).
The solution is to insist that clients complete handshake within a
reasonable time limit, defaulting to 10 seconds. A client that has
not successfully completed NBD_OPT_GO by then (including the case of
where the client didn't know TLS credentials to even reach the point
of NBD_OPT_GO) is wasting our time and does not deserve to stay
connected. Later patches will allow fine-tuning the limit away from
the default value (including disabling it for doing integration
testing of the handshake process itself).
Note that this patch in isolation actually makes it more likely to see
qemu SEGV after nbd-server-stop, as any client socket still connected
when the server shuts down will now be closed after 10 seconds rather
than at the client's whims. That will be addressed in the next patch.
For a demo of this patch in action:
$ qemu-nbd -f raw -r -t -e 10 file &
$ nbdsh --opt-mode -c '
H = list()
for i in range(20):
print(i)
H.insert(i, nbd.NBD())
H[i].set_opt_mode(True)
H[i].connect_uri("nbd://localhost")
'
$ kill $!
where later connections get to start progressing once earlier ones are
forcefully dropped for taking too long, rather than hanging.
Suggested-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-ID: <20240807174943.771624-13-eblake@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
[eblake: rebase to changes earlier in series, reduce scope of timer]
Signed-off-by: Eric Blake <eblake@redhat.com>
(cherry picked from commit b9b72cb3ce15b693148bd09cef7e50110566d8a0)
Jira: https://issues.redhat.com/browse/RHEL-52617
Signed-off-by: Eric Blake <eblake@redhat.com>
---
nbd/server.c | 28 +++++++++++++++++++++++++++-
nbd/trace-events | 1 +
2 files changed, 28 insertions(+), 1 deletion(-)
diff --git a/nbd/server.c b/nbd/server.c
index e50012499f..39285cc971 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -3186,22 +3186,48 @@ static void nbd_client_receive_next_request(NBDClient *client)
}
}
+static void nbd_handshake_timer_cb(void *opaque)
+{
+ QIOChannel *ioc = opaque;
+
+ trace_nbd_handshake_timer_cb();
+ qio_channel_shutdown(ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
+}
+
static coroutine_fn void nbd_co_client_start(void *opaque)
{
NBDClient *client = opaque;
Error *local_err = NULL;
+ QEMUTimer *handshake_timer = NULL;
qemu_co_mutex_init(&client->send_lock);
- /* TODO - utilize client->handshake_max_secs */
+ /*
+ * Create a timer to bound the time spent in negotiation. If the
+ * timer expires, it is likely nbd_negotiate will fail because the
+ * socket was shutdown.
+ */
+ if (client->handshake_max_secs > 0) {
+ handshake_timer = aio_timer_new(qemu_get_aio_context(),
+ QEMU_CLOCK_REALTIME,
+ SCALE_NS,
+ nbd_handshake_timer_cb,
+ client->sioc);
+ timer_mod(handshake_timer,
+ qemu_clock_get_ns(QEMU_CLOCK_REALTIME) +
+ client->handshake_max_secs * NANOSECONDS_PER_SECOND);
+ }
+
if (nbd_negotiate(client, &local_err)) {
if (local_err) {
error_report_err(local_err);
}
+ timer_free(handshake_timer);
client_close(client, false);
return;
}
+ timer_free(handshake_timer);
WITH_QEMU_LOCK_GUARD(&client->lock) {
nbd_client_receive_next_request(client);
}
diff --git a/nbd/trace-events b/nbd/trace-events
index 00ae3216a1..cbd0a4ab7e 100644
--- a/nbd/trace-events
+++ b/nbd/trace-events
@@ -76,6 +76,7 @@ nbd_co_receive_request_payload_received(uint64_t cookie, uint64_t len) "Payload
nbd_co_receive_ext_payload_compliance(uint64_t from, uint64_t len) "client sent non-compliant write without payload flag: from=0x%" PRIx64 ", len=0x%" PRIx64
nbd_co_receive_align_compliance(const char *op, uint64_t from, uint64_t len, uint32_t align) "client sent non-compliant unaligned %s request: from=0x%" PRIx64 ", len=0x%" PRIx64 ", align=0x%" PRIx32
nbd_trip(void) "Reading request"
+nbd_handshake_timer_cb(void) "client took too long to negotiate"
# client-connection.c
nbd_connect_thread_sleep(uint64_t timeout) "timeout %" PRIu64
--
2.39.3

View File

@ -0,0 +1,175 @@
From 70acef52a99e5114699f5fa58de5f0b5c031b880 Mon Sep 17 00:00:00 2001
From: Eric Blake <eblake@redhat.com>
Date: Wed, 7 Aug 2024 08:50:01 -0500
Subject: [PATCH 097/100] nbd/server: Plumb in new args to nbd_client_add()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Eric Blake <eblake@redhat.com>
RH-MergeRequest: 262: nbd/server: fix CVE-2024-7409 (qemu crash on nbd-server-stop)
RH-Jira: RHEL-52617
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [1/4] 7614e294e1f5b7861386950ae994bea166d19950 (redhat/centos-stream/src/qemu-kvm)
Upcoming patches to fix a CVE need to track an opaque pointer passed
in by the owner of a client object, as well as request for a time
limit on how fast negotiation must complete. Prepare for that by
changing the signature of nbd_client_new() and adding an accessor to
get at the opaque pointer, although for now the two servers
(qemu-nbd.c and blockdev-nbd.c) do not change behavior even though
they pass in a new default timeout value.
Suggested-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Signed-off-by: Eric Blake <eblake@redhat.com>
Message-ID: <20240807174943.771624-11-eblake@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
[eblake: s/LIMIT/MAX_SECS/ as suggested by Dan]
Signed-off-by: Eric Blake <eblake@redhat.com>
(cherry picked from commit fb1c2aaa981e0a2fa6362c9985f1296b74f055ac)
Jira: https://issues.redhat.com/browse/RHEL-52617
Signed-off-by: Eric Blake <eblake@redhat.com>
---
blockdev-nbd.c | 6 ++++--
include/block/nbd.h | 11 ++++++++++-
nbd/server.c | 20 +++++++++++++++++---
qemu-nbd.c | 4 +++-
4 files changed, 34 insertions(+), 7 deletions(-)
diff --git a/blockdev-nbd.c b/blockdev-nbd.c
index 213012435f..267a1de903 100644
--- a/blockdev-nbd.c
+++ b/blockdev-nbd.c
@@ -64,8 +64,10 @@ static void nbd_accept(QIONetListener *listener, QIOChannelSocket *cioc,
nbd_update_server_watch(nbd_server);
qio_channel_set_name(QIO_CHANNEL(cioc), "nbd-server");
- nbd_client_new(cioc, nbd_server->tlscreds, nbd_server->tlsauthz,
- nbd_blockdev_client_closed);
+ /* TODO - expose handshake timeout as QMP option */
+ nbd_client_new(cioc, NBD_DEFAULT_HANDSHAKE_MAX_SECS,
+ nbd_server->tlscreds, nbd_server->tlsauthz,
+ nbd_blockdev_client_closed, NULL);
}
static void nbd_update_server_watch(NBDServerData *s)
diff --git a/include/block/nbd.h b/include/block/nbd.h
index 4e7bd6342f..1d4d65922d 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -33,6 +33,12 @@ typedef struct NBDMetaContexts NBDMetaContexts;
extern const BlockExportDriver blk_exp_nbd;
+/*
+ * NBD_DEFAULT_HANDSHAKE_MAX_SECS: Number of seconds in which client must
+ * succeed at NBD_OPT_GO before being forcefully dropped as too slow.
+ */
+#define NBD_DEFAULT_HANDSHAKE_MAX_SECS 10
+
/* Handshake phase structs - this struct is passed on the wire */
typedef struct NBDOption {
@@ -403,9 +409,12 @@ AioContext *nbd_export_aio_context(NBDExport *exp);
NBDExport *nbd_export_find(const char *name);
void nbd_client_new(QIOChannelSocket *sioc,
+ uint32_t handshake_max_secs,
QCryptoTLSCreds *tlscreds,
const char *tlsauthz,
- void (*close_fn)(NBDClient *, bool));
+ void (*close_fn)(NBDClient *, bool),
+ void *owner);
+void *nbd_client_owner(NBDClient *client);
void nbd_client_get(NBDClient *client);
void nbd_client_put(NBDClient *client);
diff --git a/nbd/server.c b/nbd/server.c
index 892797bb11..e50012499f 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -124,12 +124,14 @@ struct NBDMetaContexts {
struct NBDClient {
int refcount; /* atomic */
void (*close_fn)(NBDClient *client, bool negotiated);
+ void *owner;
QemuMutex lock;
NBDExport *exp;
QCryptoTLSCreds *tlscreds;
char *tlsauthz;
+ uint32_t handshake_max_secs;
QIOChannelSocket *sioc; /* The underlying data channel */
QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */
@@ -3191,6 +3193,7 @@ static coroutine_fn void nbd_co_client_start(void *opaque)
qemu_co_mutex_init(&client->send_lock);
+ /* TODO - utilize client->handshake_max_secs */
if (nbd_negotiate(client, &local_err)) {
if (local_err) {
error_report_err(local_err);
@@ -3205,14 +3208,17 @@ static coroutine_fn void nbd_co_client_start(void *opaque)
}
/*
- * Create a new client listener using the given channel @sioc.
+ * Create a new client listener using the given channel @sioc and @owner.
* Begin servicing it in a coroutine. When the connection closes, call
- * @close_fn with an indication of whether the client completed negotiation.
+ * @close_fn with an indication of whether the client completed negotiation
+ * within @handshake_max_secs seconds (0 for unbounded).
*/
void nbd_client_new(QIOChannelSocket *sioc,
+ uint32_t handshake_max_secs,
QCryptoTLSCreds *tlscreds,
const char *tlsauthz,
- void (*close_fn)(NBDClient *, bool))
+ void (*close_fn)(NBDClient *, bool),
+ void *owner)
{
NBDClient *client;
Coroutine *co;
@@ -3225,13 +3231,21 @@ void nbd_client_new(QIOChannelSocket *sioc,
object_ref(OBJECT(client->tlscreds));
}
client->tlsauthz = g_strdup(tlsauthz);
+ client->handshake_max_secs = handshake_max_secs;
client->sioc = sioc;
qio_channel_set_delay(QIO_CHANNEL(sioc), false);
object_ref(OBJECT(client->sioc));
client->ioc = QIO_CHANNEL(sioc);
object_ref(OBJECT(client->ioc));
client->close_fn = close_fn;
+ client->owner = owner;
co = qemu_coroutine_create(nbd_co_client_start, client);
qemu_coroutine_enter(co);
}
+
+void *
+nbd_client_owner(NBDClient *client)
+{
+ return client->owner;
+}
diff --git a/qemu-nbd.c b/qemu-nbd.c
index d7b3ccab21..48e2fa5858 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -390,7 +390,9 @@ static void nbd_accept(QIONetListener *listener, QIOChannelSocket *cioc,
nb_fds++;
nbd_update_server_watch();
- nbd_client_new(cioc, tlscreds, tlsauthz, nbd_client_closed);
+ /* TODO - expose handshake timeout as command line option */
+ nbd_client_new(cioc, NBD_DEFAULT_HANDSHAKE_MAX_SECS,
+ tlscreds, tlsauthz, nbd_client_closed, NULL);
}
static void nbd_update_server_watch(void)
--
2.39.3

View File

@ -0,0 +1,68 @@
From c0a65c752cd83dea27cbeb34074d65fb2c5a6b59 Mon Sep 17 00:00:00 2001
From: Isaku Yamahata <isaku.yamahata@intel.com>
Date: Wed, 20 Mar 2024 03:39:13 -0500
Subject: [PATCH 008/100] pci-host/q35: Move PAM initialization above SMRAM
initialization
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [8/91] 22a9221d4726e872aa0f0dc25ae9d823c0611547 (bonzini/rhel-qemu-kvm)
In mch_realize(), process PAM initialization before SMRAM initialization so
that later patch can skill all the SMRAM related with a single check.
Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Michael Roth <michael.roth@amd.com>
Message-ID: <20240320083945.991426-18-michael.roth@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 42c11ae2416dcbcd694ec3ee574fe2f3e70099ae)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
hw/pci-host/q35.c | 19 ++++++++++---------
1 file changed, 10 insertions(+), 9 deletions(-)
diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
index 0d7d4e3f08..98d4a7c253 100644
--- a/hw/pci-host/q35.c
+++ b/hw/pci-host/q35.c
@@ -568,6 +568,16 @@ static void mch_realize(PCIDevice *d, Error **errp)
/* setup pci memory mapping */
pc_pci_as_mapping_init(mch->system_memory, mch->pci_address_space);
+ /* PAM */
+ init_pam(&mch->pam_regions[0], OBJECT(mch), mch->ram_memory,
+ mch->system_memory, mch->pci_address_space,
+ PAM_BIOS_BASE, PAM_BIOS_SIZE);
+ for (i = 0; i < ARRAY_SIZE(mch->pam_regions) - 1; ++i) {
+ init_pam(&mch->pam_regions[i + 1], OBJECT(mch), mch->ram_memory,
+ mch->system_memory, mch->pci_address_space,
+ PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE, PAM_EXPAN_SIZE);
+ }
+
/* if *disabled* show SMRAM to all CPUs */
memory_region_init_alias(&mch->smram_region, OBJECT(mch), "smram-region",
mch->pci_address_space, MCH_HOST_BRIDGE_SMRAM_C_BASE,
@@ -634,15 +644,6 @@ static void mch_realize(PCIDevice *d, Error **errp)
object_property_add_const_link(qdev_get_machine(), "smram",
OBJECT(&mch->smram));
-
- init_pam(&mch->pam_regions[0], OBJECT(mch), mch->ram_memory,
- mch->system_memory, mch->pci_address_space,
- PAM_BIOS_BASE, PAM_BIOS_SIZE);
- for (i = 0; i < ARRAY_SIZE(mch->pam_regions) - 1; ++i) {
- init_pam(&mch->pam_regions[i + 1], OBJECT(mch), mch->ram_memory,
- mch->system_memory, mch->pci_address_space,
- PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE, PAM_EXPAN_SIZE);
- }
}
uint64_t mch_mcfg_base(void)
--
2.39.3

View File

@ -0,0 +1,83 @@
From c70f6e7e3461e6562c0591079cc71068bf0f2ed8 Mon Sep 17 00:00:00 2001
From: Xiaoyao Li <xiaoyao.li@intel.com>
Date: Wed, 20 Mar 2024 03:39:07 -0500
Subject: [PATCH 033/100] physmem: Introduce
ram_block_discard_guest_memfd_range()
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [33/91] b6169fa8d752d83977b18897be24f6ab9f3d3472 (bonzini/rhel-qemu-kvm)
When memory page is converted from private to shared, the original
private memory is back'ed by guest_memfd. Introduce
ram_block_discard_guest_memfd_range() for discarding memory in
guest_memfd.
Based on a patch by Isaku Yamahata <isaku.yamahata@intel.com>.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Michael Roth <michael.roth@amd.com>
Message-ID: <20240320083945.991426-12-michael.roth@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit b2e9426c04fdd32d93a3a37db6b0c2e67c88c335)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
include/exec/cpu-common.h | 2 ++
system/physmem.c | 23 +++++++++++++++++++++++
2 files changed, 25 insertions(+)
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index 6346df17ce..6d5318895a 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -159,6 +159,8 @@ typedef int (RAMBlockIterFunc)(RAMBlock *rb, void *opaque);
int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque);
int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length);
+int ram_block_discard_guest_memfd_range(RAMBlock *rb, uint64_t start,
+ size_t length);
#endif
diff --git a/system/physmem.c b/system/physmem.c
index 5ebcf5be11..c3d04ca921 100644
--- a/system/physmem.c
+++ b/system/physmem.c
@@ -3721,6 +3721,29 @@ err:
return ret;
}
+int ram_block_discard_guest_memfd_range(RAMBlock *rb, uint64_t start,
+ size_t length)
+{
+ int ret = -1;
+
+#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
+ ret = fallocate(rb->guest_memfd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+ start, length);
+
+ if (ret) {
+ ret = -errno;
+ error_report("%s: Failed to fallocate %s:%" PRIx64 " +%zx (%d)",
+ __func__, rb->idstr, start, length, ret);
+ }
+#else
+ ret = -ENOSYS;
+ error_report("%s: fallocate not available %s:%" PRIx64 " +%zx (%d)",
+ __func__, rb->idstr, start, length, ret);
+#endif
+
+ return ret;
+}
+
bool ramblock_is_pmem(RAMBlock *rb)
{
return rb->flags & RAM_PMEM;
--
2.39.3

View File

@ -0,0 +1,140 @@
From cfb109b393e019398a52f66a5ff0e9581c841335 Mon Sep 17 00:00:00 2001
From: Xiaoyao Li <xiaoyao.li@intel.com>
Date: Thu, 29 Feb 2024 01:00:37 -0500
Subject: [PATCH 013/100] ppc/pef: switch to use
confidential_guest_kvm_init/reset()
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [13/91] e25c498fc79a4c8e22ca41d9cbd06e40b4cf1f11 (bonzini/rhel-qemu-kvm)
Use the unified interface to call confidential guest related kvm_init()
and kvm_reset(), to avoid exposing pef specific functions.
As a bonus, pef.h goes away since there is no direct call from sPAPR
board code to PEF code anymore.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 00a238b1a845fd5f0acd771664c5e184a63ed9b6)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
hw/ppc/pef.c | 9 ++++++---
hw/ppc/spapr.c | 10 +++++++---
include/hw/ppc/pef.h | 17 -----------------
3 files changed, 13 insertions(+), 23 deletions(-)
delete mode 100644 include/hw/ppc/pef.h
diff --git a/hw/ppc/pef.c b/hw/ppc/pef.c
index d28ed3ba73..47553348b1 100644
--- a/hw/ppc/pef.c
+++ b/hw/ppc/pef.c
@@ -15,7 +15,6 @@
#include "sysemu/kvm.h"
#include "migration/blocker.h"
#include "exec/confidential-guest-support.h"
-#include "hw/ppc/pef.h"
#define TYPE_PEF_GUEST "pef-guest"
OBJECT_DECLARE_SIMPLE_TYPE(PefGuest, PEF_GUEST)
@@ -93,7 +92,7 @@ static int kvmppc_svm_off(Error **errp)
#endif
}
-int pef_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
+static int pef_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
{
if (!object_dynamic_cast(OBJECT(cgs), TYPE_PEF_GUEST)) {
return 0;
@@ -107,7 +106,7 @@ int pef_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
return kvmppc_svm_init(cgs, errp);
}
-int pef_kvm_reset(ConfidentialGuestSupport *cgs, Error **errp)
+static int pef_kvm_reset(ConfidentialGuestSupport *cgs, Error **errp)
{
if (!object_dynamic_cast(OBJECT(cgs), TYPE_PEF_GUEST)) {
return 0;
@@ -131,6 +130,10 @@ OBJECT_DEFINE_TYPE_WITH_INTERFACES(PefGuest,
static void pef_guest_class_init(ObjectClass *oc, void *data)
{
+ ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc);
+
+ klass->kvm_init = pef_kvm_init;
+ klass->kvm_reset = pef_kvm_reset;
}
static void pef_guest_init(Object *obj)
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index a258d81846..6f6f0fd790 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -75,6 +75,7 @@
#include "hw/virtio/vhost-scsi-common.h"
#include "exec/ram_addr.h"
+#include "exec/confidential-guest-support.h"
#include "hw/usb.h"
#include "qemu/config-file.h"
#include "qemu/error-report.h"
@@ -87,7 +88,6 @@
#include "hw/ppc/spapr_tpm_proxy.h"
#include "hw/ppc/spapr_nvdimm.h"
#include "hw/ppc/spapr_numa.h"
-#include "hw/ppc/pef.h"
#include "monitor/monitor.h"
@@ -1715,7 +1715,9 @@ static void spapr_machine_reset(MachineState *machine, ShutdownCause reason)
qemu_guest_getrandom_nofail(spapr->fdt_rng_seed, 32);
}
- pef_kvm_reset(machine->cgs, &error_fatal);
+ if (machine->cgs) {
+ confidential_guest_kvm_reset(machine->cgs, &error_fatal);
+ }
spapr_caps_apply(spapr);
spapr_nested_reset(spapr);
if (spapr->svm_allowed) {
@@ -2844,7 +2846,9 @@ static void spapr_machine_init(MachineState *machine)
/*
* if Secure VM (PEF) support is configured, then initialize it
*/
- pef_kvm_init(machine->cgs, &error_fatal);
+ if (machine->cgs) {
+ confidential_guest_kvm_init(machine->cgs, &error_fatal);
+ }
msi_nonbroken = true;
diff --git a/include/hw/ppc/pef.h b/include/hw/ppc/pef.h
deleted file mode 100644
index 707dbe524c..0000000000
--- a/include/hw/ppc/pef.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * PEF (Protected Execution Facility) for POWER support
- *
- * Copyright Red Hat.
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#ifndef HW_PPC_PEF_H
-#define HW_PPC_PEF_H
-
-int pef_kvm_init(ConfidentialGuestSupport *cgs, Error **errp);
-int pef_kvm_reset(ConfidentialGuestSupport *cgs, Error **errp);
-
-#endif /* HW_PPC_PEF_H */
--
2.39.3

View File

@ -0,0 +1,164 @@
From 83bb32c25472b500738a54ac8f2ad0f5c496acf1 Mon Sep 17 00:00:00 2001
From: Isaku Yamahata <isaku.yamahata@linux.intel.com>
Date: Wed, 20 Mar 2024 03:39:14 -0500
Subject: [PATCH 009/100] q35: Introduce smm_ranges property for q35-pci-host
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [9/91] 931156772bfc2085e7241eecc56cf6eca3dac1fd (bonzini/rhel-qemu-kvm)
Add a q35 property to check whether or not SMM ranges, e.g. SMRAM, TSEG,
etc... exist for the target platform. TDX doesn't support SMM and doesn't
play nice with QEMU modifying related guest memory ranges.
Signed-off-by: Isaku Yamahata <isaku.yamahata@linux.intel.com>
Co-developed-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Michael Roth <michael.roth@amd.com>
Message-ID: <20240320083945.991426-19-michael.roth@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit b07bf7b73fd02d24a7baa64a580f4974b86bbc86)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
hw/i386/pc_q35.c | 2 ++
hw/pci-host/q35.c | 42 +++++++++++++++++++++++++++------------
include/hw/i386/pc.h | 1 +
include/hw/pci-host/q35.h | 1 +
4 files changed, 33 insertions(+), 13 deletions(-)
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 9adcdadce8..dedc86eec9 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -219,6 +219,8 @@ static void pc_q35_init(MachineState *machine)
x86ms->above_4g_mem_size, NULL);
object_property_set_bool(phb, PCI_HOST_BYPASS_IOMMU,
pcms->default_bus_bypass_iommu, NULL);
+ object_property_set_bool(phb, PCI_HOST_PROP_SMM_RANGES,
+ x86_machine_is_smm_enabled(x86ms), NULL);
sysbus_realize_and_unref(SYS_BUS_DEVICE(phb), &error_fatal);
/* pci */
diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
index 98d4a7c253..0b6cbaed7e 100644
--- a/hw/pci-host/q35.c
+++ b/hw/pci-host/q35.c
@@ -179,6 +179,8 @@ static Property q35_host_props[] = {
mch.below_4g_mem_size, 0),
DEFINE_PROP_SIZE(PCI_HOST_ABOVE_4G_MEM_SIZE, Q35PCIHost,
mch.above_4g_mem_size, 0),
+ DEFINE_PROP_BOOL(PCI_HOST_PROP_SMM_RANGES, Q35PCIHost,
+ mch.has_smm_ranges, true),
DEFINE_PROP_BOOL("x-pci-hole64-fix", Q35PCIHost, pci_hole64_fix, true),
DEFINE_PROP_END_OF_LIST(),
};
@@ -214,6 +216,7 @@ static void q35_host_initfn(Object *obj)
/* mch's object_initialize resets the default value, set it again */
qdev_prop_set_uint64(DEVICE(s), PCI_HOST_PROP_PCI_HOLE64_SIZE,
Q35_PCI_HOST_HOLE64_SIZE_DEFAULT);
+
object_property_add(obj, PCI_HOST_PROP_PCI_HOLE_START, "uint32",
q35_host_get_pci_hole_start,
NULL, NULL, NULL);
@@ -476,6 +479,10 @@ static void mch_write_config(PCIDevice *d,
mch_update_pciexbar(mch);
}
+ if (!mch->has_smm_ranges) {
+ return;
+ }
+
if (ranges_overlap(address, len, MCH_HOST_BRIDGE_SMRAM,
MCH_HOST_BRIDGE_SMRAM_SIZE)) {
mch_update_smram(mch);
@@ -494,10 +501,13 @@ static void mch_write_config(PCIDevice *d,
static void mch_update(MCHPCIState *mch)
{
mch_update_pciexbar(mch);
+
mch_update_pam(mch);
- mch_update_smram(mch);
- mch_update_ext_tseg_mbytes(mch);
- mch_update_smbase_smram(mch);
+ if (mch->has_smm_ranges) {
+ mch_update_smram(mch);
+ mch_update_ext_tseg_mbytes(mch);
+ mch_update_smbase_smram(mch);
+ }
/*
* pci hole goes from end-of-low-ram to io-apic.
@@ -538,18 +548,20 @@ static void mch_reset(DeviceState *qdev)
pci_set_quad(d->config + MCH_HOST_BRIDGE_PCIEXBAR,
MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT);
- d->config[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_DEFAULT;
- d->config[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_DEFAULT;
- d->wmask[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_WMASK;
- d->wmask[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_WMASK;
+ if (mch->has_smm_ranges) {
+ d->config[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_DEFAULT;
+ d->config[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_DEFAULT;
+ d->wmask[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_WMASK;
+ d->wmask[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_WMASK;
- if (mch->ext_tseg_mbytes > 0) {
- pci_set_word(d->config + MCH_HOST_BRIDGE_EXT_TSEG_MBYTES,
- MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_QUERY);
- }
+ if (mch->ext_tseg_mbytes > 0) {
+ pci_set_word(d->config + MCH_HOST_BRIDGE_EXT_TSEG_MBYTES,
+ MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_QUERY);
+ }
- d->config[MCH_HOST_BRIDGE_F_SMBASE] = 0;
- d->wmask[MCH_HOST_BRIDGE_F_SMBASE] = 0xff;
+ d->config[MCH_HOST_BRIDGE_F_SMBASE] = 0;
+ d->wmask[MCH_HOST_BRIDGE_F_SMBASE] = 0xff;
+ }
mch_update(mch);
}
@@ -578,6 +590,10 @@ static void mch_realize(PCIDevice *d, Error **errp)
PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE, PAM_EXPAN_SIZE);
}
+ if (!mch->has_smm_ranges) {
+ return;
+ }
+
/* if *disabled* show SMRAM to all CPUs */
memory_region_init_alias(&mch->smram_region, OBJECT(mch), "smram-region",
mch->pci_address_space, MCH_HOST_BRIDGE_SMRAM_C_BASE,
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 87420783ab..467e7fb52f 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -164,6 +164,7 @@ void pc_acpi_smi_interrupt(void *opaque, int irq, int level);
#define PCI_HOST_PROP_PCI_HOLE64_SIZE "pci-hole64-size"
#define PCI_HOST_BELOW_4G_MEM_SIZE "below-4g-mem-size"
#define PCI_HOST_ABOVE_4G_MEM_SIZE "above-4g-mem-size"
+#define PCI_HOST_PROP_SMM_RANGES "smm-ranges"
void pc_pci_as_mapping_init(MemoryRegion *system_memory,
diff --git a/include/hw/pci-host/q35.h b/include/hw/pci-host/q35.h
index bafcbe6752..22fadfa3ed 100644
--- a/include/hw/pci-host/q35.h
+++ b/include/hw/pci-host/q35.h
@@ -50,6 +50,7 @@ struct MCHPCIState {
MemoryRegion tseg_blackhole, tseg_window;
MemoryRegion smbase_blackhole, smbase_window;
bool has_smram_at_smbase;
+ bool has_smm_ranges;
Range pci_hole;
uint64_t below_4g_mem_size;
uint64_t above_4g_mem_size;
--
2.39.3

View File

@ -0,0 +1,67 @@
From 4c93bec108f7e3918a2ef91b51cec477ade38cc3 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 18 Mar 2024 17:45:56 -0400
Subject: [PATCH 018/100] runstate: skip initial CPU reset if reset is not
actually possible
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [18/91] ced267fdaadbf2072c1897223522457a006e6c81 (bonzini/rhel-qemu-kvm)
Right now, the system reset is concluded by a call to
cpu_synchronize_all_post_reset() in order to sync any changes
that the machine reset callback applied to the CPU state.
However, for VMs with encrypted state such as SEV-ES guests (currently
the only case of guests with non-resettable CPUs) this cannot be done,
because guest state has already been finalized by machine-init-done notifiers.
cpu_synchronize_all_post_reset() does nothing on these guests, and actually
we would like to make it fail if called once guest has been encrypted.
So, assume that boards that support non-resettable CPUs do not touch
CPU state and that all such setup is done before, at the time of
cpu_synchronize_all_post_init().
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 08b2d15cdd0d3fbbe37ce23bf192b770db3a7539)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
system/runstate.c | 15 ++++++++++++++-
1 file changed, 14 insertions(+), 1 deletion(-)
diff --git a/system/runstate.c b/system/runstate.c
index d6ab860eca..cb4905a40f 100644
--- a/system/runstate.c
+++ b/system/runstate.c
@@ -501,7 +501,20 @@ void qemu_system_reset(ShutdownCause reason)
default:
qapi_event_send_reset(shutdown_caused_by_guest(reason), reason);
}
- cpu_synchronize_all_post_reset();
+
+ /*
+ * Some boards use the machine reset callback to point CPUs to the firmware
+ * entry point. Assume that this is not the case for boards that support
+ * non-resettable CPUs (currently used only for confidential guests), in
+ * which case cpu_synchronize_all_post_init() is enough because
+ * it does _more_ than cpu_synchronize_all_post_reset().
+ */
+ if (cpus_are_resettable()) {
+ cpu_synchronize_all_post_reset();
+ } else {
+ assert(runstate_check(RUN_STATE_PRELAUNCH));
+ }
+
vm_set_suspended(false);
}
--
2.39.3

View File

@ -0,0 +1,109 @@
From 4ebc58d4a7a3d4a20f20f1cd3f21082b80097fe2 Mon Sep 17 00:00:00 2001
From: Xiaoyao Li <xiaoyao.li@intel.com>
Date: Thu, 29 Feb 2024 01:00:38 -0500
Subject: [PATCH 014/100] s390: Switch to use confidential_guest_kvm_init()
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [14/91] 8c9e09ec9976c00b41c02868fff034286a341468 (bonzini/rhel-qemu-kvm)
Use unified confidential_guest_kvm_init() for consistency with
other architectures.
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Message-Id: <20240229060038.606591-1-xiaoyao.li@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit a14a2b0148e657cc526b7a75f2a1937628764e7a)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
hw/s390x/s390-virtio-ccw.c | 5 ++++-
target/s390x/kvm/pv.c | 10 +++++++++-
target/s390x/kvm/pv.h | 14 --------------
3 files changed, 13 insertions(+), 16 deletions(-)
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 9ad54682c6..828ce6e87e 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -14,6 +14,7 @@
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "exec/ram_addr.h"
+#include "exec/confidential-guest-support.h"
#include "hw/s390x/s390-virtio-hcall.h"
#include "hw/s390x/sclp.h"
#include "hw/s390x/s390_flic.h"
@@ -260,7 +261,9 @@ static void ccw_init(MachineState *machine)
s390_init_cpus(machine);
/* Need CPU model to be determined before we can set up PV */
- s390_pv_init(machine->cgs, &error_fatal);
+ if (machine->cgs) {
+ confidential_guest_kvm_init(machine->cgs, &error_fatal);
+ }
s390_flic_init();
diff --git a/target/s390x/kvm/pv.c b/target/s390x/kvm/pv.c
index 7ca7faec73..dde836d21a 100644
--- a/target/s390x/kvm/pv.c
+++ b/target/s390x/kvm/pv.c
@@ -334,12 +334,17 @@ static bool s390_pv_guest_check(ConfidentialGuestSupport *cgs, Error **errp)
return s390_pv_check_cpus(errp);
}
-int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
+static int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
{
if (!object_dynamic_cast(OBJECT(cgs), TYPE_S390_PV_GUEST)) {
return 0;
}
+ if (!kvm_enabled()) {
+ error_setg(errp, "Protected Virtualization requires KVM");
+ return -1;
+ }
+
if (!s390_has_feat(S390_FEAT_UNPACK)) {
error_setg(errp,
"CPU model does not support Protected Virtualization");
@@ -364,6 +369,9 @@ OBJECT_DEFINE_TYPE_WITH_INTERFACES(S390PVGuest,
static void s390_pv_guest_class_init(ObjectClass *oc, void *data)
{
+ ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc);
+
+ klass->kvm_init = s390_pv_kvm_init;
}
static void s390_pv_guest_init(Object *obj)
diff --git a/target/s390x/kvm/pv.h b/target/s390x/kvm/pv.h
index 5877d28ff1..4b40817439 100644
--- a/target/s390x/kvm/pv.h
+++ b/target/s390x/kvm/pv.h
@@ -80,18 +80,4 @@ static inline int kvm_s390_dump_mem_state(uint64_t addr, size_t len,
static inline int kvm_s390_dump_completion_data(void *buff) { return 0; }
#endif /* CONFIG_KVM */
-int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp);
-static inline int s390_pv_init(ConfidentialGuestSupport *cgs, Error **errp)
-{
- if (!cgs) {
- return 0;
- }
- if (kvm_enabled()) {
- return s390_pv_kvm_init(cgs, errp);
- }
-
- error_setg(errp, "Protected Virtualization requires KVM");
- return -1;
-}
-
#endif /* HW_S390_PV_H */
--
2.39.3

View File

@ -0,0 +1,186 @@
From 3d197f42afea6d0b176c2b26b772965692ffeab3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alex=20Benn=C3=A9e?= <alex.bennee@linaro.org>
Date: Tue, 14 May 2024 18:42:44 +0100
Subject: [PATCH 047/100] scripts/update-linux-header.sh: be more src tree
friendly
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [47/91] c4165cc8bf79c3f96912e8210b3bb3565add288f (bonzini/rhel-qemu-kvm)
Running "install_headers" in the Linux source tree is fairly
unfriendly as out-of-tree builds will start complaining about the
kernel source being non-pristine. As we have a temporary directory for
the install we should also do the build step here. So now we have:
$tmpdir/
$blddir/
$hdrdir/
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Message-Id: <20240514174253.694591-3-alex.bennee@linaro.org>
(cherry picked from commit b51ddd937f11f76614d4b36d14d8778df242661c)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
scripts/update-linux-headers.sh | 80 +++++++++++++++++----------------
1 file changed, 41 insertions(+), 39 deletions(-)
diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh
index 5f20434d5c..4431ba4d54 100755
--- a/scripts/update-linux-headers.sh
+++ b/scripts/update-linux-headers.sh
@@ -27,6 +27,8 @@
# types like "__u64". This work is done in the cp_portable function.
tmpdir=$(mktemp -d)
+hdrdir="$tmpdir/headers"
+blddir="$tmpdir/build"
linux="$1"
output="$2"
@@ -111,56 +113,56 @@ for arch in $ARCHLIST; do
arch_var=ARCH
fi
- make -C "$linux" INSTALL_HDR_PATH="$tmpdir" $arch_var=$arch headers_install
+ make -C "$linux" O="$blddir" INSTALL_HDR_PATH="$hdrdir" $arch_var=$arch headers_install
rm -rf "$output/linux-headers/asm-$arch"
mkdir -p "$output/linux-headers/asm-$arch"
for header in kvm.h unistd.h bitsperlong.h mman.h; do
- cp "$tmpdir/include/asm/$header" "$output/linux-headers/asm-$arch"
+ cp "$hdrdir/include/asm/$header" "$output/linux-headers/asm-$arch"
done
if [ $arch = mips ]; then
- cp "$tmpdir/include/asm/sgidefs.h" "$output/linux-headers/asm-mips/"
- cp "$tmpdir/include/asm/unistd_o32.h" "$output/linux-headers/asm-mips/"
- cp "$tmpdir/include/asm/unistd_n32.h" "$output/linux-headers/asm-mips/"
- cp "$tmpdir/include/asm/unistd_n64.h" "$output/linux-headers/asm-mips/"
+ cp "$hdrdir/include/asm/sgidefs.h" "$output/linux-headers/asm-mips/"
+ cp "$hdrdir/include/asm/unistd_o32.h" "$output/linux-headers/asm-mips/"
+ cp "$hdrdir/include/asm/unistd_n32.h" "$output/linux-headers/asm-mips/"
+ cp "$hdrdir/include/asm/unistd_n64.h" "$output/linux-headers/asm-mips/"
fi
if [ $arch = powerpc ]; then
- cp "$tmpdir/include/asm/unistd_32.h" "$output/linux-headers/asm-powerpc/"
- cp "$tmpdir/include/asm/unistd_64.h" "$output/linux-headers/asm-powerpc/"
+ cp "$hdrdir/include/asm/unistd_32.h" "$output/linux-headers/asm-powerpc/"
+ cp "$hdrdir/include/asm/unistd_64.h" "$output/linux-headers/asm-powerpc/"
fi
rm -rf "$output/include/standard-headers/asm-$arch"
mkdir -p "$output/include/standard-headers/asm-$arch"
if [ $arch = s390 ]; then
- cp_portable "$tmpdir/include/asm/virtio-ccw.h" "$output/include/standard-headers/asm-s390/"
- cp "$tmpdir/include/asm/unistd_32.h" "$output/linux-headers/asm-s390/"
- cp "$tmpdir/include/asm/unistd_64.h" "$output/linux-headers/asm-s390/"
+ cp_portable "$hdrdir/include/asm/virtio-ccw.h" "$output/include/standard-headers/asm-s390/"
+ cp "$hdrdir/include/asm/unistd_32.h" "$output/linux-headers/asm-s390/"
+ cp "$hdrdir/include/asm/unistd_64.h" "$output/linux-headers/asm-s390/"
fi
if [ $arch = arm ]; then
- cp "$tmpdir/include/asm/unistd-eabi.h" "$output/linux-headers/asm-arm/"
- cp "$tmpdir/include/asm/unistd-oabi.h" "$output/linux-headers/asm-arm/"
- cp "$tmpdir/include/asm/unistd-common.h" "$output/linux-headers/asm-arm/"
+ cp "$hdrdir/include/asm/unistd-eabi.h" "$output/linux-headers/asm-arm/"
+ cp "$hdrdir/include/asm/unistd-oabi.h" "$output/linux-headers/asm-arm/"
+ cp "$hdrdir/include/asm/unistd-common.h" "$output/linux-headers/asm-arm/"
fi
if [ $arch = arm64 ]; then
- cp "$tmpdir/include/asm/sve_context.h" "$output/linux-headers/asm-arm64/"
+ cp "$hdrdir/include/asm/sve_context.h" "$output/linux-headers/asm-arm64/"
fi
if [ $arch = x86 ]; then
- cp "$tmpdir/include/asm/unistd_32.h" "$output/linux-headers/asm-x86/"
- cp "$tmpdir/include/asm/unistd_x32.h" "$output/linux-headers/asm-x86/"
- cp "$tmpdir/include/asm/unistd_64.h" "$output/linux-headers/asm-x86/"
- cp_portable "$tmpdir/include/asm/kvm_para.h" "$output/include/standard-headers/asm-$arch"
+ cp "$hdrdir/include/asm/unistd_32.h" "$output/linux-headers/asm-x86/"
+ cp "$hdrdir/include/asm/unistd_x32.h" "$output/linux-headers/asm-x86/"
+ cp "$hdrdir/include/asm/unistd_64.h" "$output/linux-headers/asm-x86/"
+ cp_portable "$hdrdir/include/asm/kvm_para.h" "$output/include/standard-headers/asm-$arch"
# Remove everything except the macros from bootparam.h avoiding the
# unnecessary import of several video/ist/etc headers
sed -e '/__ASSEMBLY__/,/__ASSEMBLY__/d' \
- "$tmpdir/include/asm/bootparam.h" > "$tmpdir/bootparam.h"
- cp_portable "$tmpdir/bootparam.h" \
+ "$hdrdir/include/asm/bootparam.h" > "$hdrdir/bootparam.h"
+ cp_portable "$hdrdir/bootparam.h" \
"$output/include/standard-headers/asm-$arch"
- cp_portable "$tmpdir/include/asm/setup_data.h" \
+ cp_portable "$hdrdir/include/asm/setup_data.h" \
"$output/standard-headers/asm-x86"
fi
if [ $arch = riscv ]; then
- cp "$tmpdir/include/asm/ptrace.h" "$output/linux-headers/asm-riscv/"
+ cp "$hdrdir/include/asm/ptrace.h" "$output/linux-headers/asm-riscv/"
fi
done
arch=
@@ -170,13 +172,13 @@ mkdir -p "$output/linux-headers/linux"
for header in const.h stddef.h kvm.h vfio.h vfio_ccw.h vfio_zdev.h vhost.h \
psci.h psp-sev.h userfaultfd.h memfd.h mman.h nvme_ioctl.h \
vduse.h iommufd.h bits.h; do
- cp "$tmpdir/include/linux/$header" "$output/linux-headers/linux"
+ cp "$hdrdir/include/linux/$header" "$output/linux-headers/linux"
done
rm -rf "$output/linux-headers/asm-generic"
mkdir -p "$output/linux-headers/asm-generic"
for header in unistd.h bitsperlong.h mman-common.h mman.h hugetlb_encode.h; do
- cp "$tmpdir/include/asm-generic/$header" "$output/linux-headers/asm-generic"
+ cp "$hdrdir/include/asm-generic/$header" "$output/linux-headers/asm-generic"
done
if [ -L "$linux/source" ]; then
@@ -211,23 +213,23 @@ EOF
rm -rf "$output/include/standard-headers/linux"
mkdir -p "$output/include/standard-headers/linux"
-for i in "$tmpdir"/include/linux/*virtio*.h \
- "$tmpdir/include/linux/qemu_fw_cfg.h" \
- "$tmpdir/include/linux/fuse.h" \
- "$tmpdir/include/linux/input.h" \
- "$tmpdir/include/linux/input-event-codes.h" \
- "$tmpdir/include/linux/udmabuf.h" \
- "$tmpdir/include/linux/pci_regs.h" \
- "$tmpdir/include/linux/ethtool.h" \
- "$tmpdir/include/linux/const.h" \
- "$tmpdir/include/linux/kernel.h" \
- "$tmpdir/include/linux/vhost_types.h" \
- "$tmpdir/include/linux/sysinfo.h" \
- "$tmpdir/include/misc/pvpanic.h"; do
+for i in "$hdrdir"/include/linux/*virtio*.h \
+ "$hdrdir/include/linux/qemu_fw_cfg.h" \
+ "$hdrdir/include/linux/fuse.h" \
+ "$hdrdir/include/linux/input.h" \
+ "$hdrdir/include/linux/input-event-codes.h" \
+ "$hdrdir/include/linux/udmabuf.h" \
+ "$hdrdir/include/linux/pci_regs.h" \
+ "$hdrdir/include/linux/ethtool.h" \
+ "$hdrdir/include/linux/const.h" \
+ "$hdrdir/include/linux/kernel.h" \
+ "$hdrdir/include/linux/vhost_types.h" \
+ "$hdrdir/include/linux/sysinfo.h" \
+ "$hdrdir/include/misc/pvpanic.h"; do
cp_portable "$i" "$output/include/standard-headers/linux"
done
mkdir -p "$output/include/standard-headers/drm"
-cp_portable "$tmpdir/include/drm/drm_fourcc.h" \
+cp_portable "$hdrdir/include/drm/drm_fourcc.h" \
"$output/include/standard-headers/drm"
rm -rf "$output/include/standard-headers/drivers/infiniband/hw/vmw_pvrdma"
--
2.39.3

View File

@ -0,0 +1,38 @@
From 5db9faee4d6efc9dbe010d2b745aba59d943d2ac Mon Sep 17 00:00:00 2001
From: Michael Roth <michael.roth@amd.com>
Date: Wed, 21 Feb 2024 10:51:38 -0600
Subject: [PATCH 016/100] scripts/update-linux-headers: Add bits.h to file
imports
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [16/91] 150ee6376982bd5f471cb561f6760bf80d1211db (bonzini/rhel-qemu-kvm)
Signed-off-by: Michael Roth <michael.roth@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit b40b8eb609d3549ac14aab43849b20f5cba951c9)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
scripts/update-linux-headers.sh | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh
index d48856f9e2..5f20434d5c 100755
--- a/scripts/update-linux-headers.sh
+++ b/scripts/update-linux-headers.sh
@@ -169,7 +169,7 @@ rm -rf "$output/linux-headers/linux"
mkdir -p "$output/linux-headers/linux"
for header in const.h stddef.h kvm.h vfio.h vfio_ccw.h vfio_zdev.h vhost.h \
psci.h psp-sev.h userfaultfd.h memfd.h mman.h nvme_ioctl.h \
- vduse.h iommufd.h; do
+ vduse.h iommufd.h bits.h; do
cp "$tmpdir/include/linux/$header" "$output/linux-headers/linux"
done
--
2.39.3

View File

@ -0,0 +1,83 @@
From 2d0989fe09703ef46ba9c5d14770dbf8a6fd2f80 Mon Sep 17 00:00:00 2001
From: Michael Roth <michael.roth@amd.com>
Date: Sun, 18 Feb 2024 23:35:02 -0600
Subject: [PATCH 015/100] scripts/update-linux-headers: Add setup_data.h to
import list
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [15/91] 9d46c8787259317710a84e7a6aa36731e9f55a17 (bonzini/rhel-qemu-kvm)
Data structures like struct setup_data have been moved to a separate
setup_data.h header which bootparam.h relies on. Add setup_data.h to
the cp_portable() list and sync it along with the other header files.
Note that currently struct setup_data is stripped away as part of
generating bootparam.h, but that handling is no currently needed for
setup_data.h since it doesn't pull in many external
headers/dependencies. However, QEMU currently redefines struct
setup_data in hw/i386/x86.c, so that will need to be removed as part of
any header update that pulls in the new setup_data.h to avoid build
bisect breakage.
Because <asm/setup_data.h> is the first architecture specific #include
in include/standard-headers/, add a new sed substitution to rewrite
asm/ include to the standard-headers/asm-* subdirectory for the current
architecture.
And while at it, remove asm-generic/kvm_para.h from the list of
allowed includes: it does not have a matching substitution, and therefore
it would not be possible to use it on non-Linux systems where there is
no /usr/include/asm-generic/ directory.
Signed-off-by: Michael Roth <michael.roth@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 66210a1a30f2384bb59f9dad8d769dba56dd30f1)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
scripts/update-linux-headers.sh | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh
index a0006eec6f..d48856f9e2 100755
--- a/scripts/update-linux-headers.sh
+++ b/scripts/update-linux-headers.sh
@@ -61,7 +61,7 @@ cp_portable() {
-e 'linux/const' \
-e 'linux/kernel' \
-e 'linux/sysinfo' \
- -e 'asm-generic/kvm_para' \
+ -e 'asm/setup_data.h' \
> /dev/null
then
echo "Unexpected #include in input file $f".
@@ -77,6 +77,7 @@ cp_portable() {
-e 's/__be\([0-9][0-9]*\)/uint\1_t/g' \
-e 's/"\(input-event-codes\.h\)"/"standard-headers\/linux\/\1"/' \
-e 's/<linux\/\([^>]*\)>/"standard-headers\/linux\/\1"/' \
+ -e 's/<asm\/\([^>]*\)>/"standard-headers\/asm-'$arch'\/\1"/' \
-e 's/__bitwise//' \
-e 's/__attribute__((packed))/QEMU_PACKED/' \
-e 's/__inline__/inline/' \
@@ -155,11 +156,14 @@ for arch in $ARCHLIST; do
"$tmpdir/include/asm/bootparam.h" > "$tmpdir/bootparam.h"
cp_portable "$tmpdir/bootparam.h" \
"$output/include/standard-headers/asm-$arch"
+ cp_portable "$tmpdir/include/asm/setup_data.h" \
+ "$output/standard-headers/asm-x86"
fi
if [ $arch = riscv ]; then
cp "$tmpdir/include/asm/ptrace.h" "$output/linux-headers/asm-riscv/"
fi
done
+arch=
rm -rf "$output/linux-headers/linux"
mkdir -p "$output/linux-headers/linux"
--
2.39.3

View File

@ -0,0 +1,47 @@
From 09acdbc49a4dd85d82ad30ec2859edfcdba8431e Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@redhat.com>
Date: Mon, 27 May 2024 08:01:26 +0200
Subject: [PATCH 049/100] scripts/update-linux-headers.sh: Fix the path of
setup_data.h
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [49/91] f3008bc07796687c9440f5720fbc72a12d0a1278 (bonzini/rhel-qemu-kvm)
When running the update-linx-headers.sh script, it currently fails with:
scripts/update-linux-headers.sh: line 73: .../qemu/standard-headers/asm-x86/setup_data.h: No such file or directory
The "include" folder is obviously missing here - no clue how this could
have worked before?
Fixes: 66210a1a30 ("scripts/update-linux-headers: Add setup_data.h to import list")
Message-ID: <20240527060126.12578-1-thuth@redhat.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
(cherry picked from commit bde26d90ae9f7551cac90e117fc7216c807a3bfe)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
scripts/update-linux-headers.sh | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh
index 0f404d5317..f084bee72e 100755
--- a/scripts/update-linux-headers.sh
+++ b/scripts/update-linux-headers.sh
@@ -160,7 +160,7 @@ for arch in $ARCHLIST; do
cp_portable "$hdrdir/bootparam.h" \
"$output/include/standard-headers/asm-$arch"
cp_portable "$hdrdir/include/asm/setup_data.h" \
- "$output/standard-headers/asm-x86"
+ "$output/include/standard-headers/asm-x86"
fi
if [ $arch = riscv ]; then
cp "$hdrdir/include/asm/ptrace.h" "$output/linux-headers/asm-riscv/"
--
2.39.3

View File

@ -0,0 +1,44 @@
From 8e63d742015bf69a00fd44e88eb1198f594b2de2 Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@redhat.com>
Date: Mon, 27 May 2024 08:02:43 +0200
Subject: [PATCH 048/100] scripts/update-linux-headers.sh: Remove temporary
directory inbetween
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [48/91] 879554dc7e722c4e20b302a00ca745ddeefdc0fb (bonzini/rhel-qemu-kvm)
We are reusing the same temporary directory for installing the headers
of all targets, so there could be stale files here when switching from
one target to another. Make sure to delete the folder before installing
a new set of target headers into it.
Message-ID: <20240527060243.12647-1-thuth@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
(cherry picked from commit 3efc75ad9d9317e5709861bbebb2c29390f8e7a2)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
scripts/update-linux-headers.sh | 1 +
1 file changed, 1 insertion(+)
diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh
index 4431ba4d54..0f404d5317 100755
--- a/scripts/update-linux-headers.sh
+++ b/scripts/update-linux-headers.sh
@@ -113,6 +113,7 @@ for arch in $ARCHLIST; do
arch_var=ARCH
fi
+ rm -rf "$hdrdir"
make -C "$linux" O="$blddir" INSTALL_HDR_PATH="$hdrdir" $arch_var=$arch headers_install
rm -rf "$output/linux-headers/asm-$arch"
--
2.39.3

View File

@ -0,0 +1,60 @@
From d580b83d9eda7802ffa3890ea8641793fe78937c Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Wed, 31 Jul 2024 14:32:05 +0200
Subject: [PATCH 094/100] scsi-block: Don't skip callback for sgio error
status/driver_status
RH-Author: Kevin Wolf <kwolf@redhat.com>
RH-MergeRequest: 261: scsi-block: Fix error handling with r/werror=stop
RH-Jira: RHEL-50000
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Commit: [2/4] 1fee1b21dae314f4f34c88f2d2fabd7af011404a (kmwolf/centos-qemu-kvm)
Instead of calling into scsi_handle_rw_error() directly from
scsi_block_sgio_complete() and skipping the normal callback, go through
the normal cleanup path by calling the callback with a positive error
value.
The important difference here is not only that the code path is cleaner,
but that the callbacks set r->req.aiocb = NULL. If we skip setting this
and the error action is BLOCK_ERROR_ACTION_STOP, resuming the VM runs
into an assertion failure in scsi_read_data() or scsi_write_data()
because the dangling aiocb pointer is unexpected.
Fixes: a108557bbf ("scsi: inline sg_io_sense_from_errno() into the callers.")
Buglink: https://issues.redhat.com/browse/RHEL-50000
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Message-ID: <20240731123207.27636-3-kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 622a70161ac258e4a166a7dca4b5be267e0652d9)
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
hw/scsi/scsi-disk.c | 10 ----------
1 file changed, 10 deletions(-)
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index bed2c8746c..e7f57f3230 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -2804,16 +2804,6 @@ static void scsi_block_sgio_complete(void *opaque, int ret)
} else {
ret = io_hdr->status;
}
-
- if (ret > 0) {
- if (scsi_handle_rw_error(r, ret, true)) {
- scsi_req_unref(&r->req);
- return;
- }
-
- /* Ignore error. */
- ret = 0;
- }
}
req->cb(req->cb_opaque, ret);
--
2.39.3

View File

@ -0,0 +1,79 @@
From eebe5fe8cbc854a6365e7c1adbb701079b137bcb Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Wed, 31 Jul 2024 14:32:06 +0200
Subject: [PATCH 095/100] scsi-disk: Add warning comments that host_status
errors take a shortcut
RH-Author: Kevin Wolf <kwolf@redhat.com>
RH-MergeRequest: 261: scsi-block: Fix error handling with r/werror=stop
RH-Jira: RHEL-50000
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Commit: [3/4] 6fcd603fc78fda65a425a1acd9a8710d81c6ed7f (kmwolf/centos-qemu-kvm)
scsi_block_sgio_complete() has surprising behaviour in that there are
error cases in which it directly completes the request and never calls
the passed callback. In the current state of the code, this doesn't seem
to result in bugs, but with future code changes, we must be careful to
never rely on the callback doing some cleanup until this code smell is
fixed. For now, just add warnings to make people aware of the trap.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Message-ID: <20240731123207.27636-4-kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 8a0495624f23f8f01dfb1484f367174f3b3572e8)
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
hw/scsi/scsi-disk.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index e7f57f3230..b4062ac2ff 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -65,6 +65,9 @@ struct SCSIDiskClass {
/*
* Callbacks receive ret == 0 for success. Errors are represented either as
* negative errno values, or as positive SAM status codes.
+ *
+ * Beware: For errors returned in host_status, the function may directly
+ * complete the request and never call the callback.
*/
DMAIOFunc *dma_readv;
DMAIOFunc *dma_writev;
@@ -359,6 +362,7 @@ done:
scsi_req_unref(&r->req);
}
+/* May not be called in all error cases, don't rely on cleanup here */
static void scsi_dma_complete(void *opaque, int ret)
{
SCSIDiskReq *r = (SCSIDiskReq *)opaque;
@@ -399,6 +403,7 @@ done:
scsi_req_unref(&r->req);
}
+/* May not be called in all error cases, don't rely on cleanup here */
static void scsi_read_complete(void *opaque, int ret)
{
SCSIDiskReq *r = (SCSIDiskReq *)opaque;
@@ -538,6 +543,7 @@ done:
scsi_req_unref(&r->req);
}
+/* May not be called in all error cases, don't rely on cleanup here */
static void scsi_write_complete(void * opaque, int ret)
{
SCSIDiskReq *r = (SCSIDiskReq *)opaque;
@@ -2793,6 +2799,7 @@ static void scsi_block_sgio_complete(void *opaque, int ret)
sg_io_hdr_t *io_hdr = &req->io_header;
if (ret == 0) {
+ /* FIXME This skips calling req->cb() and any cleanup in it */
if (io_hdr->host_status != SCSI_HOST_OK) {
scsi_req_complete_failed(&r->req, io_hdr->host_status);
scsi_req_unref(&r->req);
--
2.39.3

View File

@ -0,0 +1,106 @@
From bd5cace452183053e356a27317c759ecfe0391aa Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Wed, 31 Jul 2024 14:32:07 +0200
Subject: [PATCH 096/100] scsi-disk: Always report RESERVATION_CONFLICT to
guest
RH-Author: Kevin Wolf <kwolf@redhat.com>
RH-MergeRequest: 261: scsi-block: Fix error handling with r/werror=stop
RH-Jira: RHEL-50000
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Commit: [4/4] eb4142071e5cbe385a949a6c48b0c8f8c6086918 (kmwolf/centos-qemu-kvm)
In the case of scsi-block, RESERVATION_CONFLICT is not a backend error,
but indicates that the guest tried to make a request that it isn't
allowed to execute. Pass the error to the guest so that it can decide
what to do with it.
Without this, if we stop the VM in response to a RESERVATION_CONFLICT
(as is the default policy in management software such as oVirt or
KubeVirt), it can happen that the VM cannot be resumed any more because
every attempt to resume it immediately runs into the same error and
stops the VM again.
One case that expects RESERVATION_CONFLICT errors to be visible in the
guest is running the validation tests in Windows 2019's Failover Cluster
Manager, which intentionally tries to execute invalid requests to see if
they are properly rejected.
Buglink: https://issues.redhat.com/browse/RHEL-50000
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Message-ID: <20240731123207.27636-5-kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 9da6bd39f92434f55573acd017841b195c60188f)
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
hw/scsi/scsi-disk.c | 35 ++++++++++++++++++++++++++++++-----
1 file changed, 30 insertions(+), 5 deletions(-)
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index b4062ac2ff..91ccf37fef 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -202,7 +202,7 @@ static bool scsi_handle_rw_error(SCSIDiskReq *r, int ret, bool acct_failed)
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s));
SCSISense sense = SENSE_CODE(NO_SENSE);
- int error = 0;
+ int error;
bool req_has_sense = false;
BlockErrorAction action;
int status;
@@ -213,11 +213,35 @@ static bool scsi_handle_rw_error(SCSIDiskReq *r, int ret, bool acct_failed)
} else {
/* A passthrough command has completed with nonzero status. */
status = ret;
- if (status == CHECK_CONDITION) {
+ switch (status) {
+ case CHECK_CONDITION:
req_has_sense = true;
error = scsi_sense_buf_to_errno(r->req.sense, sizeof(r->req.sense));
- } else {
+ break;
+ case RESERVATION_CONFLICT:
+ /*
+ * Don't apply the error policy, always report to the guest.
+ *
+ * This is a passthrough code path, so it's not a backend error, but
+ * a response to an invalid guest request.
+ *
+ * Windows Failover Cluster validation intentionally sends invalid
+ * requests to verify that reservations work as intended. It is
+ * crucial that it sees the resulting errors.
+ *
+ * Treating a reservation conflict as a guest-side error is obvious
+ * when a pr-manager is in use. Without one, the situation is less
+ * clear, but there might be nothing that can be fixed on the host
+ * (like in the above example), and we don't want to be stuck in a
+ * loop where resuming the VM and retrying the request immediately
+ * stops it again. So always reporting is still the safer option in
+ * this case, too.
+ */
+ error = 0;
+ break;
+ default:
error = EINVAL;
+ break;
}
}
@@ -227,8 +251,9 @@ static bool scsi_handle_rw_error(SCSIDiskReq *r, int ret, bool acct_failed)
* are usually retried immediately, so do not post them to QMP and
* do not account them as failed I/O.
*/
- if (req_has_sense &&
- scsi_sense_buf_is_guest_recoverable(r->req.sense, sizeof(r->req.sense))) {
+ if (!error || (req_has_sense &&
+ scsi_sense_buf_is_guest_recoverable(r->req.sense,
+ sizeof(r->req.sense)))) {
action = BLOCK_ERROR_ACTION_REPORT;
acct_failed = false;
} else {
--
2.39.3

View File

@ -0,0 +1,125 @@
From 1a0aa9bbdad63d72628002740410b8a28282a96e Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Wed, 31 Jul 2024 14:32:04 +0200
Subject: [PATCH 093/100] scsi-disk: Use positive return value for status in
dma_readv/writev
RH-Author: Kevin Wolf <kwolf@redhat.com>
RH-MergeRequest: 261: scsi-block: Fix error handling with r/werror=stop
RH-Jira: RHEL-50000
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Commit: [1/4] a0b3e7bfd7b7059c0ec3706f2eb1698c1d430b08 (kmwolf/centos-qemu-kvm)
In some error cases, scsi_block_sgio_complete() never calls the passed
callback, but directly completes the request. This leads to bugs because
its error paths are not exact copies of what the callback would normally
do.
In preparation to fix this, allow passing positive return values to the
callbacks that represent the status code that should be used to complete
the request.
scsi_handle_rw_error() already handles positive values for its ret
parameter because scsi_block_sgio_complete() calls directly into it.
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Message-ID: <20240731123207.27636-2-kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit cfe0880835cd364b590ffd27ef8dbd2ad8838bc5)
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
hw/scsi/scsi-disk.c | 21 ++++++++++++++-------
1 file changed, 14 insertions(+), 7 deletions(-)
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index 4bd7af9d0c..bed2c8746c 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -62,6 +62,10 @@ OBJECT_DECLARE_TYPE(SCSIDiskState, SCSIDiskClass, SCSI_DISK_BASE)
struct SCSIDiskClass {
SCSIDeviceClass parent_class;
+ /*
+ * Callbacks receive ret == 0 for success. Errors are represented either as
+ * negative errno values, or as positive SAM status codes.
+ */
DMAIOFunc *dma_readv;
DMAIOFunc *dma_writev;
bool (*need_fua_emulation)(SCSICommand *cmd);
@@ -261,7 +265,7 @@ static bool scsi_disk_req_check_error(SCSIDiskReq *r, int ret, bool acct_failed)
return true;
}
- if (ret < 0) {
+ if (ret != 0) {
return scsi_handle_rw_error(r, ret, acct_failed);
}
@@ -338,7 +342,7 @@ static void scsi_write_do_fua(SCSIDiskReq *r)
static void scsi_dma_complete_noio(SCSIDiskReq *r, int ret)
{
assert(r->req.aiocb == NULL);
- if (scsi_disk_req_check_error(r, ret, false)) {
+ if (scsi_disk_req_check_error(r, ret, ret > 0)) {
goto done;
}
@@ -363,9 +367,10 @@ static void scsi_dma_complete(void *opaque, int ret)
assert(r->req.aiocb != NULL);
r->req.aiocb = NULL;
+ /* ret > 0 is accounted for in scsi_disk_req_check_error() */
if (ret < 0) {
block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
- } else {
+ } else if (ret == 0) {
block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
}
scsi_dma_complete_noio(r, ret);
@@ -381,7 +386,7 @@ static void scsi_read_complete_noio(SCSIDiskReq *r, int ret)
qemu_get_current_aio_context());
assert(r->req.aiocb == NULL);
- if (scsi_disk_req_check_error(r, ret, false)) {
+ if (scsi_disk_req_check_error(r, ret, ret > 0)) {
goto done;
}
@@ -402,9 +407,10 @@ static void scsi_read_complete(void *opaque, int ret)
assert(r->req.aiocb != NULL);
r->req.aiocb = NULL;
+ /* ret > 0 is accounted for in scsi_disk_req_check_error() */
if (ret < 0) {
block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
- } else {
+ } else if (ret == 0) {
block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
trace_scsi_disk_read_complete(r->req.tag, r->qiov.size);
}
@@ -512,7 +518,7 @@ static void scsi_write_complete_noio(SCSIDiskReq *r, int ret)
qemu_get_current_aio_context());
assert (r->req.aiocb == NULL);
- if (scsi_disk_req_check_error(r, ret, false)) {
+ if (scsi_disk_req_check_error(r, ret, ret > 0)) {
goto done;
}
@@ -540,9 +546,10 @@ static void scsi_write_complete(void * opaque, int ret)
assert (r->req.aiocb != NULL);
r->req.aiocb = NULL;
+ /* ret > 0 is accounted for in scsi_disk_req_check_error() */
if (ret < 0) {
block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct);
- } else {
+ } else if (ret == 0) {
block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct);
}
scsi_write_complete_noio(r, ret);
--
2.39.3

View File

@ -0,0 +1,215 @@
From d9595fecd03c9a69ac562e3f240d50b2fa8d14a4 Mon Sep 17 00:00:00 2001
From: Tao Su <tao1.su@linux.intel.com>
Date: Wed, 20 Mar 2024 10:10:44 +0800
Subject: [PATCH 006/100] target/i386: Add new CPU model SierraForest
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [6/91] 4bc71f82c258db46569a7e08965d1d358b19416c (bonzini/rhel-qemu-kvm)
According to table 1-2 in Intel Architecture Instruction Set Extensions and
Future Features (rev 051) [1], SierraForest has the following new features
which have already been virtualized:
- CMPCCXADD CPUID.(EAX=7,ECX=1):EAX[bit 7]
- AVX-IFMA CPUID.(EAX=7,ECX=1):EAX[bit 23]
- AVX-VNNI-INT8 CPUID.(EAX=7,ECX=1):EDX[bit 4]
- AVX-NE-CONVERT CPUID.(EAX=7,ECX=1):EDX[bit 5]
Add above features to new CPU model SierraForest. Comparing with GraniteRapids
CPU model, SierraForest bare-metal removes the following features:
- HLE CPUID.(EAX=7,ECX=0):EBX[bit 4]
- RTM CPUID.(EAX=7,ECX=0):EBX[bit 11]
- AVX512F CPUID.(EAX=7,ECX=0):EBX[bit 16]
- AVX512DQ CPUID.(EAX=7,ECX=0):EBX[bit 17]
- AVX512_IFMA CPUID.(EAX=7,ECX=0):EBX[bit 21]
- AVX512CD CPUID.(EAX=7,ECX=0):EBX[bit 28]
- AVX512BW CPUID.(EAX=7,ECX=0):EBX[bit 30]
- AVX512VL CPUID.(EAX=7,ECX=0):EBX[bit 31]
- AVX512_VBMI CPUID.(EAX=7,ECX=0):ECX[bit 1]
- AVX512_VBMI2 CPUID.(EAX=7,ECX=0):ECX[bit 6]
- AVX512_VNNI CPUID.(EAX=7,ECX=0):ECX[bit 11]
- AVX512_BITALG CPUID.(EAX=7,ECX=0):ECX[bit 12]
- AVX512_VPOPCNTDQ CPUID.(EAX=7,ECX=0):ECX[bit 14]
- LA57 CPUID.(EAX=7,ECX=0):ECX[bit 16]
- TSXLDTRK CPUID.(EAX=7,ECX=0):EDX[bit 16]
- AMX-BF16 CPUID.(EAX=7,ECX=0):EDX[bit 22]
- AVX512_FP16 CPUID.(EAX=7,ECX=0):EDX[bit 23]
- AMX-TILE CPUID.(EAX=7,ECX=0):EDX[bit 24]
- AMX-INT8 CPUID.(EAX=7,ECX=0):EDX[bit 25]
- AVX512_BF16 CPUID.(EAX=7,ECX=1):EAX[bit 5]
- fast zero-length MOVSB CPUID.(EAX=7,ECX=1):EAX[bit 10]
- fast short CMPSB, SCASB CPUID.(EAX=7,ECX=1):EAX[bit 12]
- AMX-FP16 CPUID.(EAX=7,ECX=1):EAX[bit 21]
- PREFETCHI CPUID.(EAX=7,ECX=1):EDX[bit 14]
- XFD CPUID.(EAX=0xD,ECX=1):EAX[bit 4]
- EPT_PAGE_WALK_LENGTH_5 VMX_EPT_VPID_CAP(0x48c)[bit 7]
Add all features of GraniteRapids CPU model except above features to
SierraForest CPU model.
SierraForest doesnt support TSX and RTM but supports TAA_NO. When RTM is
not enabled in host, KVM will not report TAA_NO. So, just don't include
TAA_NO in SierraForest CPU model.
[1] https://cdrdv2.intel.com/v1/dl/getContent/671368
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Tao Su <tao1.su@linux.intel.com>
Message-ID: <20240320021044.508263-1-tao1.su@linux.intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 6e82d3b6220777667968a04c87e1667f164ebe88)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/cpu.c | 126 ++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 126 insertions(+)
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 0aa88d9b48..efbadc3ed7 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -4127,6 +4127,132 @@ static const X86CPUDefinition builtin_x86_defs[] = {
{ /* end of list */ },
},
},
+ {
+ .name = "SierraForest",
+ .level = 0x23,
+ .vendor = CPUID_VENDOR_INTEL,
+ .family = 6,
+ .model = 175,
+ .stepping = 0,
+ /*
+ * please keep the ascending order so that we can have a clear view of
+ * bit position of each feature.
+ */
+ .features[FEAT_1_EDX] =
+ CPUID_FP87 | CPUID_VME | CPUID_DE | CPUID_PSE | CPUID_TSC |
+ CPUID_MSR | CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC |
+ CPUID_SEP | CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV |
+ CPUID_PAT | CPUID_PSE36 | CPUID_CLFLUSH | CPUID_MMX | CPUID_FXSR |
+ CPUID_SSE | CPUID_SSE2,
+ .features[FEAT_1_ECX] =
+ CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSSE3 |
+ CPUID_EXT_FMA | CPUID_EXT_CX16 | CPUID_EXT_PCID | CPUID_EXT_SSE41 |
+ CPUID_EXT_SSE42 | CPUID_EXT_X2APIC | CPUID_EXT_MOVBE |
+ CPUID_EXT_POPCNT | CPUID_EXT_TSC_DEADLINE_TIMER | CPUID_EXT_AES |
+ CPUID_EXT_XSAVE | CPUID_EXT_AVX | CPUID_EXT_F16C | CPUID_EXT_RDRAND,
+ .features[FEAT_8000_0001_EDX] =
+ CPUID_EXT2_SYSCALL | CPUID_EXT2_NX | CPUID_EXT2_PDPE1GB |
+ CPUID_EXT2_RDTSCP | CPUID_EXT2_LM,
+ .features[FEAT_8000_0001_ECX] =
+ CPUID_EXT3_LAHF_LM | CPUID_EXT3_ABM | CPUID_EXT3_3DNOWPREFETCH,
+ .features[FEAT_8000_0008_EBX] =
+ CPUID_8000_0008_EBX_WBNOINVD,
+ .features[FEAT_7_0_EBX] =
+ CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_AVX2 |
+ CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS |
+ CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX |
+ CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLFLUSHOPT | CPUID_7_0_EBX_CLWB |
+ CPUID_7_0_EBX_SHA_NI,
+ .features[FEAT_7_0_ECX] =
+ CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU | CPUID_7_0_ECX_GFNI |
+ CPUID_7_0_ECX_VAES | CPUID_7_0_ECX_VPCLMULQDQ |
+ CPUID_7_0_ECX_RDPID | CPUID_7_0_ECX_BUS_LOCK_DETECT,
+ .features[FEAT_7_0_EDX] =
+ CPUID_7_0_EDX_FSRM | CPUID_7_0_EDX_SERIALIZE |
+ CPUID_7_0_EDX_SPEC_CTRL | CPUID_7_0_EDX_ARCH_CAPABILITIES |
+ CPUID_7_0_EDX_SPEC_CTRL_SSBD,
+ .features[FEAT_ARCH_CAPABILITIES] =
+ MSR_ARCH_CAP_RDCL_NO | MSR_ARCH_CAP_IBRS_ALL |
+ MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY | MSR_ARCH_CAP_MDS_NO |
+ MSR_ARCH_CAP_PSCHANGE_MC_NO | MSR_ARCH_CAP_SBDR_SSDP_NO |
+ MSR_ARCH_CAP_FBSDP_NO | MSR_ARCH_CAP_PSDP_NO |
+ MSR_ARCH_CAP_PBRSB_NO,
+ .features[FEAT_XSAVE] =
+ CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC |
+ CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES,
+ .features[FEAT_6_EAX] =
+ CPUID_6_EAX_ARAT,
+ .features[FEAT_7_1_EAX] =
+ CPUID_7_1_EAX_AVX_VNNI | CPUID_7_1_EAX_CMPCCXADD |
+ CPUID_7_1_EAX_FSRS | CPUID_7_1_EAX_AVX_IFMA,
+ .features[FEAT_7_1_EDX] =
+ CPUID_7_1_EDX_AVX_VNNI_INT8 | CPUID_7_1_EDX_AVX_NE_CONVERT,
+ .features[FEAT_7_2_EDX] =
+ CPUID_7_2_EDX_MCDT_NO,
+ .features[FEAT_VMX_BASIC] =
+ MSR_VMX_BASIC_INS_OUTS | MSR_VMX_BASIC_TRUE_CTLS,
+ .features[FEAT_VMX_ENTRY_CTLS] =
+ VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_IA32E_MODE |
+ VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL |
+ VMX_VM_ENTRY_LOAD_IA32_PAT | VMX_VM_ENTRY_LOAD_IA32_EFER,
+ .features[FEAT_VMX_EPT_VPID_CAPS] =
+ MSR_VMX_EPT_EXECONLY | MSR_VMX_EPT_PAGE_WALK_LENGTH_4 |
+ MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | MSR_VMX_EPT_1GB |
+ MSR_VMX_EPT_INVEPT | MSR_VMX_EPT_AD_BITS |
+ MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT |
+ MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR |
+ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT |
+ MSR_VMX_EPT_INVVPID_ALL_CONTEXT |
+ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS,
+ .features[FEAT_VMX_EXIT_CTLS] =
+ VMX_VM_EXIT_SAVE_DEBUG_CONTROLS |
+ VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL |
+ VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_IA32_PAT |
+ VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER |
+ VMX_VM_EXIT_LOAD_IA32_EFER | VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER,
+ .features[FEAT_VMX_MISC] =
+ MSR_VMX_MISC_STORE_LMA | MSR_VMX_MISC_ACTIVITY_HLT |
+ MSR_VMX_MISC_VMWRITE_VMEXIT,
+ .features[FEAT_VMX_PINBASED_CTLS] =
+ VMX_PIN_BASED_EXT_INTR_MASK | VMX_PIN_BASED_NMI_EXITING |
+ VMX_PIN_BASED_VIRTUAL_NMIS | VMX_PIN_BASED_VMX_PREEMPTION_TIMER |
+ VMX_PIN_BASED_POSTED_INTR,
+ .features[FEAT_VMX_PROCBASED_CTLS] =
+ VMX_CPU_BASED_VIRTUAL_INTR_PENDING |
+ VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING |
+ VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING |
+ VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING |
+ VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING |
+ VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING |
+ VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_VIRTUAL_NMI_PENDING |
+ VMX_CPU_BASED_MOV_DR_EXITING | VMX_CPU_BASED_UNCOND_IO_EXITING |
+ VMX_CPU_BASED_USE_IO_BITMAPS | VMX_CPU_BASED_MONITOR_TRAP_FLAG |
+ VMX_CPU_BASED_USE_MSR_BITMAPS | VMX_CPU_BASED_MONITOR_EXITING |
+ VMX_CPU_BASED_PAUSE_EXITING |
+ VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS,
+ .features[FEAT_VMX_SECONDARY_CTLS] =
+ VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
+ VMX_SECONDARY_EXEC_ENABLE_EPT | VMX_SECONDARY_EXEC_DESC |
+ VMX_SECONDARY_EXEC_RDTSCP |
+ VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
+ VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_WBINVD_EXITING |
+ VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST |
+ VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT |
+ VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
+ VMX_SECONDARY_EXEC_RDRAND_EXITING |
+ VMX_SECONDARY_EXEC_ENABLE_INVPCID |
+ VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS |
+ VMX_SECONDARY_EXEC_RDSEED_EXITING | VMX_SECONDARY_EXEC_ENABLE_PML |
+ VMX_SECONDARY_EXEC_XSAVES,
+ .features[FEAT_VMX_VMFUNC] =
+ MSR_VMX_VMFUNC_EPT_SWITCHING,
+ .xlevel = 0x80000008,
+ .model_id = "Intel Xeon Processor (SierraForest)",
+ .versions = (X86CPUVersionDefinition[]) {
+ { .version = 1 },
+ { /* end of list */ },
+ },
+ },
{
.name = "Denverton",
.level = 21,
--
2.39.3

View File

@ -0,0 +1,50 @@
From ae6229a3e45318b1101291b99a0e894399dcb1db Mon Sep 17 00:00:00 2001
From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
Date: Wed, 13 Mar 2024 07:53:23 -0700
Subject: [PATCH 007/100] target/i386: Export RFDS bit to guests
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [7/91] 7eb6cae8821a2e953d3ff2033fa2e973011ad771 (bonzini/rhel-qemu-kvm)
Register File Data Sampling (RFDS) is a CPU side-channel vulnerability
that may expose stale register value. CPUs that set RFDS_NO bit in MSR
IA32_ARCH_CAPABILITIES indicate that they are not vulnerable to RFDS.
Similarly, RFDS_CLEAR indicates that CPU is affected by RFDS, and has
the microcode to help mitigate RFDS.
Make RFDS_CLEAR and RFDS_NO bits available to guests.
Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Message-ID: <9a38877857392b5c2deae7e7db1b170d15510314.1710341348.git.pawan.kumar.gupta@linux.intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 41bdd9812863c150284a9339a048ed88c40f4df7)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/cpu.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index efbadc3ed7..489c853b42 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -1158,8 +1158,8 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
NULL, "sbdr-ssdp-no", "fbsdp-no", "psdp-no",
NULL, "fb-clear", NULL, NULL,
NULL, NULL, NULL, NULL,
- "pbrsb-no", NULL, "gds-no", NULL,
- NULL, NULL, NULL, NULL,
+ "pbrsb-no", NULL, "gds-no", "rfds-no",
+ "rfds-clear", NULL, NULL, NULL,
},
.msr = {
.index = MSR_IA32_ARCH_CAPABILITIES,
--
2.39.3

View File

@ -0,0 +1,192 @@
From 4a811f54cdb3c9329f193ea43c76ed4eb1b14c19 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 19 Mar 2024 15:29:33 +0100
Subject: [PATCH 022/100] target/i386: Implement mc->kvm_type() to get VM type
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [22/91] d58cf6ead2de37852adc15c7642166904403453f (bonzini/rhel-qemu-kvm)
KVM is introducing a new API to create confidential guests, which
will be used by TDX and SEV-SNP but is also available for SEV and
SEV-ES. The API uses the VM type argument to KVM_CREATE_VM to
identify which confidential computing technology to use.
Since there are no other expected uses of VM types, delegate
mc->kvm_type() for x86 boards to the confidential-guest-support
object pointed to by ms->cgs.
For example, if a sev-guest object is specified to confidential-guest-support,
like,
qemu -machine ...,confidential-guest-support=sev0 \
-object sev-guest,id=sev0,...
it will check if a VM type KVM_X86_SEV_VM or KVM_X86_SEV_ES_VM
is supported, and if so use them together with the KVM_SEV_INIT2
function of the KVM_MEMORY_ENCRYPT_OP ioctl. If not, it will fall back to
KVM_SEV_INIT and KVM_SEV_ES_INIT.
This is a preparatory work towards TDX and SEV-SNP support, but it
will also enable support for VMSA features such as DebugSwap, which
are only available via KVM_SEV_INIT2.
Co-developed-by: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit ee88612df1e8d6c2bfec75bff3f9482ea44acec1)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
hw/i386/x86.c | 11 ++++++++
target/i386/confidential-guest.h | 19 ++++++++++++++
target/i386/kvm/kvm.c | 44 ++++++++++++++++++++++++++++++++
target/i386/kvm/kvm_i386.h | 2 ++
4 files changed, 76 insertions(+)
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index 84a4801977..3d5b51e92d 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -1381,6 +1381,16 @@ static void machine_set_sgx_epc(Object *obj, Visitor *v, const char *name,
qapi_free_SgxEPCList(list);
}
+static int x86_kvm_type(MachineState *ms, const char *vm_type)
+{
+ /*
+ * No x86 machine has a kvm-type property. If one is added that has
+ * it, it should call kvm_get_vm_type() directly or not use it at all.
+ */
+ assert(vm_type == NULL);
+ return kvm_enabled() ? kvm_get_vm_type(ms) : 0;
+}
+
static void x86_machine_initfn(Object *obj)
{
X86MachineState *x86ms = X86_MACHINE(obj);
@@ -1405,6 +1415,7 @@ static void x86_machine_class_init(ObjectClass *oc, void *data)
mc->cpu_index_to_instance_props = x86_cpu_index_to_props;
mc->get_default_cpu_node_id = x86_get_default_cpu_node_id;
mc->possible_cpu_arch_ids = x86_possible_cpu_arch_ids;
+ mc->kvm_type = x86_kvm_type;
x86mc->save_tsc_khz = true;
x86mc->fwcfg_dma_enabled = true;
nc->nmi_monitor_handler = x86_nmi;
diff --git a/target/i386/confidential-guest.h b/target/i386/confidential-guest.h
index ca12d5a8fb..532e172a60 100644
--- a/target/i386/confidential-guest.h
+++ b/target/i386/confidential-guest.h
@@ -36,5 +36,24 @@ struct X86ConfidentialGuest {
struct X86ConfidentialGuestClass {
/* <private> */
ConfidentialGuestSupportClass parent;
+
+ /* <public> */
+ int (*kvm_type)(X86ConfidentialGuest *cg);
};
+
+/**
+ * x86_confidential_guest_kvm_type:
+ *
+ * Calls #X86ConfidentialGuestClass.unplug callback of @plug_handler.
+ */
+static inline int x86_confidential_guest_kvm_type(X86ConfidentialGuest *cg)
+{
+ X86ConfidentialGuestClass *klass = X86_CONFIDENTIAL_GUEST_GET_CLASS(cg);
+
+ if (klass->kvm_type) {
+ return klass->kvm_type(cg);
+ } else {
+ return 0;
+ }
+}
#endif
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index a12207a8ee..1f0ab12c2e 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -31,6 +31,7 @@
#include "sysemu/kvm_int.h"
#include "sysemu/runstate.h"
#include "kvm_i386.h"
+#include "../confidential-guest.h"
#include "sev.h"
#include "xen-emu.h"
#include "hyperv.h"
@@ -161,6 +162,49 @@ static KVMMSRHandlers msr_handlers[KVM_MSR_FILTER_MAX_RANGES];
static RateLimit bus_lock_ratelimit_ctrl;
static int kvm_get_one_msr(X86CPU *cpu, int index, uint64_t *value);
+static const char *vm_type_name[] = {
+ [KVM_X86_DEFAULT_VM] = "default",
+};
+
+bool kvm_is_vm_type_supported(int type)
+{
+ uint32_t machine_types;
+
+ /*
+ * old KVM doesn't support KVM_CAP_VM_TYPES but KVM_X86_DEFAULT_VM
+ * is always supported
+ */
+ if (type == KVM_X86_DEFAULT_VM) {
+ return true;
+ }
+
+ machine_types = kvm_check_extension(KVM_STATE(current_machine->accelerator),
+ KVM_CAP_VM_TYPES);
+ return !!(machine_types & BIT(type));
+}
+
+int kvm_get_vm_type(MachineState *ms)
+{
+ int kvm_type = KVM_X86_DEFAULT_VM;
+
+ if (ms->cgs) {
+ if (!object_dynamic_cast(OBJECT(ms->cgs), TYPE_X86_CONFIDENTIAL_GUEST)) {
+ error_report("configuration type %s not supported for x86 guests",
+ object_get_typename(OBJECT(ms->cgs)));
+ exit(1);
+ }
+ kvm_type = x86_confidential_guest_kvm_type(
+ X86_CONFIDENTIAL_GUEST(ms->cgs));
+ }
+
+ if (!kvm_is_vm_type_supported(kvm_type)) {
+ error_report("vm-type %s not supported by KVM", vm_type_name[kvm_type]);
+ exit(1);
+ }
+
+ return kvm_type;
+}
+
bool kvm_has_smm(void)
{
return kvm_vm_check_extension(kvm_state, KVM_CAP_X86_SMM);
diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h
index 30fedcffea..6b44844d95 100644
--- a/target/i386/kvm/kvm_i386.h
+++ b/target/i386/kvm/kvm_i386.h
@@ -37,6 +37,7 @@ bool kvm_hv_vpindex_settable(void);
bool kvm_enable_sgx_provisioning(KVMState *s);
bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp);
+int kvm_get_vm_type(MachineState *ms);
void kvm_arch_reset_vcpu(X86CPU *cs);
void kvm_arch_after_reset_vcpu(X86CPU *cpu);
void kvm_arch_do_init_vcpu(X86CPU *cs);
@@ -49,6 +50,7 @@ void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask);
#ifdef CONFIG_KVM
+bool kvm_is_vm_type_supported(int type);
bool kvm_has_adjust_clock_stable(void);
bool kvm_has_exception_payload(void);
void kvm_synchronize_all_tsc(void);
--
2.39.3

View File

@ -0,0 +1,68 @@
From fe60f8d47b6e14f17dd6c06b03bd00e6bcdbeefb Mon Sep 17 00:00:00 2001
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
Date: Wed, 20 Mar 2024 17:31:38 +0800
Subject: [PATCH 005/100] target/i386: Introduce Icelake-Server-v7 to enable
TSX
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [5/91] 66d865899e0d510b6c86763422d6b28b904b208a (bonzini/rhel-qemu-kvm)
When start L2 guest with both L1/L2 using Icelake-Server-v3 or above,
QEMU reports below warning:
"warning: host doesn't support requested feature: MSR(10AH).taa-no [bit 8]"
Reason is QEMU Icelake-Server-v3 has TSX feature disabled but enables taa-no
bit. It's meaningless that TSX isn't supported but still claim TSX is secure.
So L1 KVM doesn't expose taa-no to L2 if TSX is unsupported, then starting L2
triggers the warning.
Fix it by introducing a new version Icelake-Server-v7 which has both TSX
and taa-no features. Then guest can use TSX securely when it see taa-no.
This matches the production Icelake which supports TSX and isn't susceptible
to TSX Async Abort (TAA) vulnerabilities, a.k.a, taa-no.
Ideally, TSX should have being enabled together with taa-no since v3, but for
compatibility, we'd better to add v7 to enable it.
Fixes: d965dc35592d ("target/i386: Add ARCH_CAPABILITIES related bits into Icelake-Server CPU model")
Tested-by: Xiangfei Ma <xiangfeix.ma@intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Message-ID: <20240320093138.80267-2-zhenzhong.duan@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit c895fa54e3060c5ac6f3888dce96c9b78626072b)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/cpu.c | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index a7f71422ea..0aa88d9b48 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -3840,6 +3840,16 @@ static const X86CPUDefinition builtin_x86_defs[] = {
{ /* end of list */ }
},
},
+ {
+ .version = 7,
+ .note = "TSX, taa-no",
+ .props = (PropValue[]) {
+ /* Restore TSX features removed by -v2 above */
+ { "hle", "on" },
+ { "rtm", "on" },
+ { /* end of list */ }
+ },
+ },
{ /* end of list */ }
}
},
--
2.39.3

View File

@ -0,0 +1,49 @@
From 070dda07559a7488c62fc80a8c79e8baaee125eb Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 3 Jul 2024 10:37:23 +0200
Subject: [PATCH 087/100] target/i386: SEV: fix formatting of CPUID mismatch
message
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [87/91] 36bc2cc80d5ffc1ceeb1836540660ff45885a818 (bonzini/rhel-qemu-kvm)
Fixes: 70943ad8e4d ("i386/sev: Add support for SNP CPUID validation", 2024-06-05)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit f45ef010e19fe86314bffd5d5c9d5d77f4ce8103)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/sev.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/target/i386/sev.c b/target/i386/sev.c
index c40562dce3..37de80adc7 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -839,7 +839,7 @@ sev_snp_cpuid_report_mismatches(SnpCpuidInfo *old,
size_t i;
if (old->count != new->count) {
- error_report("SEV-SNP: CPUID validation failed due to count mismatch,"
+ error_report("SEV-SNP: CPUID validation failed due to count mismatch, "
"provided: %d, expected: %d", old->count, new->count);
return;
}
@@ -851,8 +851,8 @@ sev_snp_cpuid_report_mismatches(SnpCpuidInfo *old,
new_func = &new->entries[i];
if (memcmp(old_func, new_func, sizeof(SnpCpuidFunc))) {
- error_report("SEV-SNP: CPUID validation failed for function 0x%x, index: 0x%x"
- "provided: eax:0x%08x, ebx: 0x%08x, ecx: 0x%08x, edx: 0x%08x"
+ error_report("SEV-SNP: CPUID validation failed for function 0x%x, index: 0x%x, "
+ "provided: eax:0x%08x, ebx: 0x%08x, ecx: 0x%08x, edx: 0x%08x, "
"expected: eax:0x%08x, ebx: 0x%08x, ecx: 0x%08x, edx: 0x%08x",
old_func->eax_in, old_func->ecx_in,
old_func->eax, old_func->ebx, old_func->ecx, old_func->edx,
--
2.39.3

View File

@ -0,0 +1,42 @@
From 37b7e2185f1d23dd5f5a95b545b8d760492915ed Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 2 Aug 2024 01:43:37 +0200
Subject: [PATCH 091/100] target/i386: SEV: fix mismatch in vcek-disabled
property name
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [91/91] 3a8abc4a0547b985cb79cef29bd3e8350d3d4b48 (bonzini/rhel-qemu-kvm)
The vcek-disabled property of the sev-snp-guest object is misspelled
vcek-required (which I suppose would use the opposite polarity) in
the call to object_class_property_add_bool(). Fix it.
Reported-by: Zixi Chen <zixchen@redhat.com>
Reviewed-by: Pankaj Gupta <pankaj.gupta@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit d4392415c328f83b2e30517a3561be523874f441)
---
target/i386/sev.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/target/i386/sev.c b/target/i386/sev.c
index b921defb63..aed565dbe8 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -2378,7 +2378,7 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data)
object_class_property_add_bool(oc, "author-key-enabled",
sev_snp_guest_get_author_key_enabled,
sev_snp_guest_set_author_key_enabled);
- object_class_property_add_bool(oc, "vcek-required",
+ object_class_property_add_bool(oc, "vcek-disabled",
sev_snp_guest_get_vcek_disabled,
sev_snp_guest_set_vcek_disabled);
object_class_property_add_str(oc, "host-data",
--
2.39.3

View File

@ -0,0 +1,146 @@
From 6bb738fb90a3a1221ae35596b3d03a17e0b1c34d Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 19 Mar 2024 15:30:25 +0100
Subject: [PATCH 023/100] target/i386: SEV: use KVM_SEV_INIT2 if possible
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [23/91] 9579d772ae5124a94c6b1e3a4566bf3470d2bc8f (bonzini/rhel-qemu-kvm)
Implement support for the KVM_X86_SEV_VM and KVM_X86_SEV_ES_VM virtual
machine types, and the KVM_SEV_INIT2 function of KVM_MEMORY_ENCRYPT_OP.
These replace the KVM_SEV_INIT and KVM_SEV_ES_INIT functions, and have
several advantages:
- sharing the initialization sequence with SEV-SNP and TDX
- allowing arguments including the set of desired VMSA features
- protection against invalid use of KVM_GET/SET_* ioctls for guests
with encrypted state
If the KVM_X86_SEV_VM and KVM_X86_SEV_ES_VM types are not supported,
fall back to KVM_SEV_INIT and KVM_SEV_ES_INIT (which use the
default x86 VM type).
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 663e2f443e5722370708ce2f4c27d94a2087d2d3)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/kvm/kvm.c | 2 ++
target/i386/sev.c | 41 +++++++++++++++++++++++++++++++++++++----
2 files changed, 39 insertions(+), 4 deletions(-)
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 1f0ab12c2e..408568d053 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -164,6 +164,8 @@ static int kvm_get_one_msr(X86CPU *cpu, int index, uint64_t *value);
static const char *vm_type_name[] = {
[KVM_X86_DEFAULT_VM] = "default",
+ [KVM_X86_SEV_VM] = "SEV",
+ [KVM_X86_SEV_ES_VM] = "SEV-ES",
};
bool kvm_is_vm_type_supported(int type)
diff --git a/target/i386/sev.c b/target/i386/sev.c
index ebe36d4c10..9dab4060b8 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -26,6 +26,7 @@
#include "qemu/error-report.h"
#include "crypto/hash.h"
#include "sysemu/kvm.h"
+#include "kvm/kvm_i386.h"
#include "sev.h"
#include "sysemu/sysemu.h"
#include "sysemu/runstate.h"
@@ -56,6 +57,8 @@ OBJECT_DECLARE_SIMPLE_TYPE(SevGuestState, SEV_GUEST)
struct SevGuestState {
X86ConfidentialGuest parent_obj;
+ int kvm_type;
+
/* configuration parameters */
char *sev_device;
uint32_t policy;
@@ -850,6 +853,26 @@ sev_vm_state_change(void *opaque, bool running, RunState state)
}
}
+static int sev_kvm_type(X86ConfidentialGuest *cg)
+{
+ SevGuestState *sev = SEV_GUEST(cg);
+ int kvm_type;
+
+ if (sev->kvm_type != -1) {
+ goto out;
+ }
+
+ kvm_type = (sev->policy & SEV_POLICY_ES) ? KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM;
+ if (kvm_is_vm_type_supported(kvm_type)) {
+ sev->kvm_type = kvm_type;
+ } else {
+ sev->kvm_type = KVM_X86_DEFAULT_VM;
+ }
+
+out:
+ return sev->kvm_type;
+}
+
static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
{
SevGuestState *sev = SEV_GUEST(cgs);
@@ -929,13 +952,19 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
__func__);
goto err;
}
- cmd = KVM_SEV_ES_INIT;
- } else {
- cmd = KVM_SEV_INIT;
}
trace_kvm_sev_init();
- ret = sev_ioctl(sev->sev_fd, cmd, NULL, &fw_error);
+ if (sev_kvm_type(X86_CONFIDENTIAL_GUEST(sev)) == KVM_X86_DEFAULT_VM) {
+ cmd = sev_es_enabled() ? KVM_SEV_ES_INIT : KVM_SEV_INIT;
+
+ ret = sev_ioctl(sev->sev_fd, cmd, NULL, &fw_error);
+ } else {
+ struct kvm_sev_init args = { 0 };
+
+ ret = sev_ioctl(sev->sev_fd, KVM_SEV_INIT2, &args, &fw_error);
+ }
+
if (ret) {
error_setg(errp, "%s: failed to initialize ret=%d fw_error=%d '%s'",
__func__, ret, fw_error, fw_error_to_str(fw_error));
@@ -1327,8 +1356,10 @@ static void
sev_guest_class_init(ObjectClass *oc, void *data)
{
ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc);
+ X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc);
klass->kvm_init = sev_kvm_init;
+ x86_klass->kvm_type = sev_kvm_type;
object_class_property_add_str(oc, "sev-device",
sev_guest_get_sev_device,
@@ -1357,6 +1388,8 @@ sev_guest_instance_init(Object *obj)
{
SevGuestState *sev = SEV_GUEST(obj);
+ sev->kvm_type = -1;
+
sev->sev_device = g_strdup(DEFAULT_SEV_DEVICE);
sev->policy = DEFAULT_GUEST_POLICY;
object_property_add_uint32_ptr(obj, "policy", &sev->policy,
--
2.39.3

View File

@ -0,0 +1,124 @@
From 090c64ea622534ff2ae6c9b66cdf0b1ddb58bf26 Mon Sep 17 00:00:00 2001
From: Gerd Hoffmann <kraxel@redhat.com>
Date: Mon, 18 Mar 2024 16:53:36 +0100
Subject: [PATCH 002/100] target/i386: add guest-phys-bits cpu property
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [2/91] 6603e842012dc484e1f571ea0a77b59095f37003 (bonzini/rhel-qemu-kvm)
Allows to set guest-phys-bits (cpuid leaf 80000008, eax[23:16])
via -cpu $model,guest-phys-bits=$nr.
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Message-ID: <20240318155336.156197-3-kraxel@redhat.com>
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 513ba32dccc659c80722b3a43233b26eaa50309a)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
hw/i386/pc.c | 2 ++
target/i386/cpu.c | 22 ++++++++++++++++++++++
target/i386/cpu.h | 8 ++++++++
3 files changed, 32 insertions(+)
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 648762d908..b9fde3cec1 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -349,6 +349,8 @@ GlobalProperty pc_rhel_compat[] = {
const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat);
GlobalProperty pc_rhel_9_5_compat[] = {
+ /* pc_rhel_9_5_compat from pc_compat_pc_9_0 (backported from 9.1) */
+ { TYPE_X86_CPU, "guest-phys-bits", "0" },
};
const size_t pc_rhel_9_5_compat_len = G_N_ELEMENTS(pc_rhel_9_5_compat);
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index be7b0663cd..a7f71422ea 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -6591,6 +6591,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
if (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) {
/* 64 bit processor */
*eax |= (cpu_x86_virtual_addr_width(env) << 8);
+ *eax |= (cpu->guest_phys_bits << 16);
}
*ebx = env->features[FEAT_8000_0008_EBX];
if (cs->nr_cores * cs->nr_threads > 1) {
@@ -7350,6 +7351,14 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp)
goto out;
}
+ if (cpu->guest_phys_bits == -1) {
+ /*
+ * If it was not set by the user, or by the accelerator via
+ * cpu_exec_realizefn, clear.
+ */
+ cpu->guest_phys_bits = 0;
+ }
+
if (cpu->ucode_rev == 0) {
/*
* The default is the same as KVM's. Note that this check
@@ -7400,6 +7409,14 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp)
if (cpu->phys_bits == 0) {
cpu->phys_bits = TCG_PHYS_ADDR_BITS;
}
+ if (cpu->guest_phys_bits &&
+ (cpu->guest_phys_bits > cpu->phys_bits ||
+ cpu->guest_phys_bits < 32)) {
+ error_setg(errp, "guest-phys-bits should be between 32 and %u "
+ " (but is %u)",
+ cpu->phys_bits, cpu->guest_phys_bits);
+ return;
+ }
} else {
/* For 32 bit systems don't use the user set value, but keep
* phys_bits consistent with what we tell the guest.
@@ -7408,6 +7425,10 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp)
error_setg(errp, "phys-bits is not user-configurable in 32 bit");
return;
}
+ if (cpu->guest_phys_bits != 0) {
+ error_setg(errp, "guest-phys-bits is not user-configurable in 32 bit");
+ return;
+ }
if (env->features[FEAT_1_EDX] & (CPUID_PSE36 | CPUID_PAE)) {
cpu->phys_bits = 36;
@@ -7908,6 +7929,7 @@ static Property x86_cpu_properties[] = {
DEFINE_PROP_BOOL("x-force-features", X86CPU, force_features, false),
DEFINE_PROP_BOOL("kvm", X86CPU, expose_kvm, true),
DEFINE_PROP_UINT32("phys-bits", X86CPU, phys_bits, 0),
+ DEFINE_PROP_UINT32("guest-phys-bits", X86CPU, guest_phys_bits, -1),
DEFINE_PROP_BOOL("host-phys-bits", X86CPU, host_phys_bits, false),
DEFINE_PROP_UINT8("host-phys-bits-limit", X86CPU, host_phys_bits_limit, 0),
DEFINE_PROP_BOOL("fill-mtrr-mask", X86CPU, fill_mtrr_mask, true),
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 6b05738079..6112e27bfd 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -2027,6 +2027,14 @@ struct ArchCPU {
/* Number of physical address bits supported */
uint32_t phys_bits;
+ /*
+ * Number of guest physical address bits available. Usually this is
+ * identical to host physical address bits. With NPT or EPT 4-level
+ * paging, guest physical address space might be restricted to 48 bits
+ * even if the host cpu supports more physical address bits.
+ */
+ uint32_t guest_phys_bits;
+
/* in order to simplify APIC support, we leave this pointer to the
user */
struct DeviceState *apic_state;
--
2.39.3

View File

@ -0,0 +1,161 @@
From 0573fcd1775b6613127b1906d59d02e65f7519f3 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 18 Mar 2024 11:07:43 -0400
Subject: [PATCH 021/100] target/i386: introduce x86-confidential-guest
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [21/91] e86d3bcde7e1c2fa1ba8c9bc83e02033644f1ac0 (bonzini/rhel-qemu-kvm)
Introduce a common superclass for x86 confidential guest implementations.
It will extend ConfidentialGuestSupportClass with a method that provides
the VM type to be passed to KVM_CREATE_VM.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit d82e9c843d662f13821026618aba936eda31a6c0)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/confidential-guest.c | 33 ++++++++++++++++++++++++++
target/i386/confidential-guest.h | 40 ++++++++++++++++++++++++++++++++
target/i386/meson.build | 2 +-
target/i386/sev.c | 6 ++---
4 files changed, 77 insertions(+), 4 deletions(-)
create mode 100644 target/i386/confidential-guest.c
create mode 100644 target/i386/confidential-guest.h
diff --git a/target/i386/confidential-guest.c b/target/i386/confidential-guest.c
new file mode 100644
index 0000000000..b3727845ad
--- /dev/null
+++ b/target/i386/confidential-guest.c
@@ -0,0 +1,33 @@
+/*
+ * QEMU Confidential Guest support
+ *
+ * Copyright (C) 2024 Red Hat, Inc.
+ *
+ * Authors:
+ * Paolo Bonzini <pbonzini@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later. See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+
+#include "confidential-guest.h"
+
+OBJECT_DEFINE_ABSTRACT_TYPE(X86ConfidentialGuest,
+ x86_confidential_guest,
+ X86_CONFIDENTIAL_GUEST,
+ CONFIDENTIAL_GUEST_SUPPORT)
+
+static void x86_confidential_guest_class_init(ObjectClass *oc, void *data)
+{
+}
+
+static void x86_confidential_guest_init(Object *obj)
+{
+}
+
+static void x86_confidential_guest_finalize(Object *obj)
+{
+}
diff --git a/target/i386/confidential-guest.h b/target/i386/confidential-guest.h
new file mode 100644
index 0000000000..ca12d5a8fb
--- /dev/null
+++ b/target/i386/confidential-guest.h
@@ -0,0 +1,40 @@
+/*
+ * x86-specific confidential guest methods.
+ *
+ * Copyright (c) 2024 Red Hat Inc.
+ *
+ * Authors:
+ * Paolo Bonzini <pbonzini@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#ifndef TARGET_I386_CG_H
+#define TARGET_I386_CG_H
+
+#include "qom/object.h"
+
+#include "exec/confidential-guest-support.h"
+
+#define TYPE_X86_CONFIDENTIAL_GUEST "x86-confidential-guest"
+
+OBJECT_DECLARE_TYPE(X86ConfidentialGuest,
+ X86ConfidentialGuestClass,
+ X86_CONFIDENTIAL_GUEST)
+
+struct X86ConfidentialGuest {
+ /* <private> */
+ ConfidentialGuestSupport parent_obj;
+};
+
+/**
+ * X86ConfidentialGuestClass:
+ *
+ * Class to be implemented by confidential-guest-support concrete objects
+ * for the x86 target.
+ */
+struct X86ConfidentialGuestClass {
+ /* <private> */
+ ConfidentialGuestSupportClass parent;
+};
+#endif
diff --git a/target/i386/meson.build b/target/i386/meson.build
index 7c74bfa859..8abce725f8 100644
--- a/target/i386/meson.build
+++ b/target/i386/meson.build
@@ -6,7 +6,7 @@ i386_ss.add(files(
'xsave_helper.c',
'cpu-dump.c',
))
-i386_ss.add(when: 'CONFIG_SEV', if_true: files('host-cpu.c'))
+i386_ss.add(when: 'CONFIG_SEV', if_true: files('host-cpu.c', 'confidential-guest.c'))
# x86 cpu type
i386_ss.add(when: 'CONFIG_KVM', if_true: files('host-cpu.c'))
diff --git a/target/i386/sev.c b/target/i386/sev.c
index c49a8fd55e..ebe36d4c10 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -35,7 +35,7 @@
#include "monitor/monitor.h"
#include "monitor/hmp-target.h"
#include "qapi/qapi-commands-misc-target.h"
-#include "exec/confidential-guest-support.h"
+#include "confidential-guest.h"
#include "hw/i386/pc.h"
#include "exec/address-spaces.h"
@@ -54,7 +54,7 @@ OBJECT_DECLARE_SIMPLE_TYPE(SevGuestState, SEV_GUEST)
* -machine ...,memory-encryption=sev0
*/
struct SevGuestState {
- ConfidentialGuestSupport parent_obj;
+ X86ConfidentialGuest parent_obj;
/* configuration parameters */
char *sev_device;
@@ -1372,7 +1372,7 @@ sev_guest_instance_init(Object *obj)
/* sev guest info */
static const TypeInfo sev_guest_info = {
- .parent = TYPE_CONFIDENTIAL_GUEST_SUPPORT,
+ .parent = TYPE_X86_CONFIDENTIAL_GUEST,
.name = TYPE_SEV_GUEST,
.instance_size = sizeof(SevGuestState),
.instance_finalize = sev_guest_finalize,
--
2.39.3

View File

@ -0,0 +1,59 @@
From b02dc1e5c0f01228053e784f9ec7ac3a47e91d7c Mon Sep 17 00:00:00 2001
From: Xiaoyao Li <xiaoyao.li@intel.com>
Date: Thu, 29 Feb 2024 01:36:25 -0500
Subject: [PATCH 026/100] trace/kvm: Split address space and slot id in
trace_kvm_set_user_memory()
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [26/91] 640511c4ab0ba76bb4483f6c3fb73e060d914f0a (bonzini/rhel-qemu-kvm)
The upper 16 bits of kvm_userspace_memory_region::slot are
address space id. Parse it separately in trace_kvm_set_user_memory().
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Message-ID: <20240229063726.610065-5-xiaoyao.li@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 72853afc638b3e28779c86dd05da2f3bb149fe2c)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
accel/kvm/kvm-all.c | 5 +++--
accel/kvm/trace-events | 2 +-
2 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index b51e09a583..9bd235c969 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -303,8 +303,9 @@ static int kvm_set_user_memory_region(KVMMemoryListener *kml, KVMSlot *slot, boo
ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
slot->old_flags = mem.flags;
err:
- trace_kvm_set_user_memory(mem.slot, mem.flags, mem.guest_phys_addr,
- mem.memory_size, mem.userspace_addr, ret);
+ trace_kvm_set_user_memory(mem.slot >> 16, (uint16_t)mem.slot, mem.flags,
+ mem.guest_phys_addr, mem.memory_size,
+ mem.userspace_addr, ret);
if (ret < 0) {
error_report("%s: KVM_SET_USER_MEMORY_REGION failed, slot=%d,"
" start=0x%" PRIx64 ", size=0x%" PRIx64 ": %s",
diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events
index a25902597b..9f599abc17 100644
--- a/accel/kvm/trace-events
+++ b/accel/kvm/trace-events
@@ -15,7 +15,7 @@ kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d"
kvm_irqchip_release_virq(int virq) "virq %d"
kvm_set_ioeventfd_mmio(int fd, uint64_t addr, uint32_t val, bool assign, uint32_t size, bool datamatch) "fd: %d @0x%" PRIx64 " val=0x%x assign: %d size: %d match: %d"
kvm_set_ioeventfd_pio(int fd, uint16_t addr, uint32_t val, bool assign, uint32_t size, bool datamatch) "fd: %d @0x%x val=0x%x assign: %d size: %d match: %d"
-kvm_set_user_memory(uint32_t slot, uint32_t flags, uint64_t guest_phys_addr, uint64_t memory_size, uint64_t userspace_addr, int ret) "Slot#%d flags=0x%x gpa=0x%"PRIx64 " size=0x%"PRIx64 " ua=0x%"PRIx64 " ret=%d"
+kvm_set_user_memory(uint16_t as, uint16_t slot, uint32_t flags, uint64_t guest_phys_addr, uint64_t memory_size, uint64_t userspace_addr, int ret) "AddrSpace#%d Slot#%d flags=0x%x gpa=0x%"PRIx64 " size=0x%"PRIx64 " ua=0x%"PRIx64 " ret=%d"
kvm_clear_dirty_log(uint32_t slot, uint64_t start, uint32_t size) "slot#%"PRId32" start 0x%"PRIx64" size 0x%"PRIx32
kvm_resample_fd_notify(int gsi) "gsi %d"
kvm_dirty_ring_full(int id) "vcpu %d"
--
2.39.3

View File

@ -0,0 +1,62 @@
From e185104a10a37174d13d981fa1febafbb7e651aa Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 3 Jun 2024 13:49:49 +0200
Subject: [PATCH 050/100] update-linux-headers: fix forwarding to asm-generic
headers
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [50/91] 3c98a7fe790d943bb5ff8dca1da83f5944ec3e2e (bonzini/rhel-qemu-kvm)
Afer commit 3efc75ad9d9 ("scripts/update-linux-headers.sh: Remove
temporary directory inbetween", 2024-05-29), updating linux-headers/
results in errors such as
cp: cannot stat '/tmp/tmp.1A1Eejh1UE/headers/include/asm/bitsperlong.h': No such file or directory
because Loongarch does not have an asm/bitsperlong.h file and uses the
generic version. Before commit 3efc75ad9d9, the missing file would
incorrectly cause stale files to be included in linux-headers/. The files
were never committed to qemu.git, but were wrong nevertheless. The build
would just use the system version of the files, which is opposite to
the idea of importing Linux header files into QEMU's tree.
Create forwarding headers, resembling the ones that are generated during a
kernel build by scripts/Makefile.asm-generic, if a file is only installed
under include/asm-generic/.
Reviewed-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit ef7c70f020ca1fe9e7c98ea2cd9d6ba3c5714716)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
scripts/update-linux-headers.sh | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh
index f084bee72e..78c0f2c43e 100755
--- a/scripts/update-linux-headers.sh
+++ b/scripts/update-linux-headers.sh
@@ -119,7 +119,14 @@ for arch in $ARCHLIST; do
rm -rf "$output/linux-headers/asm-$arch"
mkdir -p "$output/linux-headers/asm-$arch"
for header in kvm.h unistd.h bitsperlong.h mman.h; do
- cp "$hdrdir/include/asm/$header" "$output/linux-headers/asm-$arch"
+ if test -f "$hdrdir/include/asm/$header"; then
+ cp "$hdrdir/include/asm/$header" "$output/linux-headers/asm-$arch"
+ elif test -f "$hdrdir/include/asm-generic/$header"; then
+ # not installed as <asm/$header>, but used as such in kernel sources
+ cat <<EOF >$output/linux-headers/asm-$arch/$header
+#include <asm-generic/$header>
+EOF
+ fi
done
if [ $arch = mips ]; then
--
2.39.3

View File

@ -0,0 +1,175 @@
From 8d6c37ddc253f63202cc9519670c258e9d81b98e Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 3 Jun 2024 14:25:06 +0200
Subject: [PATCH 053/100] update-linux-headers: import linux/kvm_para.h header
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [53/91] 27d4db0ecec7d0b8adeba1ec85fca32eacee1009 (bonzini/rhel-qemu-kvm)
Right now QEMU is importing arch/x86/include/uapi/asm/kvm_para.h
because it includes definitions for kvmclock and for KVM CPUID
bits. However, other definitions for KVM hypercall values and return
codes are included in include/uapi/linux/kvm_para.h and they will be
used by SEV-SNP.
To ensure that it is possible to include both <linux/kvm_para.h> and
"standard-headers/asm-x86/kvm_para.h" without conflicts, provide
linux/kvm_para.h as a portable header too, and forward linux-headers/
files to those in include/standard-headers. Note that <linux/kvm_para.h>
will include architecture-specific definitions as well, but
"standard-headers/linux/kvm_para.h" will not because it can be used in
architecture-independent files.
This could easily be extended to other architectures, but right now
they do not need any symbol in their specific kvm_para.h files.
Reviewed-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit aa274c33c39e7de981dc195abe60e1a246c9d248)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
include/standard-headers/linux/kvm_para.h | 38 +++++++++++++++++++++++
linux-headers/asm-x86/kvm_para.h | 1 +
linux-headers/linux/kvm_para.h | 2 ++
scripts/update-linux-headers.sh | 22 ++++++++++++-
4 files changed, 62 insertions(+), 1 deletion(-)
create mode 100644 include/standard-headers/linux/kvm_para.h
create mode 100644 linux-headers/asm-x86/kvm_para.h
create mode 100644 linux-headers/linux/kvm_para.h
diff --git a/include/standard-headers/linux/kvm_para.h b/include/standard-headers/linux/kvm_para.h
new file mode 100644
index 0000000000..015c166302
--- /dev/null
+++ b/include/standard-headers/linux/kvm_para.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __LINUX_KVM_PARA_H
+#define __LINUX_KVM_PARA_H
+
+/*
+ * This header file provides a method for making a hypercall to the host
+ * Architectures should define:
+ * - kvm_hypercall0, kvm_hypercall1...
+ * - kvm_arch_para_features
+ * - kvm_para_available
+ */
+
+/* Return values for hypercalls */
+#define KVM_ENOSYS 1000
+#define KVM_EFAULT EFAULT
+#define KVM_EINVAL EINVAL
+#define KVM_E2BIG E2BIG
+#define KVM_EPERM EPERM
+#define KVM_EOPNOTSUPP 95
+
+#define KVM_HC_VAPIC_POLL_IRQ 1
+#define KVM_HC_MMU_OP 2
+#define KVM_HC_FEATURES 3
+#define KVM_HC_PPC_MAP_MAGIC_PAGE 4
+#define KVM_HC_KICK_CPU 5
+#define KVM_HC_MIPS_GET_CLOCK_FREQ 6
+#define KVM_HC_MIPS_EXIT_VM 7
+#define KVM_HC_MIPS_CONSOLE_OUTPUT 8
+#define KVM_HC_CLOCK_PAIRING 9
+#define KVM_HC_SEND_IPI 10
+#define KVM_HC_SCHED_YIELD 11
+#define KVM_HC_MAP_GPA_RANGE 12
+
+/*
+ * hypercalls use architecture specific
+ */
+
+#endif /* __LINUX_KVM_PARA_H */
diff --git a/linux-headers/asm-x86/kvm_para.h b/linux-headers/asm-x86/kvm_para.h
new file mode 100644
index 0000000000..1d3e0e0b07
--- /dev/null
+++ b/linux-headers/asm-x86/kvm_para.h
@@ -0,0 +1 @@
+#include "standard-headers/asm-x86/kvm_para.h"
diff --git a/linux-headers/linux/kvm_para.h b/linux-headers/linux/kvm_para.h
new file mode 100644
index 0000000000..6a1e672259
--- /dev/null
+++ b/linux-headers/linux/kvm_para.h
@@ -0,0 +1,2 @@
+#include "standard-headers/linux/kvm_para.h"
+#include <asm/kvm_para.h>
diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh
index 90759dcfe0..64d1989961 100755
--- a/scripts/update-linux-headers.sh
+++ b/scripts/update-linux-headers.sh
@@ -64,6 +64,7 @@ cp_portable() {
-e 'linux/kernel' \
-e 'linux/sysinfo' \
-e 'asm/setup_data.h' \
+ -e 'asm/kvm_para.h' \
> /dev/null
then
echo "Unexpected #include in input file $f".
@@ -71,6 +72,15 @@ cp_portable() {
fi
header=$(basename "$f");
+
+ if test -z "$arch"; then
+ # Let users of include/standard-headers/linux/ headers pick the
+ # asm-* header that they care about
+ arch_cmd='/<asm\/\([^>]*\)>/d'
+ else
+ arch_cmd='s/<asm\/\([^>]*\)>/"standard-headers\/asm-'$arch'\/\1"/'
+ fi
+
sed -e 's/__aligned_u64/__u64 __attribute__((aligned(8)))/g' \
-e 's/__u\([0-9][0-9]*\)/uint\1_t/g' \
-e 's/u\([0-9][0-9]*\)/uint\1_t/g' \
@@ -79,7 +89,7 @@ cp_portable() {
-e 's/__be\([0-9][0-9]*\)/uint\1_t/g' \
-e 's/"\(input-event-codes\.h\)"/"standard-headers\/linux\/\1"/' \
-e 's/<linux\/\([^>]*\)>/"standard-headers\/linux\/\1"/' \
- -e 's/<asm\/\([^>]*\)>/"standard-headers\/asm-'$arch'\/\1"/' \
+ -e "$arch_cmd" \
-e 's/__bitwise//' \
-e 's/__attribute__((packed))/QEMU_PACKED/' \
-e 's/__inline__/inline/' \
@@ -159,7 +169,12 @@ EOF
cp "$hdrdir/include/asm/unistd_32.h" "$output/linux-headers/asm-x86/"
cp "$hdrdir/include/asm/unistd_x32.h" "$output/linux-headers/asm-x86/"
cp "$hdrdir/include/asm/unistd_64.h" "$output/linux-headers/asm-x86/"
+
cp_portable "$hdrdir/include/asm/kvm_para.h" "$output/include/standard-headers/asm-$arch"
+ cat <<EOF >$output/linux-headers/asm-$arch/kvm_para.h
+#include "standard-headers/asm-$arch/kvm_para.h"
+EOF
+
# Remove everything except the macros from bootparam.h avoiding the
# unnecessary import of several video/ist/etc headers
sed -e '/__ASSEMBLY__/,/__ASSEMBLY__/d' \
@@ -209,6 +224,10 @@ if [ -d "$linux/LICENSES" ]; then
done
fi
+cat <<EOF >$output/linux-headers/linux/kvm_para.h
+#include "standard-headers/linux/kvm_para.h"
+#include <asm/kvm_para.h>
+EOF
cat <<EOF >$output/linux-headers/linux/virtio_config.h
#include "standard-headers/linux/virtio_config.h"
EOF
@@ -231,6 +250,7 @@ for i in "$hdrdir"/include/linux/*virtio*.h \
"$hdrdir/include/linux/ethtool.h" \
"$hdrdir/include/linux/const.h" \
"$hdrdir/include/linux/kernel.h" \
+ "$hdrdir/include/linux/kvm_para.h" \
"$hdrdir/include/linux/vhost_types.h" \
"$hdrdir/include/linux/sysinfo.h"; do
cp_portable "$i" "$output/include/standard-headers/linux"
--
2.39.3

View File

@ -0,0 +1,95 @@
From 00e250d9df1949d363758a34e3f46d8c71be054f Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 3 Jun 2024 14:16:55 +0200
Subject: [PATCH 051/100] update-linux-headers: move pvpanic.h to correct
directory
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
RH-MergeRequest: 245: SEV-SNP support
RH-Jira: RHEL-39544
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Bandan Das <bdas@redhat.com>
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Commit: [51/91] 10efff5bbcb867ba34f3f9ff8045381ea96f94c7 (bonzini/rhel-qemu-kvm)
Linux has <misc/pvpanic.h>, not <linux/pvpanic.h>. Use the same
directory for QEMU's include/standard-headers/ copy.
Reviewed-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit b8116f4cbaa0f64bb07564f20b3b5219e23c8bff)
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
hw/misc/pvpanic-isa.c | 2 +-
hw/misc/pvpanic-pci.c | 2 +-
hw/misc/pvpanic.c | 2 +-
include/standard-headers/{linux => misc}/pvpanic.h | 0
scripts/update-linux-headers.sh | 6 ++++--
5 files changed, 7 insertions(+), 5 deletions(-)
rename include/standard-headers/{linux => misc}/pvpanic.h (100%)
diff --git a/hw/misc/pvpanic-isa.c b/hw/misc/pvpanic-isa.c
index ccec50f61b..b4f84c4110 100644
--- a/hw/misc/pvpanic-isa.c
+++ b/hw/misc/pvpanic-isa.c
@@ -21,7 +21,7 @@
#include "hw/misc/pvpanic.h"
#include "qom/object.h"
#include "hw/isa/isa.h"
-#include "standard-headers/linux/pvpanic.h"
+#include "standard-headers/misc/pvpanic.h"
#include "hw/acpi/acpi_aml_interface.h"
OBJECT_DECLARE_SIMPLE_TYPE(PVPanicISAState, PVPANIC_ISA_DEVICE)
diff --git a/hw/misc/pvpanic-pci.c b/hw/misc/pvpanic-pci.c
index 83be95d0d2..4d44a881da 100644
--- a/hw/misc/pvpanic-pci.c
+++ b/hw/misc/pvpanic-pci.c
@@ -21,7 +21,7 @@
#include "hw/misc/pvpanic.h"
#include "qom/object.h"
#include "hw/pci/pci_device.h"
-#include "standard-headers/linux/pvpanic.h"
+#include "standard-headers/misc/pvpanic.h"
OBJECT_DECLARE_SIMPLE_TYPE(PVPanicPCIState, PVPANIC_PCI_DEVICE)
diff --git a/hw/misc/pvpanic.c b/hw/misc/pvpanic.c
index 1540e9091a..80289ecf5f 100644
--- a/hw/misc/pvpanic.c
+++ b/hw/misc/pvpanic.c
@@ -21,7 +21,7 @@
#include "hw/qdev-properties.h"
#include "hw/misc/pvpanic.h"
#include "qom/object.h"
-#include "standard-headers/linux/pvpanic.h"
+#include "standard-headers/misc/pvpanic.h"
static void handle_event(int event)
{
diff --git a/include/standard-headers/linux/pvpanic.h b/include/standard-headers/misc/pvpanic.h
similarity index 100%
rename from include/standard-headers/linux/pvpanic.h
rename to include/standard-headers/misc/pvpanic.h
diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh
index 78c0f2c43e..90759dcfe0 100755
--- a/scripts/update-linux-headers.sh
+++ b/scripts/update-linux-headers.sh
@@ -232,10 +232,12 @@ for i in "$hdrdir"/include/linux/*virtio*.h \
"$hdrdir/include/linux/const.h" \
"$hdrdir/include/linux/kernel.h" \
"$hdrdir/include/linux/vhost_types.h" \
- "$hdrdir/include/linux/sysinfo.h" \
- "$hdrdir/include/misc/pvpanic.h"; do
+ "$hdrdir/include/linux/sysinfo.h"; do
cp_portable "$i" "$output/include/standard-headers/linux"
done
+mkdir -p "$output/include/standard-headers/misc"
+cp_portable "$hdrdir/include/misc/pvpanic.h" \
+ "$output/include/standard-headers/misc"
mkdir -p "$output/include/standard-headers/drm"
cp_portable "$hdrdir/include/drm/drm_fourcc.h" \
"$output/include/standard-headers/drm"
--
2.39.3

View File

@ -0,0 +1,49 @@
From 3dd1412176a8ee6c06b5d41aa00ca49b535d99b7 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Wed, 24 Jul 2024 06:48:59 -0400
Subject: [PATCH 092/100] virtio-rng: block max-bytes=0
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Laurent Vivier <lvivier@redhat.com>
RH-MergeRequest: 259: virtio-rng: block max-bytes=0
RH-Jira: RHEL-50336
RH-Acked-by: Eugenio Pérez <eperezma@redhat.com>
RH-Acked-by: Thomas Huth <thuth@redhat.com>
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
RH-Commit: [1/1] 6d9852cc7cf7fdf49521b6301ceda26e11b1291f (lvivier/qemu-kvm-centos)
JIRA: https://issues.redhat.com/browse/RHEL-50336
with max-bytes set to 0, quota is 0 and so device does not work.
block this to avoid user confusion
Message-Id: <73a89a42d82ec8b47358f25119b87063e4a6ea57.1721818306.git.mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
(cherry picked from commit 024d046bf41b5256adec671085bcee767a6da125)
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
hw/virtio/virtio-rng.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/hw/virtio/virtio-rng.c b/hw/virtio/virtio-rng.c
index f74efffef7..7cf31da071 100644
--- a/hw/virtio/virtio-rng.c
+++ b/hw/virtio/virtio-rng.c
@@ -184,8 +184,9 @@ static void virtio_rng_device_realize(DeviceState *dev, Error **errp)
/* Workaround: Property parsing does not enforce unsigned integers,
* So this is a hack to reject such numbers. */
- if (vrng->conf.max_bytes > INT64_MAX) {
- error_setg(errp, "'max-bytes' parameter must be non-negative, "
+ if (vrng->conf.max_bytes == 0 ||
+ vrng->conf.max_bytes > INT64_MAX) {
+ error_setg(errp, "'max-bytes' parameter must be positive, "
"and less than 2^63");
return;
}
--
2.39.3

Some files were not shown because too many files have changed in this diff Show More