diff --git a/kvm-HostMem-Add-mechanism-to-opt-in-kvm-guest-memfd-via-.patch b/kvm-HostMem-Add-mechanism-to-opt-in-kvm-guest-memfd-via-.patch new file mode 100644 index 0000000..65da2cc --- /dev/null +++ b/kvm-HostMem-Add-mechanism-to-opt-in-kvm-guest-memfd-via-.patch @@ -0,0 +1,139 @@ +From 93ea86ac8849ad9ca365b1646313dde9a34ba59c Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Wed, 20 Mar 2024 03:39:03 -0500 +Subject: [PATCH 031/100] HostMem: Add mechanism to opt in kvm guest memfd via + MachineState + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [31/91] 43ce32aef954479cdb736301d1adcb919602c321 (bonzini/rhel-qemu-kvm) + +Add a new member "guest_memfd" to memory backends. When it's set +to true, it enables RAM_GUEST_MEMFD in ram_flags, thus private kvm +guest_memfd will be allocated during RAMBlock allocation. + +Memory backend's @guest_memfd is wired with @require_guest_memfd +field of MachineState. It avoid looking up the machine in phymem.c. + +MachineState::require_guest_memfd is supposed to be set by any VMs +that requires KVM guest memfd as private memory, e.g., TDX VM. + +Signed-off-by: Xiaoyao Li +Reviewed-by: David Hildenbrand +Message-ID: <20240320083945.991426-8-michael.roth@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 37662d85b0b7dded0ebdf6747bef6c3bb7ed6a0c) +Signed-off-by: Paolo Bonzini +--- + backends/hostmem-file.c | 1 + + backends/hostmem-memfd.c | 1 + + backends/hostmem-ram.c | 1 + + backends/hostmem.c | 1 + + hw/core/machine.c | 5 +++++ + include/hw/boards.h | 2 ++ + include/sysemu/hostmem.h | 1 + + 7 files changed, 12 insertions(+) + +diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c +index ac3e433cbd..3c69db7946 100644 +--- a/backends/hostmem-file.c ++++ b/backends/hostmem-file.c +@@ -85,6 +85,7 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) + ram_flags |= fb->readonly ? RAM_READONLY_FD : 0; + ram_flags |= fb->rom == ON_OFF_AUTO_ON ? RAM_READONLY : 0; + ram_flags |= backend->reserve ? 0 : RAM_NORESERVE; ++ ram_flags |= backend->guest_memfd ? RAM_GUEST_MEMFD : 0; + ram_flags |= fb->is_pmem ? RAM_PMEM : 0; + ram_flags |= RAM_NAMED_FILE; + return memory_region_init_ram_from_file(&backend->mr, OBJECT(backend), name, +diff --git a/backends/hostmem-memfd.c b/backends/hostmem-memfd.c +index 3923ea9364..745ead0034 100644 +--- a/backends/hostmem-memfd.c ++++ b/backends/hostmem-memfd.c +@@ -55,6 +55,7 @@ memfd_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) + name = host_memory_backend_get_name(backend); + ram_flags = backend->share ? RAM_SHARED : 0; + ram_flags |= backend->reserve ? 0 : RAM_NORESERVE; ++ ram_flags |= backend->guest_memfd ? RAM_GUEST_MEMFD : 0; + return memory_region_init_ram_from_fd(&backend->mr, OBJECT(backend), name, + backend->size, ram_flags, fd, 0, errp); + } +diff --git a/backends/hostmem-ram.c b/backends/hostmem-ram.c +index d121249f0f..f7d81af783 100644 +--- a/backends/hostmem-ram.c ++++ b/backends/hostmem-ram.c +@@ -30,6 +30,7 @@ ram_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) + name = host_memory_backend_get_name(backend); + ram_flags = backend->share ? RAM_SHARED : 0; + ram_flags |= backend->reserve ? 0 : RAM_NORESERVE; ++ ram_flags |= backend->guest_memfd ? RAM_GUEST_MEMFD : 0; + return memory_region_init_ram_flags_nomigrate(&backend->mr, OBJECT(backend), + name, backend->size, + ram_flags, errp); +diff --git a/backends/hostmem.c b/backends/hostmem.c +index 81a72ce40b..eb9682b4a8 100644 +--- a/backends/hostmem.c ++++ b/backends/hostmem.c +@@ -277,6 +277,7 @@ static void host_memory_backend_init(Object *obj) + /* TODO: convert access to globals to compat properties */ + backend->merge = machine_mem_merge(machine); + backend->dump = machine_dump_guest_core(machine); ++ backend->guest_memfd = machine_require_guest_memfd(machine); + backend->reserve = true; + backend->prealloc_threads = machine->smp.cpus; + } +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 92609aae27..07b994e136 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -1480,6 +1480,11 @@ bool machine_mem_merge(MachineState *machine) + return machine->mem_merge; + } + ++bool machine_require_guest_memfd(MachineState *machine) ++{ ++ return machine->require_guest_memfd; ++} ++ + static char *cpu_slot_to_string(const CPUArchId *cpu) + { + GString *s = g_string_new(NULL); +diff --git a/include/hw/boards.h b/include/hw/boards.h +index cca62f906b..815a1c4b26 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -36,6 +36,7 @@ bool machine_usb(MachineState *machine); + int machine_phandle_start(MachineState *machine); + bool machine_dump_guest_core(MachineState *machine); + bool machine_mem_merge(MachineState *machine); ++bool machine_require_guest_memfd(MachineState *machine); + HotpluggableCPUList *machine_query_hotpluggable_cpus(MachineState *machine); + void machine_set_cpu_numa_node(MachineState *machine, + const CpuInstanceProperties *props, +@@ -372,6 +373,7 @@ struct MachineState { + char *dt_compatible; + bool dump_guest_core; + bool mem_merge; ++ bool require_guest_memfd; + bool usb; + bool usb_disabled; + char *firmware; +diff --git a/include/sysemu/hostmem.h b/include/sysemu/hostmem.h +index 0e411aaa29..04b884bf42 100644 +--- a/include/sysemu/hostmem.h ++++ b/include/sysemu/hostmem.h +@@ -74,6 +74,7 @@ struct HostMemoryBackend { + uint64_t size; + bool merge, dump, use_canonical_path; + bool prealloc, is_mapped, share, reserve; ++ bool guest_memfd; + uint32_t prealloc_threads; + ThreadContext *prealloc_context; + DECLARE_BITMAP(host_nodes, MAX_NODES + 1); +-- +2.39.3 + diff --git a/kvm-KVM-remove-kvm_arch_cpu_check_are_resettable.patch b/kvm-KVM-remove-kvm_arch_cpu_check_are_resettable.patch new file mode 100644 index 0000000..aaedcf4 --- /dev/null +++ b/kvm-KVM-remove-kvm_arch_cpu_check_are_resettable.patch @@ -0,0 +1,203 @@ +From c46ac3db0a4db60e667edeabc9ed451c6e8e0ccf Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 18 Mar 2024 14:41:33 -0400 +Subject: [PATCH 020/100] KVM: remove kvm_arch_cpu_check_are_resettable +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [20/91] d7745bd1a0ed1b215847f150f4a1bb2e912beabc (bonzini/rhel-qemu-kvm) + +Board reset requires writing a fresh CPU state. As far as KVM is +concerned, the only thing that blocks reset is that CPU state is +encrypted; therefore, kvm_cpus_are_resettable() can simply check +if that is the case. + +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Paolo Bonzini +(cherry picked from commit a99c0c66ebe7d8db3af6f16689ade9375247e43e) +Signed-off-by: Paolo Bonzini +--- + accel/kvm/kvm-accel-ops.c | 2 +- + accel/kvm/kvm-all.c | 5 ----- + include/sysemu/kvm.h | 10 ---------- + target/arm/kvm.c | 5 ----- + target/i386/kvm/kvm.c | 5 ----- + target/loongarch/kvm/kvm.c | 5 ----- + target/mips/kvm.c | 5 ----- + target/ppc/kvm.c | 5 ----- + target/riscv/kvm/kvm-cpu.c | 5 ----- + target/s390x/kvm/kvm.c | 5 ----- + 10 files changed, 1 insertion(+), 51 deletions(-) + +diff --git a/accel/kvm/kvm-accel-ops.c b/accel/kvm/kvm-accel-ops.c +index b3c946dc4b..74e3c5785b 100644 +--- a/accel/kvm/kvm-accel-ops.c ++++ b/accel/kvm/kvm-accel-ops.c +@@ -82,7 +82,7 @@ static bool kvm_vcpu_thread_is_idle(CPUState *cpu) + + static bool kvm_cpus_are_resettable(void) + { +- return !kvm_enabled() || kvm_cpu_check_are_resettable(); ++ return !kvm_enabled() || !kvm_state->guest_state_protected; + } + + #ifdef KVM_CAP_SET_GUEST_DEBUG +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index ec0f6df7c5..b51e09a583 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -2696,11 +2696,6 @@ void kvm_flush_coalesced_mmio_buffer(void) + s->coalesced_flush_in_progress = false; + } + +-bool kvm_cpu_check_are_resettable(void) +-{ +- return kvm_arch_cpu_check_are_resettable(); +-} +- + static void do_kvm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) + { + if (!cpu->vcpu_dirty && !kvm_state->guest_state_protected) { +diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h +index 302e8f6f1e..54f4d83a37 100644 +--- a/include/sysemu/kvm.h ++++ b/include/sysemu/kvm.h +@@ -525,16 +525,6 @@ int kvm_get_one_reg(CPUState *cs, uint64_t id, void *target); + /* Notify resamplefd for EOI of specific interrupts. */ + void kvm_resample_fd_notify(int gsi); + +-/** +- * kvm_cpu_check_are_resettable - return whether CPUs can be reset +- * +- * Returns: true: CPUs are resettable +- * false: CPUs are not resettable +- */ +-bool kvm_cpu_check_are_resettable(void); +- +-bool kvm_arch_cpu_check_are_resettable(void); +- + bool kvm_dirty_ring_enabled(void); + + uint32_t kvm_dirty_ring_size(void); +diff --git a/target/arm/kvm.c b/target/arm/kvm.c +index ab85d628a8..21ebbf3b8f 100644 +--- a/target/arm/kvm.c ++++ b/target/arm/kvm.c +@@ -1598,11 +1598,6 @@ int kvm_arch_msi_data_to_gsi(uint32_t data) + return (data - 32) & 0xffff; + } + +-bool kvm_arch_cpu_check_are_resettable(void) +-{ +- return true; +-} +- + static void kvm_arch_get_eager_split_size(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index e271652620..a12207a8ee 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -5623,11 +5623,6 @@ bool kvm_has_waitpkg(void) + return has_msr_umwait; + } + +-bool kvm_arch_cpu_check_are_resettable(void) +-{ +- return !sev_es_enabled(); +-} +- + #define ARCH_REQ_XCOMP_GUEST_PERM 0x1025 + + void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask) +diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c +index d630cc39cb..8224d94333 100644 +--- a/target/loongarch/kvm/kvm.c ++++ b/target/loongarch/kvm/kvm.c +@@ -733,11 +733,6 @@ bool kvm_arch_stop_on_emulation_error(CPUState *cs) + return true; + } + +-bool kvm_arch_cpu_check_are_resettable(void) +-{ +- return true; +-} +- + int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) + { + int ret = 0; +diff --git a/target/mips/kvm.c b/target/mips/kvm.c +index 6c52e59f55..a631ab544f 100644 +--- a/target/mips/kvm.c ++++ b/target/mips/kvm.c +@@ -1273,11 +1273,6 @@ int kvm_arch_get_default_type(MachineState *machine) + return -1; + } + +-bool kvm_arch_cpu_check_are_resettable(void) +-{ +- return true; +-} +- + void kvm_arch_accel_class_init(ObjectClass *oc) + { + } +diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c +index 59f640cf7b..9d9d9f0d79 100644 +--- a/target/ppc/kvm.c ++++ b/target/ppc/kvm.c +@@ -2968,11 +2968,6 @@ void kvmppc_set_reg_tb_offset(PowerPCCPU *cpu, int64_t tb_offset) + } + } + +-bool kvm_arch_cpu_check_are_resettable(void) +-{ +- return true; +-} +- + void kvm_arch_accel_class_init(ObjectClass *oc) + { + } +diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c +index 6a6c6cae80..49d2f3ad58 100644 +--- a/target/riscv/kvm/kvm-cpu.c ++++ b/target/riscv/kvm/kvm-cpu.c +@@ -1475,11 +1475,6 @@ void kvm_riscv_set_irq(RISCVCPU *cpu, int irq, int level) + } + } + +-bool kvm_arch_cpu_check_are_resettable(void) +-{ +- return true; +-} +- + static int aia_mode; + + static const char *kvm_aia_mode_str(uint64_t mode) +diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c +index 55fb4855b1..4db59658e1 100644 +--- a/target/s390x/kvm/kvm.c ++++ b/target/s390x/kvm/kvm.c +@@ -2630,11 +2630,6 @@ void kvm_s390_stop_interrupt(S390CPU *cpu) + kvm_s390_vcpu_interrupt(cpu, &irq); + } + +-bool kvm_arch_cpu_check_are_resettable(void) +-{ +- return true; +-} +- + int kvm_s390_get_zpci_op(void) + { + return cap_zpci_op; +-- +2.39.3 + diff --git a/kvm-KVM-track-whether-guest-state-is-encrypted.patch b/kvm-KVM-track-whether-guest-state-is-encrypted.patch new file mode 100644 index 0000000..7cdab60 --- /dev/null +++ b/kvm-KVM-track-whether-guest-state-is-encrypted.patch @@ -0,0 +1,127 @@ +From 50399796da938c4ea7c69058fde84695bce9d794 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 18 Mar 2024 14:41:10 -0400 +Subject: [PATCH 019/100] KVM: track whether guest state is encrypted +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [19/91] 685b9c54d43d0043d15c33d13afc3a420cbe139b (bonzini/rhel-qemu-kvm) + +So far, KVM has allowed KVM_GET/SET_* ioctls to execute even if the +guest state is encrypted, in which case they do nothing. For the new +API using VM types, instead, the ioctls will fail which is a safer and +more robust approach. + +The new API will be the only one available for SEV-SNP and TDX, but it +is also usable for SEV and SEV-ES. In preparation for that, require +architecture-specific KVM code to communicate the point at which guest +state is protected (which must be after kvm_cpu_synchronize_post_init(), +though that might change in the future in order to suppor migration). +From that point, skip reading registers so that cpu->vcpu_dirty is +never true: if it ever becomes true, kvm_arch_put_registers() will +fail miserably. + +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Paolo Bonzini +(cherry picked from commit 5c3131c392f84c660033d511ec39872d8beb4b1e) +Signed-off-by: Paolo Bonzini +--- + accel/kvm/kvm-all.c | 17 ++++++++++++++--- + include/sysemu/kvm.h | 2 ++ + include/sysemu/kvm_int.h | 1 + + target/i386/sev.c | 1 + + 4 files changed, 18 insertions(+), 3 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 931f74256e..ec0f6df7c5 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -2703,7 +2703,7 @@ bool kvm_cpu_check_are_resettable(void) + + static void do_kvm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) + { +- if (!cpu->vcpu_dirty) { ++ if (!cpu->vcpu_dirty && !kvm_state->guest_state_protected) { + int ret = kvm_arch_get_registers(cpu); + if (ret) { + error_report("Failed to get registers: %s", strerror(-ret)); +@@ -2717,7 +2717,7 @@ static void do_kvm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) + + void kvm_cpu_synchronize_state(CPUState *cpu) + { +- if (!cpu->vcpu_dirty) { ++ if (!cpu->vcpu_dirty && !kvm_state->guest_state_protected) { + run_on_cpu(cpu, do_kvm_cpu_synchronize_state, RUN_ON_CPU_NULL); + } + } +@@ -2752,7 +2752,13 @@ static void do_kvm_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg) + + void kvm_cpu_synchronize_post_init(CPUState *cpu) + { +- run_on_cpu(cpu, do_kvm_cpu_synchronize_post_init, RUN_ON_CPU_NULL); ++ if (!kvm_state->guest_state_protected) { ++ /* ++ * This runs before the machine_init_done notifiers, and is the last ++ * opportunity to synchronize the state of confidential guests. ++ */ ++ run_on_cpu(cpu, do_kvm_cpu_synchronize_post_init, RUN_ON_CPU_NULL); ++ } + } + + static void do_kvm_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg) +@@ -4099,3 +4105,8 @@ void query_stats_schemas_cb(StatsSchemaList **result, Error **errp) + query_stats_schema_vcpu(first_cpu, &stats_args); + } + } ++ ++void kvm_mark_guest_state_protected(void) ++{ ++ kvm_state->guest_state_protected = true; ++} +diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h +index fad9a7e8ff..302e8f6f1e 100644 +--- a/include/sysemu/kvm.h ++++ b/include/sysemu/kvm.h +@@ -539,6 +539,8 @@ bool kvm_dirty_ring_enabled(void); + + uint32_t kvm_dirty_ring_size(void); + ++void kvm_mark_guest_state_protected(void); ++ + /** + * kvm_hwpoisoned_mem - indicate if there is any hwpoisoned page + * reported for the VM. +diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h +index 882e37e12c..3496be7997 100644 +--- a/include/sysemu/kvm_int.h ++++ b/include/sysemu/kvm_int.h +@@ -87,6 +87,7 @@ struct KVMState + bool kernel_irqchip_required; + OnOffAuto kernel_irqchip_split; + bool sync_mmu; ++ bool guest_state_protected; + uint64_t manual_dirty_log_protect; + /* The man page (and posix) say ioctl numbers are signed int, but + * they're not. Linux, glibc and *BSD all treat ioctl numbers as +diff --git a/target/i386/sev.c b/target/i386/sev.c +index b8f79d34d1..c49a8fd55e 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -755,6 +755,7 @@ sev_launch_get_measure(Notifier *notifier, void *unused) + if (ret) { + exit(1); + } ++ kvm_mark_guest_state_protected(); + } + + /* query the measurement blob length */ +-- +2.39.3 + diff --git a/kvm-RAMBlock-Add-support-of-KVM-private-guest-memfd.patch b/kvm-RAMBlock-Add-support-of-KVM-private-guest-memfd.patch new file mode 100644 index 0000000..8e47872 --- /dev/null +++ b/kvm-RAMBlock-Add-support-of-KVM-private-guest-memfd.patch @@ -0,0 +1,329 @@ +From f4b01d645926faab2cab86fadb7398c26d6b8285 Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Wed, 20 Mar 2024 03:39:02 -0500 +Subject: [PATCH 028/100] RAMBlock: Add support of KVM private guest memfd + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [28/91] 95fdf196afcb67113834c20fa354ee1397411bfd (bonzini/rhel-qemu-kvm) + +Add KVM guest_memfd support to RAMBlock so both normal hva based memory +and kvm guest memfd based private memory can be associated in one RAMBlock. + +Introduce new flag RAM_GUEST_MEMFD. When it's set, it calls KVM ioctl to +create private guest_memfd during RAMBlock setup. + +Allocating a new RAM_GUEST_MEMFD flag to instruct the setup of guest memfd +is more flexible and extensible than simply relying on the VM type because +in the future we may have the case that not all the memory of a VM need +guest memfd. As a benefit, it also avoid getting MachineState in memory +subsystem. + +Note, RAM_GUEST_MEMFD is supposed to be set for memory backends of +confidential guests, such as TDX VM. How and when to set it for memory +backends will be implemented in the following patches. + +Introduce memory_region_has_guest_memfd() to query if the MemoryRegion has +KVM guest_memfd allocated. + +Signed-off-by: Xiaoyao Li +Reviewed-by: David Hildenbrand +Message-ID: <20240320083945.991426-7-michael.roth@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 15f7a80c49cb3637f62fa37fa4a17da913bd91ff) +Signed-off-by: Paolo Bonzini +--- + accel/kvm/kvm-all.c | 28 ++++++++++++++++++++++++++++ + accel/stubs/kvm-stub.c | 5 +++++ + include/exec/memory.h | 20 +++++++++++++++++--- + include/exec/ram_addr.h | 2 +- + include/exec/ramblock.h | 1 + + include/sysemu/kvm.h | 2 ++ + system/memory.c | 5 +++++ + system/physmem.c | 34 +++++++++++++++++++++++++++++++--- + 8 files changed, 90 insertions(+), 7 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 272e945f52..a7b9a127dd 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -92,6 +92,7 @@ static bool kvm_has_guest_debug; + static int kvm_sstep_flags; + static bool kvm_immediate_exit; + static uint64_t kvm_supported_memory_attributes; ++static bool kvm_guest_memfd_supported; + static hwaddr kvm_max_slot_size = ~0; + + static const KVMCapabilityInfo kvm_required_capabilites[] = { +@@ -2419,6 +2420,11 @@ static int kvm_init(MachineState *ms) + } + + kvm_supported_memory_attributes = kvm_check_extension(s, KVM_CAP_MEMORY_ATTRIBUTES); ++ kvm_guest_memfd_supported = ++ kvm_check_extension(s, KVM_CAP_GUEST_MEMFD) && ++ kvm_check_extension(s, KVM_CAP_USER_MEMORY2) && ++ (kvm_supported_memory_attributes & KVM_MEMORY_ATTRIBUTE_PRIVATE); ++ + kvm_immediate_exit = kvm_check_extension(s, KVM_CAP_IMMEDIATE_EXIT); + s->nr_slots = kvm_check_extension(s, KVM_CAP_NR_MEMSLOTS); + +@@ -4138,3 +4144,25 @@ void kvm_mark_guest_state_protected(void) + { + kvm_state->guest_state_protected = true; + } ++ ++int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp) ++{ ++ int fd; ++ struct kvm_create_guest_memfd guest_memfd = { ++ .size = size, ++ .flags = flags, ++ }; ++ ++ if (!kvm_guest_memfd_supported) { ++ error_setg(errp, "KVM does not support guest_memfd"); ++ return -1; ++ } ++ ++ fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_GUEST_MEMFD, &guest_memfd); ++ if (fd < 0) { ++ error_setg_errno(errp, errno, "Error creating KVM guest_memfd"); ++ return -1; ++ } ++ ++ return fd; ++} +diff --git a/accel/stubs/kvm-stub.c b/accel/stubs/kvm-stub.c +index ca38172884..8e0eb22e61 100644 +--- a/accel/stubs/kvm-stub.c ++++ b/accel/stubs/kvm-stub.c +@@ -129,3 +129,8 @@ bool kvm_hwpoisoned_mem(void) + { + return false; + } ++ ++int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp) ++{ ++ return -ENOSYS; ++} +diff --git a/include/exec/memory.h b/include/exec/memory.h +index 8626a355b3..679a847685 100644 +--- a/include/exec/memory.h ++++ b/include/exec/memory.h +@@ -243,6 +243,9 @@ typedef struct IOMMUTLBEvent { + /* RAM FD is opened read-only */ + #define RAM_READONLY_FD (1 << 11) + ++/* RAM can be private that has kvm guest memfd backend */ ++#define RAM_GUEST_MEMFD (1 << 12) ++ + static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn, + IOMMUNotifierFlag flags, + hwaddr start, hwaddr end, +@@ -1307,7 +1310,8 @@ bool memory_region_init_ram_nomigrate(MemoryRegion *mr, + * @name: Region name, becomes part of RAMBlock name used in migration stream + * must be unique within any device + * @size: size of the region. +- * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_NORESERVE. ++ * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_NORESERVE, ++ * RAM_GUEST_MEMFD. + * @errp: pointer to Error*, to store an error if it happens. + * + * Note that this function does not do anything to cause the data in the +@@ -1369,7 +1373,7 @@ bool memory_region_init_resizeable_ram(MemoryRegion *mr, + * (getpagesize()) will be used. + * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM, + * RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY, +- * RAM_READONLY_FD ++ * RAM_READONLY_FD, RAM_GUEST_MEMFD + * @path: the path in which to allocate the RAM. + * @offset: offset within the file referenced by path + * @errp: pointer to Error*, to store an error if it happens. +@@ -1399,7 +1403,7 @@ bool memory_region_init_ram_from_file(MemoryRegion *mr, + * @size: size of the region. + * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM, + * RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY, +- * RAM_READONLY_FD ++ * RAM_READONLY_FD, RAM_GUEST_MEMFD + * @fd: the fd to mmap. + * @offset: offset within the file referenced by fd + * @errp: pointer to Error*, to store an error if it happens. +@@ -1722,6 +1726,16 @@ static inline bool memory_region_is_romd(MemoryRegion *mr) + */ + bool memory_region_is_protected(MemoryRegion *mr); + ++/** ++ * memory_region_has_guest_memfd: check whether a memory region has guest_memfd ++ * associated ++ * ++ * Returns %true if a memory region's ram_block has valid guest_memfd assigned. ++ * ++ * @mr: the memory region being queried ++ */ ++bool memory_region_has_guest_memfd(MemoryRegion *mr); ++ + /** + * memory_region_get_iommu: check whether a memory region is an iommu + * +diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h +index de45ba7bc9..07c8f86375 100644 +--- a/include/exec/ram_addr.h ++++ b/include/exec/ram_addr.h +@@ -110,7 +110,7 @@ long qemu_maxrampagesize(void); + * @mr: the memory region where the ram block is + * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM, + * RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY, +- * RAM_READONLY_FD ++ * RAM_READONLY_FD, RAM_GUEST_MEMFD + * @mem_path or @fd: specify the backing file or device + * @offset: Offset into target file + * @errp: pointer to Error*, to store an error if it happens +diff --git a/include/exec/ramblock.h b/include/exec/ramblock.h +index 848915ea5b..459c8917de 100644 +--- a/include/exec/ramblock.h ++++ b/include/exec/ramblock.h +@@ -41,6 +41,7 @@ struct RAMBlock { + QLIST_HEAD(, RAMBlockNotifier) ramblock_notifiers; + int fd; + uint64_t fd_offset; ++ int guest_memfd; + size_t page_size; + /* dirty bitmap used during migration */ + unsigned long *bmap; +diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h +index f114ff6986..9e4ab7ae89 100644 +--- a/include/sysemu/kvm.h ++++ b/include/sysemu/kvm.h +@@ -537,6 +537,8 @@ void kvm_mark_guest_state_protected(void); + */ + bool kvm_hwpoisoned_mem(void); + ++int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp); ++ + int kvm_set_memory_attributes_private(hwaddr start, uint64_t size); + int kvm_set_memory_attributes_shared(hwaddr start, uint64_t size); + +diff --git a/system/memory.c b/system/memory.c +index a229a79988..c756950c0c 100644 +--- a/system/memory.c ++++ b/system/memory.c +@@ -1850,6 +1850,11 @@ bool memory_region_is_protected(MemoryRegion *mr) + return mr->ram && (mr->ram_block->flags & RAM_PROTECTED); + } + ++bool memory_region_has_guest_memfd(MemoryRegion *mr) ++{ ++ return mr->ram_block && mr->ram_block->guest_memfd >= 0; ++} ++ + uint8_t memory_region_get_dirty_log_mask(MemoryRegion *mr) + { + uint8_t mask = mr->dirty_log_mask; +diff --git a/system/physmem.c b/system/physmem.c +index a4fe3d2bf8..f5dfa20e57 100644 +--- a/system/physmem.c ++++ b/system/physmem.c +@@ -1808,6 +1808,7 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) + const bool shared = qemu_ram_is_shared(new_block); + RAMBlock *block; + RAMBlock *last_block = NULL; ++ bool free_on_error = false; + ram_addr_t old_ram_size, new_ram_size; + Error *err = NULL; + +@@ -1837,6 +1838,19 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) + return; + } + memory_try_enable_merging(new_block->host, new_block->max_length); ++ free_on_error = true; ++ } ++ } ++ ++ if (new_block->flags & RAM_GUEST_MEMFD) { ++ assert(kvm_enabled()); ++ assert(new_block->guest_memfd < 0); ++ ++ new_block->guest_memfd = kvm_create_guest_memfd(new_block->max_length, ++ 0, errp); ++ if (new_block->guest_memfd < 0) { ++ qemu_mutex_unlock_ramlist(); ++ goto out_free; + } + } + +@@ -1888,6 +1902,13 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) + ram_block_notify_add(new_block->host, new_block->used_length, + new_block->max_length); + } ++ return; ++ ++out_free: ++ if (free_on_error) { ++ qemu_anon_ram_free(new_block->host, new_block->max_length); ++ new_block->host = NULL; ++ } + } + + #ifdef CONFIG_POSIX +@@ -1902,7 +1923,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, + /* Just support these ram flags by now. */ + assert((ram_flags & ~(RAM_SHARED | RAM_PMEM | RAM_NORESERVE | + RAM_PROTECTED | RAM_NAMED_FILE | RAM_READONLY | +- RAM_READONLY_FD)) == 0); ++ RAM_READONLY_FD | RAM_GUEST_MEMFD)) == 0); + + if (xen_enabled()) { + error_setg(errp, "-mem-path not supported with Xen"); +@@ -1939,6 +1960,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, + new_block->used_length = size; + new_block->max_length = size; + new_block->flags = ram_flags; ++ new_block->guest_memfd = -1; + new_block->host = file_ram_alloc(new_block, size, fd, !file_size, offset, + errp); + if (!new_block->host) { +@@ -2018,7 +2040,7 @@ RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size, + int align; + + assert((ram_flags & ~(RAM_SHARED | RAM_RESIZEABLE | RAM_PREALLOC | +- RAM_NORESERVE)) == 0); ++ RAM_NORESERVE | RAM_GUEST_MEMFD)) == 0); + assert(!host ^ (ram_flags & RAM_PREALLOC)); + + align = qemu_real_host_page_size(); +@@ -2033,6 +2055,7 @@ RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size, + new_block->max_length = max_size; + assert(max_size >= size); + new_block->fd = -1; ++ new_block->guest_memfd = -1; + new_block->page_size = qemu_real_host_page_size(); + new_block->host = host; + new_block->flags = ram_flags; +@@ -2055,7 +2078,7 @@ RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, + RAMBlock *qemu_ram_alloc(ram_addr_t size, uint32_t ram_flags, + MemoryRegion *mr, Error **errp) + { +- assert((ram_flags & ~(RAM_SHARED | RAM_NORESERVE)) == 0); ++ assert((ram_flags & ~(RAM_SHARED | RAM_NORESERVE | RAM_GUEST_MEMFD)) == 0); + return qemu_ram_alloc_internal(size, size, NULL, NULL, ram_flags, mr, errp); + } + +@@ -2083,6 +2106,11 @@ static void reclaim_ramblock(RAMBlock *block) + } else { + qemu_anon_ram_free(block->host, block->max_length); + } ++ ++ if (block->guest_memfd >= 0) { ++ close(block->guest_memfd); ++ } ++ + g_free(block); + } + +-- +2.39.3 + diff --git a/kvm-RAMBlock-make-guest_memfd-require-uncoordinated-disc.patch b/kvm-RAMBlock-make-guest_memfd-require-uncoordinated-disc.patch new file mode 100644 index 0000000..04a5fbf --- /dev/null +++ b/kvm-RAMBlock-make-guest_memfd-require-uncoordinated-disc.patch @@ -0,0 +1,82 @@ +From bd289293604d6f33e9fb89196f0b19117ce81f89 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Wed, 20 Mar 2024 17:45:29 +0100 +Subject: [PATCH 032/100] RAMBlock: make guest_memfd require uncoordinated + discard + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [32/91] 0c005849026c334737b88cbd20a0ac237dfca37e (bonzini/rhel-qemu-kvm) + +Some subsystems like VFIO might disable ram block discard, but guest_memfd +uses discard operations to implement conversions between private and +shared memory. Because of this, sequences like the following can result +in stale IOMMU mappings: + +1. allocate shared page +2. convert page shared->private +3. discard shared page +4. convert page private->shared +5. allocate shared page +6. issue DMA operations against that shared page + +This is not a use-after-free, because after step 3 VFIO is still pinning +the page. However, DMA operations in step 6 will hit the old mapping +that was allocated in step 1. + +Address this by taking ram_block_discard_is_enabled() into account when +deciding whether or not to discard pages. + +Since kvm_convert_memory()/guest_memfd doesn't implement a +RamDiscardManager handler to convey and replay discard operations, +this is a case of uncoordinated discard, which is blocked/released +by ram_block_discard_require(). Interestingly, this function had +no use so far. + +Alternative approaches would be to block discard of shared pages, but +this would cause guests to consume twice the memory if they use VFIO; +or to implement a RamDiscardManager and only block uncoordinated +discard, i.e. use ram_block_coordinated_discard_require(). + +[Commit message mostly by Michael Roth ] + +Signed-off-by: Paolo Bonzini +(cherry picked from commit 852f0048f3ea9f14de18eb279a99fccb6d250e8f) +Signed-off-by: Paolo Bonzini +--- + system/physmem.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/system/physmem.c b/system/physmem.c +index f5dfa20e57..5ebcf5be11 100644 +--- a/system/physmem.c ++++ b/system/physmem.c +@@ -1846,6 +1846,13 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) + assert(kvm_enabled()); + assert(new_block->guest_memfd < 0); + ++ if (ram_block_discard_require(true) < 0) { ++ error_setg_errno(errp, errno, ++ "cannot set up private guest memory: discard currently blocked"); ++ error_append_hint(errp, "Are you using assigned devices?\n"); ++ goto out_free; ++ } ++ + new_block->guest_memfd = kvm_create_guest_memfd(new_block->max_length, + 0, errp); + if (new_block->guest_memfd < 0) { +@@ -2109,6 +2116,7 @@ static void reclaim_ramblock(RAMBlock *block) + + if (block->guest_memfd >= 0) { + close(block->guest_memfd); ++ ram_block_discard_require(false); + } + + g_free(block); +-- +2.39.3 + diff --git a/kvm-confidential-guest-support-Add-kvm_init-and-kvm_rese.patch b/kvm-confidential-guest-support-Add-kvm_init-and-kvm_rese.patch new file mode 100644 index 0000000..785b437 --- /dev/null +++ b/kvm-confidential-guest-support-Add-kvm_init-and-kvm_rese.patch @@ -0,0 +1,90 @@ +From 0f0a3a860a07addea21a0282556a5022b9cb8b2c Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Thu, 29 Feb 2024 01:00:35 -0500 +Subject: [PATCH 011/100] confidential guest support: Add kvm_init() and + kvm_reset() in class + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [11/91] 21d2178178bf181a8e4d0b051f64bd983f0d0cf1 (bonzini/rhel-qemu-kvm) + +Different confidential VMs in different architectures all have the same +needs to do their specific initialization (and maybe resetting) stuffs +with KVM. Currently each of them exposes individual *_kvm_init() +functions and let machine code or kvm code to call it. + +To facilitate the introduction of confidential guest technology from +different x86 vendors, add two virtual functions, kvm_init() and kvm_reset() +in ConfidentialGuestSupportClass, and expose two helpers functions for +invodking them. + +Signed-off-by: Xiaoyao Li +Message-Id: <20240229060038.606591-1-xiaoyao.li@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 41a605944e3fecae43ca18ded95ec31f28e0c7fe) +Signed-off-by: Paolo Bonzini +--- + include/exec/confidential-guest-support.h | 34 ++++++++++++++++++++++- + 1 file changed, 33 insertions(+), 1 deletion(-) + +diff --git a/include/exec/confidential-guest-support.h b/include/exec/confidential-guest-support.h +index ba2dd4b5df..e5b188cffb 100644 +--- a/include/exec/confidential-guest-support.h ++++ b/include/exec/confidential-guest-support.h +@@ -23,7 +23,10 @@ + #include "qom/object.h" + + #define TYPE_CONFIDENTIAL_GUEST_SUPPORT "confidential-guest-support" +-OBJECT_DECLARE_SIMPLE_TYPE(ConfidentialGuestSupport, CONFIDENTIAL_GUEST_SUPPORT) ++OBJECT_DECLARE_TYPE(ConfidentialGuestSupport, ++ ConfidentialGuestSupportClass, ++ CONFIDENTIAL_GUEST_SUPPORT) ++ + + struct ConfidentialGuestSupport { + Object parent; +@@ -55,8 +58,37 @@ struct ConfidentialGuestSupport { + + typedef struct ConfidentialGuestSupportClass { + ObjectClass parent; ++ ++ int (*kvm_init)(ConfidentialGuestSupport *cgs, Error **errp); ++ int (*kvm_reset)(ConfidentialGuestSupport *cgs, Error **errp); + } ConfidentialGuestSupportClass; + ++static inline int confidential_guest_kvm_init(ConfidentialGuestSupport *cgs, ++ Error **errp) ++{ ++ ConfidentialGuestSupportClass *klass; ++ ++ klass = CONFIDENTIAL_GUEST_SUPPORT_GET_CLASS(cgs); ++ if (klass->kvm_init) { ++ return klass->kvm_init(cgs, errp); ++ } ++ ++ return 0; ++} ++ ++static inline int confidential_guest_kvm_reset(ConfidentialGuestSupport *cgs, ++ Error **errp) ++{ ++ ConfidentialGuestSupportClass *klass; ++ ++ klass = CONFIDENTIAL_GUEST_SUPPORT_GET_CLASS(cgs); ++ if (klass->kvm_reset) { ++ return klass->kvm_reset(cgs, errp); ++ } ++ ++ return 0; ++} ++ + #endif /* !CONFIG_USER_ONLY */ + + #endif /* QEMU_CONFIDENTIAL_GUEST_SUPPORT_H */ +-- +2.39.3 + diff --git a/kvm-hw-i386-Add-support-for-loading-BIOS-using-guest_mem.patch b/kvm-hw-i386-Add-support-for-loading-BIOS-using-guest_mem.patch new file mode 100644 index 0000000..ee2f88e --- /dev/null +++ b/kvm-hw-i386-Add-support-for-loading-BIOS-using-guest_mem.patch @@ -0,0 +1,73 @@ +From e74980be81d641736ea9d44d0fe9af02af63a220 Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Thu, 30 May 2024 06:16:40 -0500 +Subject: [PATCH 083/100] hw/i386: Add support for loading BIOS using + guest_memfd + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [83/91] 7b77d212ef7d83b66ad9d8348179ee84e64fb911 (bonzini/rhel-qemu-kvm) + +When guest_memfd is enabled, the BIOS is generally part of the initial +encrypted guest image and will be accessed as private guest memory. Add +the necessary changes to set up the associated RAM region with a +guest_memfd backend to allow for this. + +Current support centers around using -bios to load the BIOS data. +Support for loading the BIOS via pflash requires additional enablement +since those interfaces rely on the use of ROM memory regions which make +use of the KVM_MEM_READONLY memslot flag, which is not supported for +guest_memfd-backed memslots. + +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-29-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit fc7a69e177e4ba26d11fcf47b853f85115b35a11) +Signed-off-by: Paolo Bonzini +--- + hw/i386/x86-common.c | 17 ++++++++++++----- + 1 file changed, 12 insertions(+), 5 deletions(-) + +diff --git a/hw/i386/x86-common.c b/hw/i386/x86-common.c +index 35fe6eabea..6cbb76c25c 100644 +--- a/hw/i386/x86-common.c ++++ b/hw/i386/x86-common.c +@@ -969,8 +969,13 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + (bios_size % 65536) != 0) { + goto bios_error; + } +- memory_region_init_ram(&x86ms->bios, NULL, "pc.bios", bios_size, +- &error_fatal); ++ if (machine_require_guest_memfd(MACHINE(x86ms))) { ++ memory_region_init_ram_guest_memfd(&x86ms->bios, NULL, "pc.bios", ++ bios_size, &error_fatal); ++ } else { ++ memory_region_init_ram(&x86ms->bios, NULL, "pc.bios", ++ bios_size, &error_fatal); ++ } + if (sev_enabled()) { + /* + * The concept of a "reset" simply doesn't exist for +@@ -991,9 +996,11 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + } + g_free(filename); + +- /* map the last 128KB of the BIOS in ISA space */ +- x86_isa_bios_init(&x86ms->isa_bios, rom_memory, &x86ms->bios, +- !isapc_ram_fw); ++ if (!machine_require_guest_memfd(MACHINE(x86ms))) { ++ /* map the last 128KB of the BIOS in ISA space */ ++ x86_isa_bios_init(&x86ms->isa_bios, rom_memory, &x86ms->bios, ++ !isapc_ram_fw); ++ } + + /* map all the bios at the top of memory */ + memory_region_add_subregion(rom_memory, +-- +2.39.3 + diff --git a/kvm-hw-i386-Have-x86_bios_rom_init-take-X86MachineState-.patch b/kvm-hw-i386-Have-x86_bios_rom_init-take-X86MachineState-.patch new file mode 100644 index 0000000..1fafe03 --- /dev/null +++ b/kvm-hw-i386-Have-x86_bios_rom_init-take-X86MachineState-.patch @@ -0,0 +1,106 @@ +From c1e615d6b8f609b72a94ffe6d31a9848a41744ef Mon Sep 17 00:00:00 2001 +From: Bernhard Beschow +Date: Tue, 30 Apr 2024 17:06:39 +0200 +Subject: [PATCH 038/100] hw/i386: Have x86_bios_rom_init() take + X86MachineState rather than MachineState +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [38/91] 59f388b1dffc5d0aa2f0fff768194d755bc3efbb (bonzini/rhel-qemu-kvm) + +The function creates and leaks two MemoryRegion objects regarding the BIOS which +will be moved into X86MachineState in the next steps to avoid the leakage. + +Signed-off-by: Bernhard Beschow +Reviewed-by: Philippe Mathieu-Daudé +Message-ID: <20240430150643.111976-3-shentey@gmail.com> +Signed-off-by: Philippe Mathieu-Daudé +(cherry picked from commit 848351840148f8c3b53ddf6210194506547d3ffd) +Signed-off-by: Paolo Bonzini +--- + hw/i386/microvm.c | 2 +- + hw/i386/pc_sysfw.c | 4 ++-- + hw/i386/x86.c | 4 ++-- + include/hw/i386/x86.h | 2 +- + 4 files changed, 6 insertions(+), 6 deletions(-) + +diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c +index 61a772dfe6..fec63cacfa 100644 +--- a/hw/i386/microvm.c ++++ b/hw/i386/microvm.c +@@ -278,7 +278,7 @@ static void microvm_devices_init(MicrovmMachineState *mms) + default_firmware = x86_machine_is_acpi_enabled(x86ms) + ? MICROVM_BIOS_FILENAME + : MICROVM_QBOOT_FILENAME; +- x86_bios_rom_init(MACHINE(mms), default_firmware, get_system_memory(), true); ++ x86_bios_rom_init(x86ms, default_firmware, get_system_memory(), true); + } + + static void microvm_memory_init(MicrovmMachineState *mms) +diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c +index 3efabbbab2..ef7dea9798 100644 +--- a/hw/i386/pc_sysfw.c ++++ b/hw/i386/pc_sysfw.c +@@ -206,7 +206,7 @@ void pc_system_firmware_init(PCMachineState *pcms, + BlockBackend *pflash_blk[ARRAY_SIZE(pcms->flash)]; + + if (!pcmc->pci_enabled) { +- x86_bios_rom_init(MACHINE(pcms), "bios.bin", rom_memory, true); ++ x86_bios_rom_init(X86_MACHINE(pcms), "bios.bin", rom_memory, true); + return; + } + +@@ -227,7 +227,7 @@ void pc_system_firmware_init(PCMachineState *pcms, + + if (!pflash_blk[0]) { + /* Machine property pflash0 not set, use ROM mode */ +- x86_bios_rom_init(MACHINE(pcms), "bios.bin", rom_memory, false); ++ x86_bios_rom_init(X86_MACHINE(pcms), "bios.bin", rom_memory, false); + } else { + if (kvm_enabled() && !kvm_readonly_mem_enabled()) { + /* +diff --git a/hw/i386/x86.c b/hw/i386/x86.c +index 2a4f3ee285..6d3c72f124 100644 +--- a/hw/i386/x86.c ++++ b/hw/i386/x86.c +@@ -1128,7 +1128,7 @@ void x86_load_linux(X86MachineState *x86ms, + nb_option_roms++; + } + +-void x86_bios_rom_init(MachineState *ms, const char *default_firmware, ++void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + MemoryRegion *rom_memory, bool isapc_ram_fw) + { + const char *bios_name; +@@ -1138,7 +1138,7 @@ void x86_bios_rom_init(MachineState *ms, const char *default_firmware, + ssize_t ret; + + /* BIOS load */ +- bios_name = ms->firmware ?: default_firmware; ++ bios_name = MACHINE(x86ms)->firmware ?: default_firmware; + filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name); + if (filename) { + bios_size = get_image_size(filename); +diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h +index 4dc30dcb4d..cb07618d19 100644 +--- a/include/hw/i386/x86.h ++++ b/include/hw/i386/x86.h +@@ -116,7 +116,7 @@ void x86_cpu_unplug_request_cb(HotplugHandler *hotplug_dev, + void x86_cpu_unplug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp); + +-void x86_bios_rom_init(MachineState *ms, const char *default_firmware, ++void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + MemoryRegion *rom_memory, bool isapc_ram_fw); + + void x86_load_linux(X86MachineState *x86ms, +-- +2.39.3 + diff --git a/kvm-hw-i386-acpi-Set-PCAT_COMPAT-bit-only-when-pic-is-no.patch b/kvm-hw-i386-acpi-Set-PCAT_COMPAT-bit-only-when-pic-is-no.patch new file mode 100644 index 0000000..a789fb7 --- /dev/null +++ b/kvm-hw-i386-acpi-Set-PCAT_COMPAT-bit-only-when-pic-is-no.patch @@ -0,0 +1,51 @@ +From 7bb1f124413891bc5d2187f12cd19da6e794904b Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Wed, 3 Apr 2024 10:59:53 -0400 +Subject: [PATCH 010/100] hw/i386/acpi: Set PCAT_COMPAT bit only when pic is + not disabled + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [10/91] 62110e4bf52cb3e106c8d2a902bbd31548beba00 (bonzini/rhel-qemu-kvm) + +A value 1 of PCAT_COMPAT (bit 0) of MADT.Flags indicates that the system +also has a PC-AT-compatible dual-8259 setup, i.e., the PIC. When PIC +is not enabled (pic=off) for x86 machine, the PCAT_COMPAT bit needs to +be cleared. The PIC probe should then print: + + [ 0.155970] Using NULL legacy PIC + +However, no such log printed in guest kernel unless PCAT_COMPAT is +cleared. + +Signed-off-by: Xiaoyao Li +Message-ID: <20240403145953.3082491-1-xiaoyao.li@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 292dd287e78e0cbafde9d1522c729349d132d844) +Signed-off-by: Paolo Bonzini +--- + hw/i386/acpi-common.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/hw/i386/acpi-common.c b/hw/i386/acpi-common.c +index 20f19269da..0cc2919bb8 100644 +--- a/hw/i386/acpi-common.c ++++ b/hw/i386/acpi-common.c +@@ -107,7 +107,9 @@ void acpi_build_madt(GArray *table_data, BIOSLinker *linker, + acpi_table_begin(&table, table_data); + /* Local APIC Address */ + build_append_int_noprefix(table_data, APIC_DEFAULT_ADDRESS, 4); +- build_append_int_noprefix(table_data, 1 /* PCAT_COMPAT */, 4); /* Flags */ ++ /* Flags. bit 0: PCAT_COMPAT */ ++ build_append_int_noprefix(table_data, ++ x86ms->pic != ON_OFF_AUTO_OFF ? 1 : 0 , 4); + + for (i = 0; i < apic_ids->len; i++) { + pc_madt_cpu_entry(i, apic_ids, table_data, false); +-- +2.39.3 + diff --git a/kvm-hw-i386-pc_sysfw-Alias-rather-than-copy-isa-bios-reg.patch b/kvm-hw-i386-pc_sysfw-Alias-rather-than-copy-isa-bios-reg.patch new file mode 100644 index 0000000..021db3d --- /dev/null +++ b/kvm-hw-i386-pc_sysfw-Alias-rather-than-copy-isa-bios-reg.patch @@ -0,0 +1,164 @@ +From fd6de3c5e97bdf13a39342fc71815a20c66867ae Mon Sep 17 00:00:00 2001 +From: Bernhard Beschow +Date: Wed, 8 May 2024 19:55:07 +0200 +Subject: [PATCH 043/100] hw/i386/pc_sysfw: Alias rather than copy isa-bios + region + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [43/91] f64dab2a091838a10a9b94e3d09ea11432b0809f (bonzini/rhel-qemu-kvm) + +In the -bios case the "isa-bios" memory region is an alias to the BIOS mapped +to the top of the 4G memory boundary. Do the same in the -pflash case, but only +for new machine versions for migration compatibility. This establishes common +behavior and makes pflash commands work in the "isa-bios" region which some +real-world legacy bioses rely on. + +Note that in the sev_enabled() case, the "isa-bios" memory region in the -pflash +case will now also point to encrypted memory, just like it already does in the +-bios case. + +When running `info mtree` before and after this commit with +`qemu-system-x86_64 -S -drive \ +if=pflash,format=raw,readonly=on,file=/usr/share/qemu/bios-256k.bin` and running +`diff -u before.mtree after.mtree` results in the following changes in the +memory tree: + +| --- before.mtree +| +++ after.mtree +| @@ -71,7 +71,7 @@ +| 0000000000000000-ffffffffffffffff (prio -1, i/o): pci +| 00000000000a0000-00000000000bffff (prio 1, i/o): vga-lowmem +| 00000000000c0000-00000000000dffff (prio 1, rom): pc.rom +| - 00000000000e0000-00000000000fffff (prio 1, rom): isa-bios +| + 00000000000e0000-00000000000fffff (prio 1, romd): alias isa-bios @system.flash0 0000000000020000-000000000003ffff +| 00000000000a0000-00000000000bffff (prio 1, i/o): alias smram-region @pci 00000000000a0000-00000000000bffff +| 00000000000c0000-00000000000c3fff (prio 1, i/o): alias pam-pci @pci 00000000000c0000-00000000000c3fff +| 00000000000c4000-00000000000c7fff (prio 1, i/o): alias pam-pci @pci 00000000000c4000-00000000000c7fff +| @@ -108,7 +108,7 @@ +| 0000000000000000-ffffffffffffffff (prio -1, i/o): pci +| 00000000000a0000-00000000000bffff (prio 1, i/o): vga-lowmem +| 00000000000c0000-00000000000dffff (prio 1, rom): pc.rom +| - 00000000000e0000-00000000000fffff (prio 1, rom): isa-bios +| + 00000000000e0000-00000000000fffff (prio 1, romd): alias isa-bios @system.flash0 0000000000020000-000000000003ffff +| 00000000000a0000-00000000000bffff (prio 1, i/o): alias smram-region @pci 00000000000a0000-00000000000bffff +| 00000000000c0000-00000000000c3fff (prio 1, i/o): alias pam-pci @pci 00000000000c0000-00000000000c3fff +| 00000000000c4000-00000000000c7fff (prio 1, i/o): alias pam-pci @pci 00000000000c4000-00000000000c7fff +| @@ -131,11 +131,14 @@ +| memory-region: pc.ram +| 0000000000000000-0000000007ffffff (prio 0, ram): pc.ram +| +| +memory-region: system.flash0 +| + 00000000fffc0000-00000000ffffffff (prio 0, romd): system.flash0 +| + +| memory-region: pci +| 0000000000000000-ffffffffffffffff (prio -1, i/o): pci +| 00000000000a0000-00000000000bffff (prio 1, i/o): vga-lowmem +| 00000000000c0000-00000000000dffff (prio 1, rom): pc.rom +| - 00000000000e0000-00000000000fffff (prio 1, rom): isa-bios +| + 00000000000e0000-00000000000fffff (prio 1, romd): alias isa-bios @system.flash0 0000000000020000-000000000003ffff +| +| memory-region: smram +| 00000000000a0000-00000000000bffff (prio 0, ram): alias smram-low @pc.ram 00000000000a0000-00000000000bffff + +Note that in both cases the "system" memory region contains the entry + + 00000000fffc0000-00000000ffffffff (prio 0, romd): system.flash0 + +but the "system.flash0" memory region only appears standalone when "isa-bios" is +an alias. + +Signed-off-by: Bernhard Beschow +Message-ID: <20240508175507.22270-7-shentey@gmail.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit a44ea3fa7f2aa1d809fdca1b84a52695b53d8ad0) +Signed-off-by: Paolo Bonzini +--- + hw/i386/pc.c | 1 + + hw/i386/pc_piix.c | 1 + + hw/i386/pc_q35.c | 1 + + hw/i386/pc_sysfw.c | 8 +++++++- + include/hw/i386/pc.h | 1 + + 5 files changed, 11 insertions(+), 1 deletion(-) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 1a34bc4522..660a59c63b 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -1967,6 +1967,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) + pcmc->has_reserved_memory = true; + pcmc->enforce_aligned_dimm = true; + pcmc->enforce_amd_1tb_hole = true; ++ pcmc->isa_bios_alias = true; + /* BIOS ACPI tables: 128K. Other BIOS datastructures: less than 4K reported + * to be used at the moment, 32K should be enough for a while. */ + pcmc->acpi_data_size = 0x20000 + 0x8000; +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index bef3e8b73e..dbb7f2ed17 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -975,6 +975,7 @@ static void pc_machine_rhel7_options(MachineClass *m) + m->alias = "pc"; + m->is_default = 1; + m->smp_props.prefer_sockets = true; ++ pcmc->isa_bios_alias = false; + } + + static void pc_init_rhel760(MachineState *machine) +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index dedc86eec9..f9900ad798 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -735,6 +735,7 @@ static void pc_q35_machine_rhel940_options(MachineClass *m) + m->desc = "RHEL-9.4.0 PC (Q35 + ICH9, 2009)"; + pcmc->smbios_stream_product = "RHEL"; + pcmc->smbios_stream_version = "9.4.0"; ++ pcmc->isa_bios_alias = false; + + compat_props_add(m->compat_props, pc_rhel_9_5_compat, + pc_rhel_9_5_compat_len); +diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c +index 82d37cb376..ac88ad4eb9 100644 +--- a/hw/i386/pc_sysfw.c ++++ b/hw/i386/pc_sysfw.c +@@ -135,6 +135,7 @@ static void pc_system_flash_map(PCMachineState *pcms, + MemoryRegion *rom_memory) + { + X86MachineState *x86ms = X86_MACHINE(pcms); ++ PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); + hwaddr total_size = 0; + int i; + BlockBackend *blk; +@@ -184,7 +185,12 @@ static void pc_system_flash_map(PCMachineState *pcms, + + if (i == 0) { + flash_mem = pflash_cfi01_get_memory(system_flash); +- pc_isa_bios_init(&x86ms->isa_bios, rom_memory, flash_mem); ++ if (pcmc->isa_bios_alias) { ++ x86_isa_bios_init(&x86ms->isa_bios, rom_memory, flash_mem, ++ true); ++ } else { ++ pc_isa_bios_init(&x86ms->isa_bios, rom_memory, flash_mem); ++ } + + /* Encrypt the pflash boot ROM */ + if (sev_enabled()) { +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 467e7fb52f..3f53ec73ac 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -122,6 +122,7 @@ struct PCMachineClass { + bool enforce_aligned_dimm; + bool broken_reserved_end; + bool enforce_amd_1tb_hole; ++ bool isa_bios_alias; + + /* generate legacy CPU hotplug AML */ + bool legacy_cpu_hotplug; +-- +2.39.3 + diff --git a/kvm-hw-i386-pc_sysfw-Remove-unused-parameter-from-pc_isa.patch b/kvm-hw-i386-pc_sysfw-Remove-unused-parameter-from-pc_isa.patch new file mode 100644 index 0000000..4188fd3 --- /dev/null +++ b/kvm-hw-i386-pc_sysfw-Remove-unused-parameter-from-pc_isa.patch @@ -0,0 +1,53 @@ +From 9bf1d368c4b53139db39649833d475e097fc98d1 Mon Sep 17 00:00:00 2001 +From: Bernhard Beschow +Date: Mon, 22 Apr 2024 22:06:22 +0200 +Subject: [PATCH 039/100] hw/i386/pc_sysfw: Remove unused parameter from + pc_isa_bios_init() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [39/91] c0019dc2706a8e3f40486fd4a4c0dd1fbe23237b (bonzini/rhel-qemu-kvm) + +Signed-off-by: Bernhard Beschow +Reviewed-by: Philippe Mathieu-Daudé +Message-ID: <20240422200625.2768-2-shentey@gmail.com> +Signed-off-by: Philippe Mathieu-Daudé +(cherry picked from commit f4b63768b91811cdcf1fb7b270587123251dfea5) +Signed-off-by: Paolo Bonzini +--- + hw/i386/pc_sysfw.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c +index ef7dea9798..59c7a81692 100644 +--- a/hw/i386/pc_sysfw.c ++++ b/hw/i386/pc_sysfw.c +@@ -41,8 +41,7 @@ + #define FLASH_SECTOR_SIZE 4096 + + static void pc_isa_bios_init(MemoryRegion *rom_memory, +- MemoryRegion *flash_mem, +- int ram_size) ++ MemoryRegion *flash_mem) + { + int isa_bios_size; + MemoryRegion *isa_bios; +@@ -186,7 +185,7 @@ static void pc_system_flash_map(PCMachineState *pcms, + + if (i == 0) { + flash_mem = pflash_cfi01_get_memory(system_flash); +- pc_isa_bios_init(rom_memory, flash_mem, size); ++ pc_isa_bios_init(rom_memory, flash_mem); + + /* Encrypt the pflash boot ROM */ + if (sev_enabled()) { +-- +2.39.3 + diff --git a/kvm-hw-i386-sev-Add-function-to-get-SEV-metadata-from-OV.patch b/kvm-hw-i386-sev-Add-function-to-get-SEV-metadata-from-OV.patch new file mode 100644 index 0000000..a543c79 --- /dev/null +++ b/kvm-hw-i386-sev-Add-function-to-get-SEV-metadata-from-OV.patch @@ -0,0 +1,158 @@ +From e6472ff46cbed97c2a238a8ef7d321351931333a Mon Sep 17 00:00:00 2001 +From: Brijesh Singh +Date: Thu, 30 May 2024 06:16:30 -0500 +Subject: [PATCH 070/100] hw/i386/sev: Add function to get SEV metadata from + OVMF header + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [70/91] ba818dade96119c8a51ca1fb222f4f69e2752396 (bonzini/rhel-qemu-kvm) + +A recent version of OVMF expanded the reset vector GUID list to add +SEV-specific metadata GUID. The SEV metadata describes the reserved +memory regions such as the secrets and CPUID page used during the SEV-SNP +guest launch. + +The pc_system_get_ovmf_sev_metadata_ptr() is used to retieve the SEV +metadata pointer from the OVMF GUID list. + +Signed-off-by: Brijesh Singh +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-19-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit f3c30c575d34122573b7370a7da5ca3a27dde481) +Signed-off-by: Paolo Bonzini +--- + hw/i386/pc_sysfw.c | 4 ++++ + include/hw/i386/pc.h | 26 ++++++++++++++++++++++++++ + target/i386/sev-sysemu-stub.c | 4 ++++ + target/i386/sev.c | 32 ++++++++++++++++++++++++++++++++ + target/i386/sev.h | 2 ++ + 5 files changed, 68 insertions(+) + +diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c +index ac88ad4eb9..9b8671c441 100644 +--- a/hw/i386/pc_sysfw.c ++++ b/hw/i386/pc_sysfw.c +@@ -260,6 +260,10 @@ void x86_firmware_configure(void *ptr, int size) + pc_system_parse_ovmf_flash(ptr, size); + + if (sev_enabled()) { ++ ++ /* Copy the SEV metadata table (if it exists) */ ++ pc_system_parse_sev_metadata(ptr, size); ++ + ret = sev_es_save_reset_vector(ptr, size); + if (ret) { + error_report("failed to locate and/or save reset vector"); +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 3f53ec73ac..94b49310f5 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -167,6 +167,32 @@ void pc_acpi_smi_interrupt(void *opaque, int irq, int level); + #define PCI_HOST_ABOVE_4G_MEM_SIZE "above-4g-mem-size" + #define PCI_HOST_PROP_SMM_RANGES "smm-ranges" + ++typedef enum { ++ SEV_DESC_TYPE_UNDEF, ++ /* The section contains the region that must be validated by the VMM. */ ++ SEV_DESC_TYPE_SNP_SEC_MEM, ++ /* The section contains the SNP secrets page */ ++ SEV_DESC_TYPE_SNP_SECRETS, ++ /* The section contains address that can be used as a CPUID page */ ++ SEV_DESC_TYPE_CPUID, ++ ++} ovmf_sev_metadata_desc_type; ++ ++typedef struct __attribute__((__packed__)) OvmfSevMetadataDesc { ++ uint32_t base; ++ uint32_t len; ++ ovmf_sev_metadata_desc_type type; ++} OvmfSevMetadataDesc; ++ ++typedef struct __attribute__((__packed__)) OvmfSevMetadata { ++ uint8_t signature[4]; ++ uint32_t len; ++ uint32_t version; ++ uint32_t num_desc; ++ OvmfSevMetadataDesc descs[]; ++} OvmfSevMetadata; ++ ++OvmfSevMetadata *pc_system_get_ovmf_sev_metadata_ptr(void); + + void pc_pci_as_mapping_init(MemoryRegion *system_memory, + MemoryRegion *pci_address_space); +diff --git a/target/i386/sev-sysemu-stub.c b/target/i386/sev-sysemu-stub.c +index 96e1c15cc3..fc1c57c411 100644 +--- a/target/i386/sev-sysemu-stub.c ++++ b/target/i386/sev-sysemu-stub.c +@@ -67,3 +67,7 @@ void hmp_info_sev(Monitor *mon, const QDict *qdict) + { + monitor_printf(mon, "SEV is not available in this QEMU\n"); + } ++ ++void pc_system_parse_sev_metadata(uint8_t *flash_ptr, size_t flash_size) ++{ ++} +diff --git a/target/i386/sev.c b/target/i386/sev.c +index e84e4395a5..17281bb2c7 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -597,6 +597,38 @@ SevCapability *qmp_query_sev_capabilities(Error **errp) + return sev_get_capabilities(errp); + } + ++static OvmfSevMetadata *ovmf_sev_metadata_table; ++ ++#define OVMF_SEV_META_DATA_GUID "dc886566-984a-4798-A75e-5585a7bf67cc" ++typedef struct __attribute__((__packed__)) OvmfSevMetadataOffset { ++ uint32_t offset; ++} OvmfSevMetadataOffset; ++ ++OvmfSevMetadata *pc_system_get_ovmf_sev_metadata_ptr(void) ++{ ++ return ovmf_sev_metadata_table; ++} ++ ++void pc_system_parse_sev_metadata(uint8_t *flash_ptr, size_t flash_size) ++{ ++ OvmfSevMetadata *metadata; ++ OvmfSevMetadataOffset *data; ++ ++ if (!pc_system_ovmf_table_find(OVMF_SEV_META_DATA_GUID, (uint8_t **)&data, ++ NULL)) { ++ return; ++ } ++ ++ metadata = (OvmfSevMetadata *)(flash_ptr + flash_size - data->offset); ++ if (memcmp(metadata->signature, "ASEV", 4) != 0 || ++ metadata->len < sizeof(OvmfSevMetadata) || ++ metadata->len > flash_size - data->offset) { ++ return; ++ } ++ ++ ovmf_sev_metadata_table = g_memdup2(metadata, metadata->len); ++} ++ + static SevAttestationReport *sev_get_attestation_report(const char *mnonce, + Error **errp) + { +diff --git a/target/i386/sev.h b/target/i386/sev.h +index 5dc4767b1e..cc12824dd6 100644 +--- a/target/i386/sev.h ++++ b/target/i386/sev.h +@@ -66,4 +66,6 @@ int sev_inject_launch_secret(const char *hdr, const char *secret, + int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size); + void sev_es_set_reset_vector(CPUState *cpu); + ++void pc_system_parse_sev_metadata(uint8_t *flash_ptr, size_t flash_size); ++ + #endif +-- +2.39.3 + diff --git a/kvm-hw-i386-sev-Add-support-to-encrypt-BIOS-when-SEV-SNP.patch b/kvm-hw-i386-sev-Add-support-to-encrypt-BIOS-when-SEV-SNP.patch new file mode 100644 index 0000000..c5a7a28 --- /dev/null +++ b/kvm-hw-i386-sev-Add-support-to-encrypt-BIOS-when-SEV-SNP.patch @@ -0,0 +1,165 @@ +From 226cf6c3d3e2fd1a35422043dbe0b73d1216df83 Mon Sep 17 00:00:00 2001 +From: Brijesh Singh +Date: Thu, 30 May 2024 06:16:36 -0500 +Subject: [PATCH 073/100] hw/i386/sev: Add support to encrypt BIOS when SEV-SNP + is enabled + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [73/91] 844afd322c12c3e8992cf6ec692c94e70747bd0c (bonzini/rhel-qemu-kvm) + +As with SEV, an SNP guest requires that the BIOS be part of the initial +encrypted/measured guest payload. Extend sev_encrypt_flash() to handle +the SNP case and plumb through the GPA of the BIOS location since this +is needed for SNP. + +Signed-off-by: Brijesh Singh +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-25-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 77d1abd91e5352ad30ae2f83790f95fa6a3c0b6b) +Signed-off-by: Paolo Bonzini +--- + hw/i386/pc_sysfw.c | 12 +++++++----- + hw/i386/x86-common.c | 2 +- + include/hw/i386/x86.h | 2 +- + target/i386/sev-sysemu-stub.c | 2 +- + target/i386/sev.c | 5 +++-- + target/i386/sev.h | 2 +- + 6 files changed, 14 insertions(+), 11 deletions(-) + +diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c +index 9b8671c441..7cdbafc8d2 100644 +--- a/hw/i386/pc_sysfw.c ++++ b/hw/i386/pc_sysfw.c +@@ -148,6 +148,8 @@ static void pc_system_flash_map(PCMachineState *pcms, + assert(PC_MACHINE_GET_CLASS(pcms)->pci_enabled); + + for (i = 0; i < ARRAY_SIZE(pcms->flash); i++) { ++ hwaddr gpa; ++ + system_flash = pcms->flash[i]; + blk = pflash_cfi01_get_blk(system_flash); + if (!blk) { +@@ -177,11 +179,11 @@ static void pc_system_flash_map(PCMachineState *pcms, + } + + total_size += size; ++ gpa = 0x100000000ULL - total_size; /* where the flash is mapped */ + qdev_prop_set_uint32(DEVICE(system_flash), "num-blocks", + size / FLASH_SECTOR_SIZE); + sysbus_realize_and_unref(SYS_BUS_DEVICE(system_flash), &error_fatal); +- sysbus_mmio_map(SYS_BUS_DEVICE(system_flash), 0, +- 0x100000000ULL - total_size); ++ sysbus_mmio_map(SYS_BUS_DEVICE(system_flash), 0, gpa); + + if (i == 0) { + flash_mem = pflash_cfi01_get_memory(system_flash); +@@ -196,7 +198,7 @@ static void pc_system_flash_map(PCMachineState *pcms, + if (sev_enabled()) { + flash_ptr = memory_region_get_ram_ptr(flash_mem); + flash_size = memory_region_size(flash_mem); +- x86_firmware_configure(flash_ptr, flash_size); ++ x86_firmware_configure(gpa, flash_ptr, flash_size); + } + } + } +@@ -249,7 +251,7 @@ void pc_system_firmware_init(PCMachineState *pcms, + pc_system_flash_cleanup_unused(pcms); + } + +-void x86_firmware_configure(void *ptr, int size) ++void x86_firmware_configure(hwaddr gpa, void *ptr, int size) + { + int ret; + +@@ -270,6 +272,6 @@ void x86_firmware_configure(void *ptr, int size) + exit(1); + } + +- sev_encrypt_flash(ptr, size, &error_fatal); ++ sev_encrypt_flash(gpa, ptr, size, &error_fatal); + } + } +diff --git a/hw/i386/x86-common.c b/hw/i386/x86-common.c +index 67b03c913a..35fe6eabea 100644 +--- a/hw/i386/x86-common.c ++++ b/hw/i386/x86-common.c +@@ -981,7 +981,7 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + */ + void *ptr = memory_region_get_ram_ptr(&x86ms->bios); + load_image_size(filename, ptr, bios_size); +- x86_firmware_configure(ptr, bios_size); ++ x86_firmware_configure(0x100000000ULL - bios_size, ptr, bios_size); + } else { + memory_region_set_readonly(&x86ms->bios, !isapc_ram_fw); + ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1); +diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h +index b006f16b8d..d43cb3908e 100644 +--- a/include/hw/i386/x86.h ++++ b/include/hw/i386/x86.h +@@ -154,6 +154,6 @@ void ioapic_init_gsi(GSIState *gsi_state, Object *parent); + DeviceState *ioapic_init_secondary(GSIState *gsi_state); + + /* pc_sysfw.c */ +-void x86_firmware_configure(void *ptr, int size); ++void x86_firmware_configure(hwaddr gpa, void *ptr, int size); + + #endif +diff --git a/target/i386/sev-sysemu-stub.c b/target/i386/sev-sysemu-stub.c +index fc1c57c411..d5bf886e79 100644 +--- a/target/i386/sev-sysemu-stub.c ++++ b/target/i386/sev-sysemu-stub.c +@@ -42,7 +42,7 @@ void qmp_sev_inject_launch_secret(const char *packet_header, const char *secret, + error_setg(errp, "SEV is not available in this QEMU"); + } + +-int sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp) ++int sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp) + { + g_assert_not_reached(); + } +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 06401f0526..7b5c4b4874 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -1484,7 +1484,7 @@ static int sev_snp_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + } + + int +-sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp) ++sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp) + { + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + +@@ -1841,7 +1841,8 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) + /* zero the excess data so the measurement can be reliably calculated */ + memset(padded_ht->padding, 0, sizeof(padded_ht->padding)); + +- if (sev_encrypt_flash((uint8_t *)padded_ht, sizeof(*padded_ht), errp) < 0) { ++ if (sev_encrypt_flash(area->base, (uint8_t *)padded_ht, ++ sizeof(*padded_ht), errp) < 0) { + ret = false; + } + +diff --git a/target/i386/sev.h b/target/i386/sev.h +index cc12824dd6..858005a119 100644 +--- a/target/i386/sev.h ++++ b/target/i386/sev.h +@@ -59,7 +59,7 @@ uint32_t sev_get_cbit_position(void); + uint32_t sev_get_reduced_phys_bits(void); + bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp); + +-int sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp); ++int sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp); + int sev_inject_launch_secret(const char *hdr, const char *secret, + uint64_t gpa, Error **errp); + +-- +2.39.3 + diff --git a/kvm-hw-i386-sev-Use-guest_memfd-for-legacy-ROMs.patch b/kvm-hw-i386-sev-Use-guest_memfd-for-legacy-ROMs.patch new file mode 100644 index 0000000..050a522 --- /dev/null +++ b/kvm-hw-i386-sev-Use-guest_memfd-for-legacy-ROMs.patch @@ -0,0 +1,123 @@ +From a20b2e3e52b9589ac1abc8b9b818d526c86368cf Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Thu, 30 May 2024 06:16:39 -0500 +Subject: [PATCH 082/100] hw/i386/sev: Use guest_memfd for legacy ROMs + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [82/91] a591e85e00c353009803b143c80852b8c9b1f15e (bonzini/rhel-qemu-kvm) + +Current SNP guest kernels will attempt to access these regions with +with C-bit set, so guest_memfd is needed to handle that. Otherwise, +kvm_convert_memory() will fail when the guest kernel tries to access it +and QEMU attempts to call KVM_SET_MEMORY_ATTRIBUTES to set these ranges +to private. + +Whether guests should actually try to access ROM regions in this way (or +need to deal with legacy ROM regions at all), is a separate issue to be +addressed on kernel side, but current SNP guest kernels will exhibit +this behavior and so this handling is needed to allow QEMU to continue +running existing SNP guest kernels. + +Signed-off-by: Michael Roth +[pankaj: Added sev_snp_enabled() check] +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-28-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 413a67450750e0459efeffc3db3ba9759c3e381c) +Signed-off-by: Paolo Bonzini +--- + hw/i386/pc.c | 14 ++++++++++---- + hw/i386/pc_sysfw.c | 19 +++++++++++++------ + 2 files changed, 23 insertions(+), 10 deletions(-) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 0aca0cc79e..b25d075b59 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -62,6 +62,7 @@ + #include "hw/mem/memory-device.h" + #include "e820_memory_layout.h" + #include "trace.h" ++#include "sev.h" + #include CONFIG_DEVICES + + #ifdef CONFIG_XEN_EMU +@@ -1173,10 +1174,15 @@ void pc_memory_init(PCMachineState *pcms, + pc_system_firmware_init(pcms, rom_memory); + + option_rom_mr = g_malloc(sizeof(*option_rom_mr)); +- memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE, +- &error_fatal); +- if (pcmc->pci_enabled) { +- memory_region_set_readonly(option_rom_mr, true); ++ if (machine_require_guest_memfd(machine)) { ++ memory_region_init_ram_guest_memfd(option_rom_mr, NULL, "pc.rom", ++ PC_ROM_SIZE, &error_fatal); ++ } else { ++ memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE, ++ &error_fatal); ++ if (pcmc->pci_enabled) { ++ memory_region_set_readonly(option_rom_mr, true); ++ } + } + memory_region_add_subregion_overlap(rom_memory, + PC_ROM_MIN_VGA, +diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c +index 7cdbafc8d2..ef80281d28 100644 +--- a/hw/i386/pc_sysfw.c ++++ b/hw/i386/pc_sysfw.c +@@ -40,8 +40,8 @@ + + #define FLASH_SECTOR_SIZE 4096 + +-static void pc_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *rom_memory, +- MemoryRegion *flash_mem) ++static void pc_isa_bios_init(PCMachineState *pcms, MemoryRegion *isa_bios, ++ MemoryRegion *rom_memory, MemoryRegion *flash_mem) + { + int isa_bios_size; + uint64_t flash_size; +@@ -51,8 +51,13 @@ static void pc_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *rom_memory, + + /* map the last 128KB of the BIOS in ISA space */ + isa_bios_size = MIN(flash_size, 128 * KiB); +- memory_region_init_ram(isa_bios, NULL, "isa-bios", isa_bios_size, +- &error_fatal); ++ if (machine_require_guest_memfd(MACHINE(pcms))) { ++ memory_region_init_ram_guest_memfd(isa_bios, NULL, "isa-bios", ++ isa_bios_size, &error_fatal); ++ } else { ++ memory_region_init_ram(isa_bios, NULL, "isa-bios", isa_bios_size, ++ &error_fatal); ++ } + memory_region_add_subregion_overlap(rom_memory, + 0x100000 - isa_bios_size, + isa_bios, +@@ -65,7 +70,9 @@ static void pc_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *rom_memory, + ((uint8_t*)flash_ptr) + (flash_size - isa_bios_size), + isa_bios_size); + +- memory_region_set_readonly(isa_bios, true); ++ if (!machine_require_guest_memfd(current_machine)) { ++ memory_region_set_readonly(isa_bios, true); ++ } + } + + static PFlashCFI01 *pc_pflash_create(PCMachineState *pcms, +@@ -191,7 +198,7 @@ static void pc_system_flash_map(PCMachineState *pcms, + x86_isa_bios_init(&x86ms->isa_bios, rom_memory, flash_mem, + true); + } else { +- pc_isa_bios_init(&x86ms->isa_bios, rom_memory, flash_mem); ++ pc_isa_bios_init(pcms, &x86ms->isa_bios, rom_memory, flash_mem); + } + + /* Encrypt the pflash boot ROM */ +-- +2.39.3 + diff --git a/kvm-hw-i386-sev-Use-legacy-SEV-VM-types-for-older-machin.patch b/kvm-hw-i386-sev-Use-legacy-SEV-VM-types-for-older-machin.patch new file mode 100644 index 0000000..7b03cb4 --- /dev/null +++ b/kvm-hw-i386-sev-Use-legacy-SEV-VM-types-for-older-machin.patch @@ -0,0 +1,58 @@ +From 4331180aa09e44550ff8de781c618bae5e99bb70 Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Tue, 9 Apr 2024 18:07:43 -0500 +Subject: [PATCH 025/100] hw/i386/sev: Use legacy SEV VM types for older + machine types + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [25/91] 8c73cd312736ccb0818b4d3216fd13712f21f3c9 (bonzini/rhel-qemu-kvm) + +Newer 9.1 machine types will default to using the KVM_SEV_INIT2 API for +creating SEV/SEV-ES going forward. However, this API results in guest +measurement changes which are generally not expected for users of these +older guest types and can cause disruption if they switch to a newer +QEMU/kernel version. Avoid this by continuing to use the older +KVM_SEV_INIT/KVM_SEV_ES_INIT APIs for older machine types. + +Signed-off-by: Michael Roth +Message-ID: <20240409230743.962513-4-michael.roth@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit ea7fbd37537b3a598335c21ccb2ea674630fc810) +Signed-off-by: Paolo Bonzini +--- + hw/i386/pc.c | 1 + + target/i386/sev.c | 1 + + 2 files changed, 2 insertions(+) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index b9fde3cec1..1a34bc4522 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -351,6 +351,7 @@ const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); + GlobalProperty pc_rhel_9_5_compat[] = { + /* pc_rhel_9_5_compat from pc_compat_pc_9_0 (backported from 9.1) */ + { TYPE_X86_CPU, "guest-phys-bits", "0" }, ++ { "sev-guest", "legacy-vm-type", "true" }, + }; + const size_t pc_rhel_9_5_compat_len = G_N_ELEMENTS(pc_rhel_9_5_compat); + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index f4ee317cb0..d30b68c11e 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -1417,6 +1417,7 @@ sev_guest_instance_init(Object *obj) + object_property_add_uint32_ptr(obj, "reduced-phys-bits", + &sev->reduced_phys_bits, + OBJ_PROP_FLAG_READWRITE); ++ object_apply_compat_props(obj); + } + + /* sev guest info */ +-- +2.39.3 + diff --git a/kvm-hw-i386-split-x86.c-in-multiple-parts.patch b/kvm-hw-i386-split-x86.c-in-multiple-parts.patch new file mode 100644 index 0000000..40ca52b --- /dev/null +++ b/kvm-hw-i386-split-x86.c-in-multiple-parts.patch @@ -0,0 +1,2301 @@ +From bf2206fae2e640da9de7fc0648b4b90ad3ddfbe3 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Thu, 9 May 2024 19:00:41 +0200 +Subject: [PATCH 046/100] hw/i386: split x86.c in multiple parts + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [46/91] 3d6e8364aa9b691c25bdcf54a30b116da5d33874 (bonzini/rhel-qemu-kvm) + +Keep the basic X86MachineState definition in x86.c. Move out functions that +are only needed by other files: x86-common.c for the pc and microvm machines, +x86-cpu.c for those used by accelerator code. + +Signed-off-by: Paolo Bonzini +Reviewed-by: Zhao Liu +Message-ID: <20240509170044.190795-11-pbonzini@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit b061f0598b9231f7992aff4fcdf3f336f9747d11) +Signed-off-by: Paolo Bonzini +--- + hw/i386/meson.build | 4 +- + hw/i386/x86-common.c | 1007 +++++++++++++++++++++++++++++++++++++++ + hw/i386/x86-cpu.c | 97 ++++ + hw/i386/x86.c | 1052 +---------------------------------------- + include/hw/i386/x86.h | 6 +- + 5 files changed, 1113 insertions(+), 1053 deletions(-) + create mode 100644 hw/i386/x86-common.c + create mode 100644 hw/i386/x86-cpu.c + +diff --git a/hw/i386/meson.build b/hw/i386/meson.build +index d9da676038..3437da0aad 100644 +--- a/hw/i386/meson.build ++++ b/hw/i386/meson.build +@@ -4,6 +4,7 @@ i386_ss.add(files( + 'e820_memory_layout.c', + 'multiboot.c', + 'x86.c', ++ 'x86-cpu.c', + )) + + i386_ss.add(when: 'CONFIG_APIC', if_true: files('vapic.c')) +@@ -12,7 +13,7 @@ i386_ss.add(when: 'CONFIG_X86_IOMMU', if_true: files('x86-iommu.c'), + i386_ss.add(when: 'CONFIG_AMD_IOMMU', if_true: files('amd_iommu.c'), + if_false: files('amd_iommu-stub.c')) + i386_ss.add(when: 'CONFIG_I440FX', if_true: files('pc_piix.c')) +-i386_ss.add(when: 'CONFIG_MICROVM', if_true: files('microvm.c', 'acpi-microvm.c', 'microvm-dt.c')) ++i386_ss.add(when: 'CONFIG_MICROVM', if_true: files('x86-common.c', 'microvm.c', 'acpi-microvm.c', 'microvm-dt.c')) + i386_ss.add(when: 'CONFIG_Q35', if_true: files('pc_q35.c')) + i386_ss.add(when: 'CONFIG_VMMOUSE', if_true: files('vmmouse.c')) + i386_ss.add(when: 'CONFIG_VMPORT', if_true: files('vmport.c')) +@@ -22,6 +23,7 @@ i386_ss.add(when: 'CONFIG_SGX', if_true: files('sgx-epc.c','sgx.c'), + + i386_ss.add(when: 'CONFIG_ACPI', if_true: files('acpi-common.c')) + i386_ss.add(when: 'CONFIG_PC', if_true: files( ++ 'x86-common.c', + 'pc.c', + 'pc_sysfw.c', + 'acpi-build.c', +diff --git a/hw/i386/x86-common.c b/hw/i386/x86-common.c +new file mode 100644 +index 0000000000..67b03c913a +--- /dev/null ++++ b/hw/i386/x86-common.c +@@ -0,0 +1,1007 @@ ++/* ++ * Copyright (c) 2003-2004 Fabrice Bellard ++ * Copyright (c) 2019, 2024 Red Hat, Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++#include "qemu/osdep.h" ++#include "qemu/error-report.h" ++#include "qemu/cutils.h" ++#include "qemu/units.h" ++#include "qemu/datadir.h" ++#include "qapi/error.h" ++#include "sysemu/numa.h" ++#include "sysemu/sysemu.h" ++#include "sysemu/xen.h" ++#include "trace.h" ++ ++#include "hw/i386/x86.h" ++#include "target/i386/cpu.h" ++#include "hw/rtc/mc146818rtc.h" ++#include "target/i386/sev.h" ++ ++#include "hw/acpi/cpu_hotplug.h" ++#include "hw/irq.h" ++#include "hw/loader.h" ++#include "multiboot.h" ++#include "elf.h" ++#include "standard-headers/asm-x86/bootparam.h" ++#include CONFIG_DEVICES ++#include "kvm/kvm_i386.h" ++ ++#ifdef CONFIG_XEN_EMU ++#include "hw/xen/xen.h" ++#include "hw/i386/kvm/xen_evtchn.h" ++#endif ++ ++/* Physical Address of PVH entry point read from kernel ELF NOTE */ ++static size_t pvh_start_addr; ++ ++static void x86_cpu_new(X86MachineState *x86ms, int64_t apic_id, Error **errp) ++{ ++ Object *cpu = object_new(MACHINE(x86ms)->cpu_type); ++ ++ if (!object_property_set_uint(cpu, "apic-id", apic_id, errp)) { ++ goto out; ++ } ++ qdev_realize(DEVICE(cpu), NULL, errp); ++ ++out: ++ object_unref(cpu); ++} ++ ++void x86_cpus_init(X86MachineState *x86ms, int default_cpu_version) ++{ ++ int i; ++ const CPUArchIdList *possible_cpus; ++ MachineState *ms = MACHINE(x86ms); ++ MachineClass *mc = MACHINE_GET_CLASS(x86ms); ++ ++ x86_cpu_set_default_version(default_cpu_version); ++ ++ /* ++ * Calculates the limit to CPU APIC ID values ++ * ++ * Limit for the APIC ID value, so that all ++ * CPU APIC IDs are < x86ms->apic_id_limit. ++ * ++ * This is used for FW_CFG_MAX_CPUS. See comments on fw_cfg_arch_create(). ++ */ ++ x86ms->apic_id_limit = x86_cpu_apic_id_from_index(x86ms, ++ ms->smp.max_cpus - 1) + 1; ++ ++ /* ++ * Can we support APIC ID 255 or higher? With KVM, that requires ++ * both in-kernel lapic and X2APIC userspace API. ++ * ++ * kvm_enabled() must go first to ensure that kvm_* references are ++ * not emitted for the linker to consume (kvm_enabled() is ++ * a literal `0` in configurations where kvm_* aren't defined) ++ */ ++ if (kvm_enabled() && x86ms->apic_id_limit > 255 && ++ kvm_irqchip_in_kernel() && !kvm_enable_x2apic()) { ++ error_report("current -smp configuration requires kernel " ++ "irqchip and X2APIC API support."); ++ exit(EXIT_FAILURE); ++ } ++ ++ if (kvm_enabled()) { ++ kvm_set_max_apic_id(x86ms->apic_id_limit); ++ } ++ ++ if (!kvm_irqchip_in_kernel()) { ++ apic_set_max_apic_id(x86ms->apic_id_limit); ++ } ++ ++ possible_cpus = mc->possible_cpu_arch_ids(ms); ++ for (i = 0; i < ms->smp.cpus; i++) { ++ x86_cpu_new(x86ms, possible_cpus->cpus[i].arch_id, &error_fatal); ++ } ++} ++ ++void x86_rtc_set_cpus_count(ISADevice *s, uint16_t cpus_count) ++{ ++ MC146818RtcState *rtc = MC146818_RTC(s); ++ ++ if (cpus_count > 0xff) { ++ /* ++ * If the number of CPUs can't be represented in 8 bits, the ++ * BIOS must use "FW_CFG_NB_CPUS". Set RTC field to 0 just ++ * to make old BIOSes fail more predictably. ++ */ ++ mc146818rtc_set_cmos_data(rtc, 0x5f, 0); ++ } else { ++ mc146818rtc_set_cmos_data(rtc, 0x5f, cpus_count - 1); ++ } ++} ++ ++static int x86_apic_cmp(const void *a, const void *b) ++{ ++ CPUArchId *apic_a = (CPUArchId *)a; ++ CPUArchId *apic_b = (CPUArchId *)b; ++ ++ return apic_a->arch_id - apic_b->arch_id; ++} ++ ++/* ++ * returns pointer to CPUArchId descriptor that matches CPU's apic_id ++ * in ms->possible_cpus->cpus, if ms->possible_cpus->cpus has no ++ * entry corresponding to CPU's apic_id returns NULL. ++ */ ++static CPUArchId *x86_find_cpu_slot(MachineState *ms, uint32_t id, int *idx) ++{ ++ CPUArchId apic_id, *found_cpu; ++ ++ apic_id.arch_id = id; ++ found_cpu = bsearch(&apic_id, ms->possible_cpus->cpus, ++ ms->possible_cpus->len, sizeof(*ms->possible_cpus->cpus), ++ x86_apic_cmp); ++ if (found_cpu && idx) { ++ *idx = found_cpu - ms->possible_cpus->cpus; ++ } ++ return found_cpu; ++} ++ ++void x86_cpu_plug(HotplugHandler *hotplug_dev, ++ DeviceState *dev, Error **errp) ++{ ++ CPUArchId *found_cpu; ++ Error *local_err = NULL; ++ X86CPU *cpu = X86_CPU(dev); ++ X86MachineState *x86ms = X86_MACHINE(hotplug_dev); ++ ++ if (x86ms->acpi_dev) { ++ hotplug_handler_plug(x86ms->acpi_dev, dev, &local_err); ++ if (local_err) { ++ goto out; ++ } ++ } ++ ++ /* increment the number of CPUs */ ++ x86ms->boot_cpus++; ++ if (x86ms->rtc) { ++ x86_rtc_set_cpus_count(x86ms->rtc, x86ms->boot_cpus); ++ } ++ if (x86ms->fw_cfg) { ++ fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); ++ } ++ ++ found_cpu = x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, NULL); ++ found_cpu->cpu = CPU(dev); ++out: ++ error_propagate(errp, local_err); ++} ++ ++void x86_cpu_unplug_request_cb(HotplugHandler *hotplug_dev, ++ DeviceState *dev, Error **errp) ++{ ++ int idx = -1; ++ X86CPU *cpu = X86_CPU(dev); ++ X86MachineState *x86ms = X86_MACHINE(hotplug_dev); ++ ++ if (!x86ms->acpi_dev) { ++ error_setg(errp, "CPU hot unplug not supported without ACPI"); ++ return; ++ } ++ ++ x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, &idx); ++ assert(idx != -1); ++ if (idx == 0) { ++ error_setg(errp, "Boot CPU is unpluggable"); ++ return; ++ } ++ ++ hotplug_handler_unplug_request(x86ms->acpi_dev, dev, ++ errp); ++} ++ ++void x86_cpu_unplug_cb(HotplugHandler *hotplug_dev, ++ DeviceState *dev, Error **errp) ++{ ++ CPUArchId *found_cpu; ++ Error *local_err = NULL; ++ X86CPU *cpu = X86_CPU(dev); ++ X86MachineState *x86ms = X86_MACHINE(hotplug_dev); ++ ++ hotplug_handler_unplug(x86ms->acpi_dev, dev, &local_err); ++ if (local_err) { ++ goto out; ++ } ++ ++ found_cpu = x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, NULL); ++ found_cpu->cpu = NULL; ++ qdev_unrealize(dev); ++ ++ /* decrement the number of CPUs */ ++ x86ms->boot_cpus--; ++ /* Update the number of CPUs in CMOS */ ++ x86_rtc_set_cpus_count(x86ms->rtc, x86ms->boot_cpus); ++ fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); ++ out: ++ error_propagate(errp, local_err); ++} ++ ++void x86_cpu_pre_plug(HotplugHandler *hotplug_dev, ++ DeviceState *dev, Error **errp) ++{ ++ int idx; ++ CPUState *cs; ++ CPUArchId *cpu_slot; ++ X86CPUTopoIDs topo_ids; ++ X86CPU *cpu = X86_CPU(dev); ++ CPUX86State *env = &cpu->env; ++ MachineState *ms = MACHINE(hotplug_dev); ++ X86MachineState *x86ms = X86_MACHINE(hotplug_dev); ++ unsigned int smp_cores = ms->smp.cores; ++ unsigned int smp_threads = ms->smp.threads; ++ X86CPUTopoInfo topo_info; ++ ++ if (!object_dynamic_cast(OBJECT(cpu), ms->cpu_type)) { ++ error_setg(errp, "Invalid CPU type, expected cpu type: '%s'", ++ ms->cpu_type); ++ return; ++ } ++ ++ if (x86ms->acpi_dev) { ++ Error *local_err = NULL; ++ ++ hotplug_handler_pre_plug(HOTPLUG_HANDLER(x86ms->acpi_dev), dev, ++ &local_err); ++ if (local_err) { ++ error_propagate(errp, local_err); ++ return; ++ } ++ } ++ ++ init_topo_info(&topo_info, x86ms); ++ ++ env->nr_dies = ms->smp.dies; ++ ++ /* ++ * If APIC ID is not set, ++ * set it based on socket/die/core/thread properties. ++ */ ++ if (cpu->apic_id == UNASSIGNED_APIC_ID) { ++ int max_socket = (ms->smp.max_cpus - 1) / ++ smp_threads / smp_cores / ms->smp.dies; ++ ++ /* ++ * die-id was optional in QEMU 4.0 and older, so keep it optional ++ * if there's only one die per socket. ++ */ ++ if (cpu->die_id < 0 && ms->smp.dies == 1) { ++ cpu->die_id = 0; ++ } ++ ++ if (cpu->socket_id < 0) { ++ error_setg(errp, "CPU socket-id is not set"); ++ return; ++ } else if (cpu->socket_id > max_socket) { ++ error_setg(errp, "Invalid CPU socket-id: %u must be in range 0:%u", ++ cpu->socket_id, max_socket); ++ return; ++ } ++ if (cpu->die_id < 0) { ++ error_setg(errp, "CPU die-id is not set"); ++ return; ++ } else if (cpu->die_id > ms->smp.dies - 1) { ++ error_setg(errp, "Invalid CPU die-id: %u must be in range 0:%u", ++ cpu->die_id, ms->smp.dies - 1); ++ return; ++ } ++ if (cpu->core_id < 0) { ++ error_setg(errp, "CPU core-id is not set"); ++ return; ++ } else if (cpu->core_id > (smp_cores - 1)) { ++ error_setg(errp, "Invalid CPU core-id: %u must be in range 0:%u", ++ cpu->core_id, smp_cores - 1); ++ return; ++ } ++ if (cpu->thread_id < 0) { ++ error_setg(errp, "CPU thread-id is not set"); ++ return; ++ } else if (cpu->thread_id > (smp_threads - 1)) { ++ error_setg(errp, "Invalid CPU thread-id: %u must be in range 0:%u", ++ cpu->thread_id, smp_threads - 1); ++ return; ++ } ++ ++ topo_ids.pkg_id = cpu->socket_id; ++ topo_ids.die_id = cpu->die_id; ++ topo_ids.core_id = cpu->core_id; ++ topo_ids.smt_id = cpu->thread_id; ++ cpu->apic_id = x86_apicid_from_topo_ids(&topo_info, &topo_ids); ++ } ++ ++ cpu_slot = x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, &idx); ++ if (!cpu_slot) { ++ x86_topo_ids_from_apicid(cpu->apic_id, &topo_info, &topo_ids); ++ error_setg(errp, ++ "Invalid CPU [socket: %u, die: %u, core: %u, thread: %u] with" ++ " APIC ID %" PRIu32 ", valid index range 0:%d", ++ topo_ids.pkg_id, topo_ids.die_id, topo_ids.core_id, topo_ids.smt_id, ++ cpu->apic_id, ms->possible_cpus->len - 1); ++ return; ++ } ++ ++ if (cpu_slot->cpu) { ++ error_setg(errp, "CPU[%d] with APIC ID %" PRIu32 " exists", ++ idx, cpu->apic_id); ++ return; ++ } ++ ++ /* if 'address' properties socket-id/core-id/thread-id are not set, set them ++ * so that machine_query_hotpluggable_cpus would show correct values ++ */ ++ /* TODO: move socket_id/core_id/thread_id checks into x86_cpu_realizefn() ++ * once -smp refactoring is complete and there will be CPU private ++ * CPUState::nr_cores and CPUState::nr_threads fields instead of globals */ ++ x86_topo_ids_from_apicid(cpu->apic_id, &topo_info, &topo_ids); ++ if (cpu->socket_id != -1 && cpu->socket_id != topo_ids.pkg_id) { ++ error_setg(errp, "property socket-id: %u doesn't match set apic-id:" ++ " 0x%x (socket-id: %u)", cpu->socket_id, cpu->apic_id, ++ topo_ids.pkg_id); ++ return; ++ } ++ cpu->socket_id = topo_ids.pkg_id; ++ ++ if (cpu->die_id != -1 && cpu->die_id != topo_ids.die_id) { ++ error_setg(errp, "property die-id: %u doesn't match set apic-id:" ++ " 0x%x (die-id: %u)", cpu->die_id, cpu->apic_id, topo_ids.die_id); ++ return; ++ } ++ cpu->die_id = topo_ids.die_id; ++ ++ if (cpu->core_id != -1 && cpu->core_id != topo_ids.core_id) { ++ error_setg(errp, "property core-id: %u doesn't match set apic-id:" ++ " 0x%x (core-id: %u)", cpu->core_id, cpu->apic_id, ++ topo_ids.core_id); ++ return; ++ } ++ cpu->core_id = topo_ids.core_id; ++ ++ if (cpu->thread_id != -1 && cpu->thread_id != topo_ids.smt_id) { ++ error_setg(errp, "property thread-id: %u doesn't match set apic-id:" ++ " 0x%x (thread-id: %u)", cpu->thread_id, cpu->apic_id, ++ topo_ids.smt_id); ++ return; ++ } ++ cpu->thread_id = topo_ids.smt_id; ++ ++ /* ++ * kvm_enabled() must go first to ensure that kvm_* references are ++ * not emitted for the linker to consume (kvm_enabled() is ++ * a literal `0` in configurations where kvm_* aren't defined) ++ */ ++ if (kvm_enabled() && hyperv_feat_enabled(cpu, HYPERV_FEAT_VPINDEX) && ++ !kvm_hv_vpindex_settable()) { ++ error_setg(errp, "kernel doesn't allow setting HyperV VP_INDEX"); ++ return; ++ } ++ ++ cs = CPU(cpu); ++ cs->cpu_index = idx; ++ ++ numa_cpu_pre_plug(cpu_slot, dev, errp); ++} ++ ++static long get_file_size(FILE *f) ++{ ++ long where, size; ++ ++ /* XXX: on Unix systems, using fstat() probably makes more sense */ ++ ++ where = ftell(f); ++ fseek(f, 0, SEEK_END); ++ size = ftell(f); ++ fseek(f, where, SEEK_SET); ++ ++ return size; ++} ++ ++void gsi_handler(void *opaque, int n, int level) ++{ ++ GSIState *s = opaque; ++ ++ trace_x86_gsi_interrupt(n, level); ++ switch (n) { ++ case 0 ... ISA_NUM_IRQS - 1: ++ if (s->i8259_irq[n]) { ++ /* Under KVM, Kernel will forward to both PIC and IOAPIC */ ++ qemu_set_irq(s->i8259_irq[n], level); ++ } ++ /* fall through */ ++ case ISA_NUM_IRQS ... IOAPIC_NUM_PINS - 1: ++#ifdef CONFIG_XEN_EMU ++ /* ++ * Xen delivers the GSI to the Legacy PIC (not that Legacy PIC ++ * routing actually works properly under Xen). And then to ++ * *either* the PIRQ handling or the I/OAPIC depending on ++ * whether the former wants it. ++ */ ++ if (xen_mode == XEN_EMULATE && xen_evtchn_set_gsi(n, level)) { ++ break; ++ } ++#endif ++ qemu_set_irq(s->ioapic_irq[n], level); ++ break; ++ case IO_APIC_SECONDARY_IRQBASE ++ ... IO_APIC_SECONDARY_IRQBASE + IOAPIC_NUM_PINS - 1: ++ qemu_set_irq(s->ioapic2_irq[n - IO_APIC_SECONDARY_IRQBASE], level); ++ break; ++ } ++} ++ ++void ioapic_init_gsi(GSIState *gsi_state, Object *parent) ++{ ++ DeviceState *dev; ++ SysBusDevice *d; ++ unsigned int i; ++ ++ assert(parent); ++ if (kvm_ioapic_in_kernel()) { ++ dev = qdev_new(TYPE_KVM_IOAPIC); ++ } else { ++ dev = qdev_new(TYPE_IOAPIC); ++ } ++ object_property_add_child(parent, "ioapic", OBJECT(dev)); ++ d = SYS_BUS_DEVICE(dev); ++ sysbus_realize_and_unref(d, &error_fatal); ++ sysbus_mmio_map(d, 0, IO_APIC_DEFAULT_ADDRESS); ++ ++ for (i = 0; i < IOAPIC_NUM_PINS; i++) { ++ gsi_state->ioapic_irq[i] = qdev_get_gpio_in(dev, i); ++ } ++} ++ ++DeviceState *ioapic_init_secondary(GSIState *gsi_state) ++{ ++ DeviceState *dev; ++ SysBusDevice *d; ++ unsigned int i; ++ ++ dev = qdev_new(TYPE_IOAPIC); ++ d = SYS_BUS_DEVICE(dev); ++ sysbus_realize_and_unref(d, &error_fatal); ++ sysbus_mmio_map(d, 0, IO_APIC_SECONDARY_ADDRESS); ++ ++ for (i = 0; i < IOAPIC_NUM_PINS; i++) { ++ gsi_state->ioapic2_irq[i] = qdev_get_gpio_in(dev, i); ++ } ++ return dev; ++} ++ ++/* ++ * The entry point into the kernel for PVH boot is different from ++ * the native entry point. The PVH entry is defined by the x86/HVM ++ * direct boot ABI and is available in an ELFNOTE in the kernel binary. ++ * ++ * This function is passed to load_elf() when it is called from ++ * load_elfboot() which then additionally checks for an ELF Note of ++ * type XEN_ELFNOTE_PHYS32_ENTRY and passes it to this function to ++ * parse the PVH entry address from the ELF Note. ++ * ++ * Due to trickery in elf_opts.h, load_elf() is actually available as ++ * load_elf32() or load_elf64() and this routine needs to be able ++ * to deal with being called as 32 or 64 bit. ++ * ++ * The address of the PVH entry point is saved to the 'pvh_start_addr' ++ * global variable. (although the entry point is 32-bit, the kernel ++ * binary can be either 32-bit or 64-bit). ++ */ ++static uint64_t read_pvh_start_addr(void *arg1, void *arg2, bool is64) ++{ ++ size_t *elf_note_data_addr; ++ ++ /* Check if ELF Note header passed in is valid */ ++ if (arg1 == NULL) { ++ return 0; ++ } ++ ++ if (is64) { ++ struct elf64_note *nhdr64 = (struct elf64_note *)arg1; ++ uint64_t nhdr_size64 = sizeof(struct elf64_note); ++ uint64_t phdr_align = *(uint64_t *)arg2; ++ uint64_t nhdr_namesz = nhdr64->n_namesz; ++ ++ elf_note_data_addr = ++ ((void *)nhdr64) + nhdr_size64 + ++ QEMU_ALIGN_UP(nhdr_namesz, phdr_align); ++ ++ pvh_start_addr = *elf_note_data_addr; ++ } else { ++ struct elf32_note *nhdr32 = (struct elf32_note *)arg1; ++ uint32_t nhdr_size32 = sizeof(struct elf32_note); ++ uint32_t phdr_align = *(uint32_t *)arg2; ++ uint32_t nhdr_namesz = nhdr32->n_namesz; ++ ++ elf_note_data_addr = ++ ((void *)nhdr32) + nhdr_size32 + ++ QEMU_ALIGN_UP(nhdr_namesz, phdr_align); ++ ++ pvh_start_addr = *(uint32_t *)elf_note_data_addr; ++ } ++ ++ return pvh_start_addr; ++} ++ ++static bool load_elfboot(const char *kernel_filename, ++ int kernel_file_size, ++ uint8_t *header, ++ size_t pvh_xen_start_addr, ++ FWCfgState *fw_cfg) ++{ ++ uint32_t flags = 0; ++ uint32_t mh_load_addr = 0; ++ uint32_t elf_kernel_size = 0; ++ uint64_t elf_entry; ++ uint64_t elf_low, elf_high; ++ int kernel_size; ++ ++ if (ldl_p(header) != 0x464c457f) { ++ return false; /* no elfboot */ ++ } ++ ++ bool elf_is64 = header[EI_CLASS] == ELFCLASS64; ++ flags = elf_is64 ? ++ ((Elf64_Ehdr *)header)->e_flags : ((Elf32_Ehdr *)header)->e_flags; ++ ++ if (flags & 0x00010004) { /* LOAD_ELF_HEADER_HAS_ADDR */ ++ error_report("elfboot unsupported flags = %x", flags); ++ exit(1); ++ } ++ ++ uint64_t elf_note_type = XEN_ELFNOTE_PHYS32_ENTRY; ++ kernel_size = load_elf(kernel_filename, read_pvh_start_addr, ++ NULL, &elf_note_type, &elf_entry, ++ &elf_low, &elf_high, NULL, 0, I386_ELF_MACHINE, ++ 0, 0); ++ ++ if (kernel_size < 0) { ++ error_report("Error while loading elf kernel"); ++ exit(1); ++ } ++ mh_load_addr = elf_low; ++ elf_kernel_size = elf_high - elf_low; ++ ++ if (pvh_start_addr == 0) { ++ error_report("Error loading uncompressed kernel without PVH ELF Note"); ++ exit(1); ++ } ++ fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ENTRY, pvh_start_addr); ++ fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, mh_load_addr); ++ fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, elf_kernel_size); ++ ++ return true; ++} ++ ++void x86_load_linux(X86MachineState *x86ms, ++ FWCfgState *fw_cfg, ++ int acpi_data_size, ++ bool pvh_enabled) ++{ ++ bool linuxboot_dma_enabled = X86_MACHINE_GET_CLASS(x86ms)->fwcfg_dma_enabled; ++ uint16_t protocol; ++ int setup_size, kernel_size, cmdline_size; ++ int dtb_size, setup_data_offset; ++ uint32_t initrd_max; ++ uint8_t header[8192], *setup, *kernel; ++ hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0; ++ FILE *f; ++ char *vmode; ++ MachineState *machine = MACHINE(x86ms); ++ struct setup_data *setup_data; ++ const char *kernel_filename = machine->kernel_filename; ++ const char *initrd_filename = machine->initrd_filename; ++ const char *dtb_filename = machine->dtb; ++ const char *kernel_cmdline = machine->kernel_cmdline; ++ SevKernelLoaderContext sev_load_ctx = {}; ++ ++ /* Align to 16 bytes as a paranoia measure */ ++ cmdline_size = (strlen(kernel_cmdline) + 16) & ~15; ++ ++ /* load the kernel header */ ++ f = fopen(kernel_filename, "rb"); ++ if (!f) { ++ fprintf(stderr, "qemu: could not open kernel file '%s': %s\n", ++ kernel_filename, strerror(errno)); ++ exit(1); ++ } ++ ++ kernel_size = get_file_size(f); ++ if (!kernel_size || ++ fread(header, 1, MIN(ARRAY_SIZE(header), kernel_size), f) != ++ MIN(ARRAY_SIZE(header), kernel_size)) { ++ fprintf(stderr, "qemu: could not load kernel '%s': %s\n", ++ kernel_filename, strerror(errno)); ++ exit(1); ++ } ++ ++ /* kernel protocol version */ ++ if (ldl_p(header + 0x202) == 0x53726448) { ++ protocol = lduw_p(header + 0x206); ++ } else { ++ /* ++ * This could be a multiboot kernel. If it is, let's stop treating it ++ * like a Linux kernel. ++ * Note: some multiboot images could be in the ELF format (the same of ++ * PVH), so we try multiboot first since we check the multiboot magic ++ * header before to load it. ++ */ ++ if (load_multiboot(x86ms, fw_cfg, f, kernel_filename, initrd_filename, ++ kernel_cmdline, kernel_size, header)) { ++ return; ++ } ++ /* ++ * Check if the file is an uncompressed kernel file (ELF) and load it, ++ * saving the PVH entry point used by the x86/HVM direct boot ABI. ++ * If load_elfboot() is successful, populate the fw_cfg info. ++ */ ++ if (pvh_enabled && ++ load_elfboot(kernel_filename, kernel_size, ++ header, pvh_start_addr, fw_cfg)) { ++ fclose(f); ++ ++ fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, ++ strlen(kernel_cmdline) + 1); ++ fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline); ++ ++ fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, sizeof(header)); ++ fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, ++ header, sizeof(header)); ++ ++ /* load initrd */ ++ if (initrd_filename) { ++ GMappedFile *mapped_file; ++ gsize initrd_size; ++ gchar *initrd_data; ++ GError *gerr = NULL; ++ ++ mapped_file = g_mapped_file_new(initrd_filename, false, &gerr); ++ if (!mapped_file) { ++ fprintf(stderr, "qemu: error reading initrd %s: %s\n", ++ initrd_filename, gerr->message); ++ exit(1); ++ } ++ x86ms->initrd_mapped_file = mapped_file; ++ ++ initrd_data = g_mapped_file_get_contents(mapped_file); ++ initrd_size = g_mapped_file_get_length(mapped_file); ++ initrd_max = x86ms->below_4g_mem_size - acpi_data_size - 1; ++ if (initrd_size >= initrd_max) { ++ fprintf(stderr, "qemu: initrd is too large, cannot support." ++ "(max: %"PRIu32", need %"PRId64")\n", ++ initrd_max, (uint64_t)initrd_size); ++ exit(1); ++ } ++ ++ initrd_addr = (initrd_max - initrd_size) & ~4095; ++ ++ fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr); ++ fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); ++ fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, ++ initrd_size); ++ } ++ ++ option_rom[nb_option_roms].bootindex = 0; ++ option_rom[nb_option_roms].name = "pvh.bin"; ++ nb_option_roms++; ++ ++ return; ++ } ++ protocol = 0; ++ } ++ ++ if (protocol < 0x200 || !(header[0x211] & 0x01)) { ++ /* Low kernel */ ++ real_addr = 0x90000; ++ cmdline_addr = 0x9a000 - cmdline_size; ++ prot_addr = 0x10000; ++ } else if (protocol < 0x202) { ++ /* High but ancient kernel */ ++ real_addr = 0x90000; ++ cmdline_addr = 0x9a000 - cmdline_size; ++ prot_addr = 0x100000; ++ } else { ++ /* High and recent kernel */ ++ real_addr = 0x10000; ++ cmdline_addr = 0x20000; ++ prot_addr = 0x100000; ++ } ++ ++ /* highest address for loading the initrd */ ++ if (protocol >= 0x20c && ++ lduw_p(header + 0x236) & XLF_CAN_BE_LOADED_ABOVE_4G) { ++ /* ++ * Linux has supported initrd up to 4 GB for a very long time (2007, ++ * long before XLF_CAN_BE_LOADED_ABOVE_4G which was added in 2013), ++ * though it only sets initrd_max to 2 GB to "work around bootloader ++ * bugs". Luckily, QEMU firmware(which does something like bootloader) ++ * has supported this. ++ * ++ * It's believed that if XLF_CAN_BE_LOADED_ABOVE_4G is set, initrd can ++ * be loaded into any address. ++ * ++ * In addition, initrd_max is uint32_t simply because QEMU doesn't ++ * support the 64-bit boot protocol (specifically the ext_ramdisk_image ++ * field). ++ * ++ * Therefore here just limit initrd_max to UINT32_MAX simply as well. ++ */ ++ initrd_max = UINT32_MAX; ++ } else if (protocol >= 0x203) { ++ initrd_max = ldl_p(header + 0x22c); ++ } else { ++ initrd_max = 0x37ffffff; ++ } ++ ++ if (initrd_max >= x86ms->below_4g_mem_size - acpi_data_size) { ++ initrd_max = x86ms->below_4g_mem_size - acpi_data_size - 1; ++ } ++ ++ fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_ADDR, cmdline_addr); ++ fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, strlen(kernel_cmdline) + 1); ++ fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline); ++ sev_load_ctx.cmdline_data = (char *)kernel_cmdline; ++ sev_load_ctx.cmdline_size = strlen(kernel_cmdline) + 1; ++ ++ if (protocol >= 0x202) { ++ stl_p(header + 0x228, cmdline_addr); ++ } else { ++ stw_p(header + 0x20, 0xA33F); ++ stw_p(header + 0x22, cmdline_addr - real_addr); ++ } ++ ++ /* handle vga= parameter */ ++ vmode = strstr(kernel_cmdline, "vga="); ++ if (vmode) { ++ unsigned int video_mode; ++ const char *end; ++ int ret; ++ /* skip "vga=" */ ++ vmode += 4; ++ if (!strncmp(vmode, "normal", 6)) { ++ video_mode = 0xffff; ++ } else if (!strncmp(vmode, "ext", 3)) { ++ video_mode = 0xfffe; ++ } else if (!strncmp(vmode, "ask", 3)) { ++ video_mode = 0xfffd; ++ } else { ++ ret = qemu_strtoui(vmode, &end, 0, &video_mode); ++ if (ret != 0 || (*end && *end != ' ')) { ++ fprintf(stderr, "qemu: invalid 'vga=' kernel parameter.\n"); ++ exit(1); ++ } ++ } ++ stw_p(header + 0x1fa, video_mode); ++ } ++ ++ /* loader type */ ++ /* ++ * High nybble = B reserved for QEMU; low nybble is revision number. ++ * If this code is substantially changed, you may want to consider ++ * incrementing the revision. ++ */ ++ if (protocol >= 0x200) { ++ header[0x210] = 0xB0; ++ } ++ /* heap */ ++ if (protocol >= 0x201) { ++ header[0x211] |= 0x80; /* CAN_USE_HEAP */ ++ stw_p(header + 0x224, cmdline_addr - real_addr - 0x200); ++ } ++ ++ /* load initrd */ ++ if (initrd_filename) { ++ GMappedFile *mapped_file; ++ gsize initrd_size; ++ gchar *initrd_data; ++ GError *gerr = NULL; ++ ++ if (protocol < 0x200) { ++ fprintf(stderr, "qemu: linux kernel too old to load a ram disk\n"); ++ exit(1); ++ } ++ ++ mapped_file = g_mapped_file_new(initrd_filename, false, &gerr); ++ if (!mapped_file) { ++ fprintf(stderr, "qemu: error reading initrd %s: %s\n", ++ initrd_filename, gerr->message); ++ exit(1); ++ } ++ x86ms->initrd_mapped_file = mapped_file; ++ ++ initrd_data = g_mapped_file_get_contents(mapped_file); ++ initrd_size = g_mapped_file_get_length(mapped_file); ++ if (initrd_size >= initrd_max) { ++ fprintf(stderr, "qemu: initrd is too large, cannot support." ++ "(max: %"PRIu32", need %"PRId64")\n", ++ initrd_max, (uint64_t)initrd_size); ++ exit(1); ++ } ++ ++ initrd_addr = (initrd_max - initrd_size) & ~4095; ++ ++ fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr); ++ fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); ++ fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, initrd_size); ++ sev_load_ctx.initrd_data = initrd_data; ++ sev_load_ctx.initrd_size = initrd_size; ++ ++ stl_p(header + 0x218, initrd_addr); ++ stl_p(header + 0x21c, initrd_size); ++ } ++ ++ /* load kernel and setup */ ++ setup_size = header[0x1f1]; ++ if (setup_size == 0) { ++ setup_size = 4; ++ } ++ setup_size = (setup_size + 1) * 512; ++ if (setup_size > kernel_size) { ++ fprintf(stderr, "qemu: invalid kernel header\n"); ++ exit(1); ++ } ++ kernel_size -= setup_size; ++ ++ setup = g_malloc(setup_size); ++ kernel = g_malloc(kernel_size); ++ fseek(f, 0, SEEK_SET); ++ if (fread(setup, 1, setup_size, f) != setup_size) { ++ fprintf(stderr, "fread() failed\n"); ++ exit(1); ++ } ++ if (fread(kernel, 1, kernel_size, f) != kernel_size) { ++ fprintf(stderr, "fread() failed\n"); ++ exit(1); ++ } ++ fclose(f); ++ ++ /* append dtb to kernel */ ++ if (dtb_filename) { ++ if (protocol < 0x209) { ++ fprintf(stderr, "qemu: Linux kernel too old to load a dtb\n"); ++ exit(1); ++ } ++ ++ dtb_size = get_image_size(dtb_filename); ++ if (dtb_size <= 0) { ++ fprintf(stderr, "qemu: error reading dtb %s: %s\n", ++ dtb_filename, strerror(errno)); ++ exit(1); ++ } ++ ++ setup_data_offset = QEMU_ALIGN_UP(kernel_size, 16); ++ kernel_size = setup_data_offset + sizeof(struct setup_data) + dtb_size; ++ kernel = g_realloc(kernel, kernel_size); ++ ++ stq_p(header + 0x250, prot_addr + setup_data_offset); ++ ++ setup_data = (struct setup_data *)(kernel + setup_data_offset); ++ setup_data->next = 0; ++ setup_data->type = cpu_to_le32(SETUP_DTB); ++ setup_data->len = cpu_to_le32(dtb_size); ++ ++ load_image_size(dtb_filename, setup_data->data, dtb_size); ++ } ++ ++ /* ++ * If we're starting an encrypted VM, it will be OVMF based, which uses the ++ * efi stub for booting and doesn't require any values to be placed in the ++ * kernel header. We therefore don't update the header so the hash of the ++ * kernel on the other side of the fw_cfg interface matches the hash of the ++ * file the user passed in. ++ */ ++ if (!sev_enabled()) { ++ memcpy(setup, header, MIN(sizeof(header), setup_size)); ++ } ++ ++ fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr); ++ fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size); ++ fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size); ++ sev_load_ctx.kernel_data = (char *)kernel; ++ sev_load_ctx.kernel_size = kernel_size; ++ ++ fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr); ++ fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, setup_size); ++ fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, setup, setup_size); ++ sev_load_ctx.setup_data = (char *)setup; ++ sev_load_ctx.setup_size = setup_size; ++ ++ if (sev_enabled()) { ++ sev_add_kernel_loader_hashes(&sev_load_ctx, &error_fatal); ++ } ++ ++ option_rom[nb_option_roms].bootindex = 0; ++ option_rom[nb_option_roms].name = "linuxboot.bin"; ++ if (linuxboot_dma_enabled && fw_cfg_dma_enabled(fw_cfg)) { ++ option_rom[nb_option_roms].name = "linuxboot_dma.bin"; ++ } ++ nb_option_roms++; ++} ++ ++void x86_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *isa_memory, ++ MemoryRegion *bios, bool read_only) ++{ ++ uint64_t bios_size = memory_region_size(bios); ++ uint64_t isa_bios_size = MIN(bios_size, 128 * KiB); ++ ++ memory_region_init_alias(isa_bios, NULL, "isa-bios", bios, ++ bios_size - isa_bios_size, isa_bios_size); ++ memory_region_add_subregion_overlap(isa_memory, 1 * MiB - isa_bios_size, ++ isa_bios, 1); ++ memory_region_set_readonly(isa_bios, read_only); ++} ++ ++void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, ++ MemoryRegion *rom_memory, bool isapc_ram_fw) ++{ ++ const char *bios_name; ++ char *filename; ++ int bios_size; ++ ssize_t ret; ++ ++ /* BIOS load */ ++ bios_name = MACHINE(x86ms)->firmware ?: default_firmware; ++ filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name); ++ if (filename) { ++ bios_size = get_image_size(filename); ++ } else { ++ bios_size = -1; ++ } ++ if (bios_size <= 0 || ++ (bios_size % 65536) != 0) { ++ goto bios_error; ++ } ++ memory_region_init_ram(&x86ms->bios, NULL, "pc.bios", bios_size, ++ &error_fatal); ++ if (sev_enabled()) { ++ /* ++ * The concept of a "reset" simply doesn't exist for ++ * confidential computing guests, we have to destroy and ++ * re-launch them instead. So there is no need to register ++ * the firmware as rom to properly re-initialize on reset. ++ * Just go for a straight file load instead. ++ */ ++ void *ptr = memory_region_get_ram_ptr(&x86ms->bios); ++ load_image_size(filename, ptr, bios_size); ++ x86_firmware_configure(ptr, bios_size); ++ } else { ++ memory_region_set_readonly(&x86ms->bios, !isapc_ram_fw); ++ ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1); ++ if (ret != 0) { ++ goto bios_error; ++ } ++ } ++ g_free(filename); ++ ++ /* map the last 128KB of the BIOS in ISA space */ ++ x86_isa_bios_init(&x86ms->isa_bios, rom_memory, &x86ms->bios, ++ !isapc_ram_fw); ++ ++ /* map all the bios at the top of memory */ ++ memory_region_add_subregion(rom_memory, ++ (uint32_t)(-bios_size), ++ &x86ms->bios); ++ return; ++ ++bios_error: ++ fprintf(stderr, "qemu: could not load PC BIOS '%s'\n", bios_name); ++ exit(1); ++} +diff --git a/hw/i386/x86-cpu.c b/hw/i386/x86-cpu.c +new file mode 100644 +index 0000000000..ab2920522d +--- /dev/null ++++ b/hw/i386/x86-cpu.c +@@ -0,0 +1,97 @@ ++/* ++ * Copyright (c) 2003-2004 Fabrice Bellard ++ * Copyright (c) 2019, 2024 Red Hat, Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++#include "qemu/osdep.h" ++#include "sysemu/whpx.h" ++#include "sysemu/cpu-timers.h" ++#include "trace.h" ++ ++#include "hw/i386/x86.h" ++#include "target/i386/cpu.h" ++#include "hw/intc/i8259.h" ++#include "hw/irq.h" ++#include "sysemu/kvm.h" ++ ++/* TSC handling */ ++uint64_t cpu_get_tsc(CPUX86State *env) ++{ ++ return cpus_get_elapsed_ticks(); ++} ++ ++/* IRQ handling */ ++static void pic_irq_request(void *opaque, int irq, int level) ++{ ++ CPUState *cs = first_cpu; ++ X86CPU *cpu = X86_CPU(cs); ++ ++ trace_x86_pic_interrupt(irq, level); ++ if (cpu_is_apic_enabled(cpu->apic_state) && !kvm_irqchip_in_kernel() && ++ !whpx_apic_in_platform()) { ++ CPU_FOREACH(cs) { ++ cpu = X86_CPU(cs); ++ if (apic_accept_pic_intr(cpu->apic_state)) { ++ apic_deliver_pic_intr(cpu->apic_state, level); ++ } ++ } ++ } else { ++ if (level) { ++ cpu_interrupt(cs, CPU_INTERRUPT_HARD); ++ } else { ++ cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); ++ } ++ } ++} ++ ++qemu_irq x86_allocate_cpu_irq(void) ++{ ++ return qemu_allocate_irq(pic_irq_request, NULL, 0); ++} ++ ++int cpu_get_pic_interrupt(CPUX86State *env) ++{ ++ X86CPU *cpu = env_archcpu(env); ++ int intno; ++ ++ if (!kvm_irqchip_in_kernel() && !whpx_apic_in_platform()) { ++ intno = apic_get_interrupt(cpu->apic_state); ++ if (intno >= 0) { ++ return intno; ++ } ++ /* read the irq from the PIC */ ++ if (!apic_accept_pic_intr(cpu->apic_state)) { ++ return -1; ++ } ++ } ++ ++ intno = pic_read_irq(isa_pic); ++ return intno; ++} ++ ++DeviceState *cpu_get_current_apic(void) ++{ ++ if (current_cpu) { ++ X86CPU *cpu = X86_CPU(current_cpu); ++ return cpu->apic_state; ++ } else { ++ return NULL; ++ } ++} +diff --git a/hw/i386/x86.c b/hw/i386/x86.c +index fcef652c1e..0b5cc59956 100644 +--- a/hw/i386/x86.c ++++ b/hw/i386/x86.c +@@ -22,52 +22,25 @@ + */ + #include "qemu/osdep.h" + #include "qemu/error-report.h" +-#include "qemu/option.h" +-#include "qemu/cutils.h" + #include "qemu/units.h" +-#include "qemu/datadir.h" + #include "qapi/error.h" + #include "qapi/qapi-visit-common.h" +-#include "qapi/clone-visitor.h" + #include "qapi/qapi-visit-machine.h" + #include "qapi/visitor.h" + #include "sysemu/qtest.h" +-#include "sysemu/whpx.h" + #include "sysemu/numa.h" +-#include "sysemu/replay.h" +-#include "sysemu/sysemu.h" +-#include "sysemu/cpu-timers.h" +-#include "sysemu/xen.h" + #include "trace.h" + ++#include "hw/acpi/aml-build.h" + #include "hw/i386/x86.h" +-#include "target/i386/cpu.h" + #include "hw/i386/topology.h" +-#include "hw/i386/fw_cfg.h" +-#include "hw/intc/i8259.h" +-#include "hw/rtc/mc146818rtc.h" +-#include "target/i386/sev.h" + +-#include "hw/acpi/cpu_hotplug.h" +-#include "hw/irq.h" + #include "hw/nmi.h" +-#include "hw/loader.h" +-#include "multiboot.h" +-#include "elf.h" +-#include "standard-headers/asm-x86/bootparam.h" +-#include CONFIG_DEVICES + #include "kvm/kvm_i386.h" + +-#ifdef CONFIG_XEN_EMU +-#include "hw/xen/xen.h" +-#include "hw/i386/kvm/xen_evtchn.h" +-#endif + +-/* Physical Address of PVH entry point read from kernel ELF NOTE */ +-static size_t pvh_start_addr; +- +-static void init_topo_info(X86CPUTopoInfo *topo_info, +- const X86MachineState *x86ms) ++void init_topo_info(X86CPUTopoInfo *topo_info, ++ const X86MachineState *x86ms) + { + MachineState *ms = MACHINE(x86ms); + +@@ -94,355 +67,6 @@ uint32_t x86_cpu_apic_id_from_index(X86MachineState *x86ms, + return x86_apicid_from_cpu_idx(&topo_info, cpu_index); + } + +- +-void x86_cpu_new(X86MachineState *x86ms, int64_t apic_id, Error **errp) +-{ +- Object *cpu = object_new(MACHINE(x86ms)->cpu_type); +- +- if (!object_property_set_uint(cpu, "apic-id", apic_id, errp)) { +- goto out; +- } +- qdev_realize(DEVICE(cpu), NULL, errp); +- +-out: +- object_unref(cpu); +-} +- +-void x86_cpus_init(X86MachineState *x86ms, int default_cpu_version) +-{ +- int i; +- const CPUArchIdList *possible_cpus; +- MachineState *ms = MACHINE(x86ms); +- MachineClass *mc = MACHINE_GET_CLASS(x86ms); +- +- x86_cpu_set_default_version(default_cpu_version); +- +- /* +- * Calculates the limit to CPU APIC ID values +- * +- * Limit for the APIC ID value, so that all +- * CPU APIC IDs are < x86ms->apic_id_limit. +- * +- * This is used for FW_CFG_MAX_CPUS. See comments on fw_cfg_arch_create(). +- */ +- x86ms->apic_id_limit = x86_cpu_apic_id_from_index(x86ms, +- ms->smp.max_cpus - 1) + 1; +- +- /* +- * Can we support APIC ID 255 or higher? With KVM, that requires +- * both in-kernel lapic and X2APIC userspace API. +- * +- * kvm_enabled() must go first to ensure that kvm_* references are +- * not emitted for the linker to consume (kvm_enabled() is +- * a literal `0` in configurations where kvm_* aren't defined) +- */ +- if (kvm_enabled() && x86ms->apic_id_limit > 255 && +- kvm_irqchip_in_kernel() && !kvm_enable_x2apic()) { +- error_report("current -smp configuration requires kernel " +- "irqchip and X2APIC API support."); +- exit(EXIT_FAILURE); +- } +- +- if (kvm_enabled()) { +- kvm_set_max_apic_id(x86ms->apic_id_limit); +- } +- +- if (!kvm_irqchip_in_kernel()) { +- apic_set_max_apic_id(x86ms->apic_id_limit); +- } +- +- possible_cpus = mc->possible_cpu_arch_ids(ms); +- for (i = 0; i < ms->smp.cpus; i++) { +- x86_cpu_new(x86ms, possible_cpus->cpus[i].arch_id, &error_fatal); +- } +-} +- +-void x86_rtc_set_cpus_count(ISADevice *s, uint16_t cpus_count) +-{ +- MC146818RtcState *rtc = MC146818_RTC(s); +- +- if (cpus_count > 0xff) { +- /* +- * If the number of CPUs can't be represented in 8 bits, the +- * BIOS must use "FW_CFG_NB_CPUS". Set RTC field to 0 just +- * to make old BIOSes fail more predictably. +- */ +- mc146818rtc_set_cmos_data(rtc, 0x5f, 0); +- } else { +- mc146818rtc_set_cmos_data(rtc, 0x5f, cpus_count - 1); +- } +-} +- +-static int x86_apic_cmp(const void *a, const void *b) +-{ +- CPUArchId *apic_a = (CPUArchId *)a; +- CPUArchId *apic_b = (CPUArchId *)b; +- +- return apic_a->arch_id - apic_b->arch_id; +-} +- +-/* +- * returns pointer to CPUArchId descriptor that matches CPU's apic_id +- * in ms->possible_cpus->cpus, if ms->possible_cpus->cpus has no +- * entry corresponding to CPU's apic_id returns NULL. +- */ +-CPUArchId *x86_find_cpu_slot(MachineState *ms, uint32_t id, int *idx) +-{ +- CPUArchId apic_id, *found_cpu; +- +- apic_id.arch_id = id; +- found_cpu = bsearch(&apic_id, ms->possible_cpus->cpus, +- ms->possible_cpus->len, sizeof(*ms->possible_cpus->cpus), +- x86_apic_cmp); +- if (found_cpu && idx) { +- *idx = found_cpu - ms->possible_cpus->cpus; +- } +- return found_cpu; +-} +- +-void x86_cpu_plug(HotplugHandler *hotplug_dev, +- DeviceState *dev, Error **errp) +-{ +- CPUArchId *found_cpu; +- Error *local_err = NULL; +- X86CPU *cpu = X86_CPU(dev); +- X86MachineState *x86ms = X86_MACHINE(hotplug_dev); +- +- if (x86ms->acpi_dev) { +- hotplug_handler_plug(x86ms->acpi_dev, dev, &local_err); +- if (local_err) { +- goto out; +- } +- } +- +- /* increment the number of CPUs */ +- x86ms->boot_cpus++; +- if (x86ms->rtc) { +- x86_rtc_set_cpus_count(x86ms->rtc, x86ms->boot_cpus); +- } +- if (x86ms->fw_cfg) { +- fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); +- } +- +- found_cpu = x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, NULL); +- found_cpu->cpu = CPU(dev); +-out: +- error_propagate(errp, local_err); +-} +- +-void x86_cpu_unplug_request_cb(HotplugHandler *hotplug_dev, +- DeviceState *dev, Error **errp) +-{ +- int idx = -1; +- X86CPU *cpu = X86_CPU(dev); +- X86MachineState *x86ms = X86_MACHINE(hotplug_dev); +- +- if (!x86ms->acpi_dev) { +- error_setg(errp, "CPU hot unplug not supported without ACPI"); +- return; +- } +- +- x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, &idx); +- assert(idx != -1); +- if (idx == 0) { +- error_setg(errp, "Boot CPU is unpluggable"); +- return; +- } +- +- hotplug_handler_unplug_request(x86ms->acpi_dev, dev, +- errp); +-} +- +-void x86_cpu_unplug_cb(HotplugHandler *hotplug_dev, +- DeviceState *dev, Error **errp) +-{ +- CPUArchId *found_cpu; +- Error *local_err = NULL; +- X86CPU *cpu = X86_CPU(dev); +- X86MachineState *x86ms = X86_MACHINE(hotplug_dev); +- +- hotplug_handler_unplug(x86ms->acpi_dev, dev, &local_err); +- if (local_err) { +- goto out; +- } +- +- found_cpu = x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, NULL); +- found_cpu->cpu = NULL; +- qdev_unrealize(dev); +- +- /* decrement the number of CPUs */ +- x86ms->boot_cpus--; +- /* Update the number of CPUs in CMOS */ +- x86_rtc_set_cpus_count(x86ms->rtc, x86ms->boot_cpus); +- fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); +- out: +- error_propagate(errp, local_err); +-} +- +-void x86_cpu_pre_plug(HotplugHandler *hotplug_dev, +- DeviceState *dev, Error **errp) +-{ +- int idx; +- CPUState *cs; +- CPUArchId *cpu_slot; +- X86CPUTopoIDs topo_ids; +- X86CPU *cpu = X86_CPU(dev); +- CPUX86State *env = &cpu->env; +- MachineState *ms = MACHINE(hotplug_dev); +- X86MachineState *x86ms = X86_MACHINE(hotplug_dev); +- unsigned int smp_cores = ms->smp.cores; +- unsigned int smp_threads = ms->smp.threads; +- X86CPUTopoInfo topo_info; +- +- if (!object_dynamic_cast(OBJECT(cpu), ms->cpu_type)) { +- error_setg(errp, "Invalid CPU type, expected cpu type: '%s'", +- ms->cpu_type); +- return; +- } +- +- if (x86ms->acpi_dev) { +- Error *local_err = NULL; +- +- hotplug_handler_pre_plug(HOTPLUG_HANDLER(x86ms->acpi_dev), dev, +- &local_err); +- if (local_err) { +- error_propagate(errp, local_err); +- return; +- } +- } +- +- init_topo_info(&topo_info, x86ms); +- +- env->nr_dies = ms->smp.dies; +- +- /* +- * If APIC ID is not set, +- * set it based on socket/die/core/thread properties. +- */ +- if (cpu->apic_id == UNASSIGNED_APIC_ID) { +- int max_socket = (ms->smp.max_cpus - 1) / +- smp_threads / smp_cores / ms->smp.dies; +- +- /* +- * die-id was optional in QEMU 4.0 and older, so keep it optional +- * if there's only one die per socket. +- */ +- if (cpu->die_id < 0 && ms->smp.dies == 1) { +- cpu->die_id = 0; +- } +- +- if (cpu->socket_id < 0) { +- error_setg(errp, "CPU socket-id is not set"); +- return; +- } else if (cpu->socket_id > max_socket) { +- error_setg(errp, "Invalid CPU socket-id: %u must be in range 0:%u", +- cpu->socket_id, max_socket); +- return; +- } +- if (cpu->die_id < 0) { +- error_setg(errp, "CPU die-id is not set"); +- return; +- } else if (cpu->die_id > ms->smp.dies - 1) { +- error_setg(errp, "Invalid CPU die-id: %u must be in range 0:%u", +- cpu->die_id, ms->smp.dies - 1); +- return; +- } +- if (cpu->core_id < 0) { +- error_setg(errp, "CPU core-id is not set"); +- return; +- } else if (cpu->core_id > (smp_cores - 1)) { +- error_setg(errp, "Invalid CPU core-id: %u must be in range 0:%u", +- cpu->core_id, smp_cores - 1); +- return; +- } +- if (cpu->thread_id < 0) { +- error_setg(errp, "CPU thread-id is not set"); +- return; +- } else if (cpu->thread_id > (smp_threads - 1)) { +- error_setg(errp, "Invalid CPU thread-id: %u must be in range 0:%u", +- cpu->thread_id, smp_threads - 1); +- return; +- } +- +- topo_ids.pkg_id = cpu->socket_id; +- topo_ids.die_id = cpu->die_id; +- topo_ids.core_id = cpu->core_id; +- topo_ids.smt_id = cpu->thread_id; +- cpu->apic_id = x86_apicid_from_topo_ids(&topo_info, &topo_ids); +- } +- +- cpu_slot = x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, &idx); +- if (!cpu_slot) { +- x86_topo_ids_from_apicid(cpu->apic_id, &topo_info, &topo_ids); +- error_setg(errp, +- "Invalid CPU [socket: %u, die: %u, core: %u, thread: %u] with" +- " APIC ID %" PRIu32 ", valid index range 0:%d", +- topo_ids.pkg_id, topo_ids.die_id, topo_ids.core_id, topo_ids.smt_id, +- cpu->apic_id, ms->possible_cpus->len - 1); +- return; +- } +- +- if (cpu_slot->cpu) { +- error_setg(errp, "CPU[%d] with APIC ID %" PRIu32 " exists", +- idx, cpu->apic_id); +- return; +- } +- +- /* if 'address' properties socket-id/core-id/thread-id are not set, set them +- * so that machine_query_hotpluggable_cpus would show correct values +- */ +- /* TODO: move socket_id/core_id/thread_id checks into x86_cpu_realizefn() +- * once -smp refactoring is complete and there will be CPU private +- * CPUState::nr_cores and CPUState::nr_threads fields instead of globals */ +- x86_topo_ids_from_apicid(cpu->apic_id, &topo_info, &topo_ids); +- if (cpu->socket_id != -1 && cpu->socket_id != topo_ids.pkg_id) { +- error_setg(errp, "property socket-id: %u doesn't match set apic-id:" +- " 0x%x (socket-id: %u)", cpu->socket_id, cpu->apic_id, +- topo_ids.pkg_id); +- return; +- } +- cpu->socket_id = topo_ids.pkg_id; +- +- if (cpu->die_id != -1 && cpu->die_id != topo_ids.die_id) { +- error_setg(errp, "property die-id: %u doesn't match set apic-id:" +- " 0x%x (die-id: %u)", cpu->die_id, cpu->apic_id, topo_ids.die_id); +- return; +- } +- cpu->die_id = topo_ids.die_id; +- +- if (cpu->core_id != -1 && cpu->core_id != topo_ids.core_id) { +- error_setg(errp, "property core-id: %u doesn't match set apic-id:" +- " 0x%x (core-id: %u)", cpu->core_id, cpu->apic_id, +- topo_ids.core_id); +- return; +- } +- cpu->core_id = topo_ids.core_id; +- +- if (cpu->thread_id != -1 && cpu->thread_id != topo_ids.smt_id) { +- error_setg(errp, "property thread-id: %u doesn't match set apic-id:" +- " 0x%x (thread-id: %u)", cpu->thread_id, cpu->apic_id, +- topo_ids.smt_id); +- return; +- } +- cpu->thread_id = topo_ids.smt_id; +- +- /* +- * kvm_enabled() must go first to ensure that kvm_* references are +- * not emitted for the linker to consume (kvm_enabled() is +- * a literal `0` in configurations where kvm_* aren't defined) +- */ +- if (kvm_enabled() && hyperv_feat_enabled(cpu, HYPERV_FEAT_VPINDEX) && +- !kvm_hv_vpindex_settable()) { +- error_setg(errp, "kernel doesn't allow setting HyperV VP_INDEX"); +- return; +- } +- +- cs = CPU(cpu); +- cs->cpu_index = idx; +- +- numa_cpu_pre_plug(cpu_slot, dev, errp); +-} +- + static CpuInstanceProperties + x86_cpu_index_to_props(MachineState *ms, unsigned cpu_index) + { +@@ -528,676 +152,6 @@ static void x86_nmi(NMIState *n, int cpu_index, Error **errp) + } + } + +-static long get_file_size(FILE *f) +-{ +- long where, size; +- +- /* XXX: on Unix systems, using fstat() probably makes more sense */ +- +- where = ftell(f); +- fseek(f, 0, SEEK_END); +- size = ftell(f); +- fseek(f, where, SEEK_SET); +- +- return size; +-} +- +-/* TSC handling */ +-uint64_t cpu_get_tsc(CPUX86State *env) +-{ +- return cpus_get_elapsed_ticks(); +-} +- +-/* IRQ handling */ +-static void pic_irq_request(void *opaque, int irq, int level) +-{ +- CPUState *cs = first_cpu; +- X86CPU *cpu = X86_CPU(cs); +- +- trace_x86_pic_interrupt(irq, level); +- if (cpu_is_apic_enabled(cpu->apic_state) && !kvm_irqchip_in_kernel() && +- !whpx_apic_in_platform()) { +- CPU_FOREACH(cs) { +- cpu = X86_CPU(cs); +- if (apic_accept_pic_intr(cpu->apic_state)) { +- apic_deliver_pic_intr(cpu->apic_state, level); +- } +- } +- } else { +- if (level) { +- cpu_interrupt(cs, CPU_INTERRUPT_HARD); +- } else { +- cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); +- } +- } +-} +- +-qemu_irq x86_allocate_cpu_irq(void) +-{ +- return qemu_allocate_irq(pic_irq_request, NULL, 0); +-} +- +-int cpu_get_pic_interrupt(CPUX86State *env) +-{ +- X86CPU *cpu = env_archcpu(env); +- int intno; +- +- if (!kvm_irqchip_in_kernel() && !whpx_apic_in_platform()) { +- intno = apic_get_interrupt(cpu->apic_state); +- if (intno >= 0) { +- return intno; +- } +- /* read the irq from the PIC */ +- if (!apic_accept_pic_intr(cpu->apic_state)) { +- return -1; +- } +- } +- +- intno = pic_read_irq(isa_pic); +- return intno; +-} +- +-DeviceState *cpu_get_current_apic(void) +-{ +- if (current_cpu) { +- X86CPU *cpu = X86_CPU(current_cpu); +- return cpu->apic_state; +- } else { +- return NULL; +- } +-} +- +-void gsi_handler(void *opaque, int n, int level) +-{ +- GSIState *s = opaque; +- +- trace_x86_gsi_interrupt(n, level); +- switch (n) { +- case 0 ... ISA_NUM_IRQS - 1: +- if (s->i8259_irq[n]) { +- /* Under KVM, Kernel will forward to both PIC and IOAPIC */ +- qemu_set_irq(s->i8259_irq[n], level); +- } +- /* fall through */ +- case ISA_NUM_IRQS ... IOAPIC_NUM_PINS - 1: +-#ifdef CONFIG_XEN_EMU +- /* +- * Xen delivers the GSI to the Legacy PIC (not that Legacy PIC +- * routing actually works properly under Xen). And then to +- * *either* the PIRQ handling or the I/OAPIC depending on +- * whether the former wants it. +- */ +- if (xen_mode == XEN_EMULATE && xen_evtchn_set_gsi(n, level)) { +- break; +- } +-#endif +- qemu_set_irq(s->ioapic_irq[n], level); +- break; +- case IO_APIC_SECONDARY_IRQBASE +- ... IO_APIC_SECONDARY_IRQBASE + IOAPIC_NUM_PINS - 1: +- qemu_set_irq(s->ioapic2_irq[n - IO_APIC_SECONDARY_IRQBASE], level); +- break; +- } +-} +- +-void ioapic_init_gsi(GSIState *gsi_state, Object *parent) +-{ +- DeviceState *dev; +- SysBusDevice *d; +- unsigned int i; +- +- assert(parent); +- if (kvm_ioapic_in_kernel()) { +- dev = qdev_new(TYPE_KVM_IOAPIC); +- } else { +- dev = qdev_new(TYPE_IOAPIC); +- } +- object_property_add_child(parent, "ioapic", OBJECT(dev)); +- d = SYS_BUS_DEVICE(dev); +- sysbus_realize_and_unref(d, &error_fatal); +- sysbus_mmio_map(d, 0, IO_APIC_DEFAULT_ADDRESS); +- +- for (i = 0; i < IOAPIC_NUM_PINS; i++) { +- gsi_state->ioapic_irq[i] = qdev_get_gpio_in(dev, i); +- } +-} +- +-DeviceState *ioapic_init_secondary(GSIState *gsi_state) +-{ +- DeviceState *dev; +- SysBusDevice *d; +- unsigned int i; +- +- dev = qdev_new(TYPE_IOAPIC); +- d = SYS_BUS_DEVICE(dev); +- sysbus_realize_and_unref(d, &error_fatal); +- sysbus_mmio_map(d, 0, IO_APIC_SECONDARY_ADDRESS); +- +- for (i = 0; i < IOAPIC_NUM_PINS; i++) { +- gsi_state->ioapic2_irq[i] = qdev_get_gpio_in(dev, i); +- } +- return dev; +-} +- +-/* +- * The entry point into the kernel for PVH boot is different from +- * the native entry point. The PVH entry is defined by the x86/HVM +- * direct boot ABI and is available in an ELFNOTE in the kernel binary. +- * +- * This function is passed to load_elf() when it is called from +- * load_elfboot() which then additionally checks for an ELF Note of +- * type XEN_ELFNOTE_PHYS32_ENTRY and passes it to this function to +- * parse the PVH entry address from the ELF Note. +- * +- * Due to trickery in elf_opts.h, load_elf() is actually available as +- * load_elf32() or load_elf64() and this routine needs to be able +- * to deal with being called as 32 or 64 bit. +- * +- * The address of the PVH entry point is saved to the 'pvh_start_addr' +- * global variable. (although the entry point is 32-bit, the kernel +- * binary can be either 32-bit or 64-bit). +- */ +-static uint64_t read_pvh_start_addr(void *arg1, void *arg2, bool is64) +-{ +- size_t *elf_note_data_addr; +- +- /* Check if ELF Note header passed in is valid */ +- if (arg1 == NULL) { +- return 0; +- } +- +- if (is64) { +- struct elf64_note *nhdr64 = (struct elf64_note *)arg1; +- uint64_t nhdr_size64 = sizeof(struct elf64_note); +- uint64_t phdr_align = *(uint64_t *)arg2; +- uint64_t nhdr_namesz = nhdr64->n_namesz; +- +- elf_note_data_addr = +- ((void *)nhdr64) + nhdr_size64 + +- QEMU_ALIGN_UP(nhdr_namesz, phdr_align); +- +- pvh_start_addr = *elf_note_data_addr; +- } else { +- struct elf32_note *nhdr32 = (struct elf32_note *)arg1; +- uint32_t nhdr_size32 = sizeof(struct elf32_note); +- uint32_t phdr_align = *(uint32_t *)arg2; +- uint32_t nhdr_namesz = nhdr32->n_namesz; +- +- elf_note_data_addr = +- ((void *)nhdr32) + nhdr_size32 + +- QEMU_ALIGN_UP(nhdr_namesz, phdr_align); +- +- pvh_start_addr = *(uint32_t *)elf_note_data_addr; +- } +- +- return pvh_start_addr; +-} +- +-static bool load_elfboot(const char *kernel_filename, +- int kernel_file_size, +- uint8_t *header, +- size_t pvh_xen_start_addr, +- FWCfgState *fw_cfg) +-{ +- uint32_t flags = 0; +- uint32_t mh_load_addr = 0; +- uint32_t elf_kernel_size = 0; +- uint64_t elf_entry; +- uint64_t elf_low, elf_high; +- int kernel_size; +- +- if (ldl_p(header) != 0x464c457f) { +- return false; /* no elfboot */ +- } +- +- bool elf_is64 = header[EI_CLASS] == ELFCLASS64; +- flags = elf_is64 ? +- ((Elf64_Ehdr *)header)->e_flags : ((Elf32_Ehdr *)header)->e_flags; +- +- if (flags & 0x00010004) { /* LOAD_ELF_HEADER_HAS_ADDR */ +- error_report("elfboot unsupported flags = %x", flags); +- exit(1); +- } +- +- uint64_t elf_note_type = XEN_ELFNOTE_PHYS32_ENTRY; +- kernel_size = load_elf(kernel_filename, read_pvh_start_addr, +- NULL, &elf_note_type, &elf_entry, +- &elf_low, &elf_high, NULL, 0, I386_ELF_MACHINE, +- 0, 0); +- +- if (kernel_size < 0) { +- error_report("Error while loading elf kernel"); +- exit(1); +- } +- mh_load_addr = elf_low; +- elf_kernel_size = elf_high - elf_low; +- +- if (pvh_start_addr == 0) { +- error_report("Error loading uncompressed kernel without PVH ELF Note"); +- exit(1); +- } +- fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ENTRY, pvh_start_addr); +- fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, mh_load_addr); +- fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, elf_kernel_size); +- +- return true; +-} +- +-void x86_load_linux(X86MachineState *x86ms, +- FWCfgState *fw_cfg, +- int acpi_data_size, +- bool pvh_enabled) +-{ +- bool linuxboot_dma_enabled = X86_MACHINE_GET_CLASS(x86ms)->fwcfg_dma_enabled; +- uint16_t protocol; +- int setup_size, kernel_size, cmdline_size; +- int dtb_size, setup_data_offset; +- uint32_t initrd_max; +- uint8_t header[8192], *setup, *kernel; +- hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0; +- FILE *f; +- char *vmode; +- MachineState *machine = MACHINE(x86ms); +- struct setup_data *setup_data; +- const char *kernel_filename = machine->kernel_filename; +- const char *initrd_filename = machine->initrd_filename; +- const char *dtb_filename = machine->dtb; +- const char *kernel_cmdline = machine->kernel_cmdline; +- SevKernelLoaderContext sev_load_ctx = {}; +- +- /* Align to 16 bytes as a paranoia measure */ +- cmdline_size = (strlen(kernel_cmdline) + 16) & ~15; +- +- /* load the kernel header */ +- f = fopen(kernel_filename, "rb"); +- if (!f) { +- fprintf(stderr, "qemu: could not open kernel file '%s': %s\n", +- kernel_filename, strerror(errno)); +- exit(1); +- } +- +- kernel_size = get_file_size(f); +- if (!kernel_size || +- fread(header, 1, MIN(ARRAY_SIZE(header), kernel_size), f) != +- MIN(ARRAY_SIZE(header), kernel_size)) { +- fprintf(stderr, "qemu: could not load kernel '%s': %s\n", +- kernel_filename, strerror(errno)); +- exit(1); +- } +- +- /* kernel protocol version */ +- if (ldl_p(header + 0x202) == 0x53726448) { +- protocol = lduw_p(header + 0x206); +- } else { +- /* +- * This could be a multiboot kernel. If it is, let's stop treating it +- * like a Linux kernel. +- * Note: some multiboot images could be in the ELF format (the same of +- * PVH), so we try multiboot first since we check the multiboot magic +- * header before to load it. +- */ +- if (load_multiboot(x86ms, fw_cfg, f, kernel_filename, initrd_filename, +- kernel_cmdline, kernel_size, header)) { +- return; +- } +- /* +- * Check if the file is an uncompressed kernel file (ELF) and load it, +- * saving the PVH entry point used by the x86/HVM direct boot ABI. +- * If load_elfboot() is successful, populate the fw_cfg info. +- */ +- if (pvh_enabled && +- load_elfboot(kernel_filename, kernel_size, +- header, pvh_start_addr, fw_cfg)) { +- fclose(f); +- +- fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, +- strlen(kernel_cmdline) + 1); +- fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline); +- +- fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, sizeof(header)); +- fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, +- header, sizeof(header)); +- +- /* load initrd */ +- if (initrd_filename) { +- GMappedFile *mapped_file; +- gsize initrd_size; +- gchar *initrd_data; +- GError *gerr = NULL; +- +- mapped_file = g_mapped_file_new(initrd_filename, false, &gerr); +- if (!mapped_file) { +- fprintf(stderr, "qemu: error reading initrd %s: %s\n", +- initrd_filename, gerr->message); +- exit(1); +- } +- x86ms->initrd_mapped_file = mapped_file; +- +- initrd_data = g_mapped_file_get_contents(mapped_file); +- initrd_size = g_mapped_file_get_length(mapped_file); +- initrd_max = x86ms->below_4g_mem_size - acpi_data_size - 1; +- if (initrd_size >= initrd_max) { +- fprintf(stderr, "qemu: initrd is too large, cannot support." +- "(max: %"PRIu32", need %"PRId64")\n", +- initrd_max, (uint64_t)initrd_size); +- exit(1); +- } +- +- initrd_addr = (initrd_max - initrd_size) & ~4095; +- +- fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr); +- fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); +- fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, +- initrd_size); +- } +- +- option_rom[nb_option_roms].bootindex = 0; +- option_rom[nb_option_roms].name = "pvh.bin"; +- nb_option_roms++; +- +- return; +- } +- protocol = 0; +- } +- +- if (protocol < 0x200 || !(header[0x211] & 0x01)) { +- /* Low kernel */ +- real_addr = 0x90000; +- cmdline_addr = 0x9a000 - cmdline_size; +- prot_addr = 0x10000; +- } else if (protocol < 0x202) { +- /* High but ancient kernel */ +- real_addr = 0x90000; +- cmdline_addr = 0x9a000 - cmdline_size; +- prot_addr = 0x100000; +- } else { +- /* High and recent kernel */ +- real_addr = 0x10000; +- cmdline_addr = 0x20000; +- prot_addr = 0x100000; +- } +- +- /* highest address for loading the initrd */ +- if (protocol >= 0x20c && +- lduw_p(header + 0x236) & XLF_CAN_BE_LOADED_ABOVE_4G) { +- /* +- * Linux has supported initrd up to 4 GB for a very long time (2007, +- * long before XLF_CAN_BE_LOADED_ABOVE_4G which was added in 2013), +- * though it only sets initrd_max to 2 GB to "work around bootloader +- * bugs". Luckily, QEMU firmware(which does something like bootloader) +- * has supported this. +- * +- * It's believed that if XLF_CAN_BE_LOADED_ABOVE_4G is set, initrd can +- * be loaded into any address. +- * +- * In addition, initrd_max is uint32_t simply because QEMU doesn't +- * support the 64-bit boot protocol (specifically the ext_ramdisk_image +- * field). +- * +- * Therefore here just limit initrd_max to UINT32_MAX simply as well. +- */ +- initrd_max = UINT32_MAX; +- } else if (protocol >= 0x203) { +- initrd_max = ldl_p(header + 0x22c); +- } else { +- initrd_max = 0x37ffffff; +- } +- +- if (initrd_max >= x86ms->below_4g_mem_size - acpi_data_size) { +- initrd_max = x86ms->below_4g_mem_size - acpi_data_size - 1; +- } +- +- fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_ADDR, cmdline_addr); +- fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, strlen(kernel_cmdline) + 1); +- fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline); +- sev_load_ctx.cmdline_data = (char *)kernel_cmdline; +- sev_load_ctx.cmdline_size = strlen(kernel_cmdline) + 1; +- +- if (protocol >= 0x202) { +- stl_p(header + 0x228, cmdline_addr); +- } else { +- stw_p(header + 0x20, 0xA33F); +- stw_p(header + 0x22, cmdline_addr - real_addr); +- } +- +- /* handle vga= parameter */ +- vmode = strstr(kernel_cmdline, "vga="); +- if (vmode) { +- unsigned int video_mode; +- const char *end; +- int ret; +- /* skip "vga=" */ +- vmode += 4; +- if (!strncmp(vmode, "normal", 6)) { +- video_mode = 0xffff; +- } else if (!strncmp(vmode, "ext", 3)) { +- video_mode = 0xfffe; +- } else if (!strncmp(vmode, "ask", 3)) { +- video_mode = 0xfffd; +- } else { +- ret = qemu_strtoui(vmode, &end, 0, &video_mode); +- if (ret != 0 || (*end && *end != ' ')) { +- fprintf(stderr, "qemu: invalid 'vga=' kernel parameter.\n"); +- exit(1); +- } +- } +- stw_p(header + 0x1fa, video_mode); +- } +- +- /* loader type */ +- /* +- * High nybble = B reserved for QEMU; low nybble is revision number. +- * If this code is substantially changed, you may want to consider +- * incrementing the revision. +- */ +- if (protocol >= 0x200) { +- header[0x210] = 0xB0; +- } +- /* heap */ +- if (protocol >= 0x201) { +- header[0x211] |= 0x80; /* CAN_USE_HEAP */ +- stw_p(header + 0x224, cmdline_addr - real_addr - 0x200); +- } +- +- /* load initrd */ +- if (initrd_filename) { +- GMappedFile *mapped_file; +- gsize initrd_size; +- gchar *initrd_data; +- GError *gerr = NULL; +- +- if (protocol < 0x200) { +- fprintf(stderr, "qemu: linux kernel too old to load a ram disk\n"); +- exit(1); +- } +- +- mapped_file = g_mapped_file_new(initrd_filename, false, &gerr); +- if (!mapped_file) { +- fprintf(stderr, "qemu: error reading initrd %s: %s\n", +- initrd_filename, gerr->message); +- exit(1); +- } +- x86ms->initrd_mapped_file = mapped_file; +- +- initrd_data = g_mapped_file_get_contents(mapped_file); +- initrd_size = g_mapped_file_get_length(mapped_file); +- if (initrd_size >= initrd_max) { +- fprintf(stderr, "qemu: initrd is too large, cannot support." +- "(max: %"PRIu32", need %"PRId64")\n", +- initrd_max, (uint64_t)initrd_size); +- exit(1); +- } +- +- initrd_addr = (initrd_max - initrd_size) & ~4095; +- +- fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr); +- fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); +- fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, initrd_size); +- sev_load_ctx.initrd_data = initrd_data; +- sev_load_ctx.initrd_size = initrd_size; +- +- stl_p(header + 0x218, initrd_addr); +- stl_p(header + 0x21c, initrd_size); +- } +- +- /* load kernel and setup */ +- setup_size = header[0x1f1]; +- if (setup_size == 0) { +- setup_size = 4; +- } +- setup_size = (setup_size + 1) * 512; +- if (setup_size > kernel_size) { +- fprintf(stderr, "qemu: invalid kernel header\n"); +- exit(1); +- } +- kernel_size -= setup_size; +- +- setup = g_malloc(setup_size); +- kernel = g_malloc(kernel_size); +- fseek(f, 0, SEEK_SET); +- if (fread(setup, 1, setup_size, f) != setup_size) { +- fprintf(stderr, "fread() failed\n"); +- exit(1); +- } +- if (fread(kernel, 1, kernel_size, f) != kernel_size) { +- fprintf(stderr, "fread() failed\n"); +- exit(1); +- } +- fclose(f); +- +- /* append dtb to kernel */ +- if (dtb_filename) { +- if (protocol < 0x209) { +- fprintf(stderr, "qemu: Linux kernel too old to load a dtb\n"); +- exit(1); +- } +- +- dtb_size = get_image_size(dtb_filename); +- if (dtb_size <= 0) { +- fprintf(stderr, "qemu: error reading dtb %s: %s\n", +- dtb_filename, strerror(errno)); +- exit(1); +- } +- +- setup_data_offset = QEMU_ALIGN_UP(kernel_size, 16); +- kernel_size = setup_data_offset + sizeof(struct setup_data) + dtb_size; +- kernel = g_realloc(kernel, kernel_size); +- +- stq_p(header + 0x250, prot_addr + setup_data_offset); +- +- setup_data = (struct setup_data *)(kernel + setup_data_offset); +- setup_data->next = 0; +- setup_data->type = cpu_to_le32(SETUP_DTB); +- setup_data->len = cpu_to_le32(dtb_size); +- +- load_image_size(dtb_filename, setup_data->data, dtb_size); +- } +- +- /* +- * If we're starting an encrypted VM, it will be OVMF based, which uses the +- * efi stub for booting and doesn't require any values to be placed in the +- * kernel header. We therefore don't update the header so the hash of the +- * kernel on the other side of the fw_cfg interface matches the hash of the +- * file the user passed in. +- */ +- if (!sev_enabled()) { +- memcpy(setup, header, MIN(sizeof(header), setup_size)); +- } +- +- fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr); +- fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size); +- fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size); +- sev_load_ctx.kernel_data = (char *)kernel; +- sev_load_ctx.kernel_size = kernel_size; +- +- fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr); +- fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, setup_size); +- fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, setup, setup_size); +- sev_load_ctx.setup_data = (char *)setup; +- sev_load_ctx.setup_size = setup_size; +- +- if (sev_enabled()) { +- sev_add_kernel_loader_hashes(&sev_load_ctx, &error_fatal); +- } +- +- option_rom[nb_option_roms].bootindex = 0; +- option_rom[nb_option_roms].name = "linuxboot.bin"; +- if (linuxboot_dma_enabled && fw_cfg_dma_enabled(fw_cfg)) { +- option_rom[nb_option_roms].name = "linuxboot_dma.bin"; +- } +- nb_option_roms++; +-} +- +-void x86_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *isa_memory, +- MemoryRegion *bios, bool read_only) +-{ +- uint64_t bios_size = memory_region_size(bios); +- uint64_t isa_bios_size = MIN(bios_size, 128 * KiB); +- +- memory_region_init_alias(isa_bios, NULL, "isa-bios", bios, +- bios_size - isa_bios_size, isa_bios_size); +- memory_region_add_subregion_overlap(isa_memory, 1 * MiB - isa_bios_size, +- isa_bios, 1); +- memory_region_set_readonly(isa_bios, read_only); +-} +- +-void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, +- MemoryRegion *rom_memory, bool isapc_ram_fw) +-{ +- const char *bios_name; +- char *filename; +- int bios_size; +- ssize_t ret; +- +- /* BIOS load */ +- bios_name = MACHINE(x86ms)->firmware ?: default_firmware; +- filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name); +- if (filename) { +- bios_size = get_image_size(filename); +- } else { +- bios_size = -1; +- } +- if (bios_size <= 0 || +- (bios_size % 65536) != 0) { +- goto bios_error; +- } +- memory_region_init_ram(&x86ms->bios, NULL, "pc.bios", bios_size, +- &error_fatal); +- if (sev_enabled()) { +- /* +- * The concept of a "reset" simply doesn't exist for +- * confidential computing guests, we have to destroy and +- * re-launch them instead. So there is no need to register +- * the firmware as rom to properly re-initialize on reset. +- * Just go for a straight file load instead. +- */ +- void *ptr = memory_region_get_ram_ptr(&x86ms->bios); +- load_image_size(filename, ptr, bios_size); +- x86_firmware_configure(ptr, bios_size); +- } else { +- memory_region_set_readonly(&x86ms->bios, !isapc_ram_fw); +- ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1); +- if (ret != 0) { +- goto bios_error; +- } +- } +- g_free(filename); +- +- /* map the last 128KB of the BIOS in ISA space */ +- x86_isa_bios_init(&x86ms->isa_bios, rom_memory, &x86ms->bios, +- !isapc_ram_fw); +- +- /* map all the bios at the top of memory */ +- memory_region_add_subregion(rom_memory, +- (uint32_t)(-bios_size), +- &x86ms->bios); +- return; +- +-bios_error: +- fprintf(stderr, "qemu: could not load PC BIOS '%s'\n", bios_name); +- exit(1); +-} +- + bool x86_machine_is_smm_enabled(const X86MachineState *x86ms) + { + bool smm_available = false; +diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h +index c2062db13f..b006f16b8d 100644 +--- a/include/hw/i386/x86.h ++++ b/include/hw/i386/x86.h +@@ -21,6 +21,7 @@ + #include "exec/memory.h" + + #include "hw/boards.h" ++#include "hw/i386/topology.h" + #include "hw/intc/ioapic.h" + #include "hw/isa/isa.h" + #include "qom/object.h" +@@ -109,12 +110,11 @@ struct X86MachineState { + #define TYPE_X86_MACHINE MACHINE_TYPE_NAME("x86") + OBJECT_DECLARE_TYPE(X86MachineState, X86MachineClass, X86_MACHINE) + +-uint32_t x86_cpu_apic_id_from_index(X86MachineState *pcms, ++void init_topo_info(X86CPUTopoInfo *topo_info, const X86MachineState *x86ms); ++uint32_t x86_cpu_apic_id_from_index(X86MachineState *x86ms, + unsigned int cpu_index); + +-void x86_cpu_new(X86MachineState *pcms, int64_t apic_id, Error **errp); + void x86_cpus_init(X86MachineState *pcms, int default_cpu_version); +-CPUArchId *x86_find_cpu_slot(MachineState *ms, uint32_t id, int *idx); + void x86_rtc_set_cpus_count(ISADevice *rtc, uint16_t cpus_count); + void x86_cpu_pre_plug(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp); +-- +2.39.3 + diff --git a/kvm-hw-i386-x86-Don-t-leak-isa-bios-memory-regions.patch b/kvm-hw-i386-x86-Don-t-leak-isa-bios-memory-regions.patch new file mode 100644 index 0000000..38fd870 --- /dev/null +++ b/kvm-hw-i386-x86-Don-t-leak-isa-bios-memory-regions.patch @@ -0,0 +1,133 @@ +From ebf08d2a822576acfa60fbd5f552d26de1e4c4be Mon Sep 17 00:00:00 2001 +From: Bernhard Beschow +Date: Wed, 8 May 2024 19:55:04 +0200 +Subject: [PATCH 040/100] hw/i386/x86: Don't leak "isa-bios" memory regions +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [40/91] bb595357c6cc2d5a80bf3873853c69553c5feee5 (bonzini/rhel-qemu-kvm) + +Fix the leaking in x86_bios_rom_init() and pc_isa_bios_init() by adding an +"isa_bios" attribute to X86MachineState. + +Suggested-by: Philippe Mathieu-Daudé +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Bernhard Beschow +Message-ID: <20240508175507.22270-4-shentey@gmail.com> +Signed-off-by: Philippe Mathieu-Daudé +(cherry picked from commit 32d3ee87a17fc91e981a23dba94855bff89f5920) +Signed-off-by: Paolo Bonzini +--- + hw/i386/pc_sysfw.c | 7 +++---- + hw/i386/x86.c | 9 ++++----- + include/hw/i386/x86.h | 7 +++++++ + 3 files changed, 14 insertions(+), 9 deletions(-) + +diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c +index 59c7a81692..82d37cb376 100644 +--- a/hw/i386/pc_sysfw.c ++++ b/hw/i386/pc_sysfw.c +@@ -40,11 +40,10 @@ + + #define FLASH_SECTOR_SIZE 4096 + +-static void pc_isa_bios_init(MemoryRegion *rom_memory, ++static void pc_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *rom_memory, + MemoryRegion *flash_mem) + { + int isa_bios_size; +- MemoryRegion *isa_bios; + uint64_t flash_size; + void *flash_ptr, *isa_bios_ptr; + +@@ -52,7 +51,6 @@ static void pc_isa_bios_init(MemoryRegion *rom_memory, + + /* map the last 128KB of the BIOS in ISA space */ + isa_bios_size = MIN(flash_size, 128 * KiB); +- isa_bios = g_malloc(sizeof(*isa_bios)); + memory_region_init_ram(isa_bios, NULL, "isa-bios", isa_bios_size, + &error_fatal); + memory_region_add_subregion_overlap(rom_memory, +@@ -136,6 +134,7 @@ void pc_system_flash_cleanup_unused(PCMachineState *pcms) + static void pc_system_flash_map(PCMachineState *pcms, + MemoryRegion *rom_memory) + { ++ X86MachineState *x86ms = X86_MACHINE(pcms); + hwaddr total_size = 0; + int i; + BlockBackend *blk; +@@ -185,7 +184,7 @@ static void pc_system_flash_map(PCMachineState *pcms, + + if (i == 0) { + flash_mem = pflash_cfi01_get_memory(system_flash); +- pc_isa_bios_init(rom_memory, flash_mem); ++ pc_isa_bios_init(&x86ms->isa_bios, rom_memory, flash_mem); + + /* Encrypt the pflash boot ROM */ + if (sev_enabled()) { +diff --git a/hw/i386/x86.c b/hw/i386/x86.c +index 6d3c72f124..457e8a34a5 100644 +--- a/hw/i386/x86.c ++++ b/hw/i386/x86.c +@@ -1133,7 +1133,7 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + { + const char *bios_name; + char *filename; +- MemoryRegion *bios, *isa_bios; ++ MemoryRegion *bios; + int bios_size, isa_bios_size; + ssize_t ret; + +@@ -1173,14 +1173,13 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + + /* map the last 128KB of the BIOS in ISA space */ + isa_bios_size = MIN(bios_size, 128 * KiB); +- isa_bios = g_malloc(sizeof(*isa_bios)); +- memory_region_init_alias(isa_bios, NULL, "isa-bios", bios, ++ memory_region_init_alias(&x86ms->isa_bios, NULL, "isa-bios", bios, + bios_size - isa_bios_size, isa_bios_size); + memory_region_add_subregion_overlap(rom_memory, + 0x100000 - isa_bios_size, +- isa_bios, ++ &x86ms->isa_bios, + 1); +- memory_region_set_readonly(isa_bios, !isapc_ram_fw); ++ memory_region_set_readonly(&x86ms->isa_bios, !isapc_ram_fw); + + /* map all the bios at the top of memory */ + memory_region_add_subregion(rom_memory, +diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h +index cb07618d19..a07de79167 100644 +--- a/include/hw/i386/x86.h ++++ b/include/hw/i386/x86.h +@@ -18,6 +18,7 @@ + #define HW_I386_X86_H + + #include "exec/hwaddr.h" ++#include "exec/memory.h" + + #include "hw/boards.h" + #include "hw/intc/ioapic.h" +@@ -52,6 +53,12 @@ struct X86MachineState { + GMappedFile *initrd_mapped_file; + HotplugHandler *acpi_dev; + ++ /* ++ * Map the upper 128 KiB of the BIOS just underneath the 1 MiB address ++ * boundary. ++ */ ++ MemoryRegion isa_bios; ++ + /* RAM information (sizes, addresses, configuration): */ + ram_addr_t below_4g_mem_size, above_4g_mem_size; + +-- +2.39.3 + diff --git a/kvm-hw-i386-x86-Don-t-leak-pc.bios-memory-region.patch b/kvm-hw-i386-x86-Don-t-leak-pc.bios-memory-region.patch new file mode 100644 index 0000000..7a61f95 --- /dev/null +++ b/kvm-hw-i386-x86-Don-t-leak-pc.bios-memory-region.patch @@ -0,0 +1,105 @@ +From e1f2265b5f6bf5b63bf3808bb540888f3cf8badb Mon Sep 17 00:00:00 2001 +From: Bernhard Beschow +Date: Wed, 8 May 2024 19:55:05 +0200 +Subject: [PATCH 041/100] hw/i386/x86: Don't leak "pc.bios" memory region +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [41/91] a9cd61d8d240134c09c46e244efb89217cadf60c (bonzini/rhel-qemu-kvm) + +Fix the leaking in x86_bios_rom_init() by adding a "bios" attribute to +X86MachineState. Note that it is only used in the -bios case. + +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Bernhard Beschow +Message-ID: <20240508175507.22270-5-shentey@gmail.com> +Signed-off-by: Philippe Mathieu-Daudé +(cherry picked from commit 865d95321ffc8d9941e33000b10140550f094556) +Signed-off-by: Paolo Bonzini +--- + hw/i386/x86.c | 13 ++++++------- + include/hw/i386/x86.h | 6 ++++++ + 2 files changed, 12 insertions(+), 7 deletions(-) + +diff --git a/hw/i386/x86.c b/hw/i386/x86.c +index 457e8a34a5..29167de97d 100644 +--- a/hw/i386/x86.c ++++ b/hw/i386/x86.c +@@ -1133,7 +1133,6 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + { + const char *bios_name; + char *filename; +- MemoryRegion *bios; + int bios_size, isa_bios_size; + ssize_t ret; + +@@ -1149,8 +1148,8 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + (bios_size % 65536) != 0) { + goto bios_error; + } +- bios = g_malloc(sizeof(*bios)); +- memory_region_init_ram(bios, NULL, "pc.bios", bios_size, &error_fatal); ++ memory_region_init_ram(&x86ms->bios, NULL, "pc.bios", bios_size, ++ &error_fatal); + if (sev_enabled()) { + /* + * The concept of a "reset" simply doesn't exist for +@@ -1159,11 +1158,11 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + * the firmware as rom to properly re-initialize on reset. + * Just go for a straight file load instead. + */ +- void *ptr = memory_region_get_ram_ptr(bios); ++ void *ptr = memory_region_get_ram_ptr(&x86ms->bios); + load_image_size(filename, ptr, bios_size); + x86_firmware_configure(ptr, bios_size); + } else { +- memory_region_set_readonly(bios, !isapc_ram_fw); ++ memory_region_set_readonly(&x86ms->bios, !isapc_ram_fw); + ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1); + if (ret != 0) { + goto bios_error; +@@ -1173,7 +1172,7 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + + /* map the last 128KB of the BIOS in ISA space */ + isa_bios_size = MIN(bios_size, 128 * KiB); +- memory_region_init_alias(&x86ms->isa_bios, NULL, "isa-bios", bios, ++ memory_region_init_alias(&x86ms->isa_bios, NULL, "isa-bios", &x86ms->bios, + bios_size - isa_bios_size, isa_bios_size); + memory_region_add_subregion_overlap(rom_memory, + 0x100000 - isa_bios_size, +@@ -1184,7 +1183,7 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + /* map all the bios at the top of memory */ + memory_region_add_subregion(rom_memory, + (uint32_t)(-bios_size), +- bios); ++ &x86ms->bios); + return; + + bios_error: +diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h +index a07de79167..55c6809ae0 100644 +--- a/include/hw/i386/x86.h ++++ b/include/hw/i386/x86.h +@@ -53,6 +53,12 @@ struct X86MachineState { + GMappedFile *initrd_mapped_file; + HotplugHandler *acpi_dev; + ++ /* ++ * Map the whole BIOS just underneath the 4 GiB address boundary. Only used ++ * in the ROM (-bios) case. ++ */ ++ MemoryRegion bios; ++ + /* + * Map the upper 128 KiB of the BIOS just underneath the 1 MiB address + * boundary. +-- +2.39.3 + diff --git a/kvm-hw-i386-x86-Eliminate-two-if-statements-in-x86_bios_.patch b/kvm-hw-i386-x86-Eliminate-two-if-statements-in-x86_bios_.patch new file mode 100644 index 0000000..b9c18e7 --- /dev/null +++ b/kvm-hw-i386-x86-Eliminate-two-if-statements-in-x86_bios_.patch @@ -0,0 +1,69 @@ +From b9d0c78f04160fbc1eee6cfd94b17f1133a35d83 Mon Sep 17 00:00:00 2001 +From: Bernhard Beschow +Date: Tue, 30 Apr 2024 17:06:38 +0200 +Subject: [PATCH 037/100] hw/i386/x86: Eliminate two if statements in + x86_bios_rom_init() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [37/91] 1ef6a13214e85f6ef773f5c894c720f20330912b (bonzini/rhel-qemu-kvm) + +Given that memory_region_set_readonly() is a no-op when the readonlyness is +already as requested it is possible to simplify the pattern + + if (condition) { + foo(true); + } + +to + + foo(condition); + +which is shorter and allows to see the invariant of the code more easily. + +Signed-off-by: Bernhard Beschow +Reviewed-by: Philippe Mathieu-Daudé +Message-ID: <20240430150643.111976-2-shentey@gmail.com> +Signed-off-by: Philippe Mathieu-Daudé +(cherry picked from commit 014dbdac8798799d081abc9dff3e4876ca54f49e) +Signed-off-by: Paolo Bonzini +--- + hw/i386/x86.c | 8 ++------ + 1 file changed, 2 insertions(+), 6 deletions(-) + +diff --git a/hw/i386/x86.c b/hw/i386/x86.c +index 3d5b51e92d..2a4f3ee285 100644 +--- a/hw/i386/x86.c ++++ b/hw/i386/x86.c +@@ -1163,9 +1163,7 @@ void x86_bios_rom_init(MachineState *ms, const char *default_firmware, + load_image_size(filename, ptr, bios_size); + x86_firmware_configure(ptr, bios_size); + } else { +- if (!isapc_ram_fw) { +- memory_region_set_readonly(bios, true); +- } ++ memory_region_set_readonly(bios, !isapc_ram_fw); + ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1); + if (ret != 0) { + goto bios_error; +@@ -1182,9 +1180,7 @@ void x86_bios_rom_init(MachineState *ms, const char *default_firmware, + 0x100000 - isa_bios_size, + isa_bios, + 1); +- if (!isapc_ram_fw) { +- memory_region_set_readonly(isa_bios, true); +- } ++ memory_region_set_readonly(isa_bios, !isapc_ram_fw); + + /* map all the bios at the top of memory */ + memory_region_add_subregion(rom_memory, +-- +2.39.3 + diff --git a/kvm-hw-i386-x86-Extract-x86_isa_bios_init-from-x86_bios_.patch b/kvm-hw-i386-x86-Extract-x86_isa_bios_init-from-x86_bios_.patch new file mode 100644 index 0000000..6ce9c72 --- /dev/null +++ b/kvm-hw-i386-x86-Extract-x86_isa_bios_init-from-x86_bios_.patch @@ -0,0 +1,98 @@ +From 1baf67564d4227d6ba98923217a15814c438c32b Mon Sep 17 00:00:00 2001 +From: Bernhard Beschow +Date: Wed, 8 May 2024 19:55:06 +0200 +Subject: [PATCH 042/100] hw/i386/x86: Extract x86_isa_bios_init() from + x86_bios_rom_init() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [42/91] 1db417a5995480924f7fd0661a306f2d2bfa0a77 (bonzini/rhel-qemu-kvm) + +The function is inspired by pc_isa_bios_init() and should eventually replace it. +Using x86_isa_bios_init() rather than pc_isa_bios_init() fixes pflash commands +to work in the isa-bios region. + +While at it convert the magic number 0x100000 (== 1MiB) to increase readability. + +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Bernhard Beschow +Message-ID: <20240508175507.22270-6-shentey@gmail.com> +Signed-off-by: Philippe Mathieu-Daudé +(cherry picked from commit 5c5ffec12c30d2017cbdee6798f54d8fad3f9656) +Signed-off-by: Paolo Bonzini +--- + hw/i386/x86.c | 25 ++++++++++++++++--------- + include/hw/i386/x86.h | 2 ++ + 2 files changed, 18 insertions(+), 9 deletions(-) + +diff --git a/hw/i386/x86.c b/hw/i386/x86.c +index 29167de97d..c61f4ebfa6 100644 +--- a/hw/i386/x86.c ++++ b/hw/i386/x86.c +@@ -1128,12 +1128,25 @@ void x86_load_linux(X86MachineState *x86ms, + nb_option_roms++; + } + ++void x86_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *isa_memory, ++ MemoryRegion *bios, bool read_only) ++{ ++ uint64_t bios_size = memory_region_size(bios); ++ uint64_t isa_bios_size = MIN(bios_size, 128 * KiB); ++ ++ memory_region_init_alias(isa_bios, NULL, "isa-bios", bios, ++ bios_size - isa_bios_size, isa_bios_size); ++ memory_region_add_subregion_overlap(isa_memory, 1 * MiB - isa_bios_size, ++ isa_bios, 1); ++ memory_region_set_readonly(isa_bios, read_only); ++} ++ + void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + MemoryRegion *rom_memory, bool isapc_ram_fw) + { + const char *bios_name; + char *filename; +- int bios_size, isa_bios_size; ++ int bios_size; + ssize_t ret; + + /* BIOS load */ +@@ -1171,14 +1184,8 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + g_free(filename); + + /* map the last 128KB of the BIOS in ISA space */ +- isa_bios_size = MIN(bios_size, 128 * KiB); +- memory_region_init_alias(&x86ms->isa_bios, NULL, "isa-bios", &x86ms->bios, +- bios_size - isa_bios_size, isa_bios_size); +- memory_region_add_subregion_overlap(rom_memory, +- 0x100000 - isa_bios_size, +- &x86ms->isa_bios, +- 1); +- memory_region_set_readonly(&x86ms->isa_bios, !isapc_ram_fw); ++ x86_isa_bios_init(&x86ms->isa_bios, rom_memory, &x86ms->bios, ++ !isapc_ram_fw); + + /* map all the bios at the top of memory */ + memory_region_add_subregion(rom_memory, +diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h +index 55c6809ae0..d7b7d3f3ce 100644 +--- a/include/hw/i386/x86.h ++++ b/include/hw/i386/x86.h +@@ -129,6 +129,8 @@ void x86_cpu_unplug_request_cb(HotplugHandler *hotplug_dev, + void x86_cpu_unplug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp); + ++void x86_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *isa_memory, ++ MemoryRegion *bios, bool read_only); + void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + MemoryRegion *rom_memory, bool isapc_ram_fw); + +-- +2.39.3 + diff --git a/kvm-i386-correctly-select-code-in-hw-i386-that-depends-o.patch b/kvm-i386-correctly-select-code-in-hw-i386-that-depends-o.patch new file mode 100644 index 0000000..8f69f9e --- /dev/null +++ b/kvm-i386-correctly-select-code-in-hw-i386-that-depends-o.patch @@ -0,0 +1,68 @@ +From f572a40924c7138072e387111d0f092185972477 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Thu, 9 May 2024 19:00:39 +0200 +Subject: [PATCH 044/100] i386: correctly select code in hw/i386 that depends + on other components + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [44/91] 1327a5eb2b91edacf56cc4e93255cad456abbbeb (bonzini/rhel-qemu-kvm) + +fw_cfg.c and vapic.c are currently included unconditionally but +depend on other components. vapic.c depends on the local APIC, +while fw_cfg.c includes a piece of AML builder code that depends +on CONFIG_ACPI. + +Signed-off-by: Paolo Bonzini +Reviewed-by: Zhao Liu +Message-ID: <20240509170044.190795-9-pbonzini@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 7974e51342775c87f6e759a8c525db1045ddfa24) +Signed-off-by: Paolo Bonzini +--- + hw/i386/fw_cfg.c | 2 ++ + hw/i386/meson.build | 2 +- + 2 files changed, 3 insertions(+), 1 deletion(-) + +diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c +index 283c3f4c16..7f97d40616 100644 +--- a/hw/i386/fw_cfg.c ++++ b/hw/i386/fw_cfg.c +@@ -204,6 +204,7 @@ void fw_cfg_build_feature_control(MachineState *ms, FWCfgState *fw_cfg) + fw_cfg_add_file(fw_cfg, "etc/msr_feature_control", val, sizeof(*val)); + } + ++#ifdef CONFIG_ACPI + void fw_cfg_add_acpi_dsdt(Aml *scope, FWCfgState *fw_cfg) + { + /* +@@ -230,3 +231,4 @@ void fw_cfg_add_acpi_dsdt(Aml *scope, FWCfgState *fw_cfg) + aml_append(dev, aml_name_decl("_CRS", crs)); + aml_append(scope, dev); + } ++#endif +diff --git a/hw/i386/meson.build b/hw/i386/meson.build +index d8b70ef3e9..d9da676038 100644 +--- a/hw/i386/meson.build ++++ b/hw/i386/meson.build +@@ -1,12 +1,12 @@ + i386_ss = ss.source_set() + i386_ss.add(files( + 'fw_cfg.c', +- 'vapic.c', + 'e820_memory_layout.c', + 'multiboot.c', + 'x86.c', + )) + ++i386_ss.add(when: 'CONFIG_APIC', if_true: files('vapic.c')) + i386_ss.add(when: 'CONFIG_X86_IOMMU', if_true: files('x86-iommu.c'), + if_false: files('x86-iommu-stub.c')) + i386_ss.add(when: 'CONFIG_AMD_IOMMU', if_true: files('amd_iommu.c'), +-- +2.39.3 + diff --git a/kvm-i386-cpu-Set-SEV-SNP-CPUID-bit-when-SNP-enabled.patch b/kvm-i386-cpu-Set-SEV-SNP-CPUID-bit-when-SNP-enabled.patch new file mode 100644 index 0000000..31a7e92 --- /dev/null +++ b/kvm-i386-cpu-Set-SEV-SNP-CPUID-bit-when-SNP-enabled.patch @@ -0,0 +1,40 @@ +From 127f3c60668e1bd08ec00856a317cb841adf0440 Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Thu, 30 May 2024 06:16:23 -0500 +Subject: [PATCH 063/100] i386/cpu: Set SEV-SNP CPUID bit when SNP enabled + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [63/91] 0f834a6897c5cdc0e29a5b1862e621f8ce309657 (bonzini/rhel-qemu-kvm) + +SNP guests will rely on this bit to determine certain feature support. + +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-12-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 7831221941cccbde922412c1550ed8b4bce7c361) +Signed-off-by: Paolo Bonzini +--- + target/i386/cpu.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 489c853b42..13737cd703 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -6822,6 +6822,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, + if (sev_enabled()) { + *eax = 0x2; + *eax |= sev_es_enabled() ? 0x8 : 0; ++ *eax |= sev_snp_enabled() ? 0x10 : 0; + *ebx = sev_get_cbit_position() & 0x3f; /* EBX[5:0] */ + *ebx |= (sev_get_reduced_phys_bits() & 0x3f) << 6; /* EBX[11:6] */ + } +-- +2.39.3 + diff --git a/kvm-i386-kvm-Add-KVM_EXIT_HYPERCALL-handling-for-KVM_HC_.patch b/kvm-i386-kvm-Add-KVM_EXIT_HYPERCALL-handling-for-KVM_HC_.patch new file mode 100644 index 0000000..fd604d2 --- /dev/null +++ b/kvm-i386-kvm-Add-KVM_EXIT_HYPERCALL-handling-for-KVM_HC_.patch @@ -0,0 +1,145 @@ +From 14aa42bbacde75b2ce9a59d1267f73d613026461 Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Thu, 30 May 2024 06:16:42 -0500 +Subject: [PATCH 076/100] i386/kvm: Add KVM_EXIT_HYPERCALL handling for + KVM_HC_MAP_GPA_RANGE + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [76/91] 3e1201c330dc826af1ec4650974d47053270eb16 (bonzini/rhel-qemu-kvm) + +KVM_HC_MAP_GPA_RANGE will be used to send requests to userspace for +private/shared memory attribute updates requested by the guest. +Implement handling for that use-case along with some basic +infrastructure for enabling specific hypercall events. + +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-31-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 47e76d03b155e43beca550251a6eb7ea926c059f) +Signed-off-by: Paolo Bonzini +--- + target/i386/kvm/kvm.c | 55 ++++++++++++++++++++++++++++++++++++ + target/i386/kvm/kvm_i386.h | 1 + + target/i386/kvm/trace-events | 1 + + 3 files changed, 57 insertions(+) + +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index 75e75d9772..2935e3931a 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -21,6 +21,7 @@ + #include + + #include ++#include + #include "standard-headers/asm-x86/kvm_para.h" + #include "hw/xen/interface/arch-x86/cpuid.h" + +@@ -208,6 +209,13 @@ int kvm_get_vm_type(MachineState *ms) + return kvm_type; + } + ++bool kvm_enable_hypercall(uint64_t enable_mask) ++{ ++ KVMState *s = KVM_STATE(current_accel()); ++ ++ return !kvm_vm_enable_cap(s, KVM_CAP_EXIT_HYPERCALL, 0, enable_mask); ++} ++ + bool kvm_has_smm(void) + { + return kvm_vm_check_extension(kvm_state, KVM_CAP_X86_SMM); +@@ -5325,6 +5333,50 @@ static bool host_supports_vmx(void) + return ecx & CPUID_EXT_VMX; + } + ++/* ++ * Currently the handling here only supports use of KVM_HC_MAP_GPA_RANGE ++ * to service guest-initiated memory attribute update requests so that ++ * KVM_SET_MEMORY_ATTRIBUTES can update whether or not a page should be ++ * backed by the private memory pool provided by guest_memfd, and as such ++ * is only applicable to guest_memfd-backed guests (e.g. SNP/TDX). ++ * ++ * Other other use-cases for KVM_HC_MAP_GPA_RANGE, such as for SEV live ++ * migration, are not implemented here currently. ++ * ++ * For the guest_memfd use-case, these exits will generally be synthesized ++ * by KVM based on platform-specific hypercalls, like GHCB requests in the ++ * case of SEV-SNP, and not issued directly within the guest though the ++ * KVM_HC_MAP_GPA_RANGE hypercall. So in this case, KVM_HC_MAP_GPA_RANGE is ++ * not actually advertised to guests via the KVM CPUID feature bit, as ++ * opposed to SEV live migration where it would be. Since it is unlikely the ++ * SEV live migration use-case would be useful for guest-memfd backed guests, ++ * because private/shared page tracking is already provided through other ++ * means, these 2 use-cases should be treated as being mutually-exclusive. ++ */ ++static int kvm_handle_hc_map_gpa_range(struct kvm_run *run) ++{ ++ uint64_t gpa, size, attributes; ++ ++ if (!machine_require_guest_memfd(current_machine)) ++ return -EINVAL; ++ ++ gpa = run->hypercall.args[0]; ++ size = run->hypercall.args[1] * TARGET_PAGE_SIZE; ++ attributes = run->hypercall.args[2]; ++ ++ trace_kvm_hc_map_gpa_range(gpa, size, attributes, run->hypercall.flags); ++ ++ return kvm_convert_memory(gpa, size, attributes & KVM_MAP_GPA_RANGE_ENCRYPTED); ++} ++ ++static int kvm_handle_hypercall(struct kvm_run *run) ++{ ++ if (run->hypercall.nr == KVM_HC_MAP_GPA_RANGE) ++ return kvm_handle_hc_map_gpa_range(run); ++ ++ return -EINVAL; ++} ++ + #define VMX_INVALID_GUEST_STATE 0x80000021 + + int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) +@@ -5420,6 +5472,9 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) + ret = kvm_xen_handle_exit(cpu, &run->xen); + break; + #endif ++ case KVM_EXIT_HYPERCALL: ++ ret = kvm_handle_hypercall(run); ++ break; + default: + fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); + ret = -1; +diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h +index 6b44844d95..34fc60774b 100644 +--- a/target/i386/kvm/kvm_i386.h ++++ b/target/i386/kvm/kvm_i386.h +@@ -33,6 +33,7 @@ + bool kvm_has_smm(void); + bool kvm_enable_x2apic(void); + bool kvm_hv_vpindex_settable(void); ++bool kvm_enable_hypercall(uint64_t enable_mask); + + bool kvm_enable_sgx_provisioning(KVMState *s); + bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp); +diff --git a/target/i386/kvm/trace-events b/target/i386/kvm/trace-events +index b365a8e8e2..74a6234ff7 100644 +--- a/target/i386/kvm/trace-events ++++ b/target/i386/kvm/trace-events +@@ -5,6 +5,7 @@ kvm_x86_fixup_msi_error(uint32_t gsi) "VT-d failed to remap interrupt for GSI %" + kvm_x86_add_msi_route(int virq) "Adding route entry for virq %d" + kvm_x86_remove_msi_route(int virq) "Removing route entry for virq %d" + kvm_x86_update_msi_routes(int num) "Updated %d MSI routes" ++kvm_hc_map_gpa_range(uint64_t gpa, uint64_t size, uint64_t attributes, uint64_t flags) "gpa 0x%" PRIx64 " size 0x%" PRIx64 " attributes 0x%" PRIx64 " flags 0x%" PRIx64 + + # xen-emu.c + kvm_xen_hypercall(int cpu, uint8_t cpl, uint64_t input, uint64_t a0, uint64_t a1, uint64_t a2, uint64_t ret) "xen_hypercall: cpu %d cpl %d input %" PRIu64 " a0 0x%" PRIx64 " a1 0x%" PRIx64 " a2 0x%" PRIx64" ret 0x%" PRIx64 +-- +2.39.3 + diff --git a/kvm-i386-kvm-Move-architectural-CPUID-leaf-generation-to.patch b/kvm-i386-kvm-Move-architectural-CPUID-leaf-generation-to.patch new file mode 100644 index 0000000..4b91e93 --- /dev/null +++ b/kvm-i386-kvm-Move-architectural-CPUID-leaf-generation-to.patch @@ -0,0 +1,536 @@ +From 5ead79f45e8e90b7a04586c89e70cb9d0b66b730 Mon Sep 17 00:00:00 2001 +From: Sean Christopherson +Date: Thu, 29 Feb 2024 01:36:43 -0500 +Subject: [PATCH 004/100] i386/kvm: Move architectural CPUID leaf generation to + separate helper + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [4/91] 06ecdbcf05ad3d658273980b114f02477d0b0475 (bonzini/rhel-qemu-kvm) + +Move the architectural (for lack of a better term) CPUID leaf generation +to a separate helper so that the generation code can be reused by TDX, +which needs to generate a canonical VM-scoped configuration. + +For now this is just a cleanup, so keep the function static. + +Signed-off-by: Sean Christopherson +Signed-off-by: Xiaoyao Li +Message-ID: <20240229063726.610065-23-xiaoyao.li@intel.com> +Reviewed-by: Xiaoyao Li +Signed-off-by: Paolo Bonzini +(cherry picked from commit a5acf4f26c208a05d05ef1bde65553ce2ab5e5d0) +Signed-off-by: Paolo Bonzini +--- + target/i386/kvm/kvm.c | 417 +++++++++++++++++++++--------------------- + 1 file changed, 211 insertions(+), 206 deletions(-) + +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index 739f33db47..5f30b649a0 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -1706,195 +1706,22 @@ static void kvm_init_nested_state(CPUX86State *env) + } + } + +-int kvm_arch_init_vcpu(CPUState *cs) ++static uint32_t kvm_x86_build_cpuid(CPUX86State *env, ++ struct kvm_cpuid_entry2 *entries, ++ uint32_t cpuid_i) + { +- struct { +- struct kvm_cpuid2 cpuid; +- struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES]; +- } cpuid_data; +- /* +- * The kernel defines these structs with padding fields so there +- * should be no extra padding in our cpuid_data struct. +- */ +- QEMU_BUILD_BUG_ON(sizeof(cpuid_data) != +- sizeof(struct kvm_cpuid2) + +- sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES); +- +- X86CPU *cpu = X86_CPU(cs); +- CPUX86State *env = &cpu->env; +- uint32_t limit, i, j, cpuid_i; ++ uint32_t limit, i, j; + uint32_t unused; + struct kvm_cpuid_entry2 *c; +- uint32_t signature[3]; +- int kvm_base = KVM_CPUID_SIGNATURE; +- int max_nested_state_len; +- int r; +- Error *local_err = NULL; +- +- memset(&cpuid_data, 0, sizeof(cpuid_data)); +- +- cpuid_i = 0; +- +- has_xsave2 = kvm_check_extension(cs->kvm_state, KVM_CAP_XSAVE2); +- +- r = kvm_arch_set_tsc_khz(cs); +- if (r < 0) { +- return r; +- } +- +- /* vcpu's TSC frequency is either specified by user, or following +- * the value used by KVM if the former is not present. In the +- * latter case, we query it from KVM and record in env->tsc_khz, +- * so that vcpu's TSC frequency can be migrated later via this field. +- */ +- if (!env->tsc_khz) { +- r = kvm_check_extension(cs->kvm_state, KVM_CAP_GET_TSC_KHZ) ? +- kvm_vcpu_ioctl(cs, KVM_GET_TSC_KHZ) : +- -ENOTSUP; +- if (r > 0) { +- env->tsc_khz = r; +- } +- } +- +- env->apic_bus_freq = KVM_APIC_BUS_FREQUENCY; +- +- /* +- * kvm_hyperv_expand_features() is called here for the second time in case +- * KVM_CAP_SYS_HYPERV_CPUID is not supported. While we can't possibly handle +- * 'query-cpu-model-expansion' in this case as we don't have a KVM vCPU to +- * check which Hyper-V enlightenments are supported and which are not, we +- * can still proceed and check/expand Hyper-V enlightenments here so legacy +- * behavior is preserved. +- */ +- if (!kvm_hyperv_expand_features(cpu, &local_err)) { +- error_report_err(local_err); +- return -ENOSYS; +- } +- +- if (hyperv_enabled(cpu)) { +- r = hyperv_init_vcpu(cpu); +- if (r) { +- return r; +- } +- +- cpuid_i = hyperv_fill_cpuids(cs, cpuid_data.entries); +- kvm_base = KVM_CPUID_SIGNATURE_NEXT; +- has_msr_hv_hypercall = true; +- } +- +- if (cs->kvm_state->xen_version) { +-#ifdef CONFIG_XEN_EMU +- struct kvm_cpuid_entry2 *xen_max_leaf; +- +- memcpy(signature, "XenVMMXenVMM", 12); +- +- xen_max_leaf = c = &cpuid_data.entries[cpuid_i++]; +- c->function = kvm_base + XEN_CPUID_SIGNATURE; +- c->eax = kvm_base + XEN_CPUID_TIME; +- c->ebx = signature[0]; +- c->ecx = signature[1]; +- c->edx = signature[2]; +- +- c = &cpuid_data.entries[cpuid_i++]; +- c->function = kvm_base + XEN_CPUID_VENDOR; +- c->eax = cs->kvm_state->xen_version; +- c->ebx = 0; +- c->ecx = 0; +- c->edx = 0; +- +- c = &cpuid_data.entries[cpuid_i++]; +- c->function = kvm_base + XEN_CPUID_HVM_MSR; +- /* Number of hypercall-transfer pages */ +- c->eax = 1; +- /* Hypercall MSR base address */ +- if (hyperv_enabled(cpu)) { +- c->ebx = XEN_HYPERCALL_MSR_HYPERV; +- kvm_xen_init(cs->kvm_state, c->ebx); +- } else { +- c->ebx = XEN_HYPERCALL_MSR; +- } +- c->ecx = 0; +- c->edx = 0; +- +- c = &cpuid_data.entries[cpuid_i++]; +- c->function = kvm_base + XEN_CPUID_TIME; +- c->eax = ((!!tsc_is_stable_and_known(env) << 1) | +- (!!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_RDTSCP) << 2)); +- /* default=0 (emulate if necessary) */ +- c->ebx = 0; +- /* guest tsc frequency */ +- c->ecx = env->user_tsc_khz; +- /* guest tsc incarnation (migration count) */ +- c->edx = 0; +- +- c = &cpuid_data.entries[cpuid_i++]; +- c->function = kvm_base + XEN_CPUID_HVM; +- xen_max_leaf->eax = kvm_base + XEN_CPUID_HVM; +- if (cs->kvm_state->xen_version >= XEN_VERSION(4, 5)) { +- c->function = kvm_base + XEN_CPUID_HVM; +- +- if (cpu->xen_vapic) { +- c->eax |= XEN_HVM_CPUID_APIC_ACCESS_VIRT; +- c->eax |= XEN_HVM_CPUID_X2APIC_VIRT; +- } +- +- c->eax |= XEN_HVM_CPUID_IOMMU_MAPPINGS; +- +- if (cs->kvm_state->xen_version >= XEN_VERSION(4, 6)) { +- c->eax |= XEN_HVM_CPUID_VCPU_ID_PRESENT; +- c->ebx = cs->cpu_index; +- } +- +- if (cs->kvm_state->xen_version >= XEN_VERSION(4, 17)) { +- c->eax |= XEN_HVM_CPUID_UPCALL_VECTOR; +- } +- } +- +- r = kvm_xen_init_vcpu(cs); +- if (r) { +- return r; +- } +- +- kvm_base += 0x100; +-#else /* CONFIG_XEN_EMU */ +- /* This should never happen as kvm_arch_init() would have died first. */ +- fprintf(stderr, "Cannot enable Xen CPUID without Xen support\n"); +- abort(); +-#endif +- } else if (cpu->expose_kvm) { +- memcpy(signature, "KVMKVMKVM\0\0\0", 12); +- c = &cpuid_data.entries[cpuid_i++]; +- c->function = KVM_CPUID_SIGNATURE | kvm_base; +- c->eax = KVM_CPUID_FEATURES | kvm_base; +- c->ebx = signature[0]; +- c->ecx = signature[1]; +- c->edx = signature[2]; +- +- c = &cpuid_data.entries[cpuid_i++]; +- c->function = KVM_CPUID_FEATURES | kvm_base; +- c->eax = env->features[FEAT_KVM]; +- c->edx = env->features[FEAT_KVM_HINTS]; +- } + + cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused); + +- if (cpu->kvm_pv_enforce_cpuid) { +- r = kvm_vcpu_enable_cap(cs, KVM_CAP_ENFORCE_PV_FEATURE_CPUID, 0, 1); +- if (r < 0) { +- fprintf(stderr, +- "failed to enable KVM_CAP_ENFORCE_PV_FEATURE_CPUID: %s", +- strerror(-r)); +- abort(); +- } +- } +- + for (i = 0; i <= limit; i++) { ++ j = 0; + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { +- fprintf(stderr, "unsupported level value: 0x%x\n", limit); +- abort(); ++ goto full; + } +- c = &cpuid_data.entries[cpuid_i++]; +- ++ c = &entries[cpuid_i++]; + switch (i) { + case 2: { + /* Keep reading function 2 till all the input is received */ +@@ -1908,11 +1735,9 @@ int kvm_arch_init_vcpu(CPUState *cs) + + for (j = 1; j < times; ++j) { + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { +- fprintf(stderr, "cpuid_data is full, no space for " +- "cpuid(eax:2):eax & 0xf = 0x%x\n", times); +- abort(); ++ goto full; + } +- c = &cpuid_data.entries[cpuid_i++]; ++ c = &entries[cpuid_i++]; + c->function = i; + c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC; + cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx); +@@ -1951,11 +1776,9 @@ int kvm_arch_init_vcpu(CPUState *cs) + continue; + } + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { +- fprintf(stderr, "cpuid_data is full, no space for " +- "cpuid(eax:0x%x,ecx:0x%x)\n", i, j); +- abort(); ++ goto full; + } +- c = &cpuid_data.entries[cpuid_i++]; ++ c = &entries[cpuid_i++]; + } + break; + case 0x12: +@@ -1970,11 +1793,9 @@ int kvm_arch_init_vcpu(CPUState *cs) + } + + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { +- fprintf(stderr, "cpuid_data is full, no space for " +- "cpuid(eax:0x12,ecx:0x%x)\n", j); +- abort(); ++ goto full; + } +- c = &cpuid_data.entries[cpuid_i++]; ++ c = &entries[cpuid_i++]; + } + break; + case 0x7: +@@ -1991,11 +1812,9 @@ int kvm_arch_init_vcpu(CPUState *cs) + + for (j = 1; j <= times; ++j) { + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { +- fprintf(stderr, "cpuid_data is full, no space for " +- "cpuid(eax:0x%x,ecx:0x%x)\n", i, j); +- abort(); ++ goto full; + } +- c = &cpuid_data.entries[cpuid_i++]; ++ c = &entries[cpuid_i++]; + c->function = i; + c->index = j; + c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX; +@@ -2048,11 +1867,11 @@ int kvm_arch_init_vcpu(CPUState *cs) + cpu_x86_cpuid(env, 0x80000000, 0, &limit, &unused, &unused, &unused); + + for (i = 0x80000000; i <= limit; i++) { ++ j = 0; + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { +- fprintf(stderr, "unsupported xlevel value: 0x%x\n", limit); +- abort(); ++ goto full; + } +- c = &cpuid_data.entries[cpuid_i++]; ++ c = &entries[cpuid_i++]; + + switch (i) { + case 0x8000001d: +@@ -2067,11 +1886,9 @@ int kvm_arch_init_vcpu(CPUState *cs) + break; + } + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { +- fprintf(stderr, "cpuid_data is full, no space for " +- "cpuid(eax:0x%x,ecx:0x%x)\n", i, j); +- abort(); ++ goto full; + } +- c = &cpuid_data.entries[cpuid_i++]; ++ c = &entries[cpuid_i++]; + } + break; + default: +@@ -2094,11 +1911,11 @@ int kvm_arch_init_vcpu(CPUState *cs) + cpu_x86_cpuid(env, 0xC0000000, 0, &limit, &unused, &unused, &unused); + + for (i = 0xC0000000; i <= limit; i++) { ++ j = 0; + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { +- fprintf(stderr, "unsupported xlevel2 value: 0x%x\n", limit); +- abort(); ++ goto full; + } +- c = &cpuid_data.entries[cpuid_i++]; ++ c = &entries[cpuid_i++]; + + c->function = i; + c->flags = 0; +@@ -2106,6 +1923,194 @@ int kvm_arch_init_vcpu(CPUState *cs) + } + } + ++ return cpuid_i; ++ ++full: ++ fprintf(stderr, "cpuid_data is full, no space for " ++ "cpuid(eax:0x%x,ecx:0x%x)\n", i, j); ++ abort(); ++} ++ ++int kvm_arch_init_vcpu(CPUState *cs) ++{ ++ struct { ++ struct kvm_cpuid2 cpuid; ++ struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES]; ++ } cpuid_data; ++ /* ++ * The kernel defines these structs with padding fields so there ++ * should be no extra padding in our cpuid_data struct. ++ */ ++ QEMU_BUILD_BUG_ON(sizeof(cpuid_data) != ++ sizeof(struct kvm_cpuid2) + ++ sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES); ++ ++ X86CPU *cpu = X86_CPU(cs); ++ CPUX86State *env = &cpu->env; ++ uint32_t cpuid_i; ++ struct kvm_cpuid_entry2 *c; ++ uint32_t signature[3]; ++ int kvm_base = KVM_CPUID_SIGNATURE; ++ int max_nested_state_len; ++ int r; ++ Error *local_err = NULL; ++ ++ memset(&cpuid_data, 0, sizeof(cpuid_data)); ++ ++ cpuid_i = 0; ++ ++ has_xsave2 = kvm_check_extension(cs->kvm_state, KVM_CAP_XSAVE2); ++ ++ r = kvm_arch_set_tsc_khz(cs); ++ if (r < 0) { ++ return r; ++ } ++ ++ /* vcpu's TSC frequency is either specified by user, or following ++ * the value used by KVM if the former is not present. In the ++ * latter case, we query it from KVM and record in env->tsc_khz, ++ * so that vcpu's TSC frequency can be migrated later via this field. ++ */ ++ if (!env->tsc_khz) { ++ r = kvm_check_extension(cs->kvm_state, KVM_CAP_GET_TSC_KHZ) ? ++ kvm_vcpu_ioctl(cs, KVM_GET_TSC_KHZ) : ++ -ENOTSUP; ++ if (r > 0) { ++ env->tsc_khz = r; ++ } ++ } ++ ++ env->apic_bus_freq = KVM_APIC_BUS_FREQUENCY; ++ ++ /* ++ * kvm_hyperv_expand_features() is called here for the second time in case ++ * KVM_CAP_SYS_HYPERV_CPUID is not supported. While we can't possibly handle ++ * 'query-cpu-model-expansion' in this case as we don't have a KVM vCPU to ++ * check which Hyper-V enlightenments are supported and which are not, we ++ * can still proceed and check/expand Hyper-V enlightenments here so legacy ++ * behavior is preserved. ++ */ ++ if (!kvm_hyperv_expand_features(cpu, &local_err)) { ++ error_report_err(local_err); ++ return -ENOSYS; ++ } ++ ++ if (hyperv_enabled(cpu)) { ++ r = hyperv_init_vcpu(cpu); ++ if (r) { ++ return r; ++ } ++ ++ cpuid_i = hyperv_fill_cpuids(cs, cpuid_data.entries); ++ kvm_base = KVM_CPUID_SIGNATURE_NEXT; ++ has_msr_hv_hypercall = true; ++ } ++ ++ if (cs->kvm_state->xen_version) { ++#ifdef CONFIG_XEN_EMU ++ struct kvm_cpuid_entry2 *xen_max_leaf; ++ ++ memcpy(signature, "XenVMMXenVMM", 12); ++ ++ xen_max_leaf = c = &cpuid_data.entries[cpuid_i++]; ++ c->function = kvm_base + XEN_CPUID_SIGNATURE; ++ c->eax = kvm_base + XEN_CPUID_TIME; ++ c->ebx = signature[0]; ++ c->ecx = signature[1]; ++ c->edx = signature[2]; ++ ++ c = &cpuid_data.entries[cpuid_i++]; ++ c->function = kvm_base + XEN_CPUID_VENDOR; ++ c->eax = cs->kvm_state->xen_version; ++ c->ebx = 0; ++ c->ecx = 0; ++ c->edx = 0; ++ ++ c = &cpuid_data.entries[cpuid_i++]; ++ c->function = kvm_base + XEN_CPUID_HVM_MSR; ++ /* Number of hypercall-transfer pages */ ++ c->eax = 1; ++ /* Hypercall MSR base address */ ++ if (hyperv_enabled(cpu)) { ++ c->ebx = XEN_HYPERCALL_MSR_HYPERV; ++ kvm_xen_init(cs->kvm_state, c->ebx); ++ } else { ++ c->ebx = XEN_HYPERCALL_MSR; ++ } ++ c->ecx = 0; ++ c->edx = 0; ++ ++ c = &cpuid_data.entries[cpuid_i++]; ++ c->function = kvm_base + XEN_CPUID_TIME; ++ c->eax = ((!!tsc_is_stable_and_known(env) << 1) | ++ (!!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_RDTSCP) << 2)); ++ /* default=0 (emulate if necessary) */ ++ c->ebx = 0; ++ /* guest tsc frequency */ ++ c->ecx = env->user_tsc_khz; ++ /* guest tsc incarnation (migration count) */ ++ c->edx = 0; ++ ++ c = &cpuid_data.entries[cpuid_i++]; ++ c->function = kvm_base + XEN_CPUID_HVM; ++ xen_max_leaf->eax = kvm_base + XEN_CPUID_HVM; ++ if (cs->kvm_state->xen_version >= XEN_VERSION(4, 5)) { ++ c->function = kvm_base + XEN_CPUID_HVM; ++ ++ if (cpu->xen_vapic) { ++ c->eax |= XEN_HVM_CPUID_APIC_ACCESS_VIRT; ++ c->eax |= XEN_HVM_CPUID_X2APIC_VIRT; ++ } ++ ++ c->eax |= XEN_HVM_CPUID_IOMMU_MAPPINGS; ++ ++ if (cs->kvm_state->xen_version >= XEN_VERSION(4, 6)) { ++ c->eax |= XEN_HVM_CPUID_VCPU_ID_PRESENT; ++ c->ebx = cs->cpu_index; ++ } ++ ++ if (cs->kvm_state->xen_version >= XEN_VERSION(4, 17)) { ++ c->eax |= XEN_HVM_CPUID_UPCALL_VECTOR; ++ } ++ } ++ ++ r = kvm_xen_init_vcpu(cs); ++ if (r) { ++ return r; ++ } ++ ++ kvm_base += 0x100; ++#else /* CONFIG_XEN_EMU */ ++ /* This should never happen as kvm_arch_init() would have died first. */ ++ fprintf(stderr, "Cannot enable Xen CPUID without Xen support\n"); ++ abort(); ++#endif ++ } else if (cpu->expose_kvm) { ++ memcpy(signature, "KVMKVMKVM\0\0\0", 12); ++ c = &cpuid_data.entries[cpuid_i++]; ++ c->function = KVM_CPUID_SIGNATURE | kvm_base; ++ c->eax = KVM_CPUID_FEATURES | kvm_base; ++ c->ebx = signature[0]; ++ c->ecx = signature[1]; ++ c->edx = signature[2]; ++ ++ c = &cpuid_data.entries[cpuid_i++]; ++ c->function = KVM_CPUID_FEATURES | kvm_base; ++ c->eax = env->features[FEAT_KVM]; ++ c->edx = env->features[FEAT_KVM_HINTS]; ++ } ++ ++ if (cpu->kvm_pv_enforce_cpuid) { ++ r = kvm_vcpu_enable_cap(cs, KVM_CAP_ENFORCE_PV_FEATURE_CPUID, 0, 1); ++ if (r < 0) { ++ fprintf(stderr, ++ "failed to enable KVM_CAP_ENFORCE_PV_FEATURE_CPUID: %s", ++ strerror(-r)); ++ abort(); ++ } ++ } ++ ++ cpuid_i = kvm_x86_build_cpuid(env, cpuid_data.entries, cpuid_i); + cpuid_data.cpuid.nent = cpuid_i; + + if (((env->cpuid_version >> 8)&0xF) >= 6 +-- +2.39.3 + diff --git a/kvm-i386-pc-remove-unnecessary-MachineClass-overrides.patch b/kvm-i386-pc-remove-unnecessary-MachineClass-overrides.patch new file mode 100644 index 0000000..65d09ab --- /dev/null +++ b/kvm-i386-pc-remove-unnecessary-MachineClass-overrides.patch @@ -0,0 +1,91 @@ +From 03e275023b482ac79b4f92ca4ceef6de3caa634f Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Thu, 9 May 2024 19:00:40 +0200 +Subject: [PATCH 045/100] i386: pc: remove unnecessary MachineClass overrides + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [45/91] c03d5b57014d0d02f6ce0cdfb19a34996d100dea (bonzini/rhel-qemu-kvm) + +There is no need to override these fields of MachineClass because they are +already set to the right value in the superclass. + +Signed-off-by: Paolo Bonzini +Reviewed-by: Zhao Liu +Message-ID: <20240509170044.190795-10-pbonzini@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit b348fdcdac9f9fc70be9ae56c54e41765e9aae24) +Signed-off-by: Paolo Bonzini +--- + hw/i386/pc.c | 3 --- + hw/i386/x86.c | 6 +++--- + include/hw/i386/x86.h | 4 ---- + 3 files changed, 3 insertions(+), 10 deletions(-) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 660a59c63b..0aca0cc79e 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -1979,9 +1979,6 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) + mc->async_pf_vmexit_disable = false; + mc->get_hotplug_handler = pc_get_hotplug_handler; + mc->hotplug_allowed = pc_hotplug_allowed; +- mc->cpu_index_to_instance_props = x86_cpu_index_to_props; +- mc->get_default_cpu_node_id = x86_get_default_cpu_node_id; +- mc->possible_cpu_arch_ids = x86_possible_cpu_arch_ids; + mc->auto_enable_numa_with_memhp = true; + mc->auto_enable_numa_with_memdev = true; + mc->has_hotpluggable_cpus = true; +diff --git a/hw/i386/x86.c b/hw/i386/x86.c +index c61f4ebfa6..fcef652c1e 100644 +--- a/hw/i386/x86.c ++++ b/hw/i386/x86.c +@@ -443,7 +443,7 @@ void x86_cpu_pre_plug(HotplugHandler *hotplug_dev, + numa_cpu_pre_plug(cpu_slot, dev, errp); + } + +-CpuInstanceProperties ++static CpuInstanceProperties + x86_cpu_index_to_props(MachineState *ms, unsigned cpu_index) + { + MachineClass *mc = MACHINE_GET_CLASS(ms); +@@ -453,7 +453,7 @@ x86_cpu_index_to_props(MachineState *ms, unsigned cpu_index) + return possible_cpus->cpus[cpu_index].props; + } + +-int64_t x86_get_default_cpu_node_id(const MachineState *ms, int idx) ++static int64_t x86_get_default_cpu_node_id(const MachineState *ms, int idx) + { + X86CPUTopoIDs topo_ids; + X86MachineState *x86ms = X86_MACHINE(ms); +@@ -467,7 +467,7 @@ int64_t x86_get_default_cpu_node_id(const MachineState *ms, int idx) + return topo_ids.pkg_id % ms->numa_state->num_nodes; + } + +-const CPUArchIdList *x86_possible_cpu_arch_ids(MachineState *ms) ++static const CPUArchIdList *x86_possible_cpu_arch_ids(MachineState *ms) + { + X86MachineState *x86ms = X86_MACHINE(ms); + unsigned int max_cpus = ms->smp.max_cpus; +diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h +index d7b7d3f3ce..c2062db13f 100644 +--- a/include/hw/i386/x86.h ++++ b/include/hw/i386/x86.h +@@ -114,10 +114,6 @@ uint32_t x86_cpu_apic_id_from_index(X86MachineState *pcms, + + void x86_cpu_new(X86MachineState *pcms, int64_t apic_id, Error **errp); + void x86_cpus_init(X86MachineState *pcms, int default_cpu_version); +-CpuInstanceProperties x86_cpu_index_to_props(MachineState *ms, +- unsigned cpu_index); +-int64_t x86_get_default_cpu_node_id(const MachineState *ms, int idx); +-const CPUArchIdList *x86_possible_cpu_arch_ids(MachineState *ms); + CPUArchId *x86_find_cpu_slot(MachineState *ms, uint32_t id, int *idx); + void x86_rtc_set_cpus_count(ISADevice *rtc, uint16_t cpus_count); + void x86_cpu_pre_plug(HotplugHandler *hotplug_dev, +-- +2.39.3 + diff --git a/kvm-i386-sev-Add-a-class-method-to-determine-KVM-VM-type.patch b/kvm-i386-sev-Add-a-class-method-to-determine-KVM-VM-type.patch new file mode 100644 index 0000000..fce51aa --- /dev/null +++ b/kvm-i386-sev-Add-a-class-method-to-determine-KVM-VM-type.patch @@ -0,0 +1,116 @@ +From 652793962000d6906e219ceae36348a476b78c28 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Fri, 31 May 2024 12:44:44 +0200 +Subject: [PATCH 065/100] i386/sev: Add a class method to determine KVM VM type + for SNP guests + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [65/91] c6cbeac0a6f691138df212b80efaa9b1143fdaa8 (bonzini/rhel-qemu-kvm) + +SEV guests can use either KVM_X86_DEFAULT_VM, KVM_X86_SEV_VM, +or KVM_X86_SEV_ES_VM depending on the configuration and what +the host kernel supports. SNP guests on the other hand can only +ever use KVM_X86_SNP_VM, so split determination of VM type out +into a separate class method that can be set accordingly for +sev-guest vs. sev-snp-guest objects and add handling for SNP. + +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-14-pankaj.gupta@amd.com> +[Remove unnecessary function pointer declaration. - Paolo] +Signed-off-by: Paolo Bonzini +(cherry picked from commit a808132f6d8e855bd83a400570ec91d2e00bebe3) +Signed-off-by: Paolo Bonzini +--- + target/i386/kvm/kvm.c | 1 + + target/i386/sev.c | 15 ++++++++++++--- + 2 files changed, 13 insertions(+), 3 deletions(-) + +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index 408568d053..75e75d9772 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -166,6 +166,7 @@ static const char *vm_type_name[] = { + [KVM_X86_DEFAULT_VM] = "default", + [KVM_X86_SEV_VM] = "SEV", + [KVM_X86_SEV_ES_VM] = "SEV-ES", ++ [KVM_X86_SNP_VM] = "SEV-SNP", + }; + + bool kvm_is_vm_type_supported(int type) +diff --git a/target/i386/sev.c b/target/i386/sev.c +index c3daaf1ad5..072cc4f853 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -885,6 +885,11 @@ out: + return sev_common->kvm_type; + } + ++static int sev_snp_kvm_type(X86ConfidentialGuest *cg) ++{ ++ return KVM_X86_SNP_VM; ++} ++ + static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + { + char *devname; +@@ -894,6 +899,8 @@ static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + struct sev_user_data_status status = {}; + SevCommonState *sev_common = SEV_COMMON(cgs); + SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(cgs); ++ X86ConfidentialGuestClass *x86_klass = ++ X86_CONFIDENTIAL_GUEST_GET_CLASS(cgs); + + sev_common->state = SEV_STATE_UNINIT; + +@@ -964,7 +971,7 @@ static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + } + + trace_kvm_sev_init(); +- if (sev_kvm_type(X86_CONFIDENTIAL_GUEST(sev_common)) == KVM_X86_DEFAULT_VM) { ++ if (x86_klass->kvm_type(X86_CONFIDENTIAL_GUEST(sev_common)) == KVM_X86_DEFAULT_VM) { + cmd = sev_es_enabled() ? KVM_SEV_ES_INIT : KVM_SEV_INIT; + + ret = sev_ioctl(sev_common->sev_fd, cmd, NULL, &fw_error); +@@ -1441,10 +1448,8 @@ static void + sev_common_class_init(ObjectClass *oc, void *data) + { + ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); +- X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); + + klass->kvm_init = sev_common_kvm_init; +- x86_klass->kvm_type = sev_kvm_type; + + object_class_property_add_str(oc, "sev-device", + sev_common_get_sev_device, +@@ -1529,10 +1534,12 @@ static void + sev_guest_class_init(ObjectClass *oc, void *data) + { + SevCommonStateClass *klass = SEV_COMMON_CLASS(oc); ++ X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); + + klass->launch_start = sev_launch_start; + klass->launch_finish = sev_launch_finish; + klass->kvm_init = sev_kvm_init; ++ x86_klass->kvm_type = sev_kvm_type; + + object_class_property_add_str(oc, "dh-cert-file", + sev_guest_get_dh_cert_file, +@@ -1770,8 +1777,10 @@ static void + sev_snp_guest_class_init(ObjectClass *oc, void *data) + { + SevCommonStateClass *klass = SEV_COMMON_CLASS(oc); ++ X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); + + klass->kvm_init = sev_snp_kvm_init; ++ x86_klass->kvm_type = sev_snp_kvm_type; + + object_class_property_add(oc, "policy", "uint64", + sev_snp_guest_get_policy, +-- +2.39.3 + diff --git a/kvm-i386-sev-Add-a-sev_snp_enabled-helper.patch b/kvm-i386-sev-Add-a-sev_snp_enabled-helper.patch new file mode 100644 index 0000000..d194994 --- /dev/null +++ b/kvm-i386-sev-Add-a-sev_snp_enabled-helper.patch @@ -0,0 +1,84 @@ +From 82a714b79851b5c2d1389d2fa7a01548c486a854 Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Thu, 30 May 2024 06:16:20 -0500 +Subject: [PATCH 060/100] i386/sev: Add a sev_snp_enabled() helper + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [60/91] c35ead095028ccfb1e1be0fe010ca4f7688530a0 (bonzini/rhel-qemu-kvm) + +Add a simple helper to check if the current guest type is SNP. Also have +SNP-enabled imply that SEV-ES is enabled as well, and fix up any places +where the sev_es_enabled() check is expecting a pure/non-SNP guest. + +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-9-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 99190f805dca9475fe244fbd8041961842657dc2) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 13 ++++++++++++- + target/i386/sev.h | 2 ++ + 2 files changed, 14 insertions(+), 1 deletion(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index a81b3228d4..4edfedc139 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -325,12 +325,21 @@ sev_enabled(void) + return !!object_dynamic_cast(OBJECT(cgs), TYPE_SEV_COMMON); + } + ++bool ++sev_snp_enabled(void) ++{ ++ ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs; ++ ++ return !!object_dynamic_cast(OBJECT(cgs), TYPE_SEV_SNP_GUEST); ++} ++ + bool + sev_es_enabled(void) + { + ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs; + +- return sev_enabled() && (SEV_GUEST(cgs)->policy & SEV_POLICY_ES); ++ return sev_snp_enabled() || ++ (sev_enabled() && SEV_GUEST(cgs)->policy & SEV_POLICY_ES); + } + + uint32_t +@@ -946,7 +955,9 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + "support", __func__); + goto err; + } ++ } + ++ if (sev_es_enabled() && !sev_snp_enabled()) { + if (!(status.flags & SEV_STATUS_FLAGS_CONFIG_ES)) { + error_setg(errp, "%s: guest policy requires SEV-ES, but " + "host SEV-ES support unavailable", +diff --git a/target/i386/sev.h b/target/i386/sev.h +index bedc667eeb..94295ee74f 100644 +--- a/target/i386/sev.h ++++ b/target/i386/sev.h +@@ -45,9 +45,11 @@ typedef struct SevKernelLoaderContext { + #ifdef CONFIG_SEV + bool sev_enabled(void); + bool sev_es_enabled(void); ++bool sev_snp_enabled(void); + #else + #define sev_enabled() 0 + #define sev_es_enabled() 0 ++#define sev_snp_enabled() 0 + #endif + + uint32_t sev_get_cbit_position(void); +-- +2.39.3 + diff --git a/kvm-i386-sev-Add-handling-to-encrypt-finalize-guest-laun.patch b/kvm-i386-sev-Add-handling-to-encrypt-finalize-guest-laun.patch new file mode 100644 index 0000000..2bab2ac --- /dev/null +++ b/kvm-i386-sev-Add-handling-to-encrypt-finalize-guest-laun.patch @@ -0,0 +1,187 @@ +From 0e435819540b0d39da2c828aacc0f35ecaadbdf6 Mon Sep 17 00:00:00 2001 +From: Brijesh Singh +Date: Thu, 30 May 2024 06:16:28 -0500 +Subject: [PATCH 068/100] i386/sev: Add handling to encrypt/finalize guest + launch data + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [68/91] fe77931d279aa8df061823da88a320fb5f72ffea (bonzini/rhel-qemu-kvm) + +Process any queued up launch data and encrypt/measure it into the SNP +guest instance prior to initial guest launch. + +This also updates the KVM_SEV_SNP_LAUNCH_UPDATE call to handle partial +update responses. + +Signed-off-by: Brijesh Singh +Co-developed-by: Michael Roth +Signed-off-by: Michael Roth +Co-developed-by: Pankaj Gupta +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-17-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 9f3a6999f9730a694d7db448a99f9c9cb6515992) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 112 ++++++++++++++++++++++++++++++++++++++- + target/i386/trace-events | 2 + + 2 files changed, 113 insertions(+), 1 deletion(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index e89b87d2f5..ef2e592ca7 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -756,6 +756,76 @@ out: + return ret; + } + ++static const char * ++snp_page_type_to_str(int type) ++{ ++ switch (type) { ++ case KVM_SEV_SNP_PAGE_TYPE_NORMAL: return "Normal"; ++ case KVM_SEV_SNP_PAGE_TYPE_ZERO: return "Zero"; ++ case KVM_SEV_SNP_PAGE_TYPE_UNMEASURED: return "Unmeasured"; ++ case KVM_SEV_SNP_PAGE_TYPE_SECRETS: return "Secrets"; ++ case KVM_SEV_SNP_PAGE_TYPE_CPUID: return "Cpuid"; ++ default: return "unknown"; ++ } ++} ++ ++static int ++sev_snp_launch_update(SevSnpGuestState *sev_snp_guest, ++ SevLaunchUpdateData *data) ++{ ++ int ret, fw_error; ++ struct kvm_sev_snp_launch_update update = {0}; ++ ++ if (!data->hva || !data->len) { ++ error_report("SNP_LAUNCH_UPDATE called with invalid address" ++ "/ length: %p / %lx", ++ data->hva, data->len); ++ return 1; ++ } ++ ++ update.uaddr = (__u64)(unsigned long)data->hva; ++ update.gfn_start = data->gpa >> TARGET_PAGE_BITS; ++ update.len = data->len; ++ update.type = data->type; ++ ++ /* ++ * KVM_SEV_SNP_LAUNCH_UPDATE requires that GPA ranges have the private ++ * memory attribute set in advance. ++ */ ++ ret = kvm_set_memory_attributes_private(data->gpa, data->len); ++ if (ret) { ++ error_report("SEV-SNP: failed to configure initial" ++ "private guest memory"); ++ goto out; ++ } ++ ++ while (update.len || ret == -EAGAIN) { ++ trace_kvm_sev_snp_launch_update(update.uaddr, update.gfn_start << ++ TARGET_PAGE_BITS, update.len, ++ snp_page_type_to_str(update.type)); ++ ++ ret = sev_ioctl(SEV_COMMON(sev_snp_guest)->sev_fd, ++ KVM_SEV_SNP_LAUNCH_UPDATE, ++ &update, &fw_error); ++ if (ret && ret != -EAGAIN) { ++ error_report("SNP_LAUNCH_UPDATE ret=%d fw_error=%d '%s'", ++ ret, fw_error, fw_error_to_str(fw_error)); ++ break; ++ } ++ } ++ ++out: ++ if (!ret && update.gfn_start << TARGET_PAGE_BITS != data->gpa + data->len) { ++ error_report("SEV-SNP: expected update of GPA range %lx-%lx," ++ "got GPA range %lx-%llx", ++ data->gpa, data->gpa + data->len, data->gpa, ++ update.gfn_start << TARGET_PAGE_BITS); ++ ret = -EIO; ++ } ++ ++ return ret; ++} ++ + static int + sev_launch_update_data(SevGuestState *sev_guest, uint8_t *addr, uint64_t len) + { +@@ -901,6 +971,46 @@ sev_launch_finish(SevCommonState *sev_common) + migrate_add_blocker(&sev_mig_blocker, &error_fatal); + } + ++static void ++sev_snp_launch_finish(SevCommonState *sev_common) ++{ ++ int ret, error; ++ Error *local_err = NULL; ++ SevLaunchUpdateData *data; ++ SevSnpGuestState *sev_snp = SEV_SNP_GUEST(sev_common); ++ struct kvm_sev_snp_launch_finish *finish = &sev_snp->kvm_finish_conf; ++ ++ QTAILQ_FOREACH(data, &launch_update, next) { ++ ret = sev_snp_launch_update(sev_snp, data); ++ if (ret) { ++ exit(1); ++ } ++ } ++ ++ trace_kvm_sev_snp_launch_finish(sev_snp->id_block, sev_snp->id_auth, ++ sev_snp->host_data); ++ ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_SNP_LAUNCH_FINISH, ++ finish, &error); ++ if (ret) { ++ error_report("SNP_LAUNCH_FINISH ret=%d fw_error=%d '%s'", ++ ret, error, fw_error_to_str(error)); ++ exit(1); ++ } ++ ++ sev_set_guest_state(sev_common, SEV_STATE_RUNNING); ++ ++ /* add migration blocker */ ++ error_setg(&sev_mig_blocker, ++ "SEV-SNP: Migration is not implemented"); ++ ret = migrate_add_blocker(&sev_mig_blocker, &local_err); ++ if (local_err) { ++ error_report_err(local_err); ++ error_free(sev_mig_blocker); ++ exit(1); ++ } ++} ++ ++ + static void + sev_vm_state_change(void *opaque, bool running, RunState state) + { +@@ -1832,10 +1942,10 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data) + X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); + + klass->launch_start = sev_snp_launch_start; ++ klass->launch_finish = sev_snp_launch_finish; + klass->kvm_init = sev_snp_kvm_init; + x86_klass->kvm_type = sev_snp_kvm_type; + +- + object_class_property_add(oc, "policy", "uint64", + sev_snp_guest_get_policy, + sev_snp_guest_set_policy, NULL, NULL); +diff --git a/target/i386/trace-events b/target/i386/trace-events +index cb26d8a925..06b44ead2e 100644 +--- a/target/i386/trace-events ++++ b/target/i386/trace-events +@@ -12,3 +12,5 @@ kvm_sev_launch_finish(void) "" + kvm_sev_launch_secret(uint64_t hpa, uint64_t hva, uint64_t secret, int len) "hpa 0x%" PRIx64 " hva 0x%" PRIx64 " data 0x%" PRIx64 " len %d" + kvm_sev_attestation_report(const char *mnonce, const char *data) "mnonce %s data %s" + kvm_sev_snp_launch_start(uint64_t policy, char *gosvw) "policy 0x%" PRIx64 " gosvw %s" ++kvm_sev_snp_launch_update(uint64_t src, uint64_t gpa, uint64_t len, const char *type) "src 0x%" PRIx64 " gpa 0x%" PRIx64 " len 0x%" PRIx64 " (%s page)" ++kvm_sev_snp_launch_finish(char *id_block, char *id_auth, char *host_data) "id_block %s id_auth %s host_data %s" +-- +2.39.3 + diff --git a/kvm-i386-sev-Add-legacy-vm-type-parameter-for-SEV-guest-.patch b/kvm-i386-sev-Add-legacy-vm-type-parameter-for-SEV-guest-.patch new file mode 100644 index 0000000..572dddc --- /dev/null +++ b/kvm-i386-sev-Add-legacy-vm-type-parameter-for-SEV-guest-.patch @@ -0,0 +1,127 @@ +From 2872c423fa44dcbf50b581a5c3feac064a0473a0 Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Tue, 9 Apr 2024 18:07:41 -0500 +Subject: [PATCH 024/100] i386/sev: Add 'legacy-vm-type' parameter for SEV + guest objects + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [24/91] ce35d1b09fe8aa8772ff149543f7760455c1e6b5 (bonzini/rhel-qemu-kvm) + +QEMU will currently automatically make use of the KVM_SEV_INIT2 API for +initializing SEV and SEV-ES guests verses the older +KVM_SEV_INIT/KVM_SEV_ES_INIT interfaces. + +However, the older interfaces will silently avoid sync'ing FPU/XSAVE +state to the VMSA prior to encryption, thus relying on behavior and +measurements that assume the related fields to be allow zero. + +With KVM_SEV_INIT2, this state is now synced into the VMSA, resulting in +measurements changes and, theoretically, behaviorial changes, though the +latter are unlikely to be seen in practice. + +To allow a smooth transition to the newer interface, while still +providing a mechanism to maintain backward compatibility with VMs +created using the older interfaces, provide a new command-line +parameter: + + -object sev-guest,legacy-vm-type=true,... + +and have it default to false. + +Signed-off-by: Michael Roth +Message-ID: <20240409230743.962513-2-michael.roth@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 023267334da375226720e62963df9545aa8fc2fd) +Signed-off-by: Paolo Bonzini +--- + qapi/qom.json | 11 ++++++++++- + target/i386/sev.c | 18 +++++++++++++++++- + 2 files changed, 27 insertions(+), 2 deletions(-) + +diff --git a/qapi/qom.json b/qapi/qom.json +index 85e6b4f84a..38dde6d785 100644 +--- a/qapi/qom.json ++++ b/qapi/qom.json +@@ -898,6 +898,14 @@ + # designated guest firmware page for measured boot with -kernel + # (default: false) (since 6.2) + # ++# @legacy-vm-type: Use legacy KVM_SEV_INIT KVM interface for creating the VM. ++# The newer KVM_SEV_INIT2 interface syncs additional vCPU ++# state when initializing the VMSA structures, which will ++# result in a different guest measurement. Set this to ++# maintain compatibility with older QEMU or kernel versions ++# that rely on legacy KVM_SEV_INIT behavior. ++# (default: false) (since 9.1) ++# + # Since: 2.12 + ## + { 'struct': 'SevGuestProperties', +@@ -908,7 +916,8 @@ + '*handle': 'uint32', + '*cbitpos': 'uint32', + 'reduced-phys-bits': 'uint32', +- '*kernel-hashes': 'bool' } } ++ '*kernel-hashes': 'bool', ++ '*legacy-vm-type': 'bool' } } + + ## + # @ThreadContextProperties: +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 9dab4060b8..f4ee317cb0 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -67,6 +67,7 @@ struct SevGuestState { + uint32_t cbitpos; + uint32_t reduced_phys_bits; + bool kernel_hashes; ++ bool legacy_vm_type; + + /* runtime state */ + uint32_t handle; +@@ -356,6 +357,16 @@ static void sev_guest_set_kernel_hashes(Object *obj, bool value, Error **errp) + sev->kernel_hashes = value; + } + ++static bool sev_guest_get_legacy_vm_type(Object *obj, Error **errp) ++{ ++ return SEV_GUEST(obj)->legacy_vm_type; ++} ++ ++static void sev_guest_set_legacy_vm_type(Object *obj, bool value, Error **errp) ++{ ++ SEV_GUEST(obj)->legacy_vm_type = value; ++} ++ + bool + sev_enabled(void) + { +@@ -863,7 +874,7 @@ static int sev_kvm_type(X86ConfidentialGuest *cg) + } + + kvm_type = (sev->policy & SEV_POLICY_ES) ? KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM; +- if (kvm_is_vm_type_supported(kvm_type)) { ++ if (kvm_is_vm_type_supported(kvm_type) && !sev->legacy_vm_type) { + sev->kvm_type = kvm_type; + } else { + sev->kvm_type = KVM_X86_DEFAULT_VM; +@@ -1381,6 +1392,11 @@ sev_guest_class_init(ObjectClass *oc, void *data) + sev_guest_set_kernel_hashes); + object_class_property_set_description(oc, "kernel-hashes", + "add kernel hashes to guest firmware for measured Linux boot"); ++ object_class_property_add_bool(oc, "legacy-vm-type", ++ sev_guest_get_legacy_vm_type, ++ sev_guest_set_legacy_vm_type); ++ object_class_property_set_description(oc, "legacy-vm-type", ++ "use legacy VM type to maintain measurement compatibility with older QEMU or kernel versions."); + } + + static void +-- +2.39.3 + diff --git a/kvm-i386-sev-Add-sev_kvm_init-override-for-SEV-class.patch b/kvm-i386-sev-Add-sev_kvm_init-override-for-SEV-class.patch new file mode 100644 index 0000000..ca1338c --- /dev/null +++ b/kvm-i386-sev-Add-sev_kvm_init-override-for-SEV-class.patch @@ -0,0 +1,203 @@ +From a236548a903aa8350fff9601d481b2f529c8d4a7 Mon Sep 17 00:00:00 2001 +From: Pankaj Gupta +Date: Thu, 30 May 2024 06:16:21 -0500 +Subject: [PATCH 061/100] i386/sev: Add sev_kvm_init() override for SEV class + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [61/91] b24fcbc8712e7394e029312229da023c63803969 (bonzini/rhel-qemu-kvm) + +Some aspects of the init routine SEV are specific to SEV and not +applicable for SNP guests, so move the SEV-specific bits into +separate class method and retain only the common functionality. + +Co-developed-by: Michael Roth +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-10-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 990da8d243a8c59dafcbed78b56a0e4ffb1605d9) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 72 +++++++++++++++++++++++++++++++++-------------- + 1 file changed, 51 insertions(+), 21 deletions(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 4edfedc139..5519de1c6b 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -73,6 +73,7 @@ struct SevCommonStateClass { + /* public */ + int (*launch_start)(SevCommonState *sev_common); + void (*launch_finish)(SevCommonState *sev_common); ++ int (*kvm_init)(ConfidentialGuestSupport *cgs, Error **errp); + }; + + /** +@@ -882,7 +883,7 @@ out: + return sev_common->kvm_type; + } + +-static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) ++static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + { + SevCommonState *sev_common = SEV_COMMON(cgs); + char *devname; +@@ -892,12 +893,6 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + struct sev_user_data_status status = {}; + SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(cgs); + +- ret = ram_block_discard_disable(true); +- if (ret) { +- error_report("%s: cannot disable RAM discard", __func__); +- return -1; +- } +- + sev_common->state = SEV_STATE_UNINIT; + + host_cpuid(0x8000001F, 0, NULL, &ebx, NULL, NULL); +@@ -911,7 +906,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + if (host_cbitpos != sev_common->cbitpos) { + error_setg(errp, "%s: cbitpos check failed, host '%d' requested '%d'", + __func__, host_cbitpos, sev_common->cbitpos); +- goto err; ++ return -1; + } + + /* +@@ -924,7 +919,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + error_setg(errp, "%s: reduced_phys_bits check failed," + " it should be in the range of 1 to 63, requested '%d'", + __func__, sev_common->reduced_phys_bits); +- goto err; ++ return -1; + } + + devname = object_property_get_str(OBJECT(sev_common), "sev-device", NULL); +@@ -933,7 +928,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + error_setg(errp, "%s: Failed to open %s '%s'", __func__, + devname, strerror(errno)); + g_free(devname); +- goto err; ++ return -1; + } + g_free(devname); + +@@ -943,7 +938,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + error_setg(errp, "%s: failed to get platform status ret=%d " + "fw_error='%d: %s'", __func__, ret, fw_error, + fw_error_to_str(fw_error)); +- goto err; ++ return -1; + } + sev_common->build_id = status.build; + sev_common->api_major = status.api_major; +@@ -953,7 +948,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + if (!kvm_kernel_irqchip_allowed()) { + error_setg(errp, "%s: SEV-ES guests require in-kernel irqchip" + "support", __func__); +- goto err; ++ return -1; + } + } + +@@ -962,7 +957,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + error_setg(errp, "%s: guest policy requires SEV-ES, but " + "host SEV-ES support unavailable", + __func__); +- goto err; ++ return -1; + } + } + +@@ -980,25 +975,59 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + if (ret) { + error_setg(errp, "%s: failed to initialize ret=%d fw_error=%d '%s'", + __func__, ret, fw_error, fw_error_to_str(fw_error)); +- goto err; ++ return -1; + } + + ret = klass->launch_start(sev_common); + if (ret) { + error_setg(errp, "%s: failed to create encryption context", __func__); +- goto err; ++ return -1; ++ } ++ ++ if (klass->kvm_init && klass->kvm_init(cgs, errp)) { ++ return -1; + } + +- ram_block_notifier_add(&sev_ram_notifier); +- qemu_add_machine_init_done_notifier(&sev_machine_done_notify); + qemu_add_vm_change_state_handler(sev_vm_state_change, sev_common); + + cgs->ready = true; + + return 0; +-err: +- ram_block_discard_disable(false); +- return -1; ++} ++ ++static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) ++{ ++ int ret; ++ ++ /* ++ * SEV/SEV-ES rely on pinned memory to back guest RAM so discarding ++ * isn't actually possible. With SNP, only guest_memfd pages are used ++ * for private guest memory, so discarding of shared memory is still ++ * possible.. ++ */ ++ ret = ram_block_discard_disable(true); ++ if (ret) { ++ error_setg(errp, "%s: cannot disable RAM discard", __func__); ++ return -1; ++ } ++ ++ /* ++ * SEV uses these notifiers to register/pin pages prior to guest use, ++ * but SNP relies on guest_memfd for private pages, which has its ++ * own internal mechanisms for registering/pinning private memory. ++ */ ++ ram_block_notifier_add(&sev_ram_notifier); ++ ++ /* ++ * The machine done notify event is used for SEV guests to get the ++ * measurement of the encrypted images. When SEV-SNP is enabled, the ++ * measurement is part of the guest attestation process where it can ++ * be collected without any reliance on the VMM. So skip registering ++ * the notifier for SNP in favor of using guest attestation instead. ++ */ ++ qemu_add_machine_init_done_notifier(&sev_machine_done_notify); ++ ++ return 0; + } + + int +@@ -1397,7 +1426,7 @@ sev_common_class_init(ObjectClass *oc, void *data) + ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); + X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); + +- klass->kvm_init = sev_kvm_init; ++ klass->kvm_init = sev_common_kvm_init; + x86_klass->kvm_type = sev_kvm_type; + + object_class_property_add_str(oc, "sev-device", +@@ -1486,6 +1515,7 @@ sev_guest_class_init(ObjectClass *oc, void *data) + + klass->launch_start = sev_launch_start; + klass->launch_finish = sev_launch_finish; ++ klass->kvm_init = sev_kvm_init; + + object_class_property_add_str(oc, "dh-cert-file", + sev_guest_get_dh_cert_file, +-- +2.39.3 + diff --git a/kvm-i386-sev-Add-snp_kvm_init-override-for-SNP-class.patch b/kvm-i386-sev-Add-snp_kvm_init-override-for-SNP-class.patch new file mode 100644 index 0000000..0db345c --- /dev/null +++ b/kvm-i386-sev-Add-snp_kvm_init-override-for-SNP-class.patch @@ -0,0 +1,94 @@ +From 35ceebdeccbf5dceb374c6f89a12e9981def570b Mon Sep 17 00:00:00 2001 +From: Pankaj Gupta +Date: Thu, 30 May 2024 06:16:22 -0500 +Subject: [PATCH 062/100] i386/sev: Add snp_kvm_init() override for SNP class + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [62/91] 8fa537961c9262b99a4ffb99e1c25f080d76d1de (bonzini/rhel-qemu-kvm) + +SNP does not support SMM and requires guest_memfd for +private guest memory, so add SNP specific kvm_init() +functionality in snp_kvm_init() class method. + +Signed-off-by: Michael Roth +Co-developed-by: Pankaj Gupta +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-11-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 125b95a6d465a03ff30816eff0b1889aec01f0c3) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 24 +++++++++++++++++++++++- + 1 file changed, 23 insertions(+), 1 deletion(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 5519de1c6b..6525b3c1a0 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -885,12 +885,12 @@ out: + + static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + { +- SevCommonState *sev_common = SEV_COMMON(cgs); + char *devname; + int ret, fw_error, cmd; + uint32_t ebx; + uint32_t host_cbitpos; + struct sev_user_data_status status = {}; ++ SevCommonState *sev_common = SEV_COMMON(cgs); + SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(cgs); + + sev_common->state = SEV_STATE_UNINIT; +@@ -1030,6 +1030,21 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + return 0; + } + ++static int sev_snp_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) ++{ ++ MachineState *ms = MACHINE(qdev_get_machine()); ++ X86MachineState *x86ms = X86_MACHINE(ms); ++ ++ if (x86ms->smm == ON_OFF_AUTO_AUTO) { ++ x86ms->smm = ON_OFF_AUTO_OFF; ++ } else if (x86ms->smm == ON_OFF_AUTO_ON) { ++ error_setg(errp, "SEV-SNP does not support SMM."); ++ return -1; ++ } ++ ++ return 0; ++} ++ + int + sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp) + { +@@ -1752,6 +1767,10 @@ sev_snp_guest_set_host_data(Object *obj, const char *value, Error **errp) + static void + sev_snp_guest_class_init(ObjectClass *oc, void *data) + { ++ SevCommonStateClass *klass = SEV_COMMON_CLASS(oc); ++ ++ klass->kvm_init = sev_snp_kvm_init; ++ + object_class_property_add(oc, "policy", "uint64", + sev_snp_guest_get_policy, + sev_snp_guest_set_policy, NULL, NULL); +@@ -1778,8 +1797,11 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data) + static void + sev_snp_guest_instance_init(Object *obj) + { ++ ConfidentialGuestSupport *cgs = CONFIDENTIAL_GUEST_SUPPORT(obj); + SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); + ++ cgs->require_guest_memfd = true; ++ + /* default init/start/finish params for kvm */ + sev_snp_guest->kvm_start_conf.policy = DEFAULT_SEV_SNP_POLICY; + } +-- +2.39.3 + diff --git a/kvm-i386-sev-Add-support-for-SNP-CPUID-validation.patch b/kvm-i386-sev-Add-support-for-SNP-CPUID-validation.patch new file mode 100644 index 0000000..c10f75f --- /dev/null +++ b/kvm-i386-sev-Add-support-for-SNP-CPUID-validation.patch @@ -0,0 +1,262 @@ +From 4013364679757161d6b9754bfc33ae38be0a1b7f Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Thu, 30 May 2024 06:16:32 -0500 +Subject: [PATCH 072/100] i386/sev: Add support for SNP CPUID validation + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [72/91] 080e2942552dc7de8966e69d0d0d3b8951392030 (bonzini/rhel-qemu-kvm) + +SEV-SNP firmware allows a special guest page to be populated with a +table of guest CPUID values so that they can be validated through +firmware before being loaded into encrypted guest memory where they can +be used in place of hypervisor-provided values[1]. + +As part of SEV-SNP guest initialization, use this interface to validate +the CPUID entries reported by KVM_GET_CPUID2 prior to initial guest +start and populate the CPUID page reserved by OVMF with the resulting +encrypted data. + +[1] SEV SNP Firmware ABI Specification, Rev. 0.8, 8.13.2.6 + +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-21-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 70943ad8e4dfbe5f77006b880290219be9d03553) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 164 +++++++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 162 insertions(+), 2 deletions(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index c57534fca2..06401f0526 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -200,6 +200,36 @@ static const char *const sev_fw_errlist[] = { + + #define SEV_FW_MAX_ERROR ARRAY_SIZE(sev_fw_errlist) + ++/* doesn't expose this, so re-use the max from kvm.c */ ++#define KVM_MAX_CPUID_ENTRIES 100 ++ ++typedef struct KvmCpuidInfo { ++ struct kvm_cpuid2 cpuid; ++ struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES]; ++} KvmCpuidInfo; ++ ++#define SNP_CPUID_FUNCTION_MAXCOUNT 64 ++#define SNP_CPUID_FUNCTION_UNKNOWN 0xFFFFFFFF ++ ++typedef struct { ++ uint32_t eax_in; ++ uint32_t ecx_in; ++ uint64_t xcr0_in; ++ uint64_t xss_in; ++ uint32_t eax; ++ uint32_t ebx; ++ uint32_t ecx; ++ uint32_t edx; ++ uint64_t reserved; ++} __attribute__((packed)) SnpCpuidFunc; ++ ++typedef struct { ++ uint32_t count; ++ uint32_t reserved1; ++ uint64_t reserved2; ++ SnpCpuidFunc entries[SNP_CPUID_FUNCTION_MAXCOUNT]; ++} __attribute__((packed)) SnpCpuidInfo; ++ + static int + sev_ioctl(int fd, int cmd, void *data, int *error) + { +@@ -788,6 +818,35 @@ out: + return ret; + } + ++static void ++sev_snp_cpuid_report_mismatches(SnpCpuidInfo *old, ++ SnpCpuidInfo *new) ++{ ++ size_t i; ++ ++ if (old->count != new->count) { ++ error_report("SEV-SNP: CPUID validation failed due to count mismatch," ++ "provided: %d, expected: %d", old->count, new->count); ++ return; ++ } ++ ++ for (i = 0; i < old->count; i++) { ++ SnpCpuidFunc *old_func, *new_func; ++ ++ old_func = &old->entries[i]; ++ new_func = &new->entries[i]; ++ ++ if (memcmp(old_func, new_func, sizeof(SnpCpuidFunc))) { ++ error_report("SEV-SNP: CPUID validation failed for function 0x%x, index: 0x%x" ++ "provided: eax:0x%08x, ebx: 0x%08x, ecx: 0x%08x, edx: 0x%08x" ++ "expected: eax:0x%08x, ebx: 0x%08x, ecx: 0x%08x, edx: 0x%08x", ++ old_func->eax_in, old_func->ecx_in, ++ old_func->eax, old_func->ebx, old_func->ecx, old_func->edx, ++ new_func->eax, new_func->ebx, new_func->ecx, new_func->edx); ++ } ++ } ++} ++ + static const char * + snp_page_type_to_str(int type) + { +@@ -806,6 +865,7 @@ sev_snp_launch_update(SevSnpGuestState *sev_snp_guest, + SevLaunchUpdateData *data) + { + int ret, fw_error; ++ SnpCpuidInfo snp_cpuid_info; + struct kvm_sev_snp_launch_update update = {0}; + + if (!data->hva || !data->len) { +@@ -815,6 +875,11 @@ sev_snp_launch_update(SevSnpGuestState *sev_snp_guest, + return 1; + } + ++ if (data->type == KVM_SEV_SNP_PAGE_TYPE_CPUID) { ++ /* Save a copy for comparison in case the LAUNCH_UPDATE fails */ ++ memcpy(&snp_cpuid_info, data->hva, sizeof(snp_cpuid_info)); ++ } ++ + update.uaddr = (__u64)(unsigned long)data->hva; + update.gfn_start = data->gpa >> TARGET_PAGE_BITS; + update.len = data->len; +@@ -842,6 +907,11 @@ sev_snp_launch_update(SevSnpGuestState *sev_snp_guest, + if (ret && ret != -EAGAIN) { + error_report("SNP_LAUNCH_UPDATE ret=%d fw_error=%d '%s'", + ret, fw_error, fw_error_to_str(fw_error)); ++ ++ if (data->type == KVM_SEV_SNP_PAGE_TYPE_CPUID) { ++ sev_snp_cpuid_report_mismatches(&snp_cpuid_info, data->hva); ++ error_report("SEV-SNP: failed update CPUID page"); ++ } + break; + } + } +@@ -1004,7 +1074,8 @@ sev_launch_finish(SevCommonState *sev_common) + } + + static int +-snp_launch_update_data(uint64_t gpa, void *hva, uint32_t len, int type) ++snp_launch_update_data(uint64_t gpa, void *hva, ++ uint32_t len, int type) + { + SevLaunchUpdateData *data; + +@@ -1019,6 +1090,90 @@ snp_launch_update_data(uint64_t gpa, void *hva, uint32_t len, int type) + return 0; + } + ++static int ++sev_snp_cpuid_info_fill(SnpCpuidInfo *snp_cpuid_info, ++ const KvmCpuidInfo *kvm_cpuid_info) ++{ ++ size_t i; ++ ++ if (kvm_cpuid_info->cpuid.nent > SNP_CPUID_FUNCTION_MAXCOUNT) { ++ error_report("SEV-SNP: CPUID entry count (%d) exceeds max (%d)", ++ kvm_cpuid_info->cpuid.nent, SNP_CPUID_FUNCTION_MAXCOUNT); ++ return -1; ++ } ++ ++ memset(snp_cpuid_info, 0, sizeof(*snp_cpuid_info)); ++ ++ for (i = 0; i < kvm_cpuid_info->cpuid.nent; i++) { ++ const struct kvm_cpuid_entry2 *kvm_cpuid_entry; ++ SnpCpuidFunc *snp_cpuid_entry; ++ ++ kvm_cpuid_entry = &kvm_cpuid_info->entries[i]; ++ snp_cpuid_entry = &snp_cpuid_info->entries[i]; ++ ++ snp_cpuid_entry->eax_in = kvm_cpuid_entry->function; ++ if (kvm_cpuid_entry->flags == KVM_CPUID_FLAG_SIGNIFCANT_INDEX) { ++ snp_cpuid_entry->ecx_in = kvm_cpuid_entry->index; ++ } ++ snp_cpuid_entry->eax = kvm_cpuid_entry->eax; ++ snp_cpuid_entry->ebx = kvm_cpuid_entry->ebx; ++ snp_cpuid_entry->ecx = kvm_cpuid_entry->ecx; ++ snp_cpuid_entry->edx = kvm_cpuid_entry->edx; ++ ++ /* ++ * Guest kernels will calculate EBX themselves using the 0xD ++ * subfunctions corresponding to the individual XSAVE areas, so only ++ * encode the base XSAVE size in the initial leaves, corresponding ++ * to the initial XCR0=1 state. ++ */ ++ if (snp_cpuid_entry->eax_in == 0xD && ++ (snp_cpuid_entry->ecx_in == 0x0 || snp_cpuid_entry->ecx_in == 0x1)) { ++ snp_cpuid_entry->ebx = 0x240; ++ snp_cpuid_entry->xcr0_in = 1; ++ snp_cpuid_entry->xss_in = 0; ++ } ++ } ++ ++ snp_cpuid_info->count = i; ++ ++ return 0; ++} ++ ++static int ++snp_launch_update_cpuid(uint32_t cpuid_addr, void *hva, uint32_t cpuid_len) ++{ ++ KvmCpuidInfo kvm_cpuid_info = {0}; ++ SnpCpuidInfo snp_cpuid_info; ++ CPUState *cs = first_cpu; ++ int ret; ++ uint32_t i = 0; ++ ++ assert(sizeof(snp_cpuid_info) <= cpuid_len); ++ ++ /* get the cpuid list from KVM */ ++ do { ++ kvm_cpuid_info.cpuid.nent = ++i; ++ ret = kvm_vcpu_ioctl(cs, KVM_GET_CPUID2, &kvm_cpuid_info); ++ } while (ret == -E2BIG); ++ ++ if (ret) { ++ error_report("SEV-SNP: unable to query CPUID values for CPU: '%s'", ++ strerror(-ret)); ++ return 1; ++ } ++ ++ ret = sev_snp_cpuid_info_fill(&snp_cpuid_info, &kvm_cpuid_info); ++ if (ret) { ++ error_report("SEV-SNP: failed to generate CPUID table information"); ++ return 1; ++ } ++ ++ memcpy(hva, &snp_cpuid_info, sizeof(snp_cpuid_info)); ++ ++ return snp_launch_update_data(cpuid_addr, hva, cpuid_len, ++ KVM_SEV_SNP_PAGE_TYPE_CPUID); ++} ++ + static int + snp_metadata_desc_to_page_type(int desc_type) + { +@@ -1053,7 +1208,12 @@ snp_populate_metadata_pages(SevSnpGuestState *sev_snp, + exit(1); + } + +- ret = snp_launch_update_data(desc->base, hva, desc->len, type); ++ if (type == KVM_SEV_SNP_PAGE_TYPE_CPUID) { ++ ret = snp_launch_update_cpuid(desc->base, hva, desc->len); ++ } else { ++ ret = snp_launch_update_data(desc->base, hva, desc->len, type); ++ } ++ + if (ret) { + error_report("%s: Failed to add metadata page gpa 0x%x+%x type %d", + __func__, desc->base, desc->len, desc->type); +-- +2.39.3 + diff --git a/kvm-i386-sev-Add-support-for-populating-OVMF-metadata-pa.patch b/kvm-i386-sev-Add-support-for-populating-OVMF-metadata-pa.patch new file mode 100644 index 0000000..4691679 --- /dev/null +++ b/kvm-i386-sev-Add-support-for-populating-OVMF-metadata-pa.patch @@ -0,0 +1,127 @@ +From b2cfd4d89026e76ba86ea7adea323f2c3a588790 Mon Sep 17 00:00:00 2001 +From: Brijesh Singh +Date: Thu, 30 May 2024 06:16:31 -0500 +Subject: [PATCH 071/100] i386/sev: Add support for populating OVMF metadata + pages + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [71/91] b563442c0e2f6ea01937425d300b56d9e641fd57 (bonzini/rhel-qemu-kvm) + +OVMF reserves various pages so they can be pre-initialized/validated +prior to launching the guest. Add support for populating these pages +with the expected content. + +Signed-off-by: Brijesh Singh +Signed-off-by: Michael Roth +Co-developed-by: Pankaj Gupta +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-20-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 3d8c2a7f4806ff39423312e503737fd76c34dcae) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 74 insertions(+) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 17281bb2c7..c57534fca2 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -1003,15 +1003,89 @@ sev_launch_finish(SevCommonState *sev_common) + migrate_add_blocker(&sev_mig_blocker, &error_fatal); + } + ++static int ++snp_launch_update_data(uint64_t gpa, void *hva, uint32_t len, int type) ++{ ++ SevLaunchUpdateData *data; ++ ++ data = g_new0(SevLaunchUpdateData, 1); ++ data->gpa = gpa; ++ data->hva = hva; ++ data->len = len; ++ data->type = type; ++ ++ QTAILQ_INSERT_TAIL(&launch_update, data, next); ++ ++ return 0; ++} ++ ++static int ++snp_metadata_desc_to_page_type(int desc_type) ++{ ++ switch (desc_type) { ++ /* Add the umeasured prevalidated pages as a zero page */ ++ case SEV_DESC_TYPE_SNP_SEC_MEM: return KVM_SEV_SNP_PAGE_TYPE_ZERO; ++ case SEV_DESC_TYPE_SNP_SECRETS: return KVM_SEV_SNP_PAGE_TYPE_SECRETS; ++ case SEV_DESC_TYPE_CPUID: return KVM_SEV_SNP_PAGE_TYPE_CPUID; ++ default: ++ return KVM_SEV_SNP_PAGE_TYPE_ZERO; ++ } ++} ++ ++static void ++snp_populate_metadata_pages(SevSnpGuestState *sev_snp, ++ OvmfSevMetadata *metadata) ++{ ++ OvmfSevMetadataDesc *desc; ++ int type, ret, i; ++ void *hva; ++ MemoryRegion *mr = NULL; ++ ++ for (i = 0; i < metadata->num_desc; i++) { ++ desc = &metadata->descs[i]; ++ ++ type = snp_metadata_desc_to_page_type(desc->type); ++ ++ hva = gpa2hva(&mr, desc->base, desc->len, NULL); ++ if (!hva) { ++ error_report("%s: Failed to get HVA for GPA 0x%x sz 0x%x", ++ __func__, desc->base, desc->len); ++ exit(1); ++ } ++ ++ ret = snp_launch_update_data(desc->base, hva, desc->len, type); ++ if (ret) { ++ error_report("%s: Failed to add metadata page gpa 0x%x+%x type %d", ++ __func__, desc->base, desc->len, desc->type); ++ exit(1); ++ } ++ } ++} ++ + static void + sev_snp_launch_finish(SevCommonState *sev_common) + { + int ret, error; + Error *local_err = NULL; ++ OvmfSevMetadata *metadata; + SevLaunchUpdateData *data; + SevSnpGuestState *sev_snp = SEV_SNP_GUEST(sev_common); + struct kvm_sev_snp_launch_finish *finish = &sev_snp->kvm_finish_conf; + ++ /* ++ * To boot the SNP guest, the hypervisor is required to populate the CPUID ++ * and Secrets page before finalizing the launch flow. The location of ++ * the secrets and CPUID page is available through the OVMF metadata GUID. ++ */ ++ metadata = pc_system_get_ovmf_sev_metadata_ptr(); ++ if (metadata == NULL) { ++ error_report("%s: Failed to locate SEV metadata header", __func__); ++ exit(1); ++ } ++ ++ /* Populate all the metadata pages */ ++ snp_populate_metadata_pages(sev_snp, metadata); ++ + QTAILQ_FOREACH(data, &launch_update, next) { + ret = sev_snp_launch_update(sev_snp, data); + if (ret) { +-- +2.39.3 + diff --git a/kvm-i386-sev-Add-the-SNP-launch-start-context.patch b/kvm-i386-sev-Add-the-SNP-launch-start-context.patch new file mode 100644 index 0000000..5da793f --- /dev/null +++ b/kvm-i386-sev-Add-the-SNP-launch-start-context.patch @@ -0,0 +1,122 @@ +From 0f7432f2b968298b64fd243df793b176f67a538f Mon Sep 17 00:00:00 2001 +From: Brijesh Singh +Date: Thu, 30 May 2024 06:16:27 -0500 +Subject: [PATCH 067/100] i386/sev: Add the SNP launch start context + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [67/91] 63759a25a413a7a9a7274fb4c3b8bc2528634855 (bonzini/rhel-qemu-kvm) + +The SNP_LAUNCH_START is called first to create a cryptographic launch +context within the firmware. + +Signed-off-by: Brijesh Singh +Signed-off-by: Michael Roth +Co-developed-by: Pankaj Gupta +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-16-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit d3107f882ec22cfb211eab7efa0c4e95f5ce11bb) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 39 +++++++++++++++++++++++++++++++++++++++ + target/i386/trace-events | 1 + + 2 files changed, 40 insertions(+) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 43d1c48bd9..e89b87d2f5 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -39,6 +39,7 @@ + #include "confidential-guest.h" + #include "hw/i386/pc.h" + #include "exec/address-spaces.h" ++#include "qemu/queue.h" + + OBJECT_DECLARE_TYPE(SevCommonState, SevCommonStateClass, SEV_COMMON) + OBJECT_DECLARE_TYPE(SevGuestState, SevCommonStateClass, SEV_GUEST) +@@ -115,6 +116,16 @@ struct SevSnpGuestState { + #define DEFAULT_SEV_DEVICE "/dev/sev" + #define DEFAULT_SEV_SNP_POLICY 0x30000 + ++typedef struct SevLaunchUpdateData { ++ QTAILQ_ENTRY(SevLaunchUpdateData) next; ++ hwaddr gpa; ++ void *hva; ++ uint64_t len; ++ int type; ++} SevLaunchUpdateData; ++ ++static QTAILQ_HEAD(, SevLaunchUpdateData) launch_update; ++ + #define SEV_INFO_BLOCK_GUID "00f771de-1a7e-4fcb-890e-68c77e2fb44e" + typedef struct __attribute__((__packed__)) SevInfoBlock { + /* SEV-ES Reset Vector Address */ +@@ -674,6 +685,31 @@ sev_read_file_base64(const char *filename, guchar **data, gsize *len) + return 0; + } + ++static int ++sev_snp_launch_start(SevCommonState *sev_common) ++{ ++ int fw_error, rc; ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(sev_common); ++ struct kvm_sev_snp_launch_start *start = &sev_snp_guest->kvm_start_conf; ++ ++ trace_kvm_sev_snp_launch_start(start->policy, ++ sev_snp_guest->guest_visible_workarounds); ++ ++ rc = sev_ioctl(sev_common->sev_fd, KVM_SEV_SNP_LAUNCH_START, ++ start, &fw_error); ++ if (rc < 0) { ++ error_report("%s: SNP_LAUNCH_START ret=%d fw_error=%d '%s'", ++ __func__, rc, fw_error, fw_error_to_str(fw_error)); ++ return 1; ++ } ++ ++ QTAILQ_INIT(&launch_update); ++ ++ sev_set_guest_state(sev_common, SEV_STATE_LAUNCH_UPDATE); ++ ++ return 0; ++} ++ + static int + sev_launch_start(SevCommonState *sev_common) + { +@@ -1003,6 +1039,7 @@ static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + } + + ret = klass->launch_start(sev_common); ++ + if (ret) { + error_setg(errp, "%s: failed to create encryption context", __func__); + return -1; +@@ -1794,9 +1831,11 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data) + SevCommonStateClass *klass = SEV_COMMON_CLASS(oc); + X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); + ++ klass->launch_start = sev_snp_launch_start; + klass->kvm_init = sev_snp_kvm_init; + x86_klass->kvm_type = sev_snp_kvm_type; + ++ + object_class_property_add(oc, "policy", "uint64", + sev_snp_guest_get_policy, + sev_snp_guest_set_policy, NULL, NULL); +diff --git a/target/i386/trace-events b/target/i386/trace-events +index 2cd8726eeb..cb26d8a925 100644 +--- a/target/i386/trace-events ++++ b/target/i386/trace-events +@@ -11,3 +11,4 @@ kvm_sev_launch_measurement(const char *value) "data %s" + kvm_sev_launch_finish(void) "" + kvm_sev_launch_secret(uint64_t hpa, uint64_t hva, uint64_t secret, int len) "hpa 0x%" PRIx64 " hva 0x%" PRIx64 " data 0x%" PRIx64 " len %d" + kvm_sev_attestation_report(const char *mnonce, const char *data) "mnonce %s data %s" ++kvm_sev_snp_launch_start(uint64_t policy, char *gosvw) "policy 0x%" PRIx64 " gosvw %s" +-- +2.39.3 + diff --git a/kvm-i386-sev-Allow-measured-direct-kernel-boot-on-SNP.patch b/kvm-i386-sev-Allow-measured-direct-kernel-boot-on-SNP.patch new file mode 100644 index 0000000..f809242 --- /dev/null +++ b/kvm-i386-sev-Allow-measured-direct-kernel-boot-on-SNP.patch @@ -0,0 +1,237 @@ +From ec786a1ec0a76775e980862d77500f5196a937e3 Mon Sep 17 00:00:00 2001 +From: Dov Murik +Date: Thu, 30 May 2024 06:16:35 -0500 +Subject: [PATCH 080/100] i386/sev: Allow measured direct kernel boot on SNP + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [80/91] 11c629862519c1a279566febf5a537c63c5fcf61 (bonzini/rhel-qemu-kvm) + +In SNP, the hashes page designated with a specific metadata entry +published in AmdSev OVMF. + +Therefore, if the user enabled kernel hashes (for measured direct boot), +QEMU should prepare the content of hashes table, and during the +processing of the metadata entry it copy the content into the designated +page and encrypt it. + +Note that in SNP (unlike SEV and SEV-ES) the measurements is done in +whole 4KB pages. Therefore QEMU zeros the whole page that includes the +hashes table, and fills in the kernel hashes area in that page, and then +encrypts the whole page. The rest of the page is reserved for SEV +launch secrets which are not usable anyway on SNP. + +If the user disabled kernel hashes, QEMU pre-validates the kernel hashes +page as a zero page. + +Signed-off-by: Dov Murik +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-24-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit c1996992cc882b00139f78067d6a64e2ec9cb0d8) +Signed-off-by: Paolo Bonzini +--- + include/hw/i386/pc.h | 2 + + target/i386/sev.c | 111 ++++++++++++++++++++++++++++++++----------- + 2 files changed, 85 insertions(+), 28 deletions(-) + +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 94b49310f5..ee3bfb7be9 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -175,6 +175,8 @@ typedef enum { + SEV_DESC_TYPE_SNP_SECRETS, + /* The section contains address that can be used as a CPUID page */ + SEV_DESC_TYPE_CPUID, ++ /* The section contains the region for kernel hashes for measured direct boot */ ++ SEV_DESC_TYPE_SNP_KERNEL_HASHES = 0x10, + + } ovmf_sev_metadata_desc_type; + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 3fce4c08eb..004c667ac1 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -115,6 +115,10 @@ struct SevCommonStateClass { + X86ConfidentialGuestClass parent_class; + + /* public */ ++ bool (*build_kernel_loader_hashes)(SevCommonState *sev_common, ++ SevHashTableDescriptor *area, ++ SevKernelLoaderContext *ctx, ++ Error **errp); + int (*launch_start)(SevCommonState *sev_common); + void (*launch_finish)(SevCommonState *sev_common); + int (*launch_update_data)(SevCommonState *sev_common, hwaddr gpa, uint8_t *ptr, uint64_t len); +@@ -154,6 +158,9 @@ struct SevSnpGuestState { + + struct kvm_sev_snp_launch_start kvm_start_conf; + struct kvm_sev_snp_launch_finish kvm_finish_conf; ++ ++ uint32_t kernel_hashes_offset; ++ PaddedSevHashTable *kernel_hashes_data; + }; + + #define DEFAULT_GUEST_POLICY 0x1 /* disable debug */ +@@ -1189,6 +1196,23 @@ snp_launch_update_cpuid(uint32_t cpuid_addr, void *hva, uint32_t cpuid_len) + KVM_SEV_SNP_PAGE_TYPE_CPUID); + } + ++static int ++snp_launch_update_kernel_hashes(SevSnpGuestState *sev_snp, uint32_t addr, ++ void *hva, uint32_t len) ++{ ++ int type = KVM_SEV_SNP_PAGE_TYPE_ZERO; ++ if (sev_snp->parent_obj.kernel_hashes) { ++ assert(sev_snp->kernel_hashes_data); ++ assert((sev_snp->kernel_hashes_offset + ++ sizeof(*sev_snp->kernel_hashes_data)) <= len); ++ memset(hva, 0, len); ++ memcpy(hva + sev_snp->kernel_hashes_offset, sev_snp->kernel_hashes_data, ++ sizeof(*sev_snp->kernel_hashes_data)); ++ type = KVM_SEV_SNP_PAGE_TYPE_NORMAL; ++ } ++ return snp_launch_update_data(addr, hva, len, type); ++} ++ + static int + snp_metadata_desc_to_page_type(int desc_type) + { +@@ -1225,6 +1249,9 @@ snp_populate_metadata_pages(SevSnpGuestState *sev_snp, + + if (type == KVM_SEV_SNP_PAGE_TYPE_CPUID) { + ret = snp_launch_update_cpuid(desc->base, hva, desc->len); ++ } else if (desc->type == SEV_DESC_TYPE_SNP_KERNEL_HASHES) { ++ ret = snp_launch_update_kernel_hashes(sev_snp, desc->base, hva, ++ desc->len); + } else { + ret = snp_launch_update_data(desc->base, hva, desc->len, type); + } +@@ -1823,6 +1850,58 @@ static bool build_kernel_loader_hashes(PaddedSevHashTable *padded_ht, + return true; + } + ++static bool sev_snp_build_kernel_loader_hashes(SevCommonState *sev_common, ++ SevHashTableDescriptor *area, ++ SevKernelLoaderContext *ctx, ++ Error **errp) ++{ ++ /* ++ * SNP: Populate the hashes table in an area that later in ++ * snp_launch_update_kernel_hashes() will be copied to the guest memory ++ * and encrypted. ++ */ ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(sev_common); ++ sev_snp_guest->kernel_hashes_offset = area->base & ~TARGET_PAGE_MASK; ++ sev_snp_guest->kernel_hashes_data = g_new0(PaddedSevHashTable, 1); ++ return build_kernel_loader_hashes(sev_snp_guest->kernel_hashes_data, ctx, errp); ++} ++ ++static bool sev_build_kernel_loader_hashes(SevCommonState *sev_common, ++ SevHashTableDescriptor *area, ++ SevKernelLoaderContext *ctx, ++ Error **errp) ++{ ++ PaddedSevHashTable *padded_ht; ++ hwaddr mapped_len = sizeof(*padded_ht); ++ MemTxAttrs attrs = { 0 }; ++ bool ret = true; ++ ++ /* ++ * Populate the hashes table in the guest's memory at the OVMF-designated ++ * area for the SEV hashes table ++ */ ++ padded_ht = address_space_map(&address_space_memory, area->base, ++ &mapped_len, true, attrs); ++ if (!padded_ht || mapped_len != sizeof(*padded_ht)) { ++ error_setg(errp, "SEV: cannot map hashes table guest memory area"); ++ return false; ++ } ++ ++ if (build_kernel_loader_hashes(padded_ht, ctx, errp)) { ++ if (sev_encrypt_flash(area->base, (uint8_t *)padded_ht, ++ sizeof(*padded_ht), errp) < 0) { ++ ret = false; ++ } ++ } else { ++ ret = false; ++ } ++ ++ address_space_unmap(&address_space_memory, padded_ht, ++ mapped_len, true, mapped_len); ++ ++ return ret; ++} ++ + /* + * Add the hashes of the linux kernel/initrd/cmdline to an encrypted guest page + * which is included in SEV's initial memory measurement. +@@ -1831,11 +1910,8 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) + { + uint8_t *data; + SevHashTableDescriptor *area; +- PaddedSevHashTable *padded_ht; +- hwaddr mapped_len = sizeof(*padded_ht); +- MemTxAttrs attrs = { 0 }; +- bool ret = true; + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); ++ SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(sev_common); + + /* + * Only add the kernel hashes if the sev-guest configuration explicitly +@@ -1858,30 +1934,7 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) + return false; + } + +- /* +- * Populate the hashes table in the guest's memory at the OVMF-designated +- * area for the SEV hashes table +- */ +- padded_ht = address_space_map(&address_space_memory, area->base, +- &mapped_len, true, attrs); +- if (!padded_ht || mapped_len != sizeof(*padded_ht)) { +- error_setg(errp, "SEV: cannot map hashes table guest memory area"); +- return false; +- } +- +- if (build_kernel_loader_hashes(padded_ht, ctx, errp)) { +- if (sev_encrypt_flash(area->base, (uint8_t *)padded_ht, +- sizeof(*padded_ht), errp) < 0) { +- ret = false; +- } +- } else { +- ret = false; +- } +- +- address_space_unmap(&address_space_memory, padded_ht, +- mapped_len, true, mapped_len); +- +- return ret; ++ return klass->build_kernel_loader_hashes(sev_common, area, ctx, errp); + } + + static char * +@@ -1998,6 +2051,7 @@ sev_guest_class_init(ObjectClass *oc, void *data) + SevCommonStateClass *klass = SEV_COMMON_CLASS(oc); + X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); + ++ klass->build_kernel_loader_hashes = sev_build_kernel_loader_hashes; + klass->launch_start = sev_launch_start; + klass->launch_finish = sev_launch_finish; + klass->launch_update_data = sev_launch_update_data; +@@ -2242,6 +2296,7 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data) + SevCommonStateClass *klass = SEV_COMMON_CLASS(oc); + X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); + ++ klass->build_kernel_loader_hashes = sev_snp_build_kernel_loader_hashes; + klass->launch_start = sev_snp_launch_start; + klass->launch_finish = sev_snp_launch_finish; + klass->launch_update_data = sev_snp_launch_update_data; +-- +2.39.3 + diff --git a/kvm-i386-sev-Don-t-allow-automatic-fallback-to-legacy-KV.patch b/kvm-i386-sev-Don-t-allow-automatic-fallback-to-legacy-KV.patch new file mode 100644 index 0000000..aacb0da --- /dev/null +++ b/kvm-i386-sev-Don-t-allow-automatic-fallback-to-legacy-KV.patch @@ -0,0 +1,268 @@ +From ab6197309551bd6ddd9f8239191f68dfac23684b Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Tue, 9 Jul 2024 23:10:05 -0500 +Subject: [PATCH 090/100] i386/sev: Don't allow automatic fallback to legacy + KVM_SEV*_INIT +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [90/91] 2b1345faa56f993bb6e13d63e11656c784e20412 (bonzini/rhel-qemu-kvm) + +Currently if the 'legacy-vm-type' property of the sev-guest object is +'on', QEMU will attempt to use the newer KVM_SEV_INIT2 kernel +interface in conjunction with the newer KVM_X86_SEV_VM and +KVM_X86_SEV_ES_VM KVM VM types. + +This can lead to measurement changes if, for instance, an SEV guest was +created on a host that originally had an older kernel that didn't +support KVM_SEV_INIT2, but is booted on the same host later on after the +host kernel was upgraded. + +Instead, if legacy-vm-type is 'off', QEMU should fail if the +KVM_SEV_INIT2 interface is not provided by the current host kernel. +Modify the fallback handling accordingly. + +In the future, VMSA features and other flags might be added to QEMU +which will require legacy-vm-type to be 'off' because they will rely +on the newer KVM_SEV_INIT2 interface. It may be difficult to convey to +users what values of legacy-vm-type are compatible with which +features/options, so as part of this rework, switch legacy-vm-type to a +tri-state OnOffAuto option. 'auto' in this case will automatically +switch to using the newer KVM_SEV_INIT2, but only if it is required to +make use of new VMSA features or other options only available via +KVM_SEV_INIT2. + +Defining 'auto' in this way would avoid inadvertantly breaking +compatibility with older kernels since it would only be used in cases +where users opt into newer features that are only available via +KVM_SEV_INIT2 and newer kernels, and provide better default behavior +than the legacy-vm-type=off behavior that was previously in place, so +make it the default for 9.1+ machine types. + +Cc: Daniel P. Berrangé +Cc: Paolo Bonzini +cc: kvm@vger.kernel.org +Signed-off-by: Michael Roth +Reviewed-by: Daniel P. Berrangé +Link: https://lore.kernel.org/r/20240710041005.83720-1-michael.roth@amd.com +Signed-off-by: Paolo Bonzini +(cherry picked from commit 9d38d9dca2a81aaf5752d45d221021ef96d496cd) + +RHEL: adjust compatiility setting, applying it to 9.4 machine type +--- + hw/i386/pc.c | 2 +- + qapi/qom.json | 18 ++++++---- + target/i386/sev.c | 85 +++++++++++++++++++++++++++++++++++++++-------- + 3 files changed, 83 insertions(+), 22 deletions(-) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index b25d075b59..e9c5ea5d8f 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -352,7 +352,7 @@ const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); + GlobalProperty pc_rhel_9_5_compat[] = { + /* pc_rhel_9_5_compat from pc_compat_pc_9_0 (backported from 9.1) */ + { TYPE_X86_CPU, "guest-phys-bits", "0" }, +- { "sev-guest", "legacy-vm-type", "true" }, ++ { "sev-guest", "legacy-vm-type", "on" }, + }; + const size_t pc_rhel_9_5_compat_len = G_N_ELEMENTS(pc_rhel_9_5_compat); + +diff --git a/qapi/qom.json b/qapi/qom.json +index 8bd299265e..17bd5a0cf7 100644 +--- a/qapi/qom.json ++++ b/qapi/qom.json +@@ -912,12 +912,16 @@ + # @handle: SEV firmware handle (default: 0) + # + # @legacy-vm-type: Use legacy KVM_SEV_INIT KVM interface for creating the VM. +-# The newer KVM_SEV_INIT2 interface syncs additional vCPU +-# state when initializing the VMSA structures, which will +-# result in a different guest measurement. Set this to +-# maintain compatibility with older QEMU or kernel versions +-# that rely on legacy KVM_SEV_INIT behavior. +-# (default: false) (since 9.1) ++# The newer KVM_SEV_INIT2 interface, from Linux >= 6.10, syncs ++# additional vCPU state when initializing the VMSA structures, ++# which will result in a different guest measurement. Set ++# this to 'on' to force compatibility with older QEMU or kernel ++# versions that rely on legacy KVM_SEV_INIT behavior. 'auto' ++# will behave identically to 'on', but will automatically ++# switch to using KVM_SEV_INIT2 if the user specifies any ++# additional options that require it. If set to 'off', QEMU ++# will require KVM_SEV_INIT2 unconditionally. ++# (default: off) (since 9.1) + # + # Since: 2.12 + ## +@@ -927,7 +931,7 @@ + '*session-file': 'str', + '*policy': 'uint32', + '*handle': 'uint32', +- '*legacy-vm-type': 'bool' } } ++ '*legacy-vm-type': 'OnOffAuto' } } + + ## + # @SevSnpGuestProperties: +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 491fab74fd..b921defb63 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -144,7 +144,7 @@ struct SevGuestState { + uint32_t policy; + char *dh_cert_file; + char *session_file; +- bool legacy_vm_type; ++ OnOffAuto legacy_vm_type; + }; + + struct SevSnpGuestState { +@@ -1334,6 +1334,17 @@ sev_vm_state_change(void *opaque, bool running, RunState state) + } + } + ++/* ++ * This helper is to examine sev-guest properties and determine if any options ++ * have been set which rely on the newer KVM_SEV_INIT2 interface and associated ++ * KVM VM types. ++ */ ++static bool sev_init2_required(SevGuestState *sev_guest) ++{ ++ /* Currently no KVM_SEV_INIT2-specific options are exposed via QEMU */ ++ return false; ++} ++ + static int sev_kvm_type(X86ConfidentialGuest *cg) + { + SevCommonState *sev_common = SEV_COMMON(cg); +@@ -1344,14 +1355,39 @@ static int sev_kvm_type(X86ConfidentialGuest *cg) + goto out; + } + ++ /* These are the only cases where legacy VM types can be used. */ ++ if (sev_guest->legacy_vm_type == ON_OFF_AUTO_ON || ++ (sev_guest->legacy_vm_type == ON_OFF_AUTO_AUTO && ++ !sev_init2_required(sev_guest))) { ++ sev_common->kvm_type = KVM_X86_DEFAULT_VM; ++ goto out; ++ } ++ ++ /* ++ * Newer VM types are required, either explicitly via legacy-vm-type=on, or ++ * implicitly via legacy-vm-type=auto along with additional sev-guest ++ * properties that require the newer VM types. ++ */ + kvm_type = (sev_guest->policy & SEV_POLICY_ES) ? + KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM; +- if (kvm_is_vm_type_supported(kvm_type) && !sev_guest->legacy_vm_type) { +- sev_common->kvm_type = kvm_type; +- } else { +- sev_common->kvm_type = KVM_X86_DEFAULT_VM; ++ if (!kvm_is_vm_type_supported(kvm_type)) { ++ if (sev_guest->legacy_vm_type == ON_OFF_AUTO_AUTO) { ++ error_report("SEV: host kernel does not support requested %s VM type, which is required " ++ "for the set of options specified. To allow use of the legacy " ++ "KVM_X86_DEFAULT_VM VM type, please disable any options that are not " ++ "compatible with the legacy VM type, or upgrade your kernel.", ++ kvm_type == KVM_X86_SEV_VM ? "KVM_X86_SEV_VM" : "KVM_X86_SEV_ES_VM"); ++ } else { ++ error_report("SEV: host kernel does not support requested %s VM type. To allow use of " ++ "the legacy KVM_X86_DEFAULT_VM VM type, the 'legacy-vm-type' argument " ++ "must be set to 'on' or 'auto' for the sev-guest object.", ++ kvm_type == KVM_X86_SEV_VM ? "KVM_X86_SEV_VM" : "KVM_X86_SEV_ES_VM"); ++ } ++ ++ return -1; + } + ++ sev_common->kvm_type = kvm_type; + out: + return sev_common->kvm_type; + } +@@ -1442,14 +1478,24 @@ static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + } + + trace_kvm_sev_init(); +- if (x86_klass->kvm_type(X86_CONFIDENTIAL_GUEST(sev_common)) == KVM_X86_DEFAULT_VM) { ++ switch (x86_klass->kvm_type(X86_CONFIDENTIAL_GUEST(sev_common))) { ++ case KVM_X86_DEFAULT_VM: + cmd = sev_es_enabled() ? KVM_SEV_ES_INIT : KVM_SEV_INIT; + + ret = sev_ioctl(sev_common->sev_fd, cmd, NULL, &fw_error); +- } else { ++ break; ++ case KVM_X86_SEV_VM: ++ case KVM_X86_SEV_ES_VM: ++ case KVM_X86_SNP_VM: { + struct kvm_sev_init args = { 0 }; + + ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_INIT2, &args, &fw_error); ++ break; ++ } ++ default: ++ error_setg(errp, "%s: host kernel does not support the requested SEV configuration.", ++ __func__); ++ return -1; + } + + if (ret) { +@@ -2037,14 +2083,23 @@ sev_guest_set_session_file(Object *obj, const char *value, Error **errp) + SEV_GUEST(obj)->session_file = g_strdup(value); + } + +-static bool sev_guest_get_legacy_vm_type(Object *obj, Error **errp) ++static void sev_guest_get_legacy_vm_type(Object *obj, Visitor *v, ++ const char *name, void *opaque, ++ Error **errp) + { +- return SEV_GUEST(obj)->legacy_vm_type; ++ SevGuestState *sev_guest = SEV_GUEST(obj); ++ OnOffAuto legacy_vm_type = sev_guest->legacy_vm_type; ++ ++ visit_type_OnOffAuto(v, name, &legacy_vm_type, errp); + } + +-static void sev_guest_set_legacy_vm_type(Object *obj, bool value, Error **errp) ++static void sev_guest_set_legacy_vm_type(Object *obj, Visitor *v, ++ const char *name, void *opaque, ++ Error **errp) + { +- SEV_GUEST(obj)->legacy_vm_type = value; ++ SevGuestState *sev_guest = SEV_GUEST(obj); ++ ++ visit_type_OnOffAuto(v, name, &sev_guest->legacy_vm_type, errp); + } + + static void +@@ -2070,9 +2125,9 @@ sev_guest_class_init(ObjectClass *oc, void *data) + sev_guest_set_session_file); + object_class_property_set_description(oc, "session-file", + "guest owners session parameters (encoded with base64)"); +- object_class_property_add_bool(oc, "legacy-vm-type", +- sev_guest_get_legacy_vm_type, +- sev_guest_set_legacy_vm_type); ++ object_class_property_add(oc, "legacy-vm-type", "OnOffAuto", ++ sev_guest_get_legacy_vm_type, ++ sev_guest_set_legacy_vm_type, NULL, NULL); + object_class_property_set_description(oc, "legacy-vm-type", + "use legacy VM type to maintain measurement compatibility with older QEMU or kernel versions."); + } +@@ -2088,6 +2143,8 @@ sev_guest_instance_init(Object *obj) + object_property_add_uint32_ptr(obj, "policy", &sev_guest->policy, + OBJ_PROP_FLAG_READWRITE); + object_apply_compat_props(obj); ++ ++ sev_guest->legacy_vm_type = ON_OFF_AUTO_AUTO; + } + + /* guest info specific sev/sev-es */ +-- +2.39.3 + diff --git a/kvm-i386-sev-Don-t-return-launch-measurements-for-SEV-SN.patch b/kvm-i386-sev-Don-t-return-launch-measurements-for-SEV-SN.patch new file mode 100644 index 0000000..739a145 --- /dev/null +++ b/kvm-i386-sev-Don-t-return-launch-measurements-for-SEV-SN.patch @@ -0,0 +1,46 @@ +From ebb3c3536366c383fa09b0987a4efb68d018b7b8 Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Thu, 30 May 2024 06:16:24 -0500 +Subject: [PATCH 064/100] i386/sev: Don't return launch measurements for + SEV-SNP guests + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [64/91] 5a29bb2d8b5a07aec6fd271ec37345e665e9cce4 (bonzini/rhel-qemu-kvm) + +For SEV-SNP guests, launch measurement is queried from within the guest +during attestation, so don't attempt to return it as part of +query-sev-launch-measure. + +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-13-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 73ae63b162fc1fed520f53ad200712964d7d0264) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 6525b3c1a0..c3daaf1ad5 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -795,7 +795,9 @@ sev_launch_get_measure(Notifier *notifier, void *unused) + + static char *sev_get_launch_measurement(void) + { +- SevGuestState *sev_guest = SEV_GUEST(MACHINE(qdev_get_machine())->cgs); ++ ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs; ++ SevGuestState *sev_guest = ++ (SevGuestState *)object_dynamic_cast(OBJECT(cgs), TYPE_SEV_GUEST); + + if (sev_guest && + SEV_COMMON(sev_guest)->state >= SEV_STATE_LAUNCH_SECRET) { +-- +2.39.3 + diff --git a/kvm-i386-sev-Enable-KVM_HC_MAP_GPA_RANGE-hcall-for-SNP-g.patch b/kvm-i386-sev-Enable-KVM_HC_MAP_GPA_RANGE-hcall-for-SNP-g.patch new file mode 100644 index 0000000..e438cd3 --- /dev/null +++ b/kvm-i386-sev-Enable-KVM_HC_MAP_GPA_RANGE-hcall-for-SNP-g.patch @@ -0,0 +1,54 @@ +From 0612c7ed587422ec7e07c27c8ca11b89c7aa8b02 Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Thu, 30 May 2024 06:16:43 -0500 +Subject: [PATCH 077/100] i386/sev: Enable KVM_HC_MAP_GPA_RANGE hcall for SNP + guests + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [77/91] 3c494eb54499c24121cc2c47045626478b8bb41e (bonzini/rhel-qemu-kvm) + +KVM will forward GHCB page-state change requests to userspace in the +form of KVM_HC_MAP_GPA_RANGE, so make sure the hypercall handling is +enabled for SNP guests. + +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-32-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit e3cddff93c1f88fea3b26841e792dc0be6b6fae8) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index eaf5fc6c6b..abb63062ac 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -14,6 +14,7 @@ + #include "qemu/osdep.h" + + #include ++#include + #include + + #include +@@ -758,6 +759,10 @@ sev_snp_launch_start(SevCommonState *sev_common) + trace_kvm_sev_snp_launch_start(start->policy, + sev_snp_guest->guest_visible_workarounds); + ++ if (!kvm_enable_hypercall(BIT_ULL(KVM_HC_MAP_GPA_RANGE))) { ++ return 1; ++ } ++ + rc = sev_ioctl(sev_common->sev_fd, KVM_SEV_SNP_LAUNCH_START, + start, &fw_error); + if (rc < 0) { +-- +2.39.3 + diff --git a/kvm-i386-sev-Extract-build_kernel_loader_hashes.patch b/kvm-i386-sev-Extract-build_kernel_loader_hashes.patch new file mode 100644 index 0000000..a06301d --- /dev/null +++ b/kvm-i386-sev-Extract-build_kernel_loader_hashes.patch @@ -0,0 +1,167 @@ +From eed17520567c202f53ab767bfd42cfe303838772 Mon Sep 17 00:00:00 2001 +From: Dov Murik +Date: Thu, 30 May 2024 06:16:33 -0500 +Subject: [PATCH 078/100] i386/sev: Extract build_kernel_loader_hashes + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [78/91] 291ea10e774178826d1afd38fc8292d67c5fd42d (bonzini/rhel-qemu-kvm) + +Extract the building of the kernel hashes table out from +sev_add_kernel_loader_hashes() to allow building it in +other memory areas (for SNP support). + +No functional change intended. + +Signed-off-by: Dov Murik +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-22-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 06cbd66cecaa3230cccb330facac241a677b29d5) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 102 ++++++++++++++++++++++++++-------------------- + 1 file changed, 58 insertions(+), 44 deletions(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index abb63062ac..73f9406715 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -1754,45 +1754,16 @@ static const QemuUUID sev_cmdline_entry_guid = { + 0x4d, 0x36, 0xab, 0x2a) + }; + +-/* +- * Add the hashes of the linux kernel/initrd/cmdline to an encrypted guest page +- * which is included in SEV's initial memory measurement. +- */ +-bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) ++static bool build_kernel_loader_hashes(PaddedSevHashTable *padded_ht, ++ SevKernelLoaderContext *ctx, ++ Error **errp) + { +- uint8_t *data; +- SevHashTableDescriptor *area; + SevHashTable *ht; +- PaddedSevHashTable *padded_ht; + uint8_t cmdline_hash[HASH_SIZE]; + uint8_t initrd_hash[HASH_SIZE]; + uint8_t kernel_hash[HASH_SIZE]; + uint8_t *hashp; + size_t hash_len = HASH_SIZE; +- hwaddr mapped_len = sizeof(*padded_ht); +- MemTxAttrs attrs = { 0 }; +- bool ret = true; +- SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); +- +- /* +- * Only add the kernel hashes if the sev-guest configuration explicitly +- * stated kernel-hashes=on. +- */ +- if (!sev_common->kernel_hashes) { +- return false; +- } +- +- if (!pc_system_ovmf_table_find(SEV_HASH_TABLE_RV_GUID, &data, NULL)) { +- error_setg(errp, "SEV: kernel specified but guest firmware " +- "has no hashes table GUID"); +- return false; +- } +- area = (SevHashTableDescriptor *)data; +- if (!area->base || area->size < sizeof(PaddedSevHashTable)) { +- error_setg(errp, "SEV: guest firmware hashes table area is invalid " +- "(base=0x%x size=0x%x)", area->base, area->size); +- return false; +- } + + /* + * Calculate hash of kernel command-line with the terminating null byte. If +@@ -1829,16 +1800,6 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) + } + assert(hash_len == HASH_SIZE); + +- /* +- * Populate the hashes table in the guest's memory at the OVMF-designated +- * area for the SEV hashes table +- */ +- padded_ht = address_space_map(&address_space_memory, area->base, +- &mapped_len, true, attrs); +- if (!padded_ht || mapped_len != sizeof(*padded_ht)) { +- error_setg(errp, "SEV: cannot map hashes table guest memory area"); +- return false; +- } + ht = &padded_ht->ht; + + ht->guid = sev_hash_table_header_guid; +@@ -1859,8 +1820,61 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) + /* zero the excess data so the measurement can be reliably calculated */ + memset(padded_ht->padding, 0, sizeof(padded_ht->padding)); + +- if (sev_encrypt_flash(area->base, (uint8_t *)padded_ht, +- sizeof(*padded_ht), errp) < 0) { ++ return true; ++} ++ ++/* ++ * Add the hashes of the linux kernel/initrd/cmdline to an encrypted guest page ++ * which is included in SEV's initial memory measurement. ++ */ ++bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) ++{ ++ uint8_t *data; ++ SevHashTableDescriptor *area; ++ PaddedSevHashTable *padded_ht; ++ hwaddr mapped_len = sizeof(*padded_ht); ++ MemTxAttrs attrs = { 0 }; ++ bool ret = true; ++ SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); ++ ++ /* ++ * Only add the kernel hashes if the sev-guest configuration explicitly ++ * stated kernel-hashes=on. ++ */ ++ if (!sev_common->kernel_hashes) { ++ return false; ++ } ++ ++ if (!pc_system_ovmf_table_find(SEV_HASH_TABLE_RV_GUID, &data, NULL)) { ++ error_setg(errp, "SEV: kernel specified but guest firmware " ++ "has no hashes table GUID"); ++ return false; ++ } ++ ++ area = (SevHashTableDescriptor *)data; ++ if (!area->base || area->size < sizeof(PaddedSevHashTable)) { ++ error_setg(errp, "SEV: guest firmware hashes table area is invalid " ++ "(base=0x%x size=0x%x)", area->base, area->size); ++ return false; ++ } ++ ++ /* ++ * Populate the hashes table in the guest's memory at the OVMF-designated ++ * area for the SEV hashes table ++ */ ++ padded_ht = address_space_map(&address_space_memory, area->base, ++ &mapped_len, true, attrs); ++ if (!padded_ht || mapped_len != sizeof(*padded_ht)) { ++ error_setg(errp, "SEV: cannot map hashes table guest memory area"); ++ return false; ++ } ++ ++ if (build_kernel_loader_hashes(padded_ht, ctx, errp)) { ++ if (sev_encrypt_flash(area->base, (uint8_t *)padded_ht, ++ sizeof(*padded_ht), errp) < 0) { ++ ret = false; ++ } ++ } else { + ret = false; + } + +-- +2.39.3 + diff --git a/kvm-i386-sev-Fallback-to-the-default-SEV-device-if-none-.patch b/kvm-i386-sev-Fallback-to-the-default-SEV-device-if-none-.patch new file mode 100644 index 0000000..1d30674 --- /dev/null +++ b/kvm-i386-sev-Fallback-to-the-default-SEV-device-if-none-.patch @@ -0,0 +1,65 @@ +From a9530c89225fce9e381929c4cd8e372068827acf Mon Sep 17 00:00:00 2001 +From: Michal Privoznik +Date: Mon, 24 Jun 2024 10:52:49 +0200 +Subject: [PATCH 089/100] i386/sev: Fallback to the default SEV device if none + provided in sev_get_capabilities() + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [89/91] 22318c20d7102815f754cec0efaf383e05ef79c1 (bonzini/rhel-qemu-kvm) + +When management tools (e.g. libvirt) query QEMU capabilities, +they start QEMU with a minimalistic configuration and issue +various commands on monitor. One of the command issued is/might +be "query-sev-capabilities" to learn values like cbitpos or +reduced-phys-bits. But as of v9.0.0-1145-g16dcf200dc the monitor +command returns an error instead. + +This creates a chicken-egg problem because in order to query +those aforementioned values QEMU needs to be started with a +'sev-guest' object. But to start QEMU with the values must be +known. + +I think it's safe to assume that the default path ("/dev/sev") +provides the same data as user provided one. So fall back to it. + +Fixes: 16dcf200dc951c1cde3e5b442457db5f690b8cf0 +Signed-off-by: Michal Privoznik +Link: https://lore.kernel.org/r/157f93712c23818be193ce785f648f0060b33dee.1719218926.git.mprivozn@redhat.com +Signed-off-by: Paolo Bonzini +(cherry picked from commit 3fb24530b2bb1346a44e17becefc9865b40a2257) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 53b7f7315b..491fab74fd 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -585,13 +585,13 @@ static SevCapability *sev_get_capabilities(Error **errp) + } + + sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); +- if (!sev_common) { +- error_setg(errp, "SEV is not configured"); +- return NULL; ++ if (sev_common) { ++ sev_device = object_property_get_str(OBJECT(sev_common), "sev-device", ++ &error_abort); ++ } else { ++ sev_device = g_strdup(DEFAULT_SEV_DEVICE); + } + +- sev_device = object_property_get_str(OBJECT(sev_common), "sev-device", +- &error_abort); + fd = open(sev_device, O_RDWR); + if (fd < 0) { + error_setg_errno(errp, errno, "SEV: Failed to open %s", +-- +2.39.3 + diff --git a/kvm-i386-sev-Fix-error-message-in-sev_get_capabilities.patch b/kvm-i386-sev-Fix-error-message-in-sev_get_capabilities.patch new file mode 100644 index 0000000..b23e008 --- /dev/null +++ b/kvm-i386-sev-Fix-error-message-in-sev_get_capabilities.patch @@ -0,0 +1,48 @@ +From b672cdf8c10a530b5bcf6dd4489632891eb2c731 Mon Sep 17 00:00:00 2001 +From: Michal Privoznik +Date: Mon, 24 Jun 2024 10:52:48 +0200 +Subject: [PATCH 088/100] i386/sev: Fix error message in sev_get_capabilities() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [88/91] ff8a8b27af02e565172ffe39d0571c234317713d (bonzini/rhel-qemu-kvm) + +When a custom path is provided to sev-guest object and opening +the path fails an error message is reported. But the error +message still mentions DEFAULT_SEV_DEVICE ("/dev/sev") instead of +the custom path. + +Fixes: 16dcf200dc951c1cde3e5b442457db5f690b8cf0 +Signed-off-by: Michal Privoznik +Reviewed-by: Philippe Mathieu-Daudé +Link: https://lore.kernel.org/r/b4648905d399780063dc70851d3d6a3cd28719a5.1719218926.git.mprivozn@redhat.com +Signed-off-by: Paolo Bonzini +(cherry picked from commit e306ae87e0ef04bc7a5dec6db693f6ea09d64d45) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 37de80adc7..53b7f7315b 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -595,7 +595,7 @@ static SevCapability *sev_get_capabilities(Error **errp) + fd = open(sev_device, O_RDWR); + if (fd < 0) { + error_setg_errno(errp, errno, "SEV: Failed to open %s", +- DEFAULT_SEV_DEVICE); ++ sev_device); + g_free(sev_device); + return NULL; + } +-- +2.39.3 + diff --git a/kvm-i386-sev-Introduce-sev-common-type-to-encapsulate-co.patch b/kvm-i386-sev-Introduce-sev-common-type-to-encapsulate-co.patch new file mode 100644 index 0000000..2d167af --- /dev/null +++ b/kvm-i386-sev-Introduce-sev-common-type-to-encapsulate-co.patch @@ -0,0 +1,1118 @@ +From e6cf2115eb9db545821180b8a978cdccc6a2c2db Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Thu, 30 May 2024 06:16:16 -0500 +Subject: [PATCH 056/100] i386/sev: Introduce "sev-common" type to encapsulate + common SEV state + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [56/91] b52d5c9c5e4997d2fd791fa36dd5d4c836dfc32f (bonzini/rhel-qemu-kvm) + +Currently all SEV/SEV-ES functionality is managed through a single +'sev-guest' QOM type. With upcoming support for SEV-SNP, taking this +same approach won't work well since some of the properties/state +managed by 'sev-guest' is not applicable to SEV-SNP, which will instead +rely on a new QOM type with its own set of properties/state. + +To prepare for this, this patch moves common state into an abstract +'sev-common' parent type to encapsulate properties/state that are +common to both SEV/SEV-ES and SEV-SNP, leaving only SEV/SEV-ES-specific +properties/state in the current 'sev-guest' type. This should not +affect current behavior or command-line options. + +As part of this patch, some related changes are also made: + + - a static 'sev_guest' variable is currently used to keep track of + the 'sev-guest' instance. SEV-SNP would similarly introduce an + 'sev_snp_guest' static variable. But these instances are now + available via qdev_get_machine()->cgs, so switch to using that + instead and drop the static variable. + + - 'sev_guest' is currently used as the name for the static variable + holding a pointer to the 'sev-guest' instance. Re-purpose the name + as a local variable referring the 'sev-guest' instance, and use + that consistently throughout the code so it can be easily + distinguished from sev-common/sev-snp-guest instances. + + - 'sev' is generally used as the name for local variables holding a + pointer to the 'sev-guest' instance. In cases where that now points + to common state, use the name 'sev_common'; in cases where that now + points to state specific to 'sev-guest' instance, use the name + 'sev_guest' + +In order to enable kernel-hashes for SNP, pull it from +SevGuestProperties to its parent SevCommonProperties so +it will be available for both SEV and SNP. + +Signed-off-by: Michael Roth +Co-developed-by: Dov Murik +Signed-off-by: Dov Murik +Acked-by: Markus Armbruster (QAPI schema) +Co-developed-by: Pankaj Gupta +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-5-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 16dcf200dc951c1cde3e5b442457db5f690b8cf0) +Signed-off-by: Paolo Bonzini +--- + qapi/qom.json | 40 ++-- + target/i386/sev.c | 489 ++++++++++++++++++++++++++-------------------- + target/i386/sev.h | 3 + + 3 files changed, 301 insertions(+), 231 deletions(-) + +diff --git a/qapi/qom.json b/qapi/qom.json +index 38dde6d785..056b38f491 100644 +--- a/qapi/qom.json ++++ b/qapi/qom.json +@@ -875,20 +875,12 @@ + 'data': { '*filename': 'str' } } + + ## +-# @SevGuestProperties: ++# @SevCommonProperties: + # +-# Properties for sev-guest objects. ++# Properties common to objects that are derivatives of sev-common. + # + # @sev-device: SEV device to use (default: "/dev/sev") + # +-# @dh-cert-file: guest owners DH certificate (encoded with base64) +-# +-# @session-file: guest owners session parameters (encoded with base64) +-# +-# @policy: SEV policy value (default: 0x1) +-# +-# @handle: SEV firmware handle (default: 0) +-# + # @cbitpos: C-bit location in page table entry (default: 0) + # + # @reduced-phys-bits: number of bits in physical addresses that become +@@ -898,6 +890,27 @@ + # designated guest firmware page for measured boot with -kernel + # (default: false) (since 6.2) + # ++# Since: 9.1 ++## ++{ 'struct': 'SevCommonProperties', ++ 'data': { '*sev-device': 'str', ++ '*cbitpos': 'uint32', ++ 'reduced-phys-bits': 'uint32', ++ '*kernel-hashes': 'bool' } } ++ ++## ++# @SevGuestProperties: ++# ++# Properties for sev-guest objects. ++# ++# @dh-cert-file: guest owners DH certificate (encoded with base64) ++# ++# @session-file: guest owners session parameters (encoded with base64) ++# ++# @policy: SEV policy value (default: 0x1) ++# ++# @handle: SEV firmware handle (default: 0) ++# + # @legacy-vm-type: Use legacy KVM_SEV_INIT KVM interface for creating the VM. + # The newer KVM_SEV_INIT2 interface syncs additional vCPU + # state when initializing the VMSA structures, which will +@@ -909,14 +922,11 @@ + # Since: 2.12 + ## + { 'struct': 'SevGuestProperties', +- 'data': { '*sev-device': 'str', +- '*dh-cert-file': 'str', ++ 'base': 'SevCommonProperties', ++ 'data': { '*dh-cert-file': 'str', + '*session-file': 'str', + '*policy': 'uint32', + '*handle': 'uint32', +- '*cbitpos': 'uint32', +- 'reduced-phys-bits': 'uint32', +- '*kernel-hashes': 'bool', + '*legacy-vm-type': 'bool' } } + + ## +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 67ed32e5ea..33e606eea0 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -40,49 +40,59 @@ + #include "hw/i386/pc.h" + #include "exec/address-spaces.h" + +-#define TYPE_SEV_GUEST "sev-guest" +-OBJECT_DECLARE_SIMPLE_TYPE(SevGuestState, SEV_GUEST) ++OBJECT_DECLARE_TYPE(SevCommonState, SevCommonStateClass, SEV_COMMON) ++OBJECT_DECLARE_TYPE(SevGuestState, SevCommonStateClass, SEV_GUEST) + +- +-/** +- * SevGuestState: +- * +- * The SevGuestState object is used for creating and managing a SEV +- * guest. +- * +- * # $QEMU \ +- * -object sev-guest,id=sev0 \ +- * -machine ...,memory-encryption=sev0 +- */ +-struct SevGuestState { ++struct SevCommonState { + X86ConfidentialGuest parent_obj; + + int kvm_type; + + /* configuration parameters */ + char *sev_device; +- uint32_t policy; +- char *dh_cert_file; +- char *session_file; + uint32_t cbitpos; + uint32_t reduced_phys_bits; + bool kernel_hashes; +- bool legacy_vm_type; + + /* runtime state */ +- uint32_t handle; + uint8_t api_major; + uint8_t api_minor; + uint8_t build_id; + int sev_fd; + SevState state; +- gchar *measurement; + + uint32_t reset_cs; + uint32_t reset_ip; + bool reset_data_valid; + }; + ++struct SevCommonStateClass { ++ X86ConfidentialGuestClass parent_class; ++ ++}; ++ ++/** ++ * SevGuestState: ++ * ++ * The SevGuestState object is used for creating and managing a SEV ++ * guest. ++ * ++ * # $QEMU \ ++ * -object sev-guest,id=sev0 \ ++ * -machine ...,memory-encryption=sev0 ++ */ ++struct SevGuestState { ++ SevCommonState parent_obj; ++ gchar *measurement; ++ ++ /* configuration parameters */ ++ uint32_t handle; ++ uint32_t policy; ++ char *dh_cert_file; ++ char *session_file; ++ bool legacy_vm_type; ++}; ++ + #define DEFAULT_GUEST_POLICY 0x1 /* disable debug */ + #define DEFAULT_SEV_DEVICE "/dev/sev" + +@@ -128,7 +138,6 @@ typedef struct QEMU_PACKED PaddedSevHashTable { + + QEMU_BUILD_BUG_ON(sizeof(PaddedSevHashTable) % 16 != 0); + +-static SevGuestState *sev_guest; + static Error *sev_mig_blocker; + + static const char *const sev_fw_errlist[] = { +@@ -209,21 +218,21 @@ fw_error_to_str(int code) + } + + static bool +-sev_check_state(const SevGuestState *sev, SevState state) ++sev_check_state(const SevCommonState *sev_common, SevState state) + { +- assert(sev); +- return sev->state == state ? true : false; ++ assert(sev_common); ++ return sev_common->state == state ? true : false; + } + + static void +-sev_set_guest_state(SevGuestState *sev, SevState new_state) ++sev_set_guest_state(SevCommonState *sev_common, SevState new_state) + { + assert(new_state < SEV_STATE__MAX); +- assert(sev); ++ assert(sev_common); + +- trace_kvm_sev_change_state(SevState_str(sev->state), ++ trace_kvm_sev_change_state(SevState_str(sev_common->state), + SevState_str(new_state)); +- sev->state = new_state; ++ sev_common->state = new_state; + } + + static void +@@ -290,121 +299,61 @@ static struct RAMBlockNotifier sev_ram_notifier = { + .ram_block_removed = sev_ram_block_removed, + }; + +-static void +-sev_guest_finalize(Object *obj) +-{ +-} +- +-static char * +-sev_guest_get_session_file(Object *obj, Error **errp) +-{ +- SevGuestState *s = SEV_GUEST(obj); +- +- return s->session_file ? g_strdup(s->session_file) : NULL; +-} +- +-static void +-sev_guest_set_session_file(Object *obj, const char *value, Error **errp) +-{ +- SevGuestState *s = SEV_GUEST(obj); +- +- s->session_file = g_strdup(value); +-} +- +-static char * +-sev_guest_get_dh_cert_file(Object *obj, Error **errp) +-{ +- SevGuestState *s = SEV_GUEST(obj); +- +- return g_strdup(s->dh_cert_file); +-} +- +-static void +-sev_guest_set_dh_cert_file(Object *obj, const char *value, Error **errp) +-{ +- SevGuestState *s = SEV_GUEST(obj); +- +- s->dh_cert_file = g_strdup(value); +-} +- +-static char * +-sev_guest_get_sev_device(Object *obj, Error **errp) +-{ +- SevGuestState *sev = SEV_GUEST(obj); +- +- return g_strdup(sev->sev_device); +-} +- +-static void +-sev_guest_set_sev_device(Object *obj, const char *value, Error **errp) +-{ +- SevGuestState *sev = SEV_GUEST(obj); +- +- sev->sev_device = g_strdup(value); +-} +- +-static bool sev_guest_get_kernel_hashes(Object *obj, Error **errp) +-{ +- SevGuestState *sev = SEV_GUEST(obj); +- +- return sev->kernel_hashes; +-} +- +-static void sev_guest_set_kernel_hashes(Object *obj, bool value, Error **errp) +-{ +- SevGuestState *sev = SEV_GUEST(obj); +- +- sev->kernel_hashes = value; +-} +- +-static bool sev_guest_get_legacy_vm_type(Object *obj, Error **errp) +-{ +- return SEV_GUEST(obj)->legacy_vm_type; +-} +- +-static void sev_guest_set_legacy_vm_type(Object *obj, bool value, Error **errp) +-{ +- SEV_GUEST(obj)->legacy_vm_type = value; +-} +- + bool + sev_enabled(void) + { +- return !!sev_guest; ++ ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs; ++ ++ return !!object_dynamic_cast(OBJECT(cgs), TYPE_SEV_COMMON); + } + + bool + sev_es_enabled(void) + { +- return sev_enabled() && (sev_guest->policy & SEV_POLICY_ES); ++ ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs; ++ ++ return sev_enabled() && (SEV_GUEST(cgs)->policy & SEV_POLICY_ES); + } + + uint32_t + sev_get_cbit_position(void) + { +- return sev_guest ? sev_guest->cbitpos : 0; ++ SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); ++ ++ return sev_common ? sev_common->cbitpos : 0; + } + + uint32_t + sev_get_reduced_phys_bits(void) + { +- return sev_guest ? sev_guest->reduced_phys_bits : 0; ++ SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); ++ ++ return sev_common ? sev_common->reduced_phys_bits : 0; + } + + static SevInfo *sev_get_info(void) + { + SevInfo *info; ++ SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); ++ SevGuestState *sev_guest = ++ (SevGuestState *)object_dynamic_cast(OBJECT(sev_common), ++ TYPE_SEV_GUEST); + + info = g_new0(SevInfo, 1); + info->enabled = sev_enabled(); + + if (info->enabled) { +- info->api_major = sev_guest->api_major; +- info->api_minor = sev_guest->api_minor; +- info->build_id = sev_guest->build_id; +- info->policy = sev_guest->policy; +- info->state = sev_guest->state; +- info->handle = sev_guest->handle; ++ if (sev_guest) { ++ info->handle = sev_guest->handle; ++ } ++ info->api_major = sev_common->api_major; ++ info->api_minor = sev_common->api_minor; ++ info->build_id = sev_common->build_id; ++ info->state = sev_common->state; ++ /* we only report the lower 32-bits of policy for SNP, ok for now... */ ++ info->policy = ++ (uint32_t)object_property_get_uint(OBJECT(sev_common), ++ "policy", NULL); + } + + return info; +@@ -530,6 +479,8 @@ static SevCapability *sev_get_capabilities(Error **errp) + size_t pdh_len = 0, cert_chain_len = 0, cpu0_id_len = 0; + uint32_t ebx; + int fd; ++ SevCommonState *sev_common; ++ char *sev_device; + + if (!kvm_enabled()) { + error_setg(errp, "KVM not enabled"); +@@ -540,12 +491,21 @@ static SevCapability *sev_get_capabilities(Error **errp) + return NULL; + } + +- fd = open(DEFAULT_SEV_DEVICE, O_RDWR); ++ sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); ++ if (!sev_common) { ++ error_setg(errp, "SEV is not configured"); ++ } ++ ++ sev_device = object_property_get_str(OBJECT(sev_common), "sev-device", ++ &error_abort); ++ fd = open(sev_device, O_RDWR); + if (fd < 0) { + error_setg_errno(errp, errno, "SEV: Failed to open %s", + DEFAULT_SEV_DEVICE); ++ g_free(sev_device); + return NULL; + } ++ g_free(sev_device); + + if (sev_get_pdh_info(fd, &pdh_data, &pdh_len, + &cert_chain_data, &cert_chain_len, errp)) { +@@ -588,7 +548,7 @@ static SevAttestationReport *sev_get_attestation_report(const char *mnonce, + { + struct kvm_sev_attestation_report input = {}; + SevAttestationReport *report = NULL; +- SevGuestState *sev = sev_guest; ++ SevCommonState *sev_common; + g_autofree guchar *data = NULL; + g_autofree guchar *buf = NULL; + gsize len; +@@ -613,8 +573,10 @@ static SevAttestationReport *sev_get_attestation_report(const char *mnonce, + return NULL; + } + ++ sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); ++ + /* Query the report length */ +- ret = sev_ioctl(sev->sev_fd, KVM_SEV_GET_ATTESTATION_REPORT, ++ ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_GET_ATTESTATION_REPORT, + &input, &err); + if (ret < 0) { + if (err != SEV_RET_INVALID_LEN) { +@@ -630,7 +592,7 @@ static SevAttestationReport *sev_get_attestation_report(const char *mnonce, + memcpy(input.mnonce, buf, sizeof(input.mnonce)); + + /* Query the report */ +- ret = sev_ioctl(sev->sev_fd, KVM_SEV_GET_ATTESTATION_REPORT, ++ ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_GET_ATTESTATION_REPORT, + &input, &err); + if (ret) { + error_setg_errno(errp, errno, "SEV: Failed to get attestation report" +@@ -670,26 +632,27 @@ sev_read_file_base64(const char *filename, guchar **data, gsize *len) + } + + static int +-sev_launch_start(SevGuestState *sev) ++sev_launch_start(SevGuestState *sev_guest) + { + gsize sz; + int ret = 1; + int fw_error, rc; + struct kvm_sev_launch_start start = { +- .handle = sev->handle, .policy = sev->policy ++ .handle = sev_guest->handle, .policy = sev_guest->policy + }; + guchar *session = NULL, *dh_cert = NULL; ++ SevCommonState *sev_common = SEV_COMMON(sev_guest); + +- if (sev->session_file) { +- if (sev_read_file_base64(sev->session_file, &session, &sz) < 0) { ++ if (sev_guest->session_file) { ++ if (sev_read_file_base64(sev_guest->session_file, &session, &sz) < 0) { + goto out; + } + start.session_uaddr = (unsigned long)session; + start.session_len = sz; + } + +- if (sev->dh_cert_file) { +- if (sev_read_file_base64(sev->dh_cert_file, &dh_cert, &sz) < 0) { ++ if (sev_guest->dh_cert_file) { ++ if (sev_read_file_base64(sev_guest->dh_cert_file, &dh_cert, &sz) < 0) { + goto out; + } + start.dh_uaddr = (unsigned long)dh_cert; +@@ -697,15 +660,15 @@ sev_launch_start(SevGuestState *sev) + } + + trace_kvm_sev_launch_start(start.policy, session, dh_cert); +- rc = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_START, &start, &fw_error); ++ rc = sev_ioctl(sev_common->sev_fd, KVM_SEV_LAUNCH_START, &start, &fw_error); + if (rc < 0) { + error_report("%s: LAUNCH_START ret=%d fw_error=%d '%s'", + __func__, ret, fw_error, fw_error_to_str(fw_error)); + goto out; + } + +- sev_set_guest_state(sev, SEV_STATE_LAUNCH_UPDATE); +- sev->handle = start.handle; ++ sev_set_guest_state(sev_common, SEV_STATE_LAUNCH_UPDATE); ++ sev_guest->handle = start.handle; + ret = 0; + + out: +@@ -715,7 +678,7 @@ out: + } + + static int +-sev_launch_update_data(SevGuestState *sev, uint8_t *addr, uint64_t len) ++sev_launch_update_data(SevGuestState *sev_guest, uint8_t *addr, uint64_t len) + { + int ret, fw_error; + struct kvm_sev_launch_update_data update; +@@ -727,7 +690,7 @@ sev_launch_update_data(SevGuestState *sev, uint8_t *addr, uint64_t len) + update.uaddr = (uintptr_t)addr; + update.len = len; + trace_kvm_sev_launch_update_data(addr, len); +- ret = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_UPDATE_DATA, ++ ret = sev_ioctl(SEV_COMMON(sev_guest)->sev_fd, KVM_SEV_LAUNCH_UPDATE_DATA, + &update, &fw_error); + if (ret) { + error_report("%s: LAUNCH_UPDATE ret=%d fw_error=%d '%s'", +@@ -738,11 +701,12 @@ sev_launch_update_data(SevGuestState *sev, uint8_t *addr, uint64_t len) + } + + static int +-sev_launch_update_vmsa(SevGuestState *sev) ++sev_launch_update_vmsa(SevGuestState *sev_guest) + { + int ret, fw_error; + +- ret = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL, &fw_error); ++ ret = sev_ioctl(SEV_COMMON(sev_guest)->sev_fd, KVM_SEV_LAUNCH_UPDATE_VMSA, ++ NULL, &fw_error); + if (ret) { + error_report("%s: LAUNCH_UPDATE_VMSA ret=%d fw_error=%d '%s'", + __func__, ret, fw_error, fw_error_to_str(fw_error)); +@@ -754,18 +718,19 @@ sev_launch_update_vmsa(SevGuestState *sev) + static void + sev_launch_get_measure(Notifier *notifier, void *unused) + { +- SevGuestState *sev = sev_guest; ++ SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); ++ SevGuestState *sev_guest = SEV_GUEST(sev_common); + int ret, error; + g_autofree guchar *data = NULL; + struct kvm_sev_launch_measure measurement = {}; + +- if (!sev_check_state(sev, SEV_STATE_LAUNCH_UPDATE)) { ++ if (!sev_check_state(sev_common, SEV_STATE_LAUNCH_UPDATE)) { + return; + } + + if (sev_es_enabled()) { + /* measure all the VM save areas before getting launch_measure */ +- ret = sev_launch_update_vmsa(sev); ++ ret = sev_launch_update_vmsa(sev_guest); + if (ret) { + exit(1); + } +@@ -773,7 +738,7 @@ sev_launch_get_measure(Notifier *notifier, void *unused) + } + + /* query the measurement blob length */ +- ret = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_MEASURE, ++ ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_LAUNCH_MEASURE, + &measurement, &error); + if (!measurement.len) { + error_report("%s: LAUNCH_MEASURE ret=%d fw_error=%d '%s'", +@@ -785,7 +750,7 @@ sev_launch_get_measure(Notifier *notifier, void *unused) + measurement.uaddr = (unsigned long)data; + + /* get the measurement blob */ +- ret = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_MEASURE, ++ ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_LAUNCH_MEASURE, + &measurement, &error); + if (ret) { + error_report("%s: LAUNCH_MEASURE ret=%d fw_error=%d '%s'", +@@ -793,17 +758,19 @@ sev_launch_get_measure(Notifier *notifier, void *unused) + return; + } + +- sev_set_guest_state(sev, SEV_STATE_LAUNCH_SECRET); ++ sev_set_guest_state(sev_common, SEV_STATE_LAUNCH_SECRET); + + /* encode the measurement value and emit the event */ +- sev->measurement = g_base64_encode(data, measurement.len); +- trace_kvm_sev_launch_measurement(sev->measurement); ++ sev_guest->measurement = g_base64_encode(data, measurement.len); ++ trace_kvm_sev_launch_measurement(sev_guest->measurement); + } + + static char *sev_get_launch_measurement(void) + { ++ SevGuestState *sev_guest = SEV_GUEST(MACHINE(qdev_get_machine())->cgs); ++ + if (sev_guest && +- sev_guest->state >= SEV_STATE_LAUNCH_SECRET) { ++ SEV_COMMON(sev_guest)->state >= SEV_STATE_LAUNCH_SECRET) { + return g_strdup(sev_guest->measurement); + } + +@@ -832,19 +799,20 @@ static Notifier sev_machine_done_notify = { + }; + + static void +-sev_launch_finish(SevGuestState *sev) ++sev_launch_finish(SevGuestState *sev_guest) + { + int ret, error; + + trace_kvm_sev_launch_finish(); +- ret = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_FINISH, 0, &error); ++ ret = sev_ioctl(SEV_COMMON(sev_guest)->sev_fd, KVM_SEV_LAUNCH_FINISH, 0, ++ &error); + if (ret) { + error_report("%s: LAUNCH_FINISH ret=%d fw_error=%d '%s'", + __func__, ret, error, fw_error_to_str(error)); + exit(1); + } + +- sev_set_guest_state(sev, SEV_STATE_RUNNING); ++ sev_set_guest_state(SEV_COMMON(sev_guest), SEV_STATE_RUNNING); + + /* add migration blocker */ + error_setg(&sev_mig_blocker, +@@ -855,38 +823,40 @@ sev_launch_finish(SevGuestState *sev) + static void + sev_vm_state_change(void *opaque, bool running, RunState state) + { +- SevGuestState *sev = opaque; ++ SevCommonState *sev_common = opaque; + + if (running) { +- if (!sev_check_state(sev, SEV_STATE_RUNNING)) { +- sev_launch_finish(sev); ++ if (!sev_check_state(sev_common, SEV_STATE_RUNNING)) { ++ sev_launch_finish(SEV_GUEST(sev_common)); + } + } + } + + static int sev_kvm_type(X86ConfidentialGuest *cg) + { +- SevGuestState *sev = SEV_GUEST(cg); ++ SevCommonState *sev_common = SEV_COMMON(cg); ++ SevGuestState *sev_guest = SEV_GUEST(sev_common); + int kvm_type; + +- if (sev->kvm_type != -1) { ++ if (sev_common->kvm_type != -1) { + goto out; + } + +- kvm_type = (sev->policy & SEV_POLICY_ES) ? KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM; +- if (kvm_is_vm_type_supported(kvm_type) && !sev->legacy_vm_type) { +- sev->kvm_type = kvm_type; ++ kvm_type = (sev_guest->policy & SEV_POLICY_ES) ? ++ KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM; ++ if (kvm_is_vm_type_supported(kvm_type) && !sev_guest->legacy_vm_type) { ++ sev_common->kvm_type = kvm_type; + } else { +- sev->kvm_type = KVM_X86_DEFAULT_VM; ++ sev_common->kvm_type = KVM_X86_DEFAULT_VM; + } + + out: +- return sev->kvm_type; ++ return sev_common->kvm_type; + } + + static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + { +- SevGuestState *sev = SEV_GUEST(cgs); ++ SevCommonState *sev_common = SEV_COMMON(cgs); + char *devname; + int ret, fw_error, cmd; + uint32_t ebx; +@@ -899,8 +869,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + return -1; + } + +- sev_guest = sev; +- sev->state = SEV_STATE_UNINIT; ++ sev_common->state = SEV_STATE_UNINIT; + + host_cpuid(0x8000001F, 0, NULL, &ebx, NULL, NULL); + host_cbitpos = ebx & 0x3f; +@@ -910,9 +879,9 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + * register of CPUID 0x8000001F. No need to verify the range as the + * comparison against the host value accomplishes that. + */ +- if (host_cbitpos != sev->cbitpos) { ++ if (host_cbitpos != sev_common->cbitpos) { + error_setg(errp, "%s: cbitpos check failed, host '%d' requested '%d'", +- __func__, host_cbitpos, sev->cbitpos); ++ __func__, host_cbitpos, sev_common->cbitpos); + goto err; + } + +@@ -921,16 +890,17 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + * the EBX register of CPUID 0x8000001F, so verify the supplied value + * is in the range of 1 to 63. + */ +- if (sev->reduced_phys_bits < 1 || sev->reduced_phys_bits > 63) { ++ if (sev_common->reduced_phys_bits < 1 || ++ sev_common->reduced_phys_bits > 63) { + error_setg(errp, "%s: reduced_phys_bits check failed," + " it should be in the range of 1 to 63, requested '%d'", +- __func__, sev->reduced_phys_bits); ++ __func__, sev_common->reduced_phys_bits); + goto err; + } + +- devname = object_property_get_str(OBJECT(sev), "sev-device", NULL); +- sev->sev_fd = open(devname, O_RDWR); +- if (sev->sev_fd < 0) { ++ devname = object_property_get_str(OBJECT(sev_common), "sev-device", NULL); ++ sev_common->sev_fd = open(devname, O_RDWR); ++ if (sev_common->sev_fd < 0) { + error_setg(errp, "%s: Failed to open %s '%s'", __func__, + devname, strerror(errno)); + g_free(devname); +@@ -938,7 +908,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + } + g_free(devname); + +- ret = sev_platform_ioctl(sev->sev_fd, SEV_PLATFORM_STATUS, &status, ++ ret = sev_platform_ioctl(sev_common->sev_fd, SEV_PLATFORM_STATUS, &status, + &fw_error); + if (ret) { + error_setg(errp, "%s: failed to get platform status ret=%d " +@@ -946,9 +916,9 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + fw_error_to_str(fw_error)); + goto err; + } +- sev->build_id = status.build; +- sev->api_major = status.api_major; +- sev->api_minor = status.api_minor; ++ sev_common->build_id = status.build; ++ sev_common->api_major = status.api_major; ++ sev_common->api_minor = status.api_minor; + + if (sev_es_enabled()) { + if (!kvm_kernel_irqchip_allowed()) { +@@ -966,14 +936,14 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + } + + trace_kvm_sev_init(); +- if (sev_kvm_type(X86_CONFIDENTIAL_GUEST(sev)) == KVM_X86_DEFAULT_VM) { ++ if (sev_kvm_type(X86_CONFIDENTIAL_GUEST(sev_common)) == KVM_X86_DEFAULT_VM) { + cmd = sev_es_enabled() ? KVM_SEV_ES_INIT : KVM_SEV_INIT; + +- ret = sev_ioctl(sev->sev_fd, cmd, NULL, &fw_error); ++ ret = sev_ioctl(sev_common->sev_fd, cmd, NULL, &fw_error); + } else { + struct kvm_sev_init args = { 0 }; + +- ret = sev_ioctl(sev->sev_fd, KVM_SEV_INIT2, &args, &fw_error); ++ ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_INIT2, &args, &fw_error); + } + + if (ret) { +@@ -982,7 +952,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + goto err; + } + +- ret = sev_launch_start(sev); ++ sev_launch_start(SEV_GUEST(sev_common)); + if (ret) { + error_setg(errp, "%s: failed to create encryption context", __func__); + goto err; +@@ -990,13 +960,12 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + + ram_block_notifier_add(&sev_ram_notifier); + qemu_add_machine_init_done_notifier(&sev_machine_done_notify); +- qemu_add_vm_change_state_handler(sev_vm_state_change, sev); ++ qemu_add_vm_change_state_handler(sev_vm_state_change, sev_common); + + cgs->ready = true; + + return 0; + err: +- sev_guest = NULL; + ram_block_discard_disable(false); + return -1; + } +@@ -1004,13 +973,15 @@ err: + int + sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp) + { +- if (!sev_guest) { ++ SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); ++ ++ if (!sev_common) { + return 0; + } + + /* if SEV is in update state then encrypt the data else do nothing */ +- if (sev_check_state(sev_guest, SEV_STATE_LAUNCH_UPDATE)) { +- int ret = sev_launch_update_data(sev_guest, ptr, len); ++ if (sev_check_state(sev_common, SEV_STATE_LAUNCH_UPDATE)) { ++ int ret = sev_launch_update_data(SEV_GUEST(sev_common), ptr, len); + if (ret < 0) { + error_setg(errp, "SEV: Failed to encrypt pflash rom"); + return ret; +@@ -1030,16 +1001,17 @@ int sev_inject_launch_secret(const char *packet_hdr, const char *secret, + void *hva; + gsize hdr_sz = 0, data_sz = 0; + MemoryRegion *mr = NULL; ++ SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + +- if (!sev_guest) { ++ if (!sev_common) { + error_setg(errp, "SEV not enabled for guest"); + return 1; + } + + /* secret can be injected only in this state */ +- if (!sev_check_state(sev_guest, SEV_STATE_LAUNCH_SECRET)) { ++ if (!sev_check_state(sev_common, SEV_STATE_LAUNCH_SECRET)) { + error_setg(errp, "SEV: Not in correct state. (LSECRET) %x", +- sev_guest->state); ++ sev_common->state); + return 1; + } + +@@ -1073,7 +1045,7 @@ int sev_inject_launch_secret(const char *packet_hdr, const char *secret, + trace_kvm_sev_launch_secret(gpa, input.guest_uaddr, + input.trans_uaddr, input.trans_len); + +- ret = sev_ioctl(sev_guest->sev_fd, KVM_SEV_LAUNCH_SECRET, ++ ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_LAUNCH_SECRET, + &input, &error); + if (ret) { + error_setg(errp, "SEV: failed to inject secret ret=%d fw_error=%d '%s'", +@@ -1180,9 +1152,10 @@ void sev_es_set_reset_vector(CPUState *cpu) + { + X86CPU *x86; + CPUX86State *env; ++ SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + + /* Only update if we have valid reset information */ +- if (!sev_guest || !sev_guest->reset_data_valid) { ++ if (!sev_common || !sev_common->reset_data_valid) { + return; + } + +@@ -1194,11 +1167,11 @@ void sev_es_set_reset_vector(CPUState *cpu) + x86 = X86_CPU(cpu); + env = &x86->env; + +- cpu_x86_load_seg_cache(env, R_CS, 0xf000, sev_guest->reset_cs, 0xffff, ++ cpu_x86_load_seg_cache(env, R_CS, 0xf000, sev_common->reset_cs, 0xffff, + DESC_P_MASK | DESC_S_MASK | DESC_CS_MASK | + DESC_R_MASK | DESC_A_MASK); + +- env->eip = sev_guest->reset_ip; ++ env->eip = sev_common->reset_ip; + } + + int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size) +@@ -1206,6 +1179,7 @@ int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size) + CPUState *cpu; + uint32_t addr; + int ret; ++ SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + + if (!sev_es_enabled()) { + return 0; +@@ -1219,9 +1193,9 @@ int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size) + } + + if (addr) { +- sev_guest->reset_cs = addr & 0xffff0000; +- sev_guest->reset_ip = addr & 0x0000ffff; +- sev_guest->reset_data_valid = true; ++ sev_common->reset_cs = addr & 0xffff0000; ++ sev_common->reset_ip = addr & 0x0000ffff; ++ sev_common->reset_data_valid = true; + + CPU_FOREACH(cpu) { + sev_es_set_reset_vector(cpu); +@@ -1267,12 +1241,13 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) + hwaddr mapped_len = sizeof(*padded_ht); + MemTxAttrs attrs = { 0 }; + bool ret = true; ++ SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + + /* + * Only add the kernel hashes if the sev-guest configuration explicitly + * stated kernel-hashes=on. + */ +- if (!sev_guest->kernel_hashes) { ++ if (!sev_common->kernel_hashes) { + return false; + } + +@@ -1363,8 +1338,30 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) + return ret; + } + ++static char * ++sev_common_get_sev_device(Object *obj, Error **errp) ++{ ++ return g_strdup(SEV_COMMON(obj)->sev_device); ++} ++ + static void +-sev_guest_class_init(ObjectClass *oc, void *data) ++sev_common_set_sev_device(Object *obj, const char *value, Error **errp) ++{ ++ SEV_COMMON(obj)->sev_device = g_strdup(value); ++} ++ ++static bool sev_common_get_kernel_hashes(Object *obj, Error **errp) ++{ ++ return SEV_COMMON(obj)->kernel_hashes; ++} ++ ++static void sev_common_set_kernel_hashes(Object *obj, bool value, Error **errp) ++{ ++ SEV_COMMON(obj)->kernel_hashes = value; ++} ++ ++static void ++sev_common_class_init(ObjectClass *oc, void *data) + { + ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); + X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); +@@ -1373,10 +1370,87 @@ sev_guest_class_init(ObjectClass *oc, void *data) + x86_klass->kvm_type = sev_kvm_type; + + object_class_property_add_str(oc, "sev-device", +- sev_guest_get_sev_device, +- sev_guest_set_sev_device); ++ sev_common_get_sev_device, ++ sev_common_set_sev_device); + object_class_property_set_description(oc, "sev-device", + "SEV device to use"); ++ object_class_property_add_bool(oc, "kernel-hashes", ++ sev_common_get_kernel_hashes, ++ sev_common_set_kernel_hashes); ++ object_class_property_set_description(oc, "kernel-hashes", ++ "add kernel hashes to guest firmware for measured Linux boot"); ++} ++ ++static void ++sev_common_instance_init(Object *obj) ++{ ++ SevCommonState *sev_common = SEV_COMMON(obj); ++ ++ sev_common->kvm_type = -1; ++ ++ sev_common->sev_device = g_strdup(DEFAULT_SEV_DEVICE); ++ ++ object_property_add_uint32_ptr(obj, "cbitpos", &sev_common->cbitpos, ++ OBJ_PROP_FLAG_READWRITE); ++ object_property_add_uint32_ptr(obj, "reduced-phys-bits", ++ &sev_common->reduced_phys_bits, ++ OBJ_PROP_FLAG_READWRITE); ++} ++ ++/* sev guest info common to sev/sev-es/sev-snp */ ++static const TypeInfo sev_common_info = { ++ .parent = TYPE_X86_CONFIDENTIAL_GUEST, ++ .name = TYPE_SEV_COMMON, ++ .instance_size = sizeof(SevCommonState), ++ .instance_init = sev_common_instance_init, ++ .class_size = sizeof(SevCommonStateClass), ++ .class_init = sev_common_class_init, ++ .abstract = true, ++ .interfaces = (InterfaceInfo[]) { ++ { TYPE_USER_CREATABLE }, ++ { } ++ } ++}; ++ ++static char * ++sev_guest_get_dh_cert_file(Object *obj, Error **errp) ++{ ++ return g_strdup(SEV_GUEST(obj)->dh_cert_file); ++} ++ ++static void ++sev_guest_set_dh_cert_file(Object *obj, const char *value, Error **errp) ++{ ++ SEV_GUEST(obj)->dh_cert_file = g_strdup(value); ++} ++ ++static char * ++sev_guest_get_session_file(Object *obj, Error **errp) ++{ ++ SevGuestState *sev_guest = SEV_GUEST(obj); ++ ++ return sev_guest->session_file ? g_strdup(sev_guest->session_file) : NULL; ++} ++ ++static void ++sev_guest_set_session_file(Object *obj, const char *value, Error **errp) ++{ ++ SEV_GUEST(obj)->session_file = g_strdup(value); ++} ++ ++static bool sev_guest_get_legacy_vm_type(Object *obj, Error **errp) ++{ ++ return SEV_GUEST(obj)->legacy_vm_type; ++} ++ ++static void sev_guest_set_legacy_vm_type(Object *obj, bool value, Error **errp) ++{ ++ SEV_GUEST(obj)->legacy_vm_type = value; ++} ++ ++static void ++sev_guest_class_init(ObjectClass *oc, void *data) ++{ + object_class_property_add_str(oc, "dh-cert-file", + sev_guest_get_dh_cert_file, + sev_guest_set_dh_cert_file); +@@ -1387,11 +1461,6 @@ sev_guest_class_init(ObjectClass *oc, void *data) + sev_guest_set_session_file); + object_class_property_set_description(oc, "session-file", + "guest owners session parameters (encoded with base64)"); +- object_class_property_add_bool(oc, "kernel-hashes", +- sev_guest_get_kernel_hashes, +- sev_guest_set_kernel_hashes); +- object_class_property_set_description(oc, "kernel-hashes", +- "add kernel hashes to guest firmware for measured Linux boot"); + object_class_property_add_bool(oc, "legacy-vm-type", + sev_guest_get_legacy_vm_type, + sev_guest_set_legacy_vm_type); +@@ -1402,41 +1471,29 @@ sev_guest_class_init(ObjectClass *oc, void *data) + static void + sev_guest_instance_init(Object *obj) + { +- SevGuestState *sev = SEV_GUEST(obj); +- +- sev->kvm_type = -1; ++ SevGuestState *sev_guest = SEV_GUEST(obj); + +- sev->sev_device = g_strdup(DEFAULT_SEV_DEVICE); +- sev->policy = DEFAULT_GUEST_POLICY; +- object_property_add_uint32_ptr(obj, "policy", &sev->policy, +- OBJ_PROP_FLAG_READWRITE); +- object_property_add_uint32_ptr(obj, "handle", &sev->handle, ++ sev_guest->policy = DEFAULT_GUEST_POLICY; ++ object_property_add_uint32_ptr(obj, "handle", &sev_guest->handle, + OBJ_PROP_FLAG_READWRITE); +- object_property_add_uint32_ptr(obj, "cbitpos", &sev->cbitpos, +- OBJ_PROP_FLAG_READWRITE); +- object_property_add_uint32_ptr(obj, "reduced-phys-bits", +- &sev->reduced_phys_bits, ++ object_property_add_uint32_ptr(obj, "policy", &sev_guest->policy, + OBJ_PROP_FLAG_READWRITE); + object_apply_compat_props(obj); + } + +-/* sev guest info */ ++/* guest info specific sev/sev-es */ + static const TypeInfo sev_guest_info = { +- .parent = TYPE_X86_CONFIDENTIAL_GUEST, ++ .parent = TYPE_SEV_COMMON, + .name = TYPE_SEV_GUEST, + .instance_size = sizeof(SevGuestState), +- .instance_finalize = sev_guest_finalize, +- .class_init = sev_guest_class_init, + .instance_init = sev_guest_instance_init, +- .interfaces = (InterfaceInfo[]) { +- { TYPE_USER_CREATABLE }, +- { } +- } ++ .class_init = sev_guest_class_init, + }; + + static void + sev_register_types(void) + { ++ type_register_static(&sev_common_info); + type_register_static(&sev_guest_info); + } + +diff --git a/target/i386/sev.h b/target/i386/sev.h +index 9e10d09539..668374eef3 100644 +--- a/target/i386/sev.h ++++ b/target/i386/sev.h +@@ -20,6 +20,9 @@ + + #include "exec/confidential-guest-support.h" + ++#define TYPE_SEV_COMMON "sev-common" ++#define TYPE_SEV_GUEST "sev-guest" ++ + #define SEV_POLICY_NODBG 0x1 + #define SEV_POLICY_NOKS 0x2 + #define SEV_POLICY_ES 0x4 +-- +2.39.3 + diff --git a/kvm-i386-sev-Introduce-sev-snp-guest-object.patch b/kvm-i386-sev-Introduce-sev-snp-guest-object.patch new file mode 100644 index 0000000..b347bf6 --- /dev/null +++ b/kvm-i386-sev-Introduce-sev-snp-guest-object.patch @@ -0,0 +1,530 @@ +From 900859fd3445b9a71f1a9a8befda17f0c33f3923 Mon Sep 17 00:00:00 2001 +From: Brijesh Singh +Date: Thu, 30 May 2024 06:16:19 -0500 +Subject: [PATCH 059/100] i386/sev: Introduce 'sev-snp-guest' object + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [59/91] 3e585113d209176c2b97ad5e4fe943f19dfdcaeb (bonzini/rhel-qemu-kvm) + +SEV-SNP support relies on a different set of properties/state than the +existing 'sev-guest' object. This patch introduces the 'sev-snp-guest' +object, which can be used to configure an SEV-SNP guest. For example, +a default-configured SEV-SNP guest with no additional information +passed in for use with attestation: + + -object sev-snp-guest,id=sev0 + +or a fully-specified SEV-SNP guest where all spec-defined binary +blobs are passed in as base64-encoded strings: + + -object sev-snp-guest,id=sev0, \ + policy=0x30000, \ + init-flags=0, \ + id-block=YWFhYWFhYWFhYWFhYWFhCg==, \ + id-auth=CxHK/OKLkXGn/KpAC7Wl1FSiisWDbGTEKz..., \ + author-key-enabled=on, \ + host-data=LNkCWBRC5CcdGXirbNUV1OrsR28s..., \ + guest-visible-workarounds=AA==, \ + +See the QAPI schema updates included in this patch for more usage +details. + +In some cases these blobs may be up to 4096 characters, but this is +generally well below the default limit for linux hosts where +command-line sizes are defined by the sysconf-configurable ARG_MAX +value, which defaults to 2097152 characters for Ubuntu hosts, for +example. + +Signed-off-by: Brijesh Singh +Co-developed-by: Michael Roth +Acked-by: Markus Armbruster (for QAPI schema) +Signed-off-by: Michael Roth +Co-developed-by: Pankaj Gupta +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-8-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 7b34df44260b391e33bc3acf1ced30019d9aadf1) +Signed-off-by: Paolo Bonzini +--- + docs/system/i386/amd-memory-encryption.rst | 70 +++++- + qapi/qom.json | 58 +++++ + target/i386/sev.c | 253 +++++++++++++++++++++ + target/i386/sev.h | 1 + + 4 files changed, 380 insertions(+), 2 deletions(-) + +diff --git a/docs/system/i386/amd-memory-encryption.rst b/docs/system/i386/amd-memory-encryption.rst +index e9bc142bc1..748f5094ba 100644 +--- a/docs/system/i386/amd-memory-encryption.rst ++++ b/docs/system/i386/amd-memory-encryption.rst +@@ -25,8 +25,8 @@ support for notifying a guest's operating system when certain types of VMEXITs + are about to occur. This allows the guest to selectively share information with + the hypervisor to satisfy the requested function. + +-Launching +---------- ++Launching (SEV and SEV-ES) ++-------------------------- + + Boot images (such as bios) must be encrypted before a guest can be booted. The + ``MEMORY_ENCRYPT_OP`` ioctl provides commands to encrypt the images: ``LAUNCH_START``, +@@ -161,6 +161,72 @@ The value of GCTX.LD is + If kernel hashes are not used, or SEV-ES is disabled, use empty blobs for + ``kernel_hashes_blob`` and ``vmsas_blob`` as needed. + ++Launching (SEV-SNP) ++------------------- ++Boot images (such as bios) must be encrypted before a guest can be booted. The ++``MEMORY_ENCRYPT_OP`` ioctl provides commands to encrypt the images: ++``SNP_LAUNCH_START``, ``SNP_LAUNCH_UPDATE``, and ``SNP_LAUNCH_FINISH``. These ++three commands communicate with SEV-SNP firmware to generate a fresh memory ++encryption key for the VM, encrypt the boot images for a successful launch. For ++more details on the SEV-SNP firmware interfaces used by these commands please ++see the SEV-SNP Firmware ABI. ++ ++``SNP_LAUNCH_START`` is called first to create a cryptographic launch context ++within the firmware. To create this context, the guest owner must provide a ++guest policy and other parameters as described in the SEV-SNP firmware ++specification. The launch parameters should be specified as described in the ++QAPI schema for the sev-snp-guest object. ++ ++The ``SNP_LAUNCH_START`` uses the following parameters, which can be configured ++by the corresponding parameters documented in the QAPI schema for the ++'sev-snp-guest' object. ++ +++--------+-------+----------+-------------------------------------------------+ ++| key | type | default | meaning | +++---------------------------+-------------------------------------------------+ ++| policy | hex | 0x30000 | a 64-bit guest policy | +++---------------------------+-------------------------------------------------+ ++| guest-visible-workarounds | string| 0 | 16-byte base64 encoded string| ++| | | | for guest OS visible | ++| | | | workarounds. | +++---------------------------+-------------------------------------------------+ ++ ++``SNP_LAUNCH_UPDATE`` encrypts the memory region using the cryptographic context ++created via the ``SNP_LAUNCH_START`` command. If required, this command can be ++called multiple times to encrypt different memory regions. The command also ++calculates the measurement of the memory contents as it encrypts. ++ ++``SNP_LAUNCH_FINISH`` finalizes the guest launch flow. Optionally, while ++finalizing the launch the firmware can perform checks on the launch digest ++computing through the ``SNP_LAUNCH_UPDATE``. To perform the check the user must ++supply the id block, authentication blob and host data that should be included ++in the attestation report. See the SEV-SNP spec for further details. ++ ++The ``SNP_LAUNCH_FINISH`` uses the following parameters, which can be configured ++by the corresponding parameters documented in the QAPI schema for the ++'sev-snp-guest' object. ++ +++--------------------+-------+----------+-------------------------------------+ ++| key | type | default | meaning | +++--------------------+-------+----------+-------------------------------------+ ++| id-block | string| none | base64 encoded ID block | +++--------------------+-------+----------+-------------------------------------+ ++| id-auth | string| none | base64 encoded authentication | ++| | | | information | +++--------------------+-------+----------+-------------------------------------+ ++| author-key-enabled | bool | 0 | auth block contains author key | +++--------------------+-------+----------+-------------------------------------+ ++| host_data | string| none | host provided data | +++--------------------+-------+----------+-------------------------------------+ ++ ++To launch a SEV-SNP guest (additional parameters are documented in the QAPI ++schema for the 'sev-snp-guest' object):: ++ ++ # ${QEMU} \ ++ -machine ...,confidential-guest-support=sev0 \ ++ -object sev-snp-guest,id=sev0,cbitpos=51,reduced-phys-bits=1 ++ ++ + Debugging + --------- + +diff --git a/qapi/qom.json b/qapi/qom.json +index 056b38f491..8bd299265e 100644 +--- a/qapi/qom.json ++++ b/qapi/qom.json +@@ -929,6 +929,62 @@ + '*handle': 'uint32', + '*legacy-vm-type': 'bool' } } + ++## ++# @SevSnpGuestProperties: ++# ++# Properties for sev-snp-guest objects. Most of these are direct ++# arguments for the KVM_SNP_* interfaces documented in the Linux ++# kernel source under ++# Documentation/arch/x86/amd-memory-encryption.rst, which are in turn ++# closely coupled with the SNP_INIT/SNP_LAUNCH_* firmware commands ++# documented in the SEV-SNP Firmware ABI Specification (Rev 0.9). ++# ++# More usage information is also available in the QEMU source tree ++# under docs/amd-memory-encryption. ++# ++# @policy: the 'POLICY' parameter to the SNP_LAUNCH_START command, as ++# defined in the SEV-SNP firmware ABI (default: 0x30000) ++# ++# @guest-visible-workarounds: 16-byte, base64-encoded blob to report ++# hypervisor-defined workarounds, corresponding to the 'GOSVW' ++# parameter of the SNP_LAUNCH_START command defined in the SEV-SNP ++# firmware ABI (default: all-zero) ++# ++# @id-block: 96-byte, base64-encoded blob to provide the 'ID Block' ++# structure for the SNP_LAUNCH_FINISH command defined in the ++# SEV-SNP firmware ABI (default: all-zero) ++# ++# @id-auth: 4096-byte, base64-encoded blob to provide the 'ID ++# Authentication Information Structure' for the SNP_LAUNCH_FINISH ++# command defined in the SEV-SNP firmware ABI (default: all-zero) ++# ++# @author-key-enabled: true if 'id-auth' blob contains the 'AUTHOR_KEY' ++# field defined SEV-SNP firmware ABI (default: false) ++# ++# @host-data: 32-byte, base64-encoded, user-defined blob to provide to ++# the guest, as documented for the 'HOST_DATA' parameter of the ++# SNP_LAUNCH_FINISH command in the SEV-SNP firmware ABI (default: ++# all-zero) ++# ++# @vcek-disabled: Guests are by default allowed to choose between VLEK ++# (Versioned Loaded Endorsement Key) or VCEK (Versioned Chip ++# Endorsement Key) when requesting attestation reports from ++# firmware. Set this to true to disable the use of VCEK. ++# (default: false) (since: 9.1) ++# ++# Since: 9.1 ++## ++{ 'struct': 'SevSnpGuestProperties', ++ 'base': 'SevCommonProperties', ++ 'data': { ++ '*policy': 'uint64', ++ '*guest-visible-workarounds': 'str', ++ '*id-block': 'str', ++ '*id-auth': 'str', ++ '*author-key-enabled': 'bool', ++ '*host-data': 'str', ++ '*vcek-disabled': 'bool' } } ++ + ## + # @ThreadContextProperties: + # +@@ -1007,6 +1063,7 @@ + { 'name': 'secret_keyring', + 'if': 'CONFIG_SECRET_KEYRING' }, + 'sev-guest', ++ 'sev-snp-guest', + 'thread-context', + 's390-pv-guest', + 'throttle-group', +@@ -1077,6 +1134,7 @@ + 'secret_keyring': { 'type': 'SecretKeyringProperties', + 'if': 'CONFIG_SECRET_KEYRING' }, + 'sev-guest': 'SevGuestProperties', ++ 'sev-snp-guest': 'SevSnpGuestProperties', + 'thread-context': 'ThreadContextProperties', + 'throttle-group': 'ThrottleGroupProperties', + 'tls-creds-anon': 'TlsCredsAnonProperties', +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 28a018ed83..a81b3228d4 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -42,6 +42,7 @@ + + OBJECT_DECLARE_TYPE(SevCommonState, SevCommonStateClass, SEV_COMMON) + OBJECT_DECLARE_TYPE(SevGuestState, SevCommonStateClass, SEV_GUEST) ++OBJECT_DECLARE_TYPE(SevSnpGuestState, SevCommonStateClass, SEV_SNP_GUEST) + + struct SevCommonState { + X86ConfidentialGuest parent_obj; +@@ -96,8 +97,22 @@ struct SevGuestState { + bool legacy_vm_type; + }; + ++struct SevSnpGuestState { ++ SevCommonState parent_obj; ++ ++ /* configuration parameters */ ++ char *guest_visible_workarounds; ++ char *id_block; ++ char *id_auth; ++ char *host_data; ++ ++ struct kvm_sev_snp_launch_start kvm_start_conf; ++ struct kvm_sev_snp_launch_finish kvm_finish_conf; ++}; ++ + #define DEFAULT_GUEST_POLICY 0x1 /* disable debug */ + #define DEFAULT_SEV_DEVICE "/dev/sev" ++#define DEFAULT_SEV_SNP_POLICY 0x30000 + + #define SEV_INFO_BLOCK_GUID "00f771de-1a7e-4fcb-890e-68c77e2fb44e" + typedef struct __attribute__((__packed__)) SevInfoBlock { +@@ -1500,11 +1515,249 @@ static const TypeInfo sev_guest_info = { + .class_init = sev_guest_class_init, + }; + ++static void ++sev_snp_guest_get_policy(Object *obj, Visitor *v, const char *name, ++ void *opaque, Error **errp) ++{ ++ visit_type_uint64(v, name, ++ (uint64_t *)&SEV_SNP_GUEST(obj)->kvm_start_conf.policy, ++ errp); ++} ++ ++static void ++sev_snp_guest_set_policy(Object *obj, Visitor *v, const char *name, ++ void *opaque, Error **errp) ++{ ++ visit_type_uint64(v, name, ++ (uint64_t *)&SEV_SNP_GUEST(obj)->kvm_start_conf.policy, ++ errp); ++} ++ ++static char * ++sev_snp_guest_get_guest_visible_workarounds(Object *obj, Error **errp) ++{ ++ return g_strdup(SEV_SNP_GUEST(obj)->guest_visible_workarounds); ++} ++ ++static void ++sev_snp_guest_set_guest_visible_workarounds(Object *obj, const char *value, ++ Error **errp) ++{ ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); ++ struct kvm_sev_snp_launch_start *start = &sev_snp_guest->kvm_start_conf; ++ g_autofree guchar *blob; ++ gsize len; ++ ++ g_free(sev_snp_guest->guest_visible_workarounds); ++ ++ /* store the base64 str so we don't need to re-encode in getter */ ++ sev_snp_guest->guest_visible_workarounds = g_strdup(value); ++ ++ blob = qbase64_decode(sev_snp_guest->guest_visible_workarounds, ++ -1, &len, errp); ++ if (!blob) { ++ return; ++ } ++ ++ if (len != sizeof(start->gosvw)) { ++ error_setg(errp, "parameter length of %lu exceeds max of %lu", ++ len, sizeof(start->gosvw)); ++ return; ++ } ++ ++ memcpy(start->gosvw, blob, len); ++} ++ ++static char * ++sev_snp_guest_get_id_block(Object *obj, Error **errp) ++{ ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); ++ ++ return g_strdup(sev_snp_guest->id_block); ++} ++ ++static void ++sev_snp_guest_set_id_block(Object *obj, const char *value, Error **errp) ++{ ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); ++ struct kvm_sev_snp_launch_finish *finish = &sev_snp_guest->kvm_finish_conf; ++ gsize len; ++ ++ g_free(sev_snp_guest->id_block); ++ g_free((guchar *)finish->id_block_uaddr); ++ ++ /* store the base64 str so we don't need to re-encode in getter */ ++ sev_snp_guest->id_block = g_strdup(value); ++ ++ finish->id_block_uaddr = ++ (uint64_t)qbase64_decode(sev_snp_guest->id_block, -1, &len, errp); ++ ++ if (!finish->id_block_uaddr) { ++ return; ++ } ++ ++ if (len != KVM_SEV_SNP_ID_BLOCK_SIZE) { ++ error_setg(errp, "parameter length of %lu not equal to %u", ++ len, KVM_SEV_SNP_ID_BLOCK_SIZE); ++ return; ++ } ++ ++ finish->id_block_en = (len) ? 1 : 0; ++} ++ ++static char * ++sev_snp_guest_get_id_auth(Object *obj, Error **errp) ++{ ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); ++ ++ return g_strdup(sev_snp_guest->id_auth); ++} ++ ++static void ++sev_snp_guest_set_id_auth(Object *obj, const char *value, Error **errp) ++{ ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); ++ struct kvm_sev_snp_launch_finish *finish = &sev_snp_guest->kvm_finish_conf; ++ gsize len; ++ ++ g_free(sev_snp_guest->id_auth); ++ g_free((guchar *)finish->id_auth_uaddr); ++ ++ /* store the base64 str so we don't need to re-encode in getter */ ++ sev_snp_guest->id_auth = g_strdup(value); ++ ++ finish->id_auth_uaddr = ++ (uint64_t)qbase64_decode(sev_snp_guest->id_auth, -1, &len, errp); ++ ++ if (!finish->id_auth_uaddr) { ++ return; ++ } ++ ++ if (len > KVM_SEV_SNP_ID_AUTH_SIZE) { ++ error_setg(errp, "parameter length:ID_AUTH %lu exceeds max of %u", ++ len, KVM_SEV_SNP_ID_AUTH_SIZE); ++ return; ++ } ++} ++ ++static bool ++sev_snp_guest_get_author_key_enabled(Object *obj, Error **errp) ++{ ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); ++ ++ return !!sev_snp_guest->kvm_finish_conf.auth_key_en; ++} ++ ++static void ++sev_snp_guest_set_author_key_enabled(Object *obj, bool value, Error **errp) ++{ ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); ++ ++ sev_snp_guest->kvm_finish_conf.auth_key_en = value; ++} ++ ++static bool ++sev_snp_guest_get_vcek_disabled(Object *obj, Error **errp) ++{ ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); ++ ++ return !!sev_snp_guest->kvm_finish_conf.vcek_disabled; ++} ++ ++static void ++sev_snp_guest_set_vcek_disabled(Object *obj, bool value, Error **errp) ++{ ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); ++ ++ sev_snp_guest->kvm_finish_conf.vcek_disabled = value; ++} ++ ++static char * ++sev_snp_guest_get_host_data(Object *obj, Error **errp) ++{ ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); ++ ++ return g_strdup(sev_snp_guest->host_data); ++} ++ ++static void ++sev_snp_guest_set_host_data(Object *obj, const char *value, Error **errp) ++{ ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); ++ struct kvm_sev_snp_launch_finish *finish = &sev_snp_guest->kvm_finish_conf; ++ g_autofree guchar *blob; ++ gsize len; ++ ++ g_free(sev_snp_guest->host_data); ++ ++ /* store the base64 str so we don't need to re-encode in getter */ ++ sev_snp_guest->host_data = g_strdup(value); ++ ++ blob = qbase64_decode(sev_snp_guest->host_data, -1, &len, errp); ++ ++ if (!blob) { ++ return; ++ } ++ ++ if (len != sizeof(finish->host_data)) { ++ error_setg(errp, "parameter length of %lu not equal to %lu", ++ len, sizeof(finish->host_data)); ++ return; ++ } ++ ++ memcpy(finish->host_data, blob, len); ++} ++ ++static void ++sev_snp_guest_class_init(ObjectClass *oc, void *data) ++{ ++ object_class_property_add(oc, "policy", "uint64", ++ sev_snp_guest_get_policy, ++ sev_snp_guest_set_policy, NULL, NULL); ++ object_class_property_add_str(oc, "guest-visible-workarounds", ++ sev_snp_guest_get_guest_visible_workarounds, ++ sev_snp_guest_set_guest_visible_workarounds); ++ object_class_property_add_str(oc, "id-block", ++ sev_snp_guest_get_id_block, ++ sev_snp_guest_set_id_block); ++ object_class_property_add_str(oc, "id-auth", ++ sev_snp_guest_get_id_auth, ++ sev_snp_guest_set_id_auth); ++ object_class_property_add_bool(oc, "author-key-enabled", ++ sev_snp_guest_get_author_key_enabled, ++ sev_snp_guest_set_author_key_enabled); ++ object_class_property_add_bool(oc, "vcek-required", ++ sev_snp_guest_get_vcek_disabled, ++ sev_snp_guest_set_vcek_disabled); ++ object_class_property_add_str(oc, "host-data", ++ sev_snp_guest_get_host_data, ++ sev_snp_guest_set_host_data); ++} ++ ++static void ++sev_snp_guest_instance_init(Object *obj) ++{ ++ SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); ++ ++ /* default init/start/finish params for kvm */ ++ sev_snp_guest->kvm_start_conf.policy = DEFAULT_SEV_SNP_POLICY; ++} ++ ++/* guest info specific to sev-snp */ ++static const TypeInfo sev_snp_guest_info = { ++ .parent = TYPE_SEV_COMMON, ++ .name = TYPE_SEV_SNP_GUEST, ++ .instance_size = sizeof(SevSnpGuestState), ++ .class_init = sev_snp_guest_class_init, ++ .instance_init = sev_snp_guest_instance_init, ++}; ++ + static void + sev_register_types(void) + { + type_register_static(&sev_common_info); + type_register_static(&sev_guest_info); ++ type_register_static(&sev_snp_guest_info); + } + + type_init(sev_register_types); +diff --git a/target/i386/sev.h b/target/i386/sev.h +index 668374eef3..bedc667eeb 100644 +--- a/target/i386/sev.h ++++ b/target/i386/sev.h +@@ -22,6 +22,7 @@ + + #define TYPE_SEV_COMMON "sev-common" + #define TYPE_SEV_GUEST "sev-guest" ++#define TYPE_SEV_SNP_GUEST "sev-snp-guest" + + #define SEV_POLICY_NODBG 0x1 + #define SEV_POLICY_NOKS 0x2 +-- +2.39.3 + diff --git a/kvm-i386-sev-Invoke-launch_updata_data-for-SEV-class.patch b/kvm-i386-sev-Invoke-launch_updata_data-for-SEV-class.patch new file mode 100644 index 0000000..265da66 --- /dev/null +++ b/kvm-i386-sev-Invoke-launch_updata_data-for-SEV-class.patch @@ -0,0 +1,85 @@ +From be37914ae54c8aebc218cf41b37bc0ea1563daae Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Fri, 31 May 2024 12:51:44 +0200 +Subject: [PATCH 074/100] i386/sev: Invoke launch_updata_data() for SEV class + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [74/91] f1b588a9ffecd6944a78186d88a6be3849698710 (bonzini/rhel-qemu-kvm) + +Add launch_update_data() in SevCommonStateClass and +invoke as sev_launch_update_data() for SEV object. + +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-26-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 9861405a8f845133b7984322c2df0c43a45553c3) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 11 ++++++++--- + 1 file changed, 8 insertions(+), 3 deletions(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 7b5c4b4874..8834cf9441 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -74,6 +74,7 @@ struct SevCommonStateClass { + /* public */ + int (*launch_start)(SevCommonState *sev_common); + void (*launch_finish)(SevCommonState *sev_common); ++ int (*launch_update_data)(SevCommonState *sev_common, hwaddr gpa, uint8_t *ptr, uint64_t len); + int (*kvm_init)(ConfidentialGuestSupport *cgs, Error **errp); + }; + +@@ -929,7 +930,7 @@ out: + } + + static int +-sev_launch_update_data(SevGuestState *sev_guest, uint8_t *addr, uint64_t len) ++sev_launch_update_data(SevCommonState *sev_common, hwaddr gpa, uint8_t *addr, uint64_t len) + { + int ret, fw_error; + struct kvm_sev_launch_update_data update; +@@ -941,7 +942,7 @@ sev_launch_update_data(SevGuestState *sev_guest, uint8_t *addr, uint64_t len) + update.uaddr = (uintptr_t)addr; + update.len = len; + trace_kvm_sev_launch_update_data(addr, len); +- ret = sev_ioctl(SEV_COMMON(sev_guest)->sev_fd, KVM_SEV_LAUNCH_UPDATE_DATA, ++ ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_LAUNCH_UPDATE_DATA, + &update, &fw_error); + if (ret) { + error_report("%s: LAUNCH_UPDATE ret=%d fw_error=%d '%s'", +@@ -1487,6 +1488,7 @@ int + sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp) + { + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); ++ SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(sev_common); + + if (!sev_common) { + return 0; +@@ -1494,7 +1496,9 @@ sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp) + + /* if SEV is in update state then encrypt the data else do nothing */ + if (sev_check_state(sev_common, SEV_STATE_LAUNCH_UPDATE)) { +- int ret = sev_launch_update_data(SEV_GUEST(sev_common), ptr, len); ++ int ret; ++ ++ ret = klass->launch_update_data(sev_common, gpa, ptr, len); + if (ret < 0) { + error_setg(errp, "SEV: Failed to encrypt pflash rom"); + return ret; +@@ -1968,6 +1972,7 @@ sev_guest_class_init(ObjectClass *oc, void *data) + + klass->launch_start = sev_launch_start; + klass->launch_finish = sev_launch_finish; ++ klass->launch_update_data = sev_launch_update_data; + klass->kvm_init = sev_kvm_init; + x86_klass->kvm_type = sev_kvm_type; + +-- +2.39.3 + diff --git a/kvm-i386-sev-Invoke-launch_updata_data-for-SNP-class.patch b/kvm-i386-sev-Invoke-launch_updata_data-for-SNP-class.patch new file mode 100644 index 0000000..f28004d --- /dev/null +++ b/kvm-i386-sev-Invoke-launch_updata_data-for-SNP-class.patch @@ -0,0 +1,55 @@ +From 32899eb4fa5143b795b107de4857adce2cf1d434 Mon Sep 17 00:00:00 2001 +From: Pankaj Gupta +Date: Thu, 30 May 2024 06:16:38 -0500 +Subject: [PATCH 075/100] i386/sev: Invoke launch_updata_data() for SNP class + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [75/91] 3520af5847f8dddb6d7fe7ad5feb308230f387b9 (bonzini/rhel-qemu-kvm) + +Invoke as sev_snp_launch_update_data() for SNP object. + +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-27-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 0765d136eba400ad1cb7cae18438bb10eace64dc) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 8834cf9441..eaf5fc6c6b 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -1091,6 +1091,15 @@ snp_launch_update_data(uint64_t gpa, void *hva, + return 0; + } + ++static int ++sev_snp_launch_update_data(SevCommonState *sev_common, hwaddr gpa, ++ uint8_t *ptr, uint64_t len) ++{ ++ int ret = snp_launch_update_data(gpa, ptr, len, ++ KVM_SEV_SNP_PAGE_TYPE_NORMAL); ++ return ret; ++} ++ + static int + sev_snp_cpuid_info_fill(SnpCpuidInfo *snp_cpuid_info, + const KvmCpuidInfo *kvm_cpuid_info) +@@ -2216,6 +2225,7 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data) + + klass->launch_start = sev_snp_launch_start; + klass->launch_finish = sev_snp_launch_finish; ++ klass->launch_update_data = sev_snp_launch_update_data; + klass->kvm_init = sev_snp_kvm_init; + x86_klass->kvm_type = sev_snp_kvm_type; + +-- +2.39.3 + diff --git a/kvm-i386-sev-Move-SEV_COMMON-null-check-before-dereferen.patch b/kvm-i386-sev-Move-SEV_COMMON-null-check-before-dereferen.patch new file mode 100644 index 0000000..e38615b --- /dev/null +++ b/kvm-i386-sev-Move-SEV_COMMON-null-check-before-dereferen.patch @@ -0,0 +1,47 @@ +From fa6076291eb45255bc2fe523399d7d0647fc5570 Mon Sep 17 00:00:00 2001 +From: Pankaj Gupta +Date: Fri, 7 Jun 2024 13:36:10 -0500 +Subject: [PATCH 085/100] i386/sev: Move SEV_COMMON null check before + dereferencing + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [85/91] e8d2bfd077766a5e7777b9337d0e77146f883224 (bonzini/rhel-qemu-kvm) + +Fixes Coverity CID 1546886. + +Fixes: 9861405a8f ("i386/sev: Invoke launch_updata_data() for SEV class") +Signed-off-by: Pankaj Gupta +Message-ID: <20240607183611.1111100-3-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 48779faef3c8e2fe70bd8285bffa731bd76dc844) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 7c9df621de..f18432f58e 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -1529,11 +1529,12 @@ int + sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp) + { + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); +- SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(sev_common); ++ SevCommonStateClass *klass; + + if (!sev_common) { + return 0; + } ++ klass = SEV_COMMON_GET_CLASS(sev_common); + + /* if SEV is in update state then encrypt the data else do nothing */ + if (sev_check_state(sev_common, SEV_STATE_LAUNCH_UPDATE)) { +-- +2.39.3 + diff --git a/kvm-i386-sev-Move-sev_launch_finish-to-separate-class-me.patch b/kvm-i386-sev-Move-sev_launch_finish-to-separate-class-me.patch new file mode 100644 index 0000000..250a723 --- /dev/null +++ b/kvm-i386-sev-Move-sev_launch_finish-to-separate-class-me.patch @@ -0,0 +1,88 @@ +From 4d96ca893126d4c17c9fe03c76973b1d4a414f21 Mon Sep 17 00:00:00 2001 +From: Pankaj Gupta +Date: Thu, 30 May 2024 06:16:18 -0500 +Subject: [PATCH 058/100] i386/sev: Move sev_launch_finish to separate class + method + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [58/91] 7865710d320a6df7038ef7016d350aa9cdcea326 (bonzini/rhel-qemu-kvm) + +When sev-snp-guest objects are introduced there will be a number of +differences in how the launch finish is handled compared to the existing +sev-guest object. Move sev_launch_finish() to a class method to make it +easier to implement SNP-specific launch update functionality later. + +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-7-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit bce615a14aec07cab0488e5a242f6a91e641efcb) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index b2aa0d6f99..28a018ed83 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -71,6 +71,7 @@ struct SevCommonStateClass { + + /* public */ + int (*launch_start)(SevCommonState *sev_common); ++ void (*launch_finish)(SevCommonState *sev_common); + }; + + /** +@@ -801,12 +802,12 @@ static Notifier sev_machine_done_notify = { + }; + + static void +-sev_launch_finish(SevGuestState *sev_guest) ++sev_launch_finish(SevCommonState *sev_common) + { + int ret, error; + + trace_kvm_sev_launch_finish(); +- ret = sev_ioctl(SEV_COMMON(sev_guest)->sev_fd, KVM_SEV_LAUNCH_FINISH, 0, ++ ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_LAUNCH_FINISH, 0, + &error); + if (ret) { + error_report("%s: LAUNCH_FINISH ret=%d fw_error=%d '%s'", +@@ -814,7 +815,7 @@ sev_launch_finish(SevGuestState *sev_guest) + exit(1); + } + +- sev_set_guest_state(SEV_COMMON(sev_guest), SEV_STATE_RUNNING); ++ sev_set_guest_state(sev_common, SEV_STATE_RUNNING); + + /* add migration blocker */ + error_setg(&sev_mig_blocker, +@@ -826,10 +827,11 @@ static void + sev_vm_state_change(void *opaque, bool running, RunState state) + { + SevCommonState *sev_common = opaque; ++ SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(opaque); + + if (running) { + if (!sev_check_state(sev_common, SEV_STATE_RUNNING)) { +- sev_launch_finish(SEV_GUEST(sev_common)); ++ klass->launch_finish(sev_common); + } + } + } +@@ -1457,6 +1459,7 @@ sev_guest_class_init(ObjectClass *oc, void *data) + SevCommonStateClass *klass = SEV_COMMON_CLASS(oc); + + klass->launch_start = sev_launch_start; ++ klass->launch_finish = sev_launch_finish; + + object_class_property_add_str(oc, "dh-cert-file", + sev_guest_get_dh_cert_file, +-- +2.39.3 + diff --git a/kvm-i386-sev-Move-sev_launch_update-to-separate-class-me.patch b/kvm-i386-sev-Move-sev_launch_update-to-separate-class-me.patch new file mode 100644 index 0000000..12824ec --- /dev/null +++ b/kvm-i386-sev-Move-sev_launch_update-to-separate-class-me.patch @@ -0,0 +1,91 @@ +From a170ba2c7dbf2775eb9047779d3643a2a81bb372 Mon Sep 17 00:00:00 2001 +From: Pankaj Gupta +Date: Thu, 30 May 2024 06:16:17 -0500 +Subject: [PATCH 057/100] i386/sev: Move sev_launch_update to separate class + method + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [57/91] 4f31e7afaec6f2c2a7c06cda4d7d27d4037e53e0 (bonzini/rhel-qemu-kvm) + +When sev-snp-guest objects are introduced there will be a number of +differences in how the launch data is handled compared to the existing +sev-guest object. Move sev_launch_start() to a class method to make it +easier to implement SNP-specific launch update functionality later. + +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-6-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 6600f1ac0c81cbe67faf048ea07f78542dea925f) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 13 ++++++++++--- + 1 file changed, 10 insertions(+), 3 deletions(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 33e606eea0..b2aa0d6f99 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -69,6 +69,8 @@ struct SevCommonState { + struct SevCommonStateClass { + X86ConfidentialGuestClass parent_class; + ++ /* public */ ++ int (*launch_start)(SevCommonState *sev_common); + }; + + /** +@@ -632,16 +634,16 @@ sev_read_file_base64(const char *filename, guchar **data, gsize *len) + } + + static int +-sev_launch_start(SevGuestState *sev_guest) ++sev_launch_start(SevCommonState *sev_common) + { + gsize sz; + int ret = 1; + int fw_error, rc; ++ SevGuestState *sev_guest = SEV_GUEST(sev_common); + struct kvm_sev_launch_start start = { + .handle = sev_guest->handle, .policy = sev_guest->policy + }; + guchar *session = NULL, *dh_cert = NULL; +- SevCommonState *sev_common = SEV_COMMON(sev_guest); + + if (sev_guest->session_file) { + if (sev_read_file_base64(sev_guest->session_file, &session, &sz) < 0) { +@@ -862,6 +864,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + uint32_t ebx; + uint32_t host_cbitpos; + struct sev_user_data_status status = {}; ++ SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(cgs); + + ret = ram_block_discard_disable(true); + if (ret) { +@@ -952,7 +955,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + goto err; + } + +- sev_launch_start(SEV_GUEST(sev_common)); ++ ret = klass->launch_start(sev_common); + if (ret) { + error_setg(errp, "%s: failed to create encryption context", __func__); + goto err; +@@ -1451,6 +1454,10 @@ static void sev_guest_set_legacy_vm_type(Object *obj, bool value, Error **errp) + static void + sev_guest_class_init(ObjectClass *oc, void *data) + { ++ SevCommonStateClass *klass = SEV_COMMON_CLASS(oc); ++ ++ klass->launch_start = sev_launch_start; ++ + object_class_property_add_str(oc, "dh-cert-file", + sev_guest_get_dh_cert_file, + sev_guest_set_dh_cert_file); +-- +2.39.3 + diff --git a/kvm-i386-sev-Reorder-struct-declarations.patch b/kvm-i386-sev-Reorder-struct-declarations.patch new file mode 100644 index 0000000..746317d --- /dev/null +++ b/kvm-i386-sev-Reorder-struct-declarations.patch @@ -0,0 +1,134 @@ +From d009fa2cebebd1da80f4f2f5d0c4fffb87e02afc Mon Sep 17 00:00:00 2001 +From: Dov Murik +Date: Thu, 30 May 2024 06:16:34 -0500 +Subject: [PATCH 079/100] i386/sev: Reorder struct declarations + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [79/91] 1274d4620e88dda99ec10173ca5e3cd4184c8fb6 (bonzini/rhel-qemu-kvm) + +Move the declaration of PaddedSevHashTable before SevSnpGuest so +we can add a new such field to the latter. + +No functional change intended. + +Signed-off-by: Dov Murik +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-23-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit cc483bf911931f405dea682c74a3d8b9b6c54369) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 84 +++++++++++++++++++++++------------------------ + 1 file changed, 42 insertions(+), 42 deletions(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 73f9406715..3fce4c08eb 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -46,6 +46,48 @@ OBJECT_DECLARE_TYPE(SevCommonState, SevCommonStateClass, SEV_COMMON) + OBJECT_DECLARE_TYPE(SevGuestState, SevCommonStateClass, SEV_GUEST) + OBJECT_DECLARE_TYPE(SevSnpGuestState, SevCommonStateClass, SEV_SNP_GUEST) + ++/* hard code sha256 digest size */ ++#define HASH_SIZE 32 ++ ++typedef struct QEMU_PACKED SevHashTableEntry { ++ QemuUUID guid; ++ uint16_t len; ++ uint8_t hash[HASH_SIZE]; ++} SevHashTableEntry; ++ ++typedef struct QEMU_PACKED SevHashTable { ++ QemuUUID guid; ++ uint16_t len; ++ SevHashTableEntry cmdline; ++ SevHashTableEntry initrd; ++ SevHashTableEntry kernel; ++} SevHashTable; ++ ++/* ++ * Data encrypted by sev_encrypt_flash() must be padded to a multiple of ++ * 16 bytes. ++ */ ++typedef struct QEMU_PACKED PaddedSevHashTable { ++ SevHashTable ht; ++ uint8_t padding[ROUND_UP(sizeof(SevHashTable), 16) - sizeof(SevHashTable)]; ++} PaddedSevHashTable; ++ ++QEMU_BUILD_BUG_ON(sizeof(PaddedSevHashTable) % 16 != 0); ++ ++#define SEV_INFO_BLOCK_GUID "00f771de-1a7e-4fcb-890e-68c77e2fb44e" ++typedef struct __attribute__((__packed__)) SevInfoBlock { ++ /* SEV-ES Reset Vector Address */ ++ uint32_t reset_addr; ++} SevInfoBlock; ++ ++#define SEV_HASH_TABLE_RV_GUID "7255371f-3a3b-4b04-927b-1da6efa8d454" ++typedef struct QEMU_PACKED SevHashTableDescriptor { ++ /* SEV hash table area guest address */ ++ uint32_t base; ++ /* SEV hash table area size (in bytes) */ ++ uint32_t size; ++} SevHashTableDescriptor; ++ + struct SevCommonState { + X86ConfidentialGuest parent_obj; + +@@ -128,48 +170,6 @@ typedef struct SevLaunchUpdateData { + + static QTAILQ_HEAD(, SevLaunchUpdateData) launch_update; + +-#define SEV_INFO_BLOCK_GUID "00f771de-1a7e-4fcb-890e-68c77e2fb44e" +-typedef struct __attribute__((__packed__)) SevInfoBlock { +- /* SEV-ES Reset Vector Address */ +- uint32_t reset_addr; +-} SevInfoBlock; +- +-#define SEV_HASH_TABLE_RV_GUID "7255371f-3a3b-4b04-927b-1da6efa8d454" +-typedef struct QEMU_PACKED SevHashTableDescriptor { +- /* SEV hash table area guest address */ +- uint32_t base; +- /* SEV hash table area size (in bytes) */ +- uint32_t size; +-} SevHashTableDescriptor; +- +-/* hard code sha256 digest size */ +-#define HASH_SIZE 32 +- +-typedef struct QEMU_PACKED SevHashTableEntry { +- QemuUUID guid; +- uint16_t len; +- uint8_t hash[HASH_SIZE]; +-} SevHashTableEntry; +- +-typedef struct QEMU_PACKED SevHashTable { +- QemuUUID guid; +- uint16_t len; +- SevHashTableEntry cmdline; +- SevHashTableEntry initrd; +- SevHashTableEntry kernel; +-} SevHashTable; +- +-/* +- * Data encrypted by sev_encrypt_flash() must be padded to a multiple of +- * 16 bytes. +- */ +-typedef struct QEMU_PACKED PaddedSevHashTable { +- SevHashTable ht; +- uint8_t padding[ROUND_UP(sizeof(SevHashTable), 16) - sizeof(SevHashTable)]; +-} PaddedSevHashTable; +- +-QEMU_BUILD_BUG_ON(sizeof(PaddedSevHashTable) % 16 != 0); +- + static Error *sev_mig_blocker; + + static const char *const sev_fw_errlist[] = { +-- +2.39.3 + diff --git a/kvm-i386-sev-Replace-error_report-with-error_setg.patch b/kvm-i386-sev-Replace-error_report-with-error_setg.patch new file mode 100644 index 0000000..ba66cde --- /dev/null +++ b/kvm-i386-sev-Replace-error_report-with-error_setg.patch @@ -0,0 +1,46 @@ +From 80c1d78e31b2567d1c610c8939b75d159ff6ea27 Mon Sep 17 00:00:00 2001 +From: Pankaj Gupta +Date: Thu, 30 May 2024 06:16:13 -0500 +Subject: [PATCH 055/100] i386/sev: Replace error_report with error_setg + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [55/91] 1e15fc2458687e564af9fa5022c29e79ddc8edfd (bonzini/rhel-qemu-kvm) + +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-2-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 18c453409a3a84cf7b2c764c5a03fb429a73bbeb) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index d30b68c11e..67ed32e5ea 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -952,13 +952,13 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + + if (sev_es_enabled()) { + if (!kvm_kernel_irqchip_allowed()) { +- error_report("%s: SEV-ES guests require in-kernel irqchip support", +- __func__); ++ error_setg(errp, "%s: SEV-ES guests require in-kernel irqchip" ++ "support", __func__); + goto err; + } + + if (!(status.flags & SEV_STATUS_FLAGS_CONFIG_ES)) { +- error_report("%s: guest policy requires SEV-ES, but " ++ error_setg(errp, "%s: guest policy requires SEV-ES, but " + "host SEV-ES support unavailable", + __func__); + goto err; +-- +2.39.3 + diff --git a/kvm-i386-sev-Return-when-sev_common-is-null.patch b/kvm-i386-sev-Return-when-sev_common-is-null.patch new file mode 100644 index 0000000..6fc68aa --- /dev/null +++ b/kvm-i386-sev-Return-when-sev_common-is-null.patch @@ -0,0 +1,40 @@ +From 88da6d01b1de2b92adb5c47c6d482876a054705f Mon Sep 17 00:00:00 2001 +From: Pankaj Gupta +Date: Fri, 7 Jun 2024 13:36:11 -0500 +Subject: [PATCH 086/100] i386/sev: Return when sev_common is null + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [86/91] 02ce4a6a51ce9fd961f417c13db0a760673591ba (bonzini/rhel-qemu-kvm) + +Fixes Coverity CID 1546885. + +Fixes: 16dcf200dc ("i386/sev: Introduce "sev-common" type to encapsulate common SEV state") +Signed-off-by: Pankaj Gupta +Message-ID: <20240607183611.1111100-4-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit cd7093a7a168a823d07671348996f049d45e8f67) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index f18432f58e..c40562dce3 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -587,6 +587,7 @@ static SevCapability *sev_get_capabilities(Error **errp) + sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + if (!sev_common) { + error_setg(errp, "SEV is not configured"); ++ return NULL; + } + + sev_device = object_property_get_str(OBJECT(sev_common), "sev-device", +-- +2.39.3 + diff --git a/kvm-i386-sev-Set-CPU-state-to-protected-once-SNP-guest-p.patch b/kvm-i386-sev-Set-CPU-state-to-protected-once-SNP-guest-p.patch new file mode 100644 index 0000000..8548e22 --- /dev/null +++ b/kvm-i386-sev-Set-CPU-state-to-protected-once-SNP-guest-p.patch @@ -0,0 +1,47 @@ +From c7649ac1b958dc48de50f32b1ad80d84b17945a8 Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Thu, 30 May 2024 06:16:29 -0500 +Subject: [PATCH 069/100] i386/sev: Set CPU state to protected once SNP guest + payload is finalized + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [69/91] 09280f987a186511ec7d62c3f340b2148e8556d7 (bonzini/rhel-qemu-kvm) + +Once KVM_SNP_LAUNCH_FINISH is called the vCPU state is copied into the +vCPU's VMSA page and measured/encrypted. Any attempt to read/write CPU +state afterward will only be acting on the initial data and so are +effectively no-ops. + +Set the vCPU state to protected at this point so that QEMU don't +continue trying to re-sync vCPU data during guest runtime. + +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-18-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 3d44fdff60ea66fbd7a33f5d32b50843cd80f48a) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index ef2e592ca7..e84e4395a5 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -997,6 +997,7 @@ sev_snp_launch_finish(SevCommonState *sev_common) + exit(1); + } + ++ kvm_mark_guest_state_protected(); + sev_set_guest_state(sev_common, SEV_STATE_RUNNING); + + /* add migration blocker */ +-- +2.39.3 + diff --git a/kvm-i386-sev-Switch-to-use-confidential_guest_kvm_init.patch b/kvm-i386-sev-Switch-to-use-confidential_guest_kvm_init.patch new file mode 100644 index 0000000..05ccb0a --- /dev/null +++ b/kvm-i386-sev-Switch-to-use-confidential_guest_kvm_init.patch @@ -0,0 +1,268 @@ +From 5540bb5ca052531563df1ade68995e268ae65224 Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Thu, 29 Feb 2024 01:00:36 -0500 +Subject: [PATCH 012/100] i386/sev: Switch to use confidential_guest_kvm_init() + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [12/91] 6f5f8d1b818826f7ee4b6ae527963ef23c97f531 (bonzini/rhel-qemu-kvm) + +Use confidential_guest_kvm_init() instead of calling SEV +specific sev_kvm_init(). This allows the introduction of multiple +confidential-guest-support subclasses for different x86 vendors. + +As a bonus, stubs are not needed anymore since there is no +direct call from target/i386/kvm/kvm.c to SEV code. + +Signed-off-by: Xiaoyao Li +Message-Id: <20240229060038.606591-1-xiaoyao.li@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 637c95b37b106c2eeba313e0abb38ec12e918a59) +Signed-off-by: Paolo Bonzini +--- + target/i386/kvm/kvm.c | 10 +-- + target/i386/kvm/meson.build | 2 - + target/i386/kvm/sev-stub.c | 21 ------ + target/i386/sev.c | 127 ++++++++++++++++++------------------ + target/i386/sev.h | 2 - + 5 files changed, 69 insertions(+), 93 deletions(-) + delete mode 100644 target/i386/kvm/sev-stub.c + +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index 5f30b649a0..e271652620 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -2543,10 +2543,12 @@ int kvm_arch_init(MachineState *ms, KVMState *s) + * mechanisms are supported in future (e.g. TDX), they'll need + * their own initialization either here or elsewhere. + */ +- ret = sev_kvm_init(ms->cgs, &local_err); +- if (ret < 0) { +- error_report_err(local_err); +- return ret; ++ if (ms->cgs) { ++ ret = confidential_guest_kvm_init(ms->cgs, &local_err); ++ if (ret < 0) { ++ error_report_err(local_err); ++ return ret; ++ } + } + + has_xcrs = kvm_check_extension(s, KVM_CAP_XCRS); +diff --git a/target/i386/kvm/meson.build b/target/i386/kvm/meson.build +index 84d9143e60..e7850981e6 100644 +--- a/target/i386/kvm/meson.build ++++ b/target/i386/kvm/meson.build +@@ -7,8 +7,6 @@ i386_kvm_ss.add(files( + + i386_kvm_ss.add(when: 'CONFIG_XEN_EMU', if_true: files('xen-emu.c')) + +-i386_kvm_ss.add(when: 'CONFIG_SEV', if_false: files('sev-stub.c')) +- + i386_system_ss.add(when: 'CONFIG_HYPERV', if_true: files('hyperv.c'), if_false: files('hyperv-stub.c')) + + i386_system_ss.add_all(when: 'CONFIG_KVM', if_true: i386_kvm_ss) +diff --git a/target/i386/kvm/sev-stub.c b/target/i386/kvm/sev-stub.c +deleted file mode 100644 +index 1be5341e8a..0000000000 +--- a/target/i386/kvm/sev-stub.c ++++ /dev/null +@@ -1,21 +0,0 @@ +-/* +- * QEMU SEV stub +- * +- * Copyright Advanced Micro Devices 2018 +- * +- * Authors: +- * Brijesh Singh +- * +- * This work is licensed under the terms of the GNU GPL, version 2 or later. +- * See the COPYING file in the top-level directory. +- * +- */ +- +-#include "qemu/osdep.h" +-#include "sev.h" +- +-int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) +-{ +- /* If we get here, cgs must be some non-SEV thing */ +- return 0; +-} +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 72930ff0dc..b8f79d34d1 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -353,63 +353,6 @@ static void sev_guest_set_kernel_hashes(Object *obj, bool value, Error **errp) + sev->kernel_hashes = value; + } + +-static void +-sev_guest_class_init(ObjectClass *oc, void *data) +-{ +- object_class_property_add_str(oc, "sev-device", +- sev_guest_get_sev_device, +- sev_guest_set_sev_device); +- object_class_property_set_description(oc, "sev-device", +- "SEV device to use"); +- object_class_property_add_str(oc, "dh-cert-file", +- sev_guest_get_dh_cert_file, +- sev_guest_set_dh_cert_file); +- object_class_property_set_description(oc, "dh-cert-file", +- "guest owners DH certificate (encoded with base64)"); +- object_class_property_add_str(oc, "session-file", +- sev_guest_get_session_file, +- sev_guest_set_session_file); +- object_class_property_set_description(oc, "session-file", +- "guest owners session parameters (encoded with base64)"); +- object_class_property_add_bool(oc, "kernel-hashes", +- sev_guest_get_kernel_hashes, +- sev_guest_set_kernel_hashes); +- object_class_property_set_description(oc, "kernel-hashes", +- "add kernel hashes to guest firmware for measured Linux boot"); +-} +- +-static void +-sev_guest_instance_init(Object *obj) +-{ +- SevGuestState *sev = SEV_GUEST(obj); +- +- sev->sev_device = g_strdup(DEFAULT_SEV_DEVICE); +- sev->policy = DEFAULT_GUEST_POLICY; +- object_property_add_uint32_ptr(obj, "policy", &sev->policy, +- OBJ_PROP_FLAG_READWRITE); +- object_property_add_uint32_ptr(obj, "handle", &sev->handle, +- OBJ_PROP_FLAG_READWRITE); +- object_property_add_uint32_ptr(obj, "cbitpos", &sev->cbitpos, +- OBJ_PROP_FLAG_READWRITE); +- object_property_add_uint32_ptr(obj, "reduced-phys-bits", +- &sev->reduced_phys_bits, +- OBJ_PROP_FLAG_READWRITE); +-} +- +-/* sev guest info */ +-static const TypeInfo sev_guest_info = { +- .parent = TYPE_CONFIDENTIAL_GUEST_SUPPORT, +- .name = TYPE_SEV_GUEST, +- .instance_size = sizeof(SevGuestState), +- .instance_finalize = sev_guest_finalize, +- .class_init = sev_guest_class_init, +- .instance_init = sev_guest_instance_init, +- .interfaces = (InterfaceInfo[]) { +- { TYPE_USER_CREATABLE }, +- { } +- } +-}; +- + bool + sev_enabled(void) + { +@@ -906,20 +849,15 @@ sev_vm_state_change(void *opaque, bool running, RunState state) + } + } + +-int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) ++static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + { +- SevGuestState *sev +- = (SevGuestState *)object_dynamic_cast(OBJECT(cgs), TYPE_SEV_GUEST); ++ SevGuestState *sev = SEV_GUEST(cgs); + char *devname; + int ret, fw_error, cmd; + uint32_t ebx; + uint32_t host_cbitpos; + struct sev_user_data_status status = {}; + +- if (!sev) { +- return 0; +- } +- + ret = ram_block_discard_disable(true); + if (ret) { + error_report("%s: cannot disable RAM discard", __func__); +@@ -1384,6 +1322,67 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) + return ret; + } + ++static void ++sev_guest_class_init(ObjectClass *oc, void *data) ++{ ++ ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); ++ ++ klass->kvm_init = sev_kvm_init; ++ ++ object_class_property_add_str(oc, "sev-device", ++ sev_guest_get_sev_device, ++ sev_guest_set_sev_device); ++ object_class_property_set_description(oc, "sev-device", ++ "SEV device to use"); ++ object_class_property_add_str(oc, "dh-cert-file", ++ sev_guest_get_dh_cert_file, ++ sev_guest_set_dh_cert_file); ++ object_class_property_set_description(oc, "dh-cert-file", ++ "guest owners DH certificate (encoded with base64)"); ++ object_class_property_add_str(oc, "session-file", ++ sev_guest_get_session_file, ++ sev_guest_set_session_file); ++ object_class_property_set_description(oc, "session-file", ++ "guest owners session parameters (encoded with base64)"); ++ object_class_property_add_bool(oc, "kernel-hashes", ++ sev_guest_get_kernel_hashes, ++ sev_guest_set_kernel_hashes); ++ object_class_property_set_description(oc, "kernel-hashes", ++ "add kernel hashes to guest firmware for measured Linux boot"); ++} ++ ++static void ++sev_guest_instance_init(Object *obj) ++{ ++ SevGuestState *sev = SEV_GUEST(obj); ++ ++ sev->sev_device = g_strdup(DEFAULT_SEV_DEVICE); ++ sev->policy = DEFAULT_GUEST_POLICY; ++ object_property_add_uint32_ptr(obj, "policy", &sev->policy, ++ OBJ_PROP_FLAG_READWRITE); ++ object_property_add_uint32_ptr(obj, "handle", &sev->handle, ++ OBJ_PROP_FLAG_READWRITE); ++ object_property_add_uint32_ptr(obj, "cbitpos", &sev->cbitpos, ++ OBJ_PROP_FLAG_READWRITE); ++ object_property_add_uint32_ptr(obj, "reduced-phys-bits", ++ &sev->reduced_phys_bits, ++ OBJ_PROP_FLAG_READWRITE); ++} ++ ++/* sev guest info */ ++static const TypeInfo sev_guest_info = { ++ .parent = TYPE_CONFIDENTIAL_GUEST_SUPPORT, ++ .name = TYPE_SEV_GUEST, ++ .instance_size = sizeof(SevGuestState), ++ .instance_finalize = sev_guest_finalize, ++ .class_init = sev_guest_class_init, ++ .instance_init = sev_guest_instance_init, ++ .interfaces = (InterfaceInfo[]) { ++ { TYPE_USER_CREATABLE }, ++ { } ++ } ++}; ++ + static void + sev_register_types(void) + { +diff --git a/target/i386/sev.h b/target/i386/sev.h +index e7499c95b1..9e10d09539 100644 +--- a/target/i386/sev.h ++++ b/target/i386/sev.h +@@ -57,6 +57,4 @@ int sev_inject_launch_secret(const char *hdr, const char *secret, + int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size); + void sev_es_set_reset_vector(CPUState *cpu); + +-int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp); +- + #endif +-- +2.39.3 + diff --git a/kvm-i386-sev-Update-query-sev-QAPI-format-to-handle-SEV-.patch b/kvm-i386-sev-Update-query-sev-QAPI-format-to-handle-SEV-.patch new file mode 100644 index 0000000..27852d5 --- /dev/null +++ b/kvm-i386-sev-Update-query-sev-QAPI-format-to-handle-SEV-.patch @@ -0,0 +1,240 @@ +From a870e7c31d9605baea4741d82521612b6164c99b Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Thu, 30 May 2024 06:16:26 -0500 +Subject: [PATCH 066/100] i386/sev: Update query-sev QAPI format to handle + SEV-SNP + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [66/91] a19b3e226e857f3995176e7d2ef1ce2e4329a885 (bonzini/rhel-qemu-kvm) + +Most of the current 'query-sev' command is relevant to both legacy +SEV/SEV-ES guests and SEV-SNP guests, with 2 exceptions: + + - 'policy' is a 64-bit field for SEV-SNP, not 32-bit, and + the meaning of the bit positions has changed + - 'handle' is not relevant to SEV-SNP + +To address this, this patch adds a new 'sev-type' field that can be +used as a discriminator to select between SEV and SEV-SNP-specific +fields/formats without breaking compatibility for existing management +tools (so long as management tools that add support for launching +SEV-SNP guest update their handling of query-sev appropriately). + +The corresponding HMP command has also been fixed up similarly. + +Signed-off-by: Michael Roth +Co-developed-by:Pankaj Gupta +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-15-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 59d3740cb4ac0f010ce35877572904f6297284b4) +Signed-off-by: Paolo Bonzini +--- + qapi/misc-target.json | 72 ++++++++++++++++++++++++++++++++++--------- + target/i386/sev.c | 55 +++++++++++++++++++++------------ + target/i386/sev.h | 3 ++ + 3 files changed, 96 insertions(+), 34 deletions(-) + +diff --git a/qapi/misc-target.json b/qapi/misc-target.json +index 4e0a6492a9..2d7d4d89bd 100644 +--- a/qapi/misc-target.json ++++ b/qapi/misc-target.json +@@ -47,6 +47,50 @@ + 'send-update', 'receive-update' ], + 'if': 'TARGET_I386' } + ++## ++# @SevGuestType: ++# ++# An enumeration indicating the type of SEV guest being run. ++# ++# @sev: The guest is a legacy SEV or SEV-ES guest. ++# ++# @sev-snp: The guest is an SEV-SNP guest. ++# ++# Since: 6.2 ++## ++{ 'enum': 'SevGuestType', ++ 'data': [ 'sev', 'sev-snp' ], ++ 'if': 'TARGET_I386' } ++ ++## ++# @SevGuestInfo: ++# ++# Information specific to legacy SEV/SEV-ES guests. ++# ++# @policy: SEV policy value ++# ++# @handle: SEV firmware handle ++# ++# Since: 2.12 ++## ++{ 'struct': 'SevGuestInfo', ++ 'data': { 'policy': 'uint32', ++ 'handle': 'uint32' }, ++ 'if': 'TARGET_I386' } ++ ++## ++# @SevSnpGuestInfo: ++# ++# Information specific to SEV-SNP guests. ++# ++# @snp-policy: SEV-SNP policy value ++# ++# Since: 9.1 ++## ++{ 'struct': 'SevSnpGuestInfo', ++ 'data': { 'snp-policy': 'uint64' }, ++ 'if': 'TARGET_I386' } ++ + ## + # @SevInfo: + # +@@ -60,25 +104,25 @@ + # + # @build-id: SEV FW build id + # +-# @policy: SEV policy value +-# + # @state: SEV guest state + # +-# @handle: SEV firmware handle ++# @sev-type: Type of SEV guest being run + # + # Since: 2.12 + ## +-{ 'struct': 'SevInfo', +- 'data': { 'enabled': 'bool', +- 'api-major': 'uint8', +- 'api-minor' : 'uint8', +- 'build-id' : 'uint8', +- 'policy' : 'uint32', +- 'state' : 'SevState', +- 'handle' : 'uint32' +- }, +- 'if': 'TARGET_I386' +-} ++{ 'union': 'SevInfo', ++ 'base': { 'enabled': 'bool', ++ 'api-major': 'uint8', ++ 'api-minor' : 'uint8', ++ 'build-id' : 'uint8', ++ 'state' : 'SevState', ++ 'sev-type' : 'SevGuestType' }, ++ 'discriminator': 'sev-type', ++ 'data': { ++ 'sev': 'SevGuestInfo', ++ 'sev-snp': 'SevSnpGuestInfo' }, ++ 'if': 'TARGET_I386' } ++ + + ## + # @query-sev: +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 072cc4f853..43d1c48bd9 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -363,25 +363,27 @@ static SevInfo *sev_get_info(void) + { + SevInfo *info; + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); +- SevGuestState *sev_guest = +- (SevGuestState *)object_dynamic_cast(OBJECT(sev_common), +- TYPE_SEV_GUEST); + + info = g_new0(SevInfo, 1); + info->enabled = sev_enabled(); + + if (info->enabled) { +- if (sev_guest) { +- info->handle = sev_guest->handle; +- } + info->api_major = sev_common->api_major; + info->api_minor = sev_common->api_minor; + info->build_id = sev_common->build_id; + info->state = sev_common->state; +- /* we only report the lower 32-bits of policy for SNP, ok for now... */ +- info->policy = +- (uint32_t)object_property_get_uint(OBJECT(sev_common), +- "policy", NULL); ++ ++ if (sev_snp_enabled()) { ++ info->sev_type = SEV_GUEST_TYPE_SEV_SNP; ++ info->u.sev_snp.snp_policy = ++ object_property_get_uint(OBJECT(sev_common), "policy", NULL); ++ } else { ++ info->sev_type = SEV_GUEST_TYPE_SEV; ++ info->u.sev.handle = SEV_GUEST(sev_common)->handle; ++ info->u.sev.policy = ++ (uint32_t)object_property_get_uint(OBJECT(sev_common), ++ "policy", NULL); ++ } + } + + return info; +@@ -404,20 +406,33 @@ void hmp_info_sev(Monitor *mon, const QDict *qdict) + { + SevInfo *info = sev_get_info(); + +- if (info && info->enabled) { +- monitor_printf(mon, "handle: %d\n", info->handle); +- monitor_printf(mon, "state: %s\n", SevState_str(info->state)); +- monitor_printf(mon, "build: %d\n", info->build_id); +- monitor_printf(mon, "api version: %d.%d\n", +- info->api_major, info->api_minor); ++ if (!info || !info->enabled) { ++ monitor_printf(mon, "SEV is not enabled\n"); ++ goto out; ++ } ++ ++ monitor_printf(mon, "SEV type: %s\n", SevGuestType_str(info->sev_type)); ++ monitor_printf(mon, "state: %s\n", SevState_str(info->state)); ++ monitor_printf(mon, "build: %d\n", info->build_id); ++ monitor_printf(mon, "api version: %d.%d\n", info->api_major, ++ info->api_minor); ++ ++ if (sev_snp_enabled()) { + monitor_printf(mon, "debug: %s\n", +- info->policy & SEV_POLICY_NODBG ? "off" : "on"); +- monitor_printf(mon, "key-sharing: %s\n", +- info->policy & SEV_POLICY_NOKS ? "off" : "on"); ++ info->u.sev_snp.snp_policy & SEV_SNP_POLICY_DBG ? "on" ++ : "off"); ++ monitor_printf(mon, "SMT allowed: %s\n", ++ info->u.sev_snp.snp_policy & SEV_SNP_POLICY_SMT ? "on" ++ : "off"); + } else { +- monitor_printf(mon, "SEV is not enabled\n"); ++ monitor_printf(mon, "handle: %d\n", info->u.sev.handle); ++ monitor_printf(mon, "debug: %s\n", ++ info->u.sev.policy & SEV_POLICY_NODBG ? "off" : "on"); ++ monitor_printf(mon, "key-sharing: %s\n", ++ info->u.sev.policy & SEV_POLICY_NOKS ? "off" : "on"); + } + ++out: + qapi_free_SevInfo(info); + } + +diff --git a/target/i386/sev.h b/target/i386/sev.h +index 94295ee74f..5dc4767b1e 100644 +--- a/target/i386/sev.h ++++ b/target/i386/sev.h +@@ -31,6 +31,9 @@ + #define SEV_POLICY_DOMAIN 0x10 + #define SEV_POLICY_SEV 0x20 + ++#define SEV_SNP_POLICY_SMT 0x10000 ++#define SEV_SNP_POLICY_DBG 0x80000 ++ + typedef struct SevKernelLoaderContext { + char *setup_data; + size_t setup_size; +-- +2.39.3 + diff --git a/kvm-i386-sev-fix-unreachable-code-coverity-issue.patch b/kvm-i386-sev-fix-unreachable-code-coverity-issue.patch new file mode 100644 index 0000000..56f9f6f --- /dev/null +++ b/kvm-i386-sev-fix-unreachable-code-coverity-issue.patch @@ -0,0 +1,51 @@ +From 98057e3adafa052b21a4fe5ef22835d30df3e644 Mon Sep 17 00:00:00 2001 +From: Pankaj Gupta +Date: Fri, 7 Jun 2024 13:36:09 -0500 +Subject: [PATCH 084/100] i386/sev: fix unreachable code coverity issue + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [84/91] dc7bf28f491bf675b22a98ea593fba72d8bc415a (bonzini/rhel-qemu-kvm) + +Set 'finish->id_block_en' early, so that it is properly reset. + +Fixes coverity CID 1546887. + +Fixes: 7b34df4426 ("i386/sev: Introduce 'sev-snp-guest' object") +Signed-off-by: Pankaj Gupta +Message-ID: <20240607183611.1111100-2-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit c94eb5db8e409c932da9eb187e68d4cdc14acc5b) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 004c667ac1..7c9df621de 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -2165,6 +2165,7 @@ sev_snp_guest_set_id_block(Object *obj, const char *value, Error **errp) + struct kvm_sev_snp_launch_finish *finish = &sev_snp_guest->kvm_finish_conf; + gsize len; + ++ finish->id_block_en = 0; + g_free(sev_snp_guest->id_block); + g_free((guchar *)finish->id_block_uaddr); + +@@ -2184,7 +2185,7 @@ sev_snp_guest_set_id_block(Object *obj, const char *value, Error **errp) + return; + } + +- finish->id_block_en = (len) ? 1 : 0; ++ finish->id_block_en = 1; + } + + static char * +-- +2.39.3 + diff --git a/kvm-introduce-pc_rhel_9_5_compat.patch b/kvm-introduce-pc_rhel_9_5_compat.patch new file mode 100644 index 0000000..9a17dda --- /dev/null +++ b/kvm-introduce-pc_rhel_9_5_compat.patch @@ -0,0 +1,81 @@ +From deae6c3b57c3919946a5ce1613e667a3240cf158 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 15 Apr 2024 12:45:09 +0200 +Subject: [PATCH 001/100] introduce pc_rhel_9_5_compat + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [1/91] cfd402fa5080eddba7c954e81ed79f9a1dd654cf (bonzini/rhel-qemu-kvm) + +Allow undoing backported changes that impact guest ABI. + +Signed-off-by: Paolo Bonzini +--- + hw/i386/pc.c | 4 ++++ + hw/i386/pc_piix.c | 2 ++ + hw/i386/pc_q35.c | 2 ++ + include/hw/i386/pc.h | 3 +++ + 4 files changed, 11 insertions(+) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 4a154c1a9a..648762d908 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -348,6 +348,10 @@ GlobalProperty pc_rhel_compat[] = { + }; + const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); + ++GlobalProperty pc_rhel_9_5_compat[] = { ++}; ++const size_t pc_rhel_9_5_compat_len = G_N_ELEMENTS(pc_rhel_9_5_compat); ++ + GlobalProperty pc_rhel_9_3_compat[] = { + /* pc_rhel_9_3_compat from pc_compat_8_0 */ + { "virtio-mem", "unplugged-inaccessible", "auto" }, +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index 6b260682eb..bef3e8b73e 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -1015,6 +1015,8 @@ static void pc_machine_rhel760_options(MachineClass *m) + object_class_property_set_description(oc, "x-south-bridge", + "Use a different south bridge than PIIX3"); + ++ compat_props_add(m->compat_props, pc_rhel_9_5_compat, ++ pc_rhel_9_5_compat_len); + compat_props_add(m->compat_props, hw_compat_rhel_9_5, + hw_compat_rhel_9_5_len); + compat_props_add(m->compat_props, hw_compat_rhel_9_4, +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 2b54944c0f..9adcdadce8 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -734,6 +734,8 @@ static void pc_q35_machine_rhel940_options(MachineClass *m) + pcmc->smbios_stream_product = "RHEL"; + pcmc->smbios_stream_version = "9.4.0"; + ++ compat_props_add(m->compat_props, pc_rhel_9_5_compat, ++ pc_rhel_9_5_compat_len); + compat_props_add(m->compat_props, hw_compat_rhel_9_5, + hw_compat_rhel_9_5_len); + } +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index a984c951ad..87420783ab 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -294,6 +294,9 @@ extern const size_t pc_compat_2_0_len; + extern GlobalProperty pc_rhel_compat[]; + extern const size_t pc_rhel_compat_len; + ++extern GlobalProperty pc_rhel_9_5_compat[]; ++extern const size_t pc_rhel_9_5_compat_len; ++ + extern GlobalProperty pc_rhel_9_3_compat[]; + extern const size_t pc_rhel_9_3_compat_len; + +-- +2.39.3 + diff --git a/kvm-kvm-Enable-KVM_SET_USER_MEMORY_REGION2-for-memslot.patch b/kvm-kvm-Enable-KVM_SET_USER_MEMORY_REGION2-for-memslot.patch new file mode 100644 index 0000000..10e98a7 --- /dev/null +++ b/kvm-kvm-Enable-KVM_SET_USER_MEMORY_REGION2-for-memslot.patch @@ -0,0 +1,153 @@ +From 120157257ac239050779fdddc9abb56bd39958b3 Mon Sep 17 00:00:00 2001 +From: Chao Peng +Date: Wed, 20 Mar 2024 03:39:05 -0500 +Subject: [PATCH 029/100] kvm: Enable KVM_SET_USER_MEMORY_REGION2 for memslot + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [29/91] 9a08c8699f632cd046a6307e33bd053a7cc7db46 (bonzini/rhel-qemu-kvm) + +Switch to KVM_SET_USER_MEMORY_REGION2 when supported by KVM. + +With KVM_SET_USER_MEMORY_REGION2, QEMU can set up memory region that +backend'ed both by hva-based shared memory and guest memfd based private +memory. + +Signed-off-by: Chao Peng +Co-developed-by: Xiaoyao Li +Signed-off-by: Xiaoyao Li +Message-ID: <20240320083945.991426-10-michael.roth@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit ce5a983233b4ca94ced88c9581014346509b5c71) +Signed-off-by: Paolo Bonzini +--- + accel/kvm/kvm-all.c | 46 +++++++++++++++++++++++++++++++++------- + accel/kvm/trace-events | 2 +- + include/sysemu/kvm_int.h | 2 ++ + 3 files changed, 41 insertions(+), 9 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index a7b9a127dd..5ef55e4dd7 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -284,35 +284,58 @@ int kvm_physical_memory_addr_from_host(KVMState *s, void *ram, + static int kvm_set_user_memory_region(KVMMemoryListener *kml, KVMSlot *slot, bool new) + { + KVMState *s = kvm_state; +- struct kvm_userspace_memory_region mem; ++ struct kvm_userspace_memory_region2 mem; + int ret; + + mem.slot = slot->slot | (kml->as_id << 16); + mem.guest_phys_addr = slot->start_addr; + mem.userspace_addr = (unsigned long)slot->ram; + mem.flags = slot->flags; ++ mem.guest_memfd = slot->guest_memfd; ++ mem.guest_memfd_offset = slot->guest_memfd_offset; + + if (slot->memory_size && !new && (mem.flags ^ slot->old_flags) & KVM_MEM_READONLY) { + /* Set the slot size to 0 before setting the slot to the desired + * value. This is needed based on KVM commit 75d61fbc. */ + mem.memory_size = 0; +- ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem); ++ ++ if (kvm_guest_memfd_supported) { ++ ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION2, &mem); ++ } else { ++ ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem); ++ } + if (ret < 0) { + goto err; + } + } + mem.memory_size = slot->memory_size; +- ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem); ++ if (kvm_guest_memfd_supported) { ++ ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION2, &mem); ++ } else { ++ ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem); ++ } + slot->old_flags = mem.flags; + err: + trace_kvm_set_user_memory(mem.slot >> 16, (uint16_t)mem.slot, mem.flags, + mem.guest_phys_addr, mem.memory_size, +- mem.userspace_addr, ret); ++ mem.userspace_addr, mem.guest_memfd, ++ mem.guest_memfd_offset, ret); + if (ret < 0) { +- error_report("%s: KVM_SET_USER_MEMORY_REGION failed, slot=%d," +- " start=0x%" PRIx64 ", size=0x%" PRIx64 ": %s", +- __func__, mem.slot, slot->start_addr, +- (uint64_t)mem.memory_size, strerror(errno)); ++ if (kvm_guest_memfd_supported) { ++ error_report("%s: KVM_SET_USER_MEMORY_REGION2 failed, slot=%d," ++ " start=0x%" PRIx64 ", size=0x%" PRIx64 "," ++ " flags=0x%" PRIx32 ", guest_memfd=%" PRId32 "," ++ " guest_memfd_offset=0x%" PRIx64 ": %s", ++ __func__, mem.slot, slot->start_addr, ++ (uint64_t)mem.memory_size, mem.flags, ++ mem.guest_memfd, (uint64_t)mem.guest_memfd_offset, ++ strerror(errno)); ++ } else { ++ error_report("%s: KVM_SET_USER_MEMORY_REGION failed, slot=%d," ++ " start=0x%" PRIx64 ", size=0x%" PRIx64 ": %s", ++ __func__, mem.slot, slot->start_addr, ++ (uint64_t)mem.memory_size, strerror(errno)); ++ } + } + return ret; + } +@@ -467,6 +490,10 @@ static int kvm_mem_flags(MemoryRegion *mr) + if (readonly && kvm_readonly_mem_allowed) { + flags |= KVM_MEM_READONLY; + } ++ if (memory_region_has_guest_memfd(mr)) { ++ assert(kvm_guest_memfd_supported); ++ flags |= KVM_MEM_GUEST_MEMFD; ++ } + return flags; + } + +@@ -1394,6 +1421,9 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, + mem->ram_start_offset = ram_start_offset; + mem->ram = ram; + mem->flags = kvm_mem_flags(mr); ++ mem->guest_memfd = mr->ram_block->guest_memfd; ++ mem->guest_memfd_offset = (uint8_t*)ram - mr->ram_block->host; ++ + kvm_slot_init_dirty_bitmap(mem); + err = kvm_set_user_memory_region(kml, mem, true); + if (err) { +diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events +index 9f599abc17..e8c52cb9e7 100644 +--- a/accel/kvm/trace-events ++++ b/accel/kvm/trace-events +@@ -15,7 +15,7 @@ kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d" + kvm_irqchip_release_virq(int virq) "virq %d" + kvm_set_ioeventfd_mmio(int fd, uint64_t addr, uint32_t val, bool assign, uint32_t size, bool datamatch) "fd: %d @0x%" PRIx64 " val=0x%x assign: %d size: %d match: %d" + kvm_set_ioeventfd_pio(int fd, uint16_t addr, uint32_t val, bool assign, uint32_t size, bool datamatch) "fd: %d @0x%x val=0x%x assign: %d size: %d match: %d" +-kvm_set_user_memory(uint16_t as, uint16_t slot, uint32_t flags, uint64_t guest_phys_addr, uint64_t memory_size, uint64_t userspace_addr, int ret) "AddrSpace#%d Slot#%d flags=0x%x gpa=0x%"PRIx64 " size=0x%"PRIx64 " ua=0x%"PRIx64 " ret=%d" ++kvm_set_user_memory(uint16_t as, uint16_t slot, uint32_t flags, uint64_t guest_phys_addr, uint64_t memory_size, uint64_t userspace_addr, uint32_t fd, uint64_t fd_offset, int ret) "AddrSpace#%d Slot#%d flags=0x%x gpa=0x%"PRIx64 " size=0x%"PRIx64 " ua=0x%"PRIx64 " guest_memfd=%d" " guest_memfd_offset=0x%" PRIx64 " ret=%d" + kvm_clear_dirty_log(uint32_t slot, uint64_t start, uint32_t size) "slot#%"PRId32" start 0x%"PRIx64" size 0x%"PRIx32 + kvm_resample_fd_notify(int gsi) "gsi %d" + kvm_dirty_ring_full(int id) "vcpu %d" +diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h +index 3496be7997..a5a3fee411 100644 +--- a/include/sysemu/kvm_int.h ++++ b/include/sysemu/kvm_int.h +@@ -30,6 +30,8 @@ typedef struct KVMSlot + int as_id; + /* Cache of the offset in ram address space */ + ram_addr_t ram_start_offset; ++ int guest_memfd; ++ hwaddr guest_memfd_offset; + } KVMSlot; + + typedef struct KVMMemoryUpdate { +-- +2.39.3 + diff --git a/kvm-kvm-Introduce-support-for-memory_attributes.patch b/kvm-kvm-Introduce-support-for-memory_attributes.patch new file mode 100644 index 0000000..1f043a9 --- /dev/null +++ b/kvm-kvm-Introduce-support-for-memory_attributes.patch @@ -0,0 +1,103 @@ +From 37e6c98987bb2d4be7ce1fdda4475cd0266271c3 Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Wed, 20 Mar 2024 03:39:06 -0500 +Subject: [PATCH 027/100] kvm: Introduce support for memory_attributes + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [27/91] 1b4428289949478f7390196ae4b098c5e6f36bb0 (bonzini/rhel-qemu-kvm) + +Introduce the helper functions to set the attributes of a range of +memory to private or shared. + +This is necessary to notify KVM the private/shared attribute of each gpa +range. KVM needs the information to decide the GPA needs to be mapped at +hva-based shared memory or guest_memfd based private memory. + +Signed-off-by: Xiaoyao Li +Message-ID: <20240320083945.991426-11-michael.roth@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 0811baed49010a9b651b8029ab6b9828b09a884f) +Signed-off-by: Paolo Bonzini +--- + accel/kvm/kvm-all.c | 32 ++++++++++++++++++++++++++++++++ + include/sysemu/kvm.h | 4 ++++ + 2 files changed, 36 insertions(+) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 9bd235c969..272e945f52 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -91,6 +91,7 @@ bool kvm_msi_use_devid; + static bool kvm_has_guest_debug; + static int kvm_sstep_flags; + static bool kvm_immediate_exit; ++static uint64_t kvm_supported_memory_attributes; + static hwaddr kvm_max_slot_size = ~0; + + static const KVMCapabilityInfo kvm_required_capabilites[] = { +@@ -1266,6 +1267,36 @@ void kvm_set_max_memslot_size(hwaddr max_slot_size) + kvm_max_slot_size = max_slot_size; + } + ++static int kvm_set_memory_attributes(hwaddr start, uint64_t size, uint64_t attr) ++{ ++ struct kvm_memory_attributes attrs; ++ int r; ++ ++ assert((attr & kvm_supported_memory_attributes) == attr); ++ attrs.attributes = attr; ++ attrs.address = start; ++ attrs.size = size; ++ attrs.flags = 0; ++ ++ r = kvm_vm_ioctl(kvm_state, KVM_SET_MEMORY_ATTRIBUTES, &attrs); ++ if (r) { ++ error_report("failed to set memory (0x%" HWADDR_PRIx "+0x%" PRIx64 ") " ++ "with attr 0x%" PRIx64 " error '%s'", ++ start, size, attr, strerror(errno)); ++ } ++ return r; ++} ++ ++int kvm_set_memory_attributes_private(hwaddr start, uint64_t size) ++{ ++ return kvm_set_memory_attributes(start, size, KVM_MEMORY_ATTRIBUTE_PRIVATE); ++} ++ ++int kvm_set_memory_attributes_shared(hwaddr start, uint64_t size) ++{ ++ return kvm_set_memory_attributes(start, size, 0); ++} ++ + /* Called with KVMMemoryListener.slots_lock held */ + static void kvm_set_phys_mem(KVMMemoryListener *kml, + MemoryRegionSection *section, bool add) +@@ -2387,6 +2418,7 @@ static int kvm_init(MachineState *ms) + goto err; + } + ++ kvm_supported_memory_attributes = kvm_check_extension(s, KVM_CAP_MEMORY_ATTRIBUTES); + kvm_immediate_exit = kvm_check_extension(s, KVM_CAP_IMMEDIATE_EXIT); + s->nr_slots = kvm_check_extension(s, KVM_CAP_NR_MEMSLOTS); + +diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h +index 54f4d83a37..f114ff6986 100644 +--- a/include/sysemu/kvm.h ++++ b/include/sysemu/kvm.h +@@ -536,4 +536,8 @@ void kvm_mark_guest_state_protected(void); + * reported for the VM. + */ + bool kvm_hwpoisoned_mem(void); ++ ++int kvm_set_memory_attributes_private(hwaddr start, uint64_t size); ++int kvm_set_memory_attributes_shared(hwaddr start, uint64_t size); ++ + #endif +-- +2.39.3 + diff --git a/kvm-kvm-add-support-for-guest-physical-bits.patch b/kvm-kvm-add-support-for-guest-physical-bits.patch new file mode 100644 index 0000000..97b94eb --- /dev/null +++ b/kvm-kvm-add-support-for-guest-physical-bits.patch @@ -0,0 +1,116 @@ +From 31cc494d69449811f4d995326479372da7c1241e Mon Sep 17 00:00:00 2001 +From: Gerd Hoffmann +Date: Mon, 18 Mar 2024 16:53:35 +0100 +Subject: [PATCH 003/100] kvm: add support for guest physical bits + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [3/91] abb1ba3a584152d8efabd8255b86afe609f8ffbd (bonzini/rhel-qemu-kvm) + +Query kvm for supported guest physical address bits, in cpuid +function 80000008, eax[23:16]. Usually this is identical to host +physical address bits. With NPT or EPT being used this might be +restricted to 48 (max 4-level paging address space size) even if +the host cpu supports more physical address bits. + +When set pass this to the guest, using cpuid too. Guest firmware +can use this to figure how big the usable guest physical address +space is, so PCI bar mapping are actually reachable. + +Signed-off-by: Gerd Hoffmann +Reviewed-by: Xiaoyao Li +Reviewed-by: Zhao Liu +Message-ID: <20240318155336.156197-2-kraxel@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 0d08c423688edcca857f88dab20f1fc56de2b281) +Signed-off-by: Paolo Bonzini +--- + target/i386/kvm/kvm-cpu.c | 50 ++++++++++++++++++++++++++++++++------- + 1 file changed, 42 insertions(+), 8 deletions(-) + +diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c +index b91af5051f..7ef94c681f 100644 +--- a/target/i386/kvm/kvm-cpu.c ++++ b/target/i386/kvm/kvm-cpu.c +@@ -18,10 +18,32 @@ + #include "kvm_i386.h" + #include "hw/core/accel-cpu.h" + ++static void kvm_set_guest_phys_bits(CPUState *cs) ++{ ++ X86CPU *cpu = X86_CPU(cs); ++ uint32_t eax, guest_phys_bits; ++ ++ eax = kvm_arch_get_supported_cpuid(cs->kvm_state, 0x80000008, 0, R_EAX); ++ guest_phys_bits = (eax >> 16) & 0xff; ++ if (!guest_phys_bits) { ++ return; ++ } ++ cpu->guest_phys_bits = guest_phys_bits; ++ if (cpu->guest_phys_bits > cpu->phys_bits) { ++ cpu->guest_phys_bits = cpu->phys_bits; ++ } ++ ++ if (cpu->host_phys_bits && cpu->host_phys_bits_limit && ++ cpu->guest_phys_bits > cpu->host_phys_bits_limit) { ++ cpu->guest_phys_bits = cpu->host_phys_bits_limit; ++ } ++} ++ + static bool kvm_cpu_realizefn(CPUState *cs, Error **errp) + { + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; ++ bool ret; + + /* + * The realize order is important, since x86_cpu_realize() checks if +@@ -32,13 +54,15 @@ static bool kvm_cpu_realizefn(CPUState *cs, Error **errp) + * + * realize order: + * +- * x86_cpu_realize(): +- * -> x86_cpu_expand_features() +- * -> cpu_exec_realizefn(): +- * -> accel_cpu_common_realize() +- * kvm_cpu_realizefn() -> host_cpu_realizefn() +- * -> cpu_common_realizefn() +- * -> check/update ucode_rev, phys_bits, mwait ++ * x86_cpu_realizefn(): ++ * x86_cpu_expand_features() ++ * cpu_exec_realizefn(): ++ * accel_cpu_common_realize() ++ * kvm_cpu_realizefn() ++ * host_cpu_realizefn() ++ * kvm_set_guest_phys_bits() ++ * check/update ucode_rev, phys_bits, guest_phys_bits, mwait ++ * cpu_common_realizefn() (via xcc->parent_realize) + */ + if (cpu->max_features) { + if (enable_cpu_pm && kvm_has_waitpkg()) { +@@ -50,7 +74,17 @@ static bool kvm_cpu_realizefn(CPUState *cs, Error **errp) + MSR_IA32_UCODE_REV); + } + } +- return host_cpu_realizefn(cs, errp); ++ ret = host_cpu_realizefn(cs, errp); ++ if (!ret) { ++ return ret; ++ } ++ ++ if ((env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) && ++ cpu->guest_phys_bits == -1) { ++ kvm_set_guest_phys_bits(cs); ++ } ++ ++ return true; + } + + static bool lmce_supported(void) +-- +2.39.3 + diff --git a/kvm-kvm-handle-KVM_EXIT_MEMORY_FAULT.patch b/kvm-kvm-handle-KVM_EXIT_MEMORY_FAULT.patch new file mode 100644 index 0000000..9baa06f --- /dev/null +++ b/kvm-kvm-handle-KVM_EXIT_MEMORY_FAULT.patch @@ -0,0 +1,194 @@ +From 33cc1b469689ee2bb7c4f745189472c74a0a98ab Mon Sep 17 00:00:00 2001 +From: Chao Peng +Date: Wed, 20 Mar 2024 03:39:08 -0500 +Subject: [PATCH 034/100] kvm: handle KVM_EXIT_MEMORY_FAULT + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [34/91] 59c672f6b19a3afcb61878775eb6425c6fdea6d5 (bonzini/rhel-qemu-kvm) + +Upon an KVM_EXIT_MEMORY_FAULT exit, userspace needs to do the memory +conversion on the RAMBlock to turn the memory into desired attribute, +switching between private and shared. + +Currently only KVM_MEMORY_EXIT_FLAG_PRIVATE in flags is valid when +KVM_EXIT_MEMORY_FAULT happens. + +Note, KVM_EXIT_MEMORY_FAULT makes sense only when the RAMBlock has +guest_memfd memory backend. + +Note, KVM_EXIT_MEMORY_FAULT returns with -EFAULT, so special handling is +added. + +When page is converted from shared to private, the original shared +memory can be discarded via ram_block_discard_range(). Note, shared +memory can be discarded only when it's not back'ed by hugetlb because +hugetlb is supposed to be pre-allocated and no need for discarding. + +Signed-off-by: Chao Peng +Co-developed-by: Xiaoyao Li +Signed-off-by: Xiaoyao Li + +Message-ID: <20240320083945.991426-13-michael.roth@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit c15e5684071d93174e446be318f49d8d59b15d6d) +Signed-off-by: Paolo Bonzini +--- + accel/kvm/kvm-all.c | 98 +++++++++++++++++++++++++++++++++++++----- + accel/kvm/trace-events | 2 + + include/sysemu/kvm.h | 2 + + 3 files changed, 92 insertions(+), 10 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 3f99efc8cc..09164e346c 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -2900,6 +2900,69 @@ static void kvm_eat_signals(CPUState *cpu) + } while (sigismember(&chkset, SIG_IPI)); + } + ++int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private) ++{ ++ MemoryRegionSection section; ++ ram_addr_t offset; ++ MemoryRegion *mr; ++ RAMBlock *rb; ++ void *addr; ++ int ret = -1; ++ ++ trace_kvm_convert_memory(start, size, to_private ? "shared_to_private" : "private_to_shared"); ++ ++ if (!QEMU_PTR_IS_ALIGNED(start, qemu_real_host_page_size()) || ++ !QEMU_PTR_IS_ALIGNED(size, qemu_real_host_page_size())) { ++ return -1; ++ } ++ ++ if (!size) { ++ return -1; ++ } ++ ++ section = memory_region_find(get_system_memory(), start, size); ++ mr = section.mr; ++ if (!mr) { ++ return -1; ++ } ++ ++ if (!memory_region_has_guest_memfd(mr)) { ++ error_report("Converting non guest_memfd backed memory region " ++ "(0x%"HWADDR_PRIx" ,+ 0x%"HWADDR_PRIx") to %s", ++ start, size, to_private ? "private" : "shared"); ++ goto out_unref; ++ } ++ ++ if (to_private) { ++ ret = kvm_set_memory_attributes_private(start, size); ++ } else { ++ ret = kvm_set_memory_attributes_shared(start, size); ++ } ++ if (ret) { ++ goto out_unref; ++ } ++ ++ addr = memory_region_get_ram_ptr(mr) + section.offset_within_region; ++ rb = qemu_ram_block_from_host(addr, false, &offset); ++ ++ if (to_private) { ++ if (rb->page_size != qemu_real_host_page_size()) { ++ /* ++ * shared memory is backed by hugetlb, which is supposed to be ++ * pre-allocated and doesn't need to be discarded ++ */ ++ goto out_unref; ++ } ++ ret = ram_block_discard_range(rb, offset, size); ++ } else { ++ ret = ram_block_discard_guest_memfd_range(rb, offset, size); ++ } ++ ++out_unref: ++ memory_region_unref(mr); ++ return ret; ++} ++ + int kvm_cpu_exec(CPUState *cpu) + { + struct kvm_run *run = cpu->kvm_run; +@@ -2967,18 +3030,20 @@ int kvm_cpu_exec(CPUState *cpu) + ret = EXCP_INTERRUPT; + break; + } +- fprintf(stderr, "error: kvm run failed %s\n", +- strerror(-run_ret)); ++ if (!(run_ret == -EFAULT && run->exit_reason == KVM_EXIT_MEMORY_FAULT)) { ++ fprintf(stderr, "error: kvm run failed %s\n", ++ strerror(-run_ret)); + #ifdef TARGET_PPC +- if (run_ret == -EBUSY) { +- fprintf(stderr, +- "This is probably because your SMT is enabled.\n" +- "VCPU can only run on primary threads with all " +- "secondary threads offline.\n"); +- } ++ if (run_ret == -EBUSY) { ++ fprintf(stderr, ++ "This is probably because your SMT is enabled.\n" ++ "VCPU can only run on primary threads with all " ++ "secondary threads offline.\n"); ++ } + #endif +- ret = -1; +- break; ++ ret = -1; ++ break; ++ } + } + + trace_kvm_run_exit(cpu->cpu_index, run->exit_reason); +@@ -3061,6 +3126,19 @@ int kvm_cpu_exec(CPUState *cpu) + break; + } + break; ++ case KVM_EXIT_MEMORY_FAULT: ++ trace_kvm_memory_fault(run->memory_fault.gpa, ++ run->memory_fault.size, ++ run->memory_fault.flags); ++ if (run->memory_fault.flags & ~KVM_MEMORY_EXIT_FLAG_PRIVATE) { ++ error_report("KVM_EXIT_MEMORY_FAULT: Unknown flag 0x%" PRIx64, ++ (uint64_t)run->memory_fault.flags); ++ ret = -1; ++ break; ++ } ++ ret = kvm_convert_memory(run->memory_fault.gpa, run->memory_fault.size, ++ run->memory_fault.flags & KVM_MEMORY_EXIT_FLAG_PRIVATE); ++ break; + default: + ret = kvm_arch_handle_exit(cpu, run); + break; +diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events +index e8c52cb9e7..681ccb667d 100644 +--- a/accel/kvm/trace-events ++++ b/accel/kvm/trace-events +@@ -31,3 +31,5 @@ kvm_cpu_exec(void) "" + kvm_interrupt_exit_request(void) "" + kvm_io_window_exit(void) "" + kvm_run_exit_system_event(int cpu_index, uint32_t event_type) "cpu_index %d, system_even_type %"PRIu32 ++kvm_convert_memory(uint64_t start, uint64_t size, const char *msg) "start 0x%" PRIx64 " size 0x%" PRIx64 " %s" ++kvm_memory_fault(uint64_t start, uint64_t size, uint64_t flags) "start 0x%" PRIx64 " size 0x%" PRIx64 " flags 0x%" PRIx64 +diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h +index 9e4ab7ae89..74f23dff9c 100644 +--- a/include/sysemu/kvm.h ++++ b/include/sysemu/kvm.h +@@ -542,4 +542,6 @@ int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp); + int kvm_set_memory_attributes_private(hwaddr start, uint64_t size); + int kvm_set_memory_attributes_shared(hwaddr start, uint64_t size); + ++int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private); ++ + #endif +-- +2.39.3 + diff --git a/kvm-kvm-memory-Make-memory-type-private-by-default-if-it.patch b/kvm-kvm-memory-Make-memory-type-private-by-default-if-it.patch new file mode 100644 index 0000000..8f9756b --- /dev/null +++ b/kvm-kvm-memory-Make-memory-type-private-by-default-if-it.patch @@ -0,0 +1,56 @@ +From f9dc55dd179bb534d589af371c5c2a7886bd461e Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Wed, 20 Mar 2024 03:39:11 -0500 +Subject: [PATCH 030/100] kvm/memory: Make memory type private by default if it + has guest memfd backend + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [30/91] 5e21edf844b5629ee32c4075843b028561b97ae2 (bonzini/rhel-qemu-kvm) + +KVM side leaves the memory to shared by default, which may incur the +overhead of paging conversion on the first visit of each page. Because +the expectation is that page is likely to private for the VMs that +require private memory (has guest memfd). + +Explicitly set the memory to private when memory region has valid +guest memfd backend. + +Signed-off-by: Xiaoyao Li +Signed-off-by: Michael Roth +Message-ID: <20240320083945.991426-16-michael.roth@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit bd3bcf6962b664ca3bf9c60fdcc4534e8e3d0641) +Signed-off-by: Paolo Bonzini +--- + accel/kvm/kvm-all.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 5ef55e4dd7..3f99efc8cc 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -1431,6 +1431,16 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, + strerror(-err)); + abort(); + } ++ ++ if (memory_region_has_guest_memfd(mr)) { ++ err = kvm_set_memory_attributes_private(start_addr, slot_size); ++ if (err) { ++ error_report("%s: failed to set memory attribute private: %s", ++ __func__, strerror(-err)); ++ exit(1); ++ } ++ } ++ + start_addr += slot_size; + ram_start_offset += slot_size; + ram += slot_size; +-- +2.39.3 + diff --git a/kvm-kvm-tdx-Don-t-complain-when-converting-vMMIO-region-.patch b/kvm-kvm-tdx-Don-t-complain-when-converting-vMMIO-region-.patch new file mode 100644 index 0000000..7b578b5 --- /dev/null +++ b/kvm-kvm-tdx-Don-t-complain-when-converting-vMMIO-region-.patch @@ -0,0 +1,61 @@ +From aeaa7061139202448d466b7e18682081f9cd2097 Mon Sep 17 00:00:00 2001 +From: Isaku Yamahata +Date: Thu, 29 Feb 2024 01:36:54 -0500 +Subject: [PATCH 035/100] kvm/tdx: Don't complain when converting vMMIO region + to shared + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [35/91] c42870771d7af5badc2e10d42be9b5620d72f95d (bonzini/rhel-qemu-kvm) + +Because vMMIO region needs to be shared region, guest TD may explicitly +convert such region from private to shared. Don't complain such +conversion. + +Signed-off-by: Isaku Yamahata +Signed-off-by: Xiaoyao Li +Message-ID: <20240229063726.610065-34-xiaoyao.li@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit c5d9425ef4da9f43fc0903905ad415456d1ab843) +Signed-off-by: Paolo Bonzini +--- + accel/kvm/kvm-all.c | 19 ++++++++++++++++--- + 1 file changed, 16 insertions(+), 3 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 09164e346c..6efaff90a7 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -2927,9 +2927,22 @@ int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private) + } + + if (!memory_region_has_guest_memfd(mr)) { +- error_report("Converting non guest_memfd backed memory region " +- "(0x%"HWADDR_PRIx" ,+ 0x%"HWADDR_PRIx") to %s", +- start, size, to_private ? "private" : "shared"); ++ /* ++ * Because vMMIO region must be shared, guest TD may convert vMMIO ++ * region to shared explicitly. Don't complain such case. See ++ * memory_region_type() for checking if the region is MMIO region. ++ */ ++ if (!to_private && ++ !memory_region_is_ram(mr) && ++ !memory_region_is_ram_device(mr) && ++ !memory_region_is_rom(mr) && ++ !memory_region_is_romd(mr)) { ++ ret = 0; ++ } else { ++ error_report("Convert non guest_memfd backed memory region " ++ "(0x%"HWADDR_PRIx" ,+ 0x%"HWADDR_PRIx") to %s", ++ start, size, to_private ? "private" : "shared"); ++ } + goto out_unref; + } + +-- +2.39.3 + diff --git a/kvm-kvm-tdx-Ignore-memory-conversion-to-shared-of-unassi.patch b/kvm-kvm-tdx-Ignore-memory-conversion-to-shared-of-unassi.patch new file mode 100644 index 0000000..c0f2bc6 --- /dev/null +++ b/kvm-kvm-tdx-Ignore-memory-conversion-to-shared-of-unassi.patch @@ -0,0 +1,62 @@ +From 2b2dfff3e383c99d0f759a8c12659d1a0ce50e8e Mon Sep 17 00:00:00 2001 +From: Isaku Yamahata +Date: Thu, 29 Feb 2024 01:36:55 -0500 +Subject: [PATCH 036/100] kvm/tdx: Ignore memory conversion to shared of + unassigned region + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [36/91] 84515b9dcfc2e07b272bb2477acf6430e9d33f28 (bonzini/rhel-qemu-kvm) + +TDX requires vMMIO region to be shared. For KVM, MMIO region is the region +which kvm memslot isn't assigned to (except in-kernel emulation). +qemu has the memory region for vMMIO at each device level. + +While OVMF issues MapGPA(to-shared) conservatively on 32bit PCI MMIO +region, qemu doesn't find corresponding vMMIO region because it's before +PCI device allocation and memory_region_find() finds the device region, not +PCI bus region. It's safe to ignore MapGPA(to-shared) because when guest +accesses those region they use GPA with shared bit set for vMMIO. Ignore +memory conversion request of non-assigned region to shared and return +success. Otherwise OVMF is confused and panics there. + +Signed-off-by: Isaku Yamahata +Signed-off-by: Xiaoyao Li +Message-ID: <20240229063726.610065-35-xiaoyao.li@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 565f4768bb9cf840b2f8cca41483bb91aa3196a3) +Signed-off-by: Paolo Bonzini +--- + accel/kvm/kvm-all.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 6efaff90a7..f6268855b4 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -2923,6 +2923,18 @@ int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private) + section = memory_region_find(get_system_memory(), start, size); + mr = section.mr; + if (!mr) { ++ /* ++ * Ignore converting non-assigned region to shared. ++ * ++ * TDX requires vMMIO region to be shared to inject #VE to guest. ++ * OVMF issues conservatively MapGPA(shared) on 32bit PCI MMIO region, ++ * and vIO-APIC 0xFEC00000 4K page. ++ * OVMF assigns 32bit PCI MMIO region to ++ * [top of low memory: typically 2GB=0xC000000, 0xFC00000) ++ */ ++ if (!to_private) { ++ return 0; ++ } + return -1; + } + +-- +2.39.3 + diff --git a/kvm-linux-headers-Update-to-current-kvm-next.patch b/kvm-linux-headers-Update-to-current-kvm-next.patch new file mode 100644 index 0000000..2fd35fd --- /dev/null +++ b/kvm-linux-headers-Update-to-current-kvm-next.patch @@ -0,0 +1,189 @@ +From c3e2bc3319882c16fa36eafc7a613073746cfc8b Mon Sep 17 00:00:00 2001 +From: Pankaj Gupta +Date: Thu, 30 May 2024 06:16:14 -0500 +Subject: [PATCH 052/100] linux-headers: Update to current kvm/next + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [52/91] df77e867072f60110b8387a54ba2db6226b35007 (bonzini/rhel-qemu-kvm) + +This updates kernel headers to commit 6f627b425378 ("KVM: SVM: Add module +parameter to enable SEV-SNP", 2024-05-12). The SNP host patches will +be included in Linux 6.11, to be released next July. + +Also brings in an linux-headers/linux/vhost.h fix from v6.9-rc4. + +Co-developed-by: Michael Roth +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-3-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 5f69e42da5b40a2213f4db70ca461f554abca686) +Signed-off-by: Paolo Bonzini +--- + linux-headers/asm-loongarch/kvm.h | 4 +++ + linux-headers/asm-riscv/kvm.h | 1 + + linux-headers/asm-x86/kvm.h | 52 ++++++++++++++++++++++++++++++- + linux-headers/linux/vhost.h | 15 ++++----- + 4 files changed, 64 insertions(+), 8 deletions(-) + +diff --git a/linux-headers/asm-loongarch/kvm.h b/linux-headers/asm-loongarch/kvm.h +index 109785922c..f9abef3823 100644 +--- a/linux-headers/asm-loongarch/kvm.h ++++ b/linux-headers/asm-loongarch/kvm.h +@@ -17,6 +17,8 @@ + #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 + #define KVM_DIRTY_LOG_PAGE_OFFSET 64 + ++#define KVM_GUESTDBG_USE_SW_BP 0x00010000 ++ + /* + * for KVM_GET_REGS and KVM_SET_REGS + */ +@@ -72,6 +74,8 @@ struct kvm_fpu { + + #define KVM_REG_LOONGARCH_COUNTER (KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 1) + #define KVM_REG_LOONGARCH_VCPU_RESET (KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 2) ++/* Debugging: Special instruction for software breakpoint */ ++#define KVM_REG_LOONGARCH_DEBUG_INST (KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 3) + + #define LOONGARCH_REG_SHIFT 3 + #define LOONGARCH_REG_64(TYPE, REG) (TYPE | KVM_REG_SIZE_U64 | (REG << LOONGARCH_REG_SHIFT)) +diff --git a/linux-headers/asm-riscv/kvm.h b/linux-headers/asm-riscv/kvm.h +index b1c503c295..e878e7cc39 100644 +--- a/linux-headers/asm-riscv/kvm.h ++++ b/linux-headers/asm-riscv/kvm.h +@@ -167,6 +167,7 @@ enum KVM_RISCV_ISA_EXT_ID { + KVM_RISCV_ISA_EXT_ZFA, + KVM_RISCV_ISA_EXT_ZTSO, + KVM_RISCV_ISA_EXT_ZACAS, ++ KVM_RISCV_ISA_EXT_SSCOFPMF, + KVM_RISCV_ISA_EXT_MAX, + }; + +diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h +index 31c95c2dfe..1c8f918234 100644 +--- a/linux-headers/asm-x86/kvm.h ++++ b/linux-headers/asm-x86/kvm.h +@@ -695,6 +695,11 @@ enum sev_cmd_id { + /* Second time is the charm; improved versions of the above ioctls. */ + KVM_SEV_INIT2, + ++ /* SNP-specific commands */ ++ KVM_SEV_SNP_LAUNCH_START = 100, ++ KVM_SEV_SNP_LAUNCH_UPDATE, ++ KVM_SEV_SNP_LAUNCH_FINISH, ++ + KVM_SEV_NR_MAX, + }; + +@@ -709,7 +714,9 @@ struct kvm_sev_cmd { + struct kvm_sev_init { + __u64 vmsa_features; + __u32 flags; +- __u32 pad[9]; ++ __u16 ghcb_version; ++ __u16 pad1; ++ __u32 pad2[8]; + }; + + struct kvm_sev_launch_start { +@@ -820,6 +827,48 @@ struct kvm_sev_receive_update_data { + __u32 pad2; + }; + ++struct kvm_sev_snp_launch_start { ++ __u64 policy; ++ __u8 gosvw[16]; ++ __u16 flags; ++ __u8 pad0[6]; ++ __u64 pad1[4]; ++}; ++ ++/* Kept in sync with firmware values for simplicity. */ ++#define KVM_SEV_SNP_PAGE_TYPE_NORMAL 0x1 ++#define KVM_SEV_SNP_PAGE_TYPE_ZERO 0x3 ++#define KVM_SEV_SNP_PAGE_TYPE_UNMEASURED 0x4 ++#define KVM_SEV_SNP_PAGE_TYPE_SECRETS 0x5 ++#define KVM_SEV_SNP_PAGE_TYPE_CPUID 0x6 ++ ++struct kvm_sev_snp_launch_update { ++ __u64 gfn_start; ++ __u64 uaddr; ++ __u64 len; ++ __u8 type; ++ __u8 pad0; ++ __u16 flags; ++ __u32 pad1; ++ __u64 pad2[4]; ++}; ++ ++#define KVM_SEV_SNP_ID_BLOCK_SIZE 96 ++#define KVM_SEV_SNP_ID_AUTH_SIZE 4096 ++#define KVM_SEV_SNP_FINISH_DATA_SIZE 32 ++ ++struct kvm_sev_snp_launch_finish { ++ __u64 id_block_uaddr; ++ __u64 id_auth_uaddr; ++ __u8 id_block_en; ++ __u8 auth_key_en; ++ __u8 vcek_disabled; ++ __u8 host_data[KVM_SEV_SNP_FINISH_DATA_SIZE]; ++ __u8 pad0[3]; ++ __u16 flags; ++ __u64 pad1[4]; ++}; ++ + #define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0) + #define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1) + +@@ -870,5 +919,6 @@ struct kvm_hyperv_eventfd { + #define KVM_X86_SW_PROTECTED_VM 1 + #define KVM_X86_SEV_VM 2 + #define KVM_X86_SEV_ES_VM 3 ++#define KVM_X86_SNP_VM 4 + + #endif /* _ASM_X86_KVM_H */ +diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h +index bea6973906..b95dd84eef 100644 +--- a/linux-headers/linux/vhost.h ++++ b/linux-headers/linux/vhost.h +@@ -179,12 +179,6 @@ + /* Get the config size */ + #define VHOST_VDPA_GET_CONFIG_SIZE _IOR(VHOST_VIRTIO, 0x79, __u32) + +-/* Get the count of all virtqueues */ +-#define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32) +- +-/* Get the number of virtqueue groups. */ +-#define VHOST_VDPA_GET_GROUP_NUM _IOR(VHOST_VIRTIO, 0x81, __u32) +- + /* Get the number of address spaces. */ + #define VHOST_VDPA_GET_AS_NUM _IOR(VHOST_VIRTIO, 0x7A, unsigned int) + +@@ -228,10 +222,17 @@ + #define VHOST_VDPA_GET_VRING_DESC_GROUP _IOWR(VHOST_VIRTIO, 0x7F, \ + struct vhost_vring_state) + ++ ++/* Get the count of all virtqueues */ ++#define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32) ++ ++/* Get the number of virtqueue groups. */ ++#define VHOST_VDPA_GET_GROUP_NUM _IOR(VHOST_VIRTIO, 0x81, __u32) ++ + /* Get the queue size of a specific virtqueue. + * userspace set the vring index in vhost_vring_state.index + * kernel set the queue size in vhost_vring_state.num + */ +-#define VHOST_VDPA_GET_VRING_SIZE _IOWR(VHOST_VIRTIO, 0x80, \ ++#define VHOST_VDPA_GET_VRING_SIZE _IOWR(VHOST_VIRTIO, 0x82, \ + struct vhost_vring_state) + #endif +-- +2.39.3 + diff --git a/kvm-linux-headers-update-to-current-kvm-next.patch b/kvm-linux-headers-update-to-current-kvm-next.patch new file mode 100644 index 0000000..4c3dd73 --- /dev/null +++ b/kvm-linux-headers-update-to-current-kvm-next.patch @@ -0,0 +1,2471 @@ +From 530296e1669c9730f261a269d5b911ea56dfcce7 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Tue, 23 Apr 2024 11:46:47 +0200 +Subject: [PATCH 017/100] linux-headers: update to current kvm/next + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [17/91] 5660d4967f10a84802de16c24540e95095eaffd5 (bonzini/rhel-qemu-kvm) + +Signed-off-by: Paolo Bonzini +(cherry picked from commit ab0c7fb22b56523f24d6e127cd4d10ecff67bf85) +Signed-off-by: Paolo Bonzini +--- + hw/i386/x86.c | 8 - + include/standard-headers/asm-x86/bootparam.h | 17 +- + include/standard-headers/asm-x86/kvm_para.h | 3 +- + include/standard-headers/asm-x86/setup_data.h | 83 +++ + include/standard-headers/linux/ethtool.h | 48 ++ + include/standard-headers/linux/fuse.h | 39 +- + .../linux/input-event-codes.h | 1 + + include/standard-headers/linux/virtio_gpu.h | 2 + + include/standard-headers/linux/virtio_pci.h | 10 +- + include/standard-headers/linux/virtio_snd.h | 154 ++++ + linux-headers/asm-arm64/kvm.h | 15 +- + linux-headers/asm-arm64/sve_context.h | 11 + + linux-headers/asm-generic/bitsperlong.h | 4 + + linux-headers/asm-loongarch/kvm.h | 2 - + linux-headers/asm-mips/kvm.h | 2 - + linux-headers/asm-powerpc/kvm.h | 45 +- + linux-headers/asm-riscv/kvm.h | 3 +- + linux-headers/asm-s390/kvm.h | 315 +++++++- + linux-headers/asm-x86/kvm.h | 328 ++++++++- + linux-headers/linux/bits.h | 15 + + linux-headers/linux/kvm.h | 689 +----------------- + linux-headers/linux/psp-sev.h | 59 ++ + linux-headers/linux/vhost.h | 7 + + 23 files changed, 1120 insertions(+), 740 deletions(-) + create mode 100644 include/standard-headers/asm-x86/setup_data.h + create mode 100644 linux-headers/linux/bits.h + +diff --git a/hw/i386/x86.c b/hw/i386/x86.c +index ffbda48917..84a4801977 100644 +--- a/hw/i386/x86.c ++++ b/hw/i386/x86.c +@@ -679,14 +679,6 @@ DeviceState *ioapic_init_secondary(GSIState *gsi_state) + return dev; + } + +-struct setup_data { +- uint64_t next; +- uint32_t type; +- uint32_t len; +- uint8_t data[]; +-} __attribute__((packed)); +- +- + /* + * The entry point into the kernel for PVH boot is different from + * the native entry point. The PVH entry is defined by the x86/HVM +diff --git a/include/standard-headers/asm-x86/bootparam.h b/include/standard-headers/asm-x86/bootparam.h +index 0b06d2bff1..b582a105c0 100644 +--- a/include/standard-headers/asm-x86/bootparam.h ++++ b/include/standard-headers/asm-x86/bootparam.h +@@ -2,21 +2,7 @@ + #ifndef _ASM_X86_BOOTPARAM_H + #define _ASM_X86_BOOTPARAM_H + +-/* setup_data/setup_indirect types */ +-#define SETUP_NONE 0 +-#define SETUP_E820_EXT 1 +-#define SETUP_DTB 2 +-#define SETUP_PCI 3 +-#define SETUP_EFI 4 +-#define SETUP_APPLE_PROPERTIES 5 +-#define SETUP_JAILHOUSE 6 +-#define SETUP_CC_BLOB 7 +-#define SETUP_IMA 8 +-#define SETUP_RNG_SEED 9 +-#define SETUP_ENUM_MAX SETUP_RNG_SEED +- +-#define SETUP_INDIRECT (1<<31) +-#define SETUP_TYPE_MAX (SETUP_ENUM_MAX | SETUP_INDIRECT) ++#include "standard-headers/asm-x86/setup_data.h" + + /* ram_size flags */ + #define RAMDISK_IMAGE_START_MASK 0x07FF +@@ -38,6 +24,7 @@ + #define XLF_EFI_KEXEC (1<<4) + #define XLF_5LEVEL (1<<5) + #define XLF_5LEVEL_ENABLED (1<<6) ++#define XLF_MEM_ENCRYPTION (1<<7) + + + #endif /* _ASM_X86_BOOTPARAM_H */ +diff --git a/include/standard-headers/asm-x86/kvm_para.h b/include/standard-headers/asm-x86/kvm_para.h +index f0235e58a1..9a011d20f0 100644 +--- a/include/standard-headers/asm-x86/kvm_para.h ++++ b/include/standard-headers/asm-x86/kvm_para.h +@@ -92,7 +92,7 @@ struct kvm_clock_pairing { + #define KVM_ASYNC_PF_DELIVERY_AS_INT (1 << 3) + + /* MSR_KVM_ASYNC_PF_INT */ +-#define KVM_ASYNC_PF_VEC_MASK GENMASK(7, 0) ++#define KVM_ASYNC_PF_VEC_MASK __GENMASK(7, 0) + + /* MSR_KVM_MIGRATION_CONTROL */ + #define KVM_MIGRATION_READY (1 << 0) +@@ -142,7 +142,6 @@ struct kvm_vcpu_pv_apf_data { + uint32_t token; + + uint8_t pad[56]; +- uint32_t enabled; + }; + + #define KVM_PV_EOI_BIT 0 +diff --git a/include/standard-headers/asm-x86/setup_data.h b/include/standard-headers/asm-x86/setup_data.h +new file mode 100644 +index 0000000000..09355f54c5 +--- /dev/null ++++ b/include/standard-headers/asm-x86/setup_data.h +@@ -0,0 +1,83 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++#ifndef _ASM_X86_SETUP_DATA_H ++#define _ASM_X86_SETUP_DATA_H ++ ++/* setup_data/setup_indirect types */ ++#define SETUP_NONE 0 ++#define SETUP_E820_EXT 1 ++#define SETUP_DTB 2 ++#define SETUP_PCI 3 ++#define SETUP_EFI 4 ++#define SETUP_APPLE_PROPERTIES 5 ++#define SETUP_JAILHOUSE 6 ++#define SETUP_CC_BLOB 7 ++#define SETUP_IMA 8 ++#define SETUP_RNG_SEED 9 ++#define SETUP_ENUM_MAX SETUP_RNG_SEED ++ ++#define SETUP_INDIRECT (1<<31) ++#define SETUP_TYPE_MAX (SETUP_ENUM_MAX | SETUP_INDIRECT) ++ ++#ifndef __ASSEMBLY__ ++ ++#include "standard-headers/linux/types.h" ++ ++/* extensible setup data list node */ ++struct setup_data { ++ uint64_t next; ++ uint32_t type; ++ uint32_t len; ++ uint8_t data[]; ++}; ++ ++/* extensible setup indirect data node */ ++struct setup_indirect { ++ uint32_t type; ++ uint32_t reserved; /* Reserved, must be set to zero. */ ++ uint64_t len; ++ uint64_t addr; ++}; ++ ++/* ++ * The E820 memory region entry of the boot protocol ABI: ++ */ ++struct boot_e820_entry { ++ uint64_t addr; ++ uint64_t size; ++ uint32_t type; ++} QEMU_PACKED; ++ ++/* ++ * The boot loader is passing platform information via this Jailhouse-specific ++ * setup data structure. ++ */ ++struct jailhouse_setup_data { ++ struct { ++ uint16_t version; ++ uint16_t compatible_version; ++ } QEMU_PACKED hdr; ++ struct { ++ uint16_t pm_timer_address; ++ uint16_t num_cpus; ++ uint64_t pci_mmconfig_base; ++ uint32_t tsc_khz; ++ uint32_t apic_khz; ++ uint8_t standard_ioapic; ++ uint8_t cpu_ids[255]; ++ } QEMU_PACKED v1; ++ struct { ++ uint32_t flags; ++ } QEMU_PACKED v2; ++} QEMU_PACKED; ++ ++/* ++ * IMA buffer setup data information from the previous kernel during kexec ++ */ ++struct ima_setup_data { ++ uint64_t addr; ++ uint64_t size; ++} QEMU_PACKED; ++ ++#endif /* __ASSEMBLY__ */ ++ ++#endif /* _ASM_X86_SETUP_DATA_H */ +diff --git a/include/standard-headers/linux/ethtool.h b/include/standard-headers/linux/ethtool.h +index dfb54eff6f..01503784d2 100644 +--- a/include/standard-headers/linux/ethtool.h ++++ b/include/standard-headers/linux/ethtool.h +@@ -2023,6 +2023,53 @@ static inline int ethtool_validate_duplex(uint8_t duplex) + #define IPV4_FLOW 0x10 /* hash only */ + #define IPV6_FLOW 0x11 /* hash only */ + #define ETHER_FLOW 0x12 /* spec only (ether_spec) */ ++ ++/* Used for GTP-U IPv4 and IPv6. ++ * The format of GTP packets only includes ++ * elements such as TEID and GTP version. ++ * It is primarily intended for data communication of the UE. ++ */ ++#define GTPU_V4_FLOW 0x13 /* hash only */ ++#define GTPU_V6_FLOW 0x14 /* hash only */ ++ ++/* Use for GTP-C IPv4 and v6. ++ * The format of these GTP packets does not include TEID. ++ * Primarily expected to be used for communication ++ * to create sessions for UE data communication, ++ * commonly referred to as CSR (Create Session Request). ++ */ ++#define GTPC_V4_FLOW 0x15 /* hash only */ ++#define GTPC_V6_FLOW 0x16 /* hash only */ ++ ++/* Use for GTP-C IPv4 and v6. ++ * Unlike GTPC_V4_FLOW, the format of these GTP packets includes TEID. ++ * After session creation, it becomes this packet. ++ * This is mainly used for requests to realize UE handover. ++ */ ++#define GTPC_TEID_V4_FLOW 0x17 /* hash only */ ++#define GTPC_TEID_V6_FLOW 0x18 /* hash only */ ++ ++/* Use for GTP-U and extended headers for the PSC (PDU Session Container). ++ * The format of these GTP packets includes TEID and QFI. ++ * In 5G communication using UPF (User Plane Function), ++ * data communication with this extended header is performed. ++ */ ++#define GTPU_EH_V4_FLOW 0x19 /* hash only */ ++#define GTPU_EH_V6_FLOW 0x1a /* hash only */ ++ ++/* Use for GTP-U IPv4 and v6 PSC (PDU Session Container) extended headers. ++ * This differs from GTPU_EH_V(4|6)_FLOW in that it is distinguished by ++ * UL/DL included in the PSC. ++ * There are differences in the data included based on Downlink/Uplink, ++ * and can be used to distinguish packets. ++ * The functions described so far are useful when you want to ++ * handle communication from the mobile network in UPF, PGW, etc. ++ */ ++#define GTPU_UL_V4_FLOW 0x1b /* hash only */ ++#define GTPU_UL_V6_FLOW 0x1c /* hash only */ ++#define GTPU_DL_V4_FLOW 0x1d /* hash only */ ++#define GTPU_DL_V6_FLOW 0x1e /* hash only */ ++ + /* Flag to enable additional fields in struct ethtool_rx_flow_spec */ + #define FLOW_EXT 0x80000000 + #define FLOW_MAC_EXT 0x40000000 +@@ -2037,6 +2084,7 @@ static inline int ethtool_validate_duplex(uint8_t duplex) + #define RXH_IP_DST (1 << 5) + #define RXH_L4_B_0_1 (1 << 6) /* src port in case of TCP/UDP/SCTP */ + #define RXH_L4_B_2_3 (1 << 7) /* dst port in case of TCP/UDP/SCTP */ ++#define RXH_GTP_TEID (1 << 8) /* teid in case of GTP */ + #define RXH_DISCARD (1 << 31) + + #define RX_CLS_FLOW_DISC 0xffffffffffffffffULL +diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h +index fc0dcd10ae..bac9dbc49f 100644 +--- a/include/standard-headers/linux/fuse.h ++++ b/include/standard-headers/linux/fuse.h +@@ -211,6 +211,12 @@ + * 7.39 + * - add FUSE_DIRECT_IO_ALLOW_MMAP + * - add FUSE_STATX and related structures ++ * ++ * 7.40 ++ * - add max_stack_depth to fuse_init_out, add FUSE_PASSTHROUGH init flag ++ * - add backing_id to fuse_open_out, add FOPEN_PASSTHROUGH open flag ++ * - add FUSE_NO_EXPORT_SUPPORT init flag ++ * - add FUSE_NOTIFY_RESEND, add FUSE_HAS_RESEND init flag + */ + + #ifndef _LINUX_FUSE_H +@@ -242,7 +248,7 @@ + #define FUSE_KERNEL_VERSION 7 + + /** Minor version number of this interface */ +-#define FUSE_KERNEL_MINOR_VERSION 39 ++#define FUSE_KERNEL_MINOR_VERSION 40 + + /** The node ID of the root inode */ + #define FUSE_ROOT_ID 1 +@@ -349,6 +355,7 @@ struct fuse_file_lock { + * FOPEN_STREAM: the file is stream-like (no file position at all) + * FOPEN_NOFLUSH: don't flush data cache on close (unless FUSE_WRITEBACK_CACHE) + * FOPEN_PARALLEL_DIRECT_WRITES: Allow concurrent direct writes on the same inode ++ * FOPEN_PASSTHROUGH: passthrough read/write io for this open file + */ + #define FOPEN_DIRECT_IO (1 << 0) + #define FOPEN_KEEP_CACHE (1 << 1) +@@ -357,6 +364,7 @@ struct fuse_file_lock { + #define FOPEN_STREAM (1 << 4) + #define FOPEN_NOFLUSH (1 << 5) + #define FOPEN_PARALLEL_DIRECT_WRITES (1 << 6) ++#define FOPEN_PASSTHROUGH (1 << 7) + + /** + * INIT request/reply flags +@@ -406,6 +414,9 @@ struct fuse_file_lock { + * symlink and mknod (single group that matches parent) + * FUSE_HAS_EXPIRE_ONLY: kernel supports expiry-only entry invalidation + * FUSE_DIRECT_IO_ALLOW_MMAP: allow shared mmap in FOPEN_DIRECT_IO mode. ++ * FUSE_NO_EXPORT_SUPPORT: explicitly disable export support ++ * FUSE_HAS_RESEND: kernel supports resending pending requests, and the high bit ++ * of the request ID indicates resend requests + */ + #define FUSE_ASYNC_READ (1 << 0) + #define FUSE_POSIX_LOCKS (1 << 1) +@@ -445,6 +456,9 @@ struct fuse_file_lock { + #define FUSE_CREATE_SUPP_GROUP (1ULL << 34) + #define FUSE_HAS_EXPIRE_ONLY (1ULL << 35) + #define FUSE_DIRECT_IO_ALLOW_MMAP (1ULL << 36) ++#define FUSE_PASSTHROUGH (1ULL << 37) ++#define FUSE_NO_EXPORT_SUPPORT (1ULL << 38) ++#define FUSE_HAS_RESEND (1ULL << 39) + + /* Obsolete alias for FUSE_DIRECT_IO_ALLOW_MMAP */ + #define FUSE_DIRECT_IO_RELAX FUSE_DIRECT_IO_ALLOW_MMAP +@@ -631,6 +645,7 @@ enum fuse_notify_code { + FUSE_NOTIFY_STORE = 4, + FUSE_NOTIFY_RETRIEVE = 5, + FUSE_NOTIFY_DELETE = 6, ++ FUSE_NOTIFY_RESEND = 7, + FUSE_NOTIFY_CODE_MAX, + }; + +@@ -757,7 +772,7 @@ struct fuse_create_in { + struct fuse_open_out { + uint64_t fh; + uint32_t open_flags; +- uint32_t padding; ++ int32_t backing_id; + }; + + struct fuse_release_in { +@@ -873,7 +888,8 @@ struct fuse_init_out { + uint16_t max_pages; + uint16_t map_alignment; + uint32_t flags2; +- uint32_t unused[7]; ++ uint32_t max_stack_depth; ++ uint32_t unused[6]; + }; + + #define CUSE_INIT_INFO_MAX 4096 +@@ -956,6 +972,14 @@ struct fuse_fallocate_in { + uint32_t padding; + }; + ++/** ++ * FUSE request unique ID flag ++ * ++ * Indicates whether this is a resend request. The receiver should handle this ++ * request accordingly. ++ */ ++#define FUSE_UNIQUE_RESEND (1ULL << 63) ++ + struct fuse_in_header { + uint32_t len; + uint32_t opcode; +@@ -1045,9 +1069,18 @@ struct fuse_notify_retrieve_in { + uint64_t dummy4; + }; + ++struct fuse_backing_map { ++ int32_t fd; ++ uint32_t flags; ++ uint64_t padding; ++}; ++ + /* Device ioctls: */ + #define FUSE_DEV_IOC_MAGIC 229 + #define FUSE_DEV_IOC_CLONE _IOR(FUSE_DEV_IOC_MAGIC, 0, uint32_t) ++#define FUSE_DEV_IOC_BACKING_OPEN _IOW(FUSE_DEV_IOC_MAGIC, 1, \ ++ struct fuse_backing_map) ++#define FUSE_DEV_IOC_BACKING_CLOSE _IOW(FUSE_DEV_IOC_MAGIC, 2, uint32_t) + + struct fuse_lseek_in { + uint64_t fh; +diff --git a/include/standard-headers/linux/input-event-codes.h b/include/standard-headers/linux/input-event-codes.h +index f6bab08540..2221b0c383 100644 +--- a/include/standard-headers/linux/input-event-codes.h ++++ b/include/standard-headers/linux/input-event-codes.h +@@ -602,6 +602,7 @@ + + #define KEY_ALS_TOGGLE 0x230 /* Ambient light sensor */ + #define KEY_ROTATE_LOCK_TOGGLE 0x231 /* Display rotation lock */ ++#define KEY_REFRESH_RATE_TOGGLE 0x232 /* Display refresh rate toggle */ + + #define KEY_BUTTONCONFIG 0x240 /* AL Button Configuration */ + #define KEY_TASKMANAGER 0x241 /* AL Task/Project Manager */ +diff --git a/include/standard-headers/linux/virtio_gpu.h b/include/standard-headers/linux/virtio_gpu.h +index 2da48d3d4c..2db643ed8f 100644 +--- a/include/standard-headers/linux/virtio_gpu.h ++++ b/include/standard-headers/linux/virtio_gpu.h +@@ -309,6 +309,8 @@ struct virtio_gpu_cmd_submit { + + #define VIRTIO_GPU_CAPSET_VIRGL 1 + #define VIRTIO_GPU_CAPSET_VIRGL2 2 ++/* 3 is reserved for gfxstream */ ++#define VIRTIO_GPU_CAPSET_VENUS 4 + + /* VIRTIO_GPU_CMD_GET_CAPSET_INFO */ + struct virtio_gpu_get_capset_info { +diff --git a/include/standard-headers/linux/virtio_pci.h b/include/standard-headers/linux/virtio_pci.h +index 3e2bc2c97e..4010216103 100644 +--- a/include/standard-headers/linux/virtio_pci.h ++++ b/include/standard-headers/linux/virtio_pci.h +@@ -240,7 +240,7 @@ struct virtio_pci_cfg_cap { + #define VIRTIO_ADMIN_CMD_LEGACY_DEV_CFG_READ 0x5 + #define VIRTIO_ADMIN_CMD_LEGACY_NOTIFY_INFO 0x6 + +-struct QEMU_PACKED virtio_admin_cmd_hdr { ++struct virtio_admin_cmd_hdr { + uint16_t opcode; + /* + * 1 - SR-IOV +@@ -252,20 +252,20 @@ struct QEMU_PACKED virtio_admin_cmd_hdr { + uint64_t group_member_id; + }; + +-struct QEMU_PACKED virtio_admin_cmd_status { ++struct virtio_admin_cmd_status { + uint16_t status; + uint16_t status_qualifier; + /* Unused, reserved for future extensions. */ + uint8_t reserved2[4]; + }; + +-struct QEMU_PACKED virtio_admin_cmd_legacy_wr_data { ++struct virtio_admin_cmd_legacy_wr_data { + uint8_t offset; /* Starting offset of the register(s) to write. */ + uint8_t reserved[7]; + uint8_t registers[]; + }; + +-struct QEMU_PACKED virtio_admin_cmd_legacy_rd_data { ++struct virtio_admin_cmd_legacy_rd_data { + uint8_t offset; /* Starting offset of the register(s) to read. */ + }; + +@@ -275,7 +275,7 @@ struct QEMU_PACKED virtio_admin_cmd_legacy_rd_data { + + #define VIRTIO_ADMIN_CMD_MAX_NOTIFY_INFO 4 + +-struct QEMU_PACKED virtio_admin_cmd_notify_info_data { ++struct virtio_admin_cmd_notify_info_data { + uint8_t flags; /* 0 = end of list, 1 = owner device, 2 = member device */ + uint8_t bar; /* BAR of the member or the owner device */ + uint8_t padding[6]; +diff --git a/include/standard-headers/linux/virtio_snd.h b/include/standard-headers/linux/virtio_snd.h +index 1af96b9fc6..860f12e0a4 100644 +--- a/include/standard-headers/linux/virtio_snd.h ++++ b/include/standard-headers/linux/virtio_snd.h +@@ -7,6 +7,14 @@ + + #include "standard-headers/linux/virtio_types.h" + ++/******************************************************************************* ++ * FEATURE BITS ++ */ ++enum { ++ /* device supports control elements */ ++ VIRTIO_SND_F_CTLS = 0 ++}; ++ + /******************************************************************************* + * CONFIGURATION SPACE + */ +@@ -17,6 +25,8 @@ struct virtio_snd_config { + uint32_t streams; + /* # of available channel maps */ + uint32_t chmaps; ++ /* # of available control elements */ ++ uint32_t controls; + }; + + enum { +@@ -55,6 +65,15 @@ enum { + /* channel map control request types */ + VIRTIO_SND_R_CHMAP_INFO = 0x0200, + ++ /* control element request types */ ++ VIRTIO_SND_R_CTL_INFO = 0x0300, ++ VIRTIO_SND_R_CTL_ENUM_ITEMS, ++ VIRTIO_SND_R_CTL_READ, ++ VIRTIO_SND_R_CTL_WRITE, ++ VIRTIO_SND_R_CTL_TLV_READ, ++ VIRTIO_SND_R_CTL_TLV_WRITE, ++ VIRTIO_SND_R_CTL_TLV_COMMAND, ++ + /* jack event types */ + VIRTIO_SND_EVT_JACK_CONNECTED = 0x1000, + VIRTIO_SND_EVT_JACK_DISCONNECTED, +@@ -63,6 +82,9 @@ enum { + VIRTIO_SND_EVT_PCM_PERIOD_ELAPSED = 0x1100, + VIRTIO_SND_EVT_PCM_XRUN, + ++ /* control element event types */ ++ VIRTIO_SND_EVT_CTL_NOTIFY = 0x1200, ++ + /* common status codes */ + VIRTIO_SND_S_OK = 0x8000, + VIRTIO_SND_S_BAD_MSG, +@@ -331,4 +353,136 @@ struct virtio_snd_chmap_info { + uint8_t positions[VIRTIO_SND_CHMAP_MAX_SIZE]; + }; + ++/******************************************************************************* ++ * CONTROL ELEMENTS MESSAGES ++ */ ++struct virtio_snd_ctl_hdr { ++ /* VIRTIO_SND_R_CTL_XXX */ ++ struct virtio_snd_hdr hdr; ++ /* 0 ... virtio_snd_config::controls - 1 */ ++ uint32_t control_id; ++}; ++ ++/* supported roles for control elements */ ++enum { ++ VIRTIO_SND_CTL_ROLE_UNDEFINED = 0, ++ VIRTIO_SND_CTL_ROLE_VOLUME, ++ VIRTIO_SND_CTL_ROLE_MUTE, ++ VIRTIO_SND_CTL_ROLE_GAIN ++}; ++ ++/* supported value types for control elements */ ++enum { ++ VIRTIO_SND_CTL_TYPE_BOOLEAN = 0, ++ VIRTIO_SND_CTL_TYPE_INTEGER, ++ VIRTIO_SND_CTL_TYPE_INTEGER64, ++ VIRTIO_SND_CTL_TYPE_ENUMERATED, ++ VIRTIO_SND_CTL_TYPE_BYTES, ++ VIRTIO_SND_CTL_TYPE_IEC958 ++}; ++ ++/* supported access rights for control elements */ ++enum { ++ VIRTIO_SND_CTL_ACCESS_READ = 0, ++ VIRTIO_SND_CTL_ACCESS_WRITE, ++ VIRTIO_SND_CTL_ACCESS_VOLATILE, ++ VIRTIO_SND_CTL_ACCESS_INACTIVE, ++ VIRTIO_SND_CTL_ACCESS_TLV_READ, ++ VIRTIO_SND_CTL_ACCESS_TLV_WRITE, ++ VIRTIO_SND_CTL_ACCESS_TLV_COMMAND ++}; ++ ++struct virtio_snd_ctl_info { ++ /* common header */ ++ struct virtio_snd_info hdr; ++ /* element role (VIRTIO_SND_CTL_ROLE_XXX) */ ++ uint32_t role; ++ /* element value type (VIRTIO_SND_CTL_TYPE_XXX) */ ++ uint32_t type; ++ /* element access right bit map (1 << VIRTIO_SND_CTL_ACCESS_XXX) */ ++ uint32_t access; ++ /* # of members in the element value */ ++ uint32_t count; ++ /* index for an element with a non-unique name */ ++ uint32_t index; ++ /* name identifier string for the element */ ++ uint8_t name[44]; ++ /* additional information about the element's value */ ++ union { ++ /* VIRTIO_SND_CTL_TYPE_INTEGER */ ++ struct { ++ /* minimum supported value */ ++ uint32_t min; ++ /* maximum supported value */ ++ uint32_t max; ++ /* fixed step size for value (0 = variable size) */ ++ uint32_t step; ++ } integer; ++ /* VIRTIO_SND_CTL_TYPE_INTEGER64 */ ++ struct { ++ /* minimum supported value */ ++ uint64_t min; ++ /* maximum supported value */ ++ uint64_t max; ++ /* fixed step size for value (0 = variable size) */ ++ uint64_t step; ++ } integer64; ++ /* VIRTIO_SND_CTL_TYPE_ENUMERATED */ ++ struct { ++ /* # of options supported for value */ ++ uint32_t items; ++ } enumerated; ++ } value; ++}; ++ ++struct virtio_snd_ctl_enum_item { ++ /* option name */ ++ uint8_t item[64]; ++}; ++ ++struct virtio_snd_ctl_iec958 { ++ /* AES/IEC958 channel status bits */ ++ uint8_t status[24]; ++ /* AES/IEC958 subcode bits */ ++ uint8_t subcode[147]; ++ /* nothing */ ++ uint8_t pad; ++ /* AES/IEC958 subframe bits */ ++ uint8_t dig_subframe[4]; ++}; ++ ++struct virtio_snd_ctl_value { ++ union { ++ /* VIRTIO_SND_CTL_TYPE_BOOLEAN|INTEGER value */ ++ uint32_t integer[128]; ++ /* VIRTIO_SND_CTL_TYPE_INTEGER64 value */ ++ uint64_t integer64[64]; ++ /* VIRTIO_SND_CTL_TYPE_ENUMERATED value (option indexes) */ ++ uint32_t enumerated[128]; ++ /* VIRTIO_SND_CTL_TYPE_BYTES value */ ++ uint8_t bytes[512]; ++ /* VIRTIO_SND_CTL_TYPE_IEC958 value */ ++ struct virtio_snd_ctl_iec958 iec958; ++ } value; ++}; ++ ++/* supported event reason types */ ++enum { ++ /* element's value has changed */ ++ VIRTIO_SND_CTL_EVT_MASK_VALUE = 0, ++ /* element's information has changed */ ++ VIRTIO_SND_CTL_EVT_MASK_INFO, ++ /* element's metadata has changed */ ++ VIRTIO_SND_CTL_EVT_MASK_TLV ++}; ++ ++struct virtio_snd_ctl_event { ++ /* VIRTIO_SND_EVT_CTL_NOTIFY */ ++ struct virtio_snd_hdr hdr; ++ /* 0 ... virtio_snd_config::controls - 1 */ ++ uint16_t control_id; ++ /* event reason bit map (1 << VIRTIO_SND_CTL_EVT_MASK_XXX) */ ++ uint16_t mask; ++}; ++ + #endif /* VIRTIO_SND_IF_H */ +diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h +index c59ea55cd8..2af9931ae9 100644 +--- a/linux-headers/asm-arm64/kvm.h ++++ b/linux-headers/asm-arm64/kvm.h +@@ -37,9 +37,7 @@ + #include + #include + +-#define __KVM_HAVE_GUEST_DEBUG + #define __KVM_HAVE_IRQ_LINE +-#define __KVM_HAVE_READONLY_MEM + #define __KVM_HAVE_VCPU_EVENTS + + #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 +@@ -76,11 +74,11 @@ struct kvm_regs { + + /* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */ + #define KVM_ARM_DEVICE_TYPE_SHIFT 0 +-#define KVM_ARM_DEVICE_TYPE_MASK GENMASK(KVM_ARM_DEVICE_TYPE_SHIFT + 15, \ +- KVM_ARM_DEVICE_TYPE_SHIFT) ++#define KVM_ARM_DEVICE_TYPE_MASK __GENMASK(KVM_ARM_DEVICE_TYPE_SHIFT + 15, \ ++ KVM_ARM_DEVICE_TYPE_SHIFT) + #define KVM_ARM_DEVICE_ID_SHIFT 16 +-#define KVM_ARM_DEVICE_ID_MASK GENMASK(KVM_ARM_DEVICE_ID_SHIFT + 15, \ +- KVM_ARM_DEVICE_ID_SHIFT) ++#define KVM_ARM_DEVICE_ID_MASK __GENMASK(KVM_ARM_DEVICE_ID_SHIFT + 15, \ ++ KVM_ARM_DEVICE_ID_SHIFT) + + /* Supported device IDs */ + #define KVM_ARM_DEVICE_VGIC_V2 0 +@@ -162,6 +160,11 @@ struct kvm_sync_regs { + __u64 device_irq_level; + }; + ++/* Bits for run->s.regs.device_irq_level */ ++#define KVM_ARM_DEV_EL1_VTIMER (1 << 0) ++#define KVM_ARM_DEV_EL1_PTIMER (1 << 1) ++#define KVM_ARM_DEV_PMU (1 << 2) ++ + /* + * PMU filter structure. Describe a range of events with a particular + * action. To be used with KVM_ARM_VCPU_PMU_V3_FILTER. +diff --git a/linux-headers/asm-arm64/sve_context.h b/linux-headers/asm-arm64/sve_context.h +index 1d0e3e1d09..d1b1ec8cb1 100644 +--- a/linux-headers/asm-arm64/sve_context.h ++++ b/linux-headers/asm-arm64/sve_context.h +@@ -13,6 +13,17 @@ + + #define __SVE_VQ_BYTES 16 /* number of bytes per quadword */ + ++/* ++ * Yes, __SVE_VQ_MAX is 512 QUADWORDS. ++ * ++ * To help ensure forward portability, this is much larger than the ++ * current maximum value defined by the SVE architecture. While arrays ++ * or static allocations can be sized based on this value, watch out! ++ * It will waste a surprisingly large amount of memory. ++ * ++ * Dynamic sizing based on the actual runtime vector length is likely to ++ * be preferable for most purposes. ++ */ + #define __SVE_VQ_MIN 1 + #define __SVE_VQ_MAX 512 + +diff --git a/linux-headers/asm-generic/bitsperlong.h b/linux-headers/asm-generic/bitsperlong.h +index 75f320fa91..1fb4f0c9f2 100644 +--- a/linux-headers/asm-generic/bitsperlong.h ++++ b/linux-headers/asm-generic/bitsperlong.h +@@ -24,4 +24,8 @@ + #endif + #endif + ++#ifndef __BITS_PER_LONG_LONG ++#define __BITS_PER_LONG_LONG 64 ++#endif ++ + #endif /* __ASM_GENERIC_BITS_PER_LONG */ +diff --git a/linux-headers/asm-loongarch/kvm.h b/linux-headers/asm-loongarch/kvm.h +index 923d0bd382..109785922c 100644 +--- a/linux-headers/asm-loongarch/kvm.h ++++ b/linux-headers/asm-loongarch/kvm.h +@@ -14,8 +14,6 @@ + * Some parts derived from the x86 version of this file. + */ + +-#define __KVM_HAVE_READONLY_MEM +- + #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 + #define KVM_DIRTY_LOG_PAGE_OFFSET 64 + +diff --git a/linux-headers/asm-mips/kvm.h b/linux-headers/asm-mips/kvm.h +index edcf717c43..9673dc9cb3 100644 +--- a/linux-headers/asm-mips/kvm.h ++++ b/linux-headers/asm-mips/kvm.h +@@ -20,8 +20,6 @@ + * Some parts derived from the x86 version of this file. + */ + +-#define __KVM_HAVE_READONLY_MEM +- + #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 + + /* +diff --git a/linux-headers/asm-powerpc/kvm.h b/linux-headers/asm-powerpc/kvm.h +index 9f18fa090f..1691297a76 100644 +--- a/linux-headers/asm-powerpc/kvm.h ++++ b/linux-headers/asm-powerpc/kvm.h +@@ -28,7 +28,6 @@ + #define __KVM_HAVE_PPC_SMT + #define __KVM_HAVE_IRQCHIP + #define __KVM_HAVE_IRQ_LINE +-#define __KVM_HAVE_GUEST_DEBUG + + /* Not always available, but if it is, this is the correct offset. */ + #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 +@@ -733,4 +732,48 @@ struct kvm_ppc_xive_eq { + #define KVM_XIVE_TIMA_PAGE_OFFSET 0 + #define KVM_XIVE_ESB_PAGE_OFFSET 4 + ++/* for KVM_PPC_GET_PVINFO */ ++ ++#define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0) ++ ++struct kvm_ppc_pvinfo { ++ /* out */ ++ __u32 flags; ++ __u32 hcall[4]; ++ __u8 pad[108]; ++}; ++ ++/* for KVM_PPC_GET_SMMU_INFO */ ++#define KVM_PPC_PAGE_SIZES_MAX_SZ 8 ++ ++struct kvm_ppc_one_page_size { ++ __u32 page_shift; /* Page shift (or 0) */ ++ __u32 pte_enc; /* Encoding in the HPTE (>>12) */ ++}; ++ ++struct kvm_ppc_one_seg_page_size { ++ __u32 page_shift; /* Base page shift of segment (or 0) */ ++ __u32 slb_enc; /* SLB encoding for BookS */ ++ struct kvm_ppc_one_page_size enc[KVM_PPC_PAGE_SIZES_MAX_SZ]; ++}; ++ ++#define KVM_PPC_PAGE_SIZES_REAL 0x00000001 ++#define KVM_PPC_1T_SEGMENTS 0x00000002 ++#define KVM_PPC_NO_HASH 0x00000004 ++ ++struct kvm_ppc_smmu_info { ++ __u64 flags; ++ __u32 slb_size; ++ __u16 data_keys; /* # storage keys supported for data */ ++ __u16 instr_keys; /* # storage keys supported for instructions */ ++ struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ]; ++}; ++ ++/* for KVM_PPC_RESIZE_HPT_{PREPARE,COMMIT} */ ++struct kvm_ppc_resize_hpt { ++ __u64 flags; ++ __u32 shift; ++ __u32 pad; ++}; ++ + #endif /* __LINUX_KVM_POWERPC_H */ +diff --git a/linux-headers/asm-riscv/kvm.h b/linux-headers/asm-riscv/kvm.h +index 7499e88a94..b1c503c295 100644 +--- a/linux-headers/asm-riscv/kvm.h ++++ b/linux-headers/asm-riscv/kvm.h +@@ -16,7 +16,6 @@ + #include + + #define __KVM_HAVE_IRQ_LINE +-#define __KVM_HAVE_READONLY_MEM + + #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 + +@@ -166,6 +165,8 @@ enum KVM_RISCV_ISA_EXT_ID { + KVM_RISCV_ISA_EXT_ZVFH, + KVM_RISCV_ISA_EXT_ZVFHMIN, + KVM_RISCV_ISA_EXT_ZFA, ++ KVM_RISCV_ISA_EXT_ZTSO, ++ KVM_RISCV_ISA_EXT_ZACAS, + KVM_RISCV_ISA_EXT_MAX, + }; + +diff --git a/linux-headers/asm-s390/kvm.h b/linux-headers/asm-s390/kvm.h +index 023a2763a9..684c4e1205 100644 +--- a/linux-headers/asm-s390/kvm.h ++++ b/linux-headers/asm-s390/kvm.h +@@ -12,7 +12,320 @@ + #include + + #define __KVM_S390 +-#define __KVM_HAVE_GUEST_DEBUG ++ ++struct kvm_s390_skeys { ++ __u64 start_gfn; ++ __u64 count; ++ __u64 skeydata_addr; ++ __u32 flags; ++ __u32 reserved[9]; ++}; ++ ++#define KVM_S390_CMMA_PEEK (1 << 0) ++ ++/** ++ * kvm_s390_cmma_log - Used for CMMA migration. ++ * ++ * Used both for input and output. ++ * ++ * @start_gfn: Guest page number to start from. ++ * @count: Size of the result buffer. ++ * @flags: Control operation mode via KVM_S390_CMMA_* flags ++ * @remaining: Used with KVM_S390_GET_CMMA_BITS. Indicates how many dirty ++ * pages are still remaining. ++ * @mask: Used with KVM_S390_SET_CMMA_BITS. Bitmap of bits to actually set ++ * in the PGSTE. ++ * @values: Pointer to the values buffer. ++ * ++ * Used in KVM_S390_{G,S}ET_CMMA_BITS ioctls. ++ */ ++struct kvm_s390_cmma_log { ++ __u64 start_gfn; ++ __u32 count; ++ __u32 flags; ++ union { ++ __u64 remaining; ++ __u64 mask; ++ }; ++ __u64 values; ++}; ++ ++#define KVM_S390_RESET_POR 1 ++#define KVM_S390_RESET_CLEAR 2 ++#define KVM_S390_RESET_SUBSYSTEM 4 ++#define KVM_S390_RESET_CPU_INIT 8 ++#define KVM_S390_RESET_IPL 16 ++ ++/* for KVM_S390_MEM_OP */ ++struct kvm_s390_mem_op { ++ /* in */ ++ __u64 gaddr; /* the guest address */ ++ __u64 flags; /* flags */ ++ __u32 size; /* amount of bytes */ ++ __u32 op; /* type of operation */ ++ __u64 buf; /* buffer in userspace */ ++ union { ++ struct { ++ __u8 ar; /* the access register number */ ++ __u8 key; /* access key, ignored if flag unset */ ++ __u8 pad1[6]; /* ignored */ ++ __u64 old_addr; /* ignored if cmpxchg flag unset */ ++ }; ++ __u32 sida_offset; /* offset into the sida */ ++ __u8 reserved[32]; /* ignored */ ++ }; ++}; ++/* types for kvm_s390_mem_op->op */ ++#define KVM_S390_MEMOP_LOGICAL_READ 0 ++#define KVM_S390_MEMOP_LOGICAL_WRITE 1 ++#define KVM_S390_MEMOP_SIDA_READ 2 ++#define KVM_S390_MEMOP_SIDA_WRITE 3 ++#define KVM_S390_MEMOP_ABSOLUTE_READ 4 ++#define KVM_S390_MEMOP_ABSOLUTE_WRITE 5 ++#define KVM_S390_MEMOP_ABSOLUTE_CMPXCHG 6 ++ ++/* flags for kvm_s390_mem_op->flags */ ++#define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0) ++#define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1) ++#define KVM_S390_MEMOP_F_SKEY_PROTECTION (1ULL << 2) ++ ++/* flags specifying extension support via KVM_CAP_S390_MEM_OP_EXTENSION */ ++#define KVM_S390_MEMOP_EXTENSION_CAP_BASE (1 << 0) ++#define KVM_S390_MEMOP_EXTENSION_CAP_CMPXCHG (1 << 1) ++ ++struct kvm_s390_psw { ++ __u64 mask; ++ __u64 addr; ++}; ++ ++/* valid values for type in kvm_s390_interrupt */ ++#define KVM_S390_SIGP_STOP 0xfffe0000u ++#define KVM_S390_PROGRAM_INT 0xfffe0001u ++#define KVM_S390_SIGP_SET_PREFIX 0xfffe0002u ++#define KVM_S390_RESTART 0xfffe0003u ++#define KVM_S390_INT_PFAULT_INIT 0xfffe0004u ++#define KVM_S390_INT_PFAULT_DONE 0xfffe0005u ++#define KVM_S390_MCHK 0xfffe1000u ++#define KVM_S390_INT_CLOCK_COMP 0xffff1004u ++#define KVM_S390_INT_CPU_TIMER 0xffff1005u ++#define KVM_S390_INT_VIRTIO 0xffff2603u ++#define KVM_S390_INT_SERVICE 0xffff2401u ++#define KVM_S390_INT_EMERGENCY 0xffff1201u ++#define KVM_S390_INT_EXTERNAL_CALL 0xffff1202u ++/* Anything below 0xfffe0000u is taken by INT_IO */ ++#define KVM_S390_INT_IO(ai,cssid,ssid,schid) \ ++ (((schid)) | \ ++ ((ssid) << 16) | \ ++ ((cssid) << 18) | \ ++ ((ai) << 26)) ++#define KVM_S390_INT_IO_MIN 0x00000000u ++#define KVM_S390_INT_IO_MAX 0xfffdffffu ++#define KVM_S390_INT_IO_AI_MASK 0x04000000u ++ ++ ++struct kvm_s390_interrupt { ++ __u32 type; ++ __u32 parm; ++ __u64 parm64; ++}; ++ ++struct kvm_s390_io_info { ++ __u16 subchannel_id; ++ __u16 subchannel_nr; ++ __u32 io_int_parm; ++ __u32 io_int_word; ++}; ++ ++struct kvm_s390_ext_info { ++ __u32 ext_params; ++ __u32 pad; ++ __u64 ext_params2; ++}; ++ ++struct kvm_s390_pgm_info { ++ __u64 trans_exc_code; ++ __u64 mon_code; ++ __u64 per_address; ++ __u32 data_exc_code; ++ __u16 code; ++ __u16 mon_class_nr; ++ __u8 per_code; ++ __u8 per_atmid; ++ __u8 exc_access_id; ++ __u8 per_access_id; ++ __u8 op_access_id; ++#define KVM_S390_PGM_FLAGS_ILC_VALID 0x01 ++#define KVM_S390_PGM_FLAGS_ILC_0 0x02 ++#define KVM_S390_PGM_FLAGS_ILC_1 0x04 ++#define KVM_S390_PGM_FLAGS_ILC_MASK 0x06 ++#define KVM_S390_PGM_FLAGS_NO_REWIND 0x08 ++ __u8 flags; ++ __u8 pad[2]; ++}; ++ ++struct kvm_s390_prefix_info { ++ __u32 address; ++}; ++ ++struct kvm_s390_extcall_info { ++ __u16 code; ++}; ++ ++struct kvm_s390_emerg_info { ++ __u16 code; ++}; ++ ++#define KVM_S390_STOP_FLAG_STORE_STATUS 0x01 ++struct kvm_s390_stop_info { ++ __u32 flags; ++}; ++ ++struct kvm_s390_mchk_info { ++ __u64 cr14; ++ __u64 mcic; ++ __u64 failing_storage_address; ++ __u32 ext_damage_code; ++ __u32 pad; ++ __u8 fixed_logout[16]; ++}; ++ ++struct kvm_s390_irq { ++ __u64 type; ++ union { ++ struct kvm_s390_io_info io; ++ struct kvm_s390_ext_info ext; ++ struct kvm_s390_pgm_info pgm; ++ struct kvm_s390_emerg_info emerg; ++ struct kvm_s390_extcall_info extcall; ++ struct kvm_s390_prefix_info prefix; ++ struct kvm_s390_stop_info stop; ++ struct kvm_s390_mchk_info mchk; ++ char reserved[64]; ++ } u; ++}; ++ ++struct kvm_s390_irq_state { ++ __u64 buf; ++ __u32 flags; /* will stay unused for compatibility reasons */ ++ __u32 len; ++ __u32 reserved[4]; /* will stay unused for compatibility reasons */ ++}; ++ ++struct kvm_s390_ucas_mapping { ++ __u64 user_addr; ++ __u64 vcpu_addr; ++ __u64 length; ++}; ++ ++struct kvm_s390_pv_sec_parm { ++ __u64 origin; ++ __u64 length; ++}; ++ ++struct kvm_s390_pv_unp { ++ __u64 addr; ++ __u64 size; ++ __u64 tweak; ++}; ++ ++enum pv_cmd_dmp_id { ++ KVM_PV_DUMP_INIT, ++ KVM_PV_DUMP_CONFIG_STOR_STATE, ++ KVM_PV_DUMP_COMPLETE, ++ KVM_PV_DUMP_CPU, ++}; ++ ++struct kvm_s390_pv_dmp { ++ __u64 subcmd; ++ __u64 buff_addr; ++ __u64 buff_len; ++ __u64 gaddr; /* For dump storage state */ ++ __u64 reserved[4]; ++}; ++ ++enum pv_cmd_info_id { ++ KVM_PV_INFO_VM, ++ KVM_PV_INFO_DUMP, ++}; ++ ++struct kvm_s390_pv_info_dump { ++ __u64 dump_cpu_buffer_len; ++ __u64 dump_config_mem_buffer_per_1m; ++ __u64 dump_config_finalize_len; ++}; ++ ++struct kvm_s390_pv_info_vm { ++ __u64 inst_calls_list[4]; ++ __u64 max_cpus; ++ __u64 max_guests; ++ __u64 max_guest_addr; ++ __u64 feature_indication; ++}; ++ ++struct kvm_s390_pv_info_header { ++ __u32 id; ++ __u32 len_max; ++ __u32 len_written; ++ __u32 reserved; ++}; ++ ++struct kvm_s390_pv_info { ++ struct kvm_s390_pv_info_header header; ++ union { ++ struct kvm_s390_pv_info_dump dump; ++ struct kvm_s390_pv_info_vm vm; ++ }; ++}; ++ ++enum pv_cmd_id { ++ KVM_PV_ENABLE, ++ KVM_PV_DISABLE, ++ KVM_PV_SET_SEC_PARMS, ++ KVM_PV_UNPACK, ++ KVM_PV_VERIFY, ++ KVM_PV_PREP_RESET, ++ KVM_PV_UNSHARE_ALL, ++ KVM_PV_INFO, ++ KVM_PV_DUMP, ++ KVM_PV_ASYNC_CLEANUP_PREPARE, ++ KVM_PV_ASYNC_CLEANUP_PERFORM, ++}; ++ ++struct kvm_pv_cmd { ++ __u32 cmd; /* Command to be executed */ ++ __u16 rc; /* Ultravisor return code */ ++ __u16 rrc; /* Ultravisor return reason code */ ++ __u64 data; /* Data or address */ ++ __u32 flags; /* flags for future extensions. Must be 0 for now */ ++ __u32 reserved[3]; ++}; ++ ++struct kvm_s390_zpci_op { ++ /* in */ ++ __u32 fh; /* target device */ ++ __u8 op; /* operation to perform */ ++ __u8 pad[3]; ++ union { ++ /* for KVM_S390_ZPCIOP_REG_AEN */ ++ struct { ++ __u64 ibv; /* Guest addr of interrupt bit vector */ ++ __u64 sb; /* Guest addr of summary bit */ ++ __u32 flags; ++ __u32 noi; /* Number of interrupts */ ++ __u8 isc; /* Guest interrupt subclass */ ++ __u8 sbo; /* Offset of guest summary bit vector */ ++ __u16 pad; ++ } reg_aen; ++ __u64 reserved[8]; ++ } u; ++}; ++ ++/* types for kvm_s390_zpci_op->op */ ++#define KVM_S390_ZPCIOP_REG_AEN 0 ++#define KVM_S390_ZPCIOP_DEREG_AEN 1 ++ ++/* flags for kvm_s390_zpci_op->u.reg_aen.flags */ ++#define KVM_S390_ZPCIOP_REGAEN_HOST (1 << 0) + + /* Device control API: s390-specific devices */ + #define KVM_DEV_FLIC_GET_ALL_IRQS 1 +diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h +index 003fb74534..31c95c2dfe 100644 +--- a/linux-headers/asm-x86/kvm.h ++++ b/linux-headers/asm-x86/kvm.h +@@ -7,6 +7,8 @@ + * + */ + ++#include ++#include + #include + #include + #include +@@ -40,7 +42,6 @@ + #define __KVM_HAVE_IRQ_LINE + #define __KVM_HAVE_MSI + #define __KVM_HAVE_USER_NMI +-#define __KVM_HAVE_GUEST_DEBUG + #define __KVM_HAVE_MSIX + #define __KVM_HAVE_MCE + #define __KVM_HAVE_PIT_STATE2 +@@ -49,7 +50,6 @@ + #define __KVM_HAVE_DEBUGREGS + #define __KVM_HAVE_XSAVE + #define __KVM_HAVE_XCRS +-#define __KVM_HAVE_READONLY_MEM + + /* Architectural interrupt line count. */ + #define KVM_NR_INTERRUPTS 256 +@@ -455,8 +455,13 @@ struct kvm_sync_regs { + + #define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE 0x00000001 + +-/* attributes for system fd (group 0) */ +-#define KVM_X86_XCOMP_GUEST_SUPP 0 ++/* vendor-independent attributes for system fd (group 0) */ ++#define KVM_X86_GRP_SYSTEM 0 ++# define KVM_X86_XCOMP_GUEST_SUPP 0 ++ ++/* vendor-specific groups and attributes for system fd */ ++#define KVM_X86_GRP_SEV 1 ++# define KVM_X86_SEV_VMSA_FEATURES 0 + + struct kvm_vmx_nested_state_data { + __u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; +@@ -524,9 +529,310 @@ struct kvm_pmu_event_filter { + #define KVM_PMU_EVENT_ALLOW 0 + #define KVM_PMU_EVENT_DENY 1 + +-#define KVM_PMU_EVENT_FLAG_MASKED_EVENTS BIT(0) ++#define KVM_PMU_EVENT_FLAG_MASKED_EVENTS _BITUL(0) + #define KVM_PMU_EVENT_FLAGS_VALID_MASK (KVM_PMU_EVENT_FLAG_MASKED_EVENTS) + ++/* for KVM_CAP_MCE */ ++struct kvm_x86_mce { ++ __u64 status; ++ __u64 addr; ++ __u64 misc; ++ __u64 mcg_status; ++ __u8 bank; ++ __u8 pad1[7]; ++ __u64 pad2[3]; ++}; ++ ++/* for KVM_CAP_XEN_HVM */ ++#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0) ++#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1) ++#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2) ++#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3) ++#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4) ++#define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5) ++#define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG (1 << 6) ++#define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE (1 << 7) ++#define KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA (1 << 8) ++ ++struct kvm_xen_hvm_config { ++ __u32 flags; ++ __u32 msr; ++ __u64 blob_addr_32; ++ __u64 blob_addr_64; ++ __u8 blob_size_32; ++ __u8 blob_size_64; ++ __u8 pad2[30]; ++}; ++ ++struct kvm_xen_hvm_attr { ++ __u16 type; ++ __u16 pad[3]; ++ union { ++ __u8 long_mode; ++ __u8 vector; ++ __u8 runstate_update_flag; ++ union { ++ __u64 gfn; ++#define KVM_XEN_INVALID_GFN ((__u64)-1) ++ __u64 hva; ++ } shared_info; ++ struct { ++ __u32 send_port; ++ __u32 type; /* EVTCHNSTAT_ipi / EVTCHNSTAT_interdomain */ ++ __u32 flags; ++#define KVM_XEN_EVTCHN_DEASSIGN (1 << 0) ++#define KVM_XEN_EVTCHN_UPDATE (1 << 1) ++#define KVM_XEN_EVTCHN_RESET (1 << 2) ++ /* ++ * Events sent by the guest are either looped back to ++ * the guest itself (potentially on a different port#) ++ * or signalled via an eventfd. ++ */ ++ union { ++ struct { ++ __u32 port; ++ __u32 vcpu; ++ __u32 priority; ++ } port; ++ struct { ++ __u32 port; /* Zero for eventfd */ ++ __s32 fd; ++ } eventfd; ++ __u32 padding[4]; ++ } deliver; ++ } evtchn; ++ __u32 xen_version; ++ __u64 pad[8]; ++ } u; ++}; ++ ++ ++/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ ++#define KVM_XEN_ATTR_TYPE_LONG_MODE 0x0 ++#define KVM_XEN_ATTR_TYPE_SHARED_INFO 0x1 ++#define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR 0x2 ++/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */ ++#define KVM_XEN_ATTR_TYPE_EVTCHN 0x3 ++#define KVM_XEN_ATTR_TYPE_XEN_VERSION 0x4 ++/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG */ ++#define KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG 0x5 ++/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA */ ++#define KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA 0x6 ++ ++struct kvm_xen_vcpu_attr { ++ __u16 type; ++ __u16 pad[3]; ++ union { ++ __u64 gpa; ++#define KVM_XEN_INVALID_GPA ((__u64)-1) ++ __u64 hva; ++ __u64 pad[8]; ++ struct { ++ __u64 state; ++ __u64 state_entry_time; ++ __u64 time_running; ++ __u64 time_runnable; ++ __u64 time_blocked; ++ __u64 time_offline; ++ } runstate; ++ __u32 vcpu_id; ++ struct { ++ __u32 port; ++ __u32 priority; ++ __u64 expires_ns; ++ } timer; ++ __u8 vector; ++ } u; ++}; ++ ++/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ ++#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO 0x0 ++#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO 0x1 ++#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR 0x2 ++#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT 0x3 ++#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA 0x4 ++#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST 0x5 ++/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */ ++#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID 0x6 ++#define KVM_XEN_VCPU_ATTR_TYPE_TIMER 0x7 ++#define KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR 0x8 ++/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA */ ++#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO_HVA 0x9 ++ ++/* Secure Encrypted Virtualization command */ ++enum sev_cmd_id { ++ /* Guest initialization commands */ ++ KVM_SEV_INIT = 0, ++ KVM_SEV_ES_INIT, ++ /* Guest launch commands */ ++ KVM_SEV_LAUNCH_START, ++ KVM_SEV_LAUNCH_UPDATE_DATA, ++ KVM_SEV_LAUNCH_UPDATE_VMSA, ++ KVM_SEV_LAUNCH_SECRET, ++ KVM_SEV_LAUNCH_MEASURE, ++ KVM_SEV_LAUNCH_FINISH, ++ /* Guest migration commands (outgoing) */ ++ KVM_SEV_SEND_START, ++ KVM_SEV_SEND_UPDATE_DATA, ++ KVM_SEV_SEND_UPDATE_VMSA, ++ KVM_SEV_SEND_FINISH, ++ /* Guest migration commands (incoming) */ ++ KVM_SEV_RECEIVE_START, ++ KVM_SEV_RECEIVE_UPDATE_DATA, ++ KVM_SEV_RECEIVE_UPDATE_VMSA, ++ KVM_SEV_RECEIVE_FINISH, ++ /* Guest status and debug commands */ ++ KVM_SEV_GUEST_STATUS, ++ KVM_SEV_DBG_DECRYPT, ++ KVM_SEV_DBG_ENCRYPT, ++ /* Guest certificates commands */ ++ KVM_SEV_CERT_EXPORT, ++ /* Attestation report */ ++ KVM_SEV_GET_ATTESTATION_REPORT, ++ /* Guest Migration Extension */ ++ KVM_SEV_SEND_CANCEL, ++ ++ /* Second time is the charm; improved versions of the above ioctls. */ ++ KVM_SEV_INIT2, ++ ++ KVM_SEV_NR_MAX, ++}; ++ ++struct kvm_sev_cmd { ++ __u32 id; ++ __u32 pad0; ++ __u64 data; ++ __u32 error; ++ __u32 sev_fd; ++}; ++ ++struct kvm_sev_init { ++ __u64 vmsa_features; ++ __u32 flags; ++ __u32 pad[9]; ++}; ++ ++struct kvm_sev_launch_start { ++ __u32 handle; ++ __u32 policy; ++ __u64 dh_uaddr; ++ __u32 dh_len; ++ __u32 pad0; ++ __u64 session_uaddr; ++ __u32 session_len; ++ __u32 pad1; ++}; ++ ++struct kvm_sev_launch_update_data { ++ __u64 uaddr; ++ __u32 len; ++ __u32 pad0; ++}; ++ ++ ++struct kvm_sev_launch_secret { ++ __u64 hdr_uaddr; ++ __u32 hdr_len; ++ __u32 pad0; ++ __u64 guest_uaddr; ++ __u32 guest_len; ++ __u32 pad1; ++ __u64 trans_uaddr; ++ __u32 trans_len; ++ __u32 pad2; ++}; ++ ++struct kvm_sev_launch_measure { ++ __u64 uaddr; ++ __u32 len; ++ __u32 pad0; ++}; ++ ++struct kvm_sev_guest_status { ++ __u32 handle; ++ __u32 policy; ++ __u32 state; ++}; ++ ++struct kvm_sev_dbg { ++ __u64 src_uaddr; ++ __u64 dst_uaddr; ++ __u32 len; ++ __u32 pad0; ++}; ++ ++struct kvm_sev_attestation_report { ++ __u8 mnonce[16]; ++ __u64 uaddr; ++ __u32 len; ++ __u32 pad0; ++}; ++ ++struct kvm_sev_send_start { ++ __u32 policy; ++ __u32 pad0; ++ __u64 pdh_cert_uaddr; ++ __u32 pdh_cert_len; ++ __u32 pad1; ++ __u64 plat_certs_uaddr; ++ __u32 plat_certs_len; ++ __u32 pad2; ++ __u64 amd_certs_uaddr; ++ __u32 amd_certs_len; ++ __u32 pad3; ++ __u64 session_uaddr; ++ __u32 session_len; ++ __u32 pad4; ++}; ++ ++struct kvm_sev_send_update_data { ++ __u64 hdr_uaddr; ++ __u32 hdr_len; ++ __u32 pad0; ++ __u64 guest_uaddr; ++ __u32 guest_len; ++ __u32 pad1; ++ __u64 trans_uaddr; ++ __u32 trans_len; ++ __u32 pad2; ++}; ++ ++struct kvm_sev_receive_start { ++ __u32 handle; ++ __u32 policy; ++ __u64 pdh_uaddr; ++ __u32 pdh_len; ++ __u32 pad0; ++ __u64 session_uaddr; ++ __u32 session_len; ++ __u32 pad1; ++}; ++ ++struct kvm_sev_receive_update_data { ++ __u64 hdr_uaddr; ++ __u32 hdr_len; ++ __u32 pad0; ++ __u64 guest_uaddr; ++ __u32 guest_len; ++ __u32 pad1; ++ __u64 trans_uaddr; ++ __u32 trans_len; ++ __u32 pad2; ++}; ++ ++#define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0) ++#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1) ++ ++struct kvm_hyperv_eventfd { ++ __u32 conn_id; ++ __s32 fd; ++ __u32 flags; ++ __u32 padding[3]; ++}; ++ ++#define KVM_HYPERV_CONN_ID_MASK 0x00ffffff ++#define KVM_HYPERV_EVENTFD_DEASSIGN (1 << 0) ++ + /* + * Masked event layout. + * Bits Description +@@ -547,10 +853,10 @@ struct kvm_pmu_event_filter { + ((__u64)(!!(exclude)) << 55)) + + #define KVM_PMU_MASKED_ENTRY_EVENT_SELECT \ +- (GENMASK_ULL(7, 0) | GENMASK_ULL(35, 32)) +-#define KVM_PMU_MASKED_ENTRY_UMASK_MASK (GENMASK_ULL(63, 56)) +-#define KVM_PMU_MASKED_ENTRY_UMASK_MATCH (GENMASK_ULL(15, 8)) +-#define KVM_PMU_MASKED_ENTRY_EXCLUDE (BIT_ULL(55)) ++ (__GENMASK_ULL(7, 0) | __GENMASK_ULL(35, 32)) ++#define KVM_PMU_MASKED_ENTRY_UMASK_MASK (__GENMASK_ULL(63, 56)) ++#define KVM_PMU_MASKED_ENTRY_UMASK_MATCH (__GENMASK_ULL(15, 8)) ++#define KVM_PMU_MASKED_ENTRY_EXCLUDE (_BITULL(55)) + #define KVM_PMU_MASKED_ENTRY_UMASK_MASK_SHIFT (56) + + /* for KVM_{GET,SET,HAS}_DEVICE_ATTR */ +@@ -558,9 +864,11 @@ struct kvm_pmu_event_filter { + #define KVM_VCPU_TSC_OFFSET 0 /* attribute for the TSC offset */ + + /* x86-specific KVM_EXIT_HYPERCALL flags. */ +-#define KVM_EXIT_HYPERCALL_LONG_MODE BIT(0) ++#define KVM_EXIT_HYPERCALL_LONG_MODE _BITULL(0) + + #define KVM_X86_DEFAULT_VM 0 + #define KVM_X86_SW_PROTECTED_VM 1 ++#define KVM_X86_SEV_VM 2 ++#define KVM_X86_SEV_ES_VM 3 + + #endif /* _ASM_X86_KVM_H */ +diff --git a/linux-headers/linux/bits.h b/linux-headers/linux/bits.h +new file mode 100644 +index 0000000000..d9897771be +--- /dev/null ++++ b/linux-headers/linux/bits.h +@@ -0,0 +1,15 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* bits.h: Macros for dealing with bitmasks. */ ++ ++#ifndef _LINUX_BITS_H ++#define _LINUX_BITS_H ++ ++#define __GENMASK(h, l) \ ++ (((~_UL(0)) - (_UL(1) << (l)) + 1) & \ ++ (~_UL(0) >> (__BITS_PER_LONG - 1 - (h)))) ++ ++#define __GENMASK_ULL(h, l) \ ++ (((~_ULL(0)) - (_ULL(1) << (l)) + 1) & \ ++ (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h)))) ++ ++#endif /* _LINUX_BITS_H */ +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index 17839229b2..038731cdef 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -16,6 +16,11 @@ + + #define KVM_API_VERSION 12 + ++/* ++ * Backwards-compatible definitions. ++ */ ++#define __KVM_HAVE_GUEST_DEBUG ++ + /* for KVM_SET_USER_MEMORY_REGION */ + struct kvm_userspace_memory_region { + __u32 slot; +@@ -85,43 +90,6 @@ struct kvm_pit_config { + + #define KVM_PIT_SPEAKER_DUMMY 1 + +-struct kvm_s390_skeys { +- __u64 start_gfn; +- __u64 count; +- __u64 skeydata_addr; +- __u32 flags; +- __u32 reserved[9]; +-}; +- +-#define KVM_S390_CMMA_PEEK (1 << 0) +- +-/** +- * kvm_s390_cmma_log - Used for CMMA migration. +- * +- * Used both for input and output. +- * +- * @start_gfn: Guest page number to start from. +- * @count: Size of the result buffer. +- * @flags: Control operation mode via KVM_S390_CMMA_* flags +- * @remaining: Used with KVM_S390_GET_CMMA_BITS. Indicates how many dirty +- * pages are still remaining. +- * @mask: Used with KVM_S390_SET_CMMA_BITS. Bitmap of bits to actually set +- * in the PGSTE. +- * @values: Pointer to the values buffer. +- * +- * Used in KVM_S390_{G,S}ET_CMMA_BITS ioctls. +- */ +-struct kvm_s390_cmma_log { +- __u64 start_gfn; +- __u32 count; +- __u32 flags; +- union { +- __u64 remaining; +- __u64 mask; +- }; +- __u64 values; +-}; +- + struct kvm_hyperv_exit { + #define KVM_EXIT_HYPERV_SYNIC 1 + #define KVM_EXIT_HYPERV_HCALL 2 +@@ -313,11 +281,6 @@ struct kvm_run { + __u32 ipb; + } s390_sieic; + /* KVM_EXIT_S390_RESET */ +-#define KVM_S390_RESET_POR 1 +-#define KVM_S390_RESET_CLEAR 2 +-#define KVM_S390_RESET_SUBSYSTEM 4 +-#define KVM_S390_RESET_CPU_INIT 8 +-#define KVM_S390_RESET_IPL 16 + __u64 s390_reset_flags; + /* KVM_EXIT_S390_UCONTROL */ + struct { +@@ -532,43 +495,6 @@ struct kvm_translation { + __u8 pad[5]; + }; + +-/* for KVM_S390_MEM_OP */ +-struct kvm_s390_mem_op { +- /* in */ +- __u64 gaddr; /* the guest address */ +- __u64 flags; /* flags */ +- __u32 size; /* amount of bytes */ +- __u32 op; /* type of operation */ +- __u64 buf; /* buffer in userspace */ +- union { +- struct { +- __u8 ar; /* the access register number */ +- __u8 key; /* access key, ignored if flag unset */ +- __u8 pad1[6]; /* ignored */ +- __u64 old_addr; /* ignored if cmpxchg flag unset */ +- }; +- __u32 sida_offset; /* offset into the sida */ +- __u8 reserved[32]; /* ignored */ +- }; +-}; +-/* types for kvm_s390_mem_op->op */ +-#define KVM_S390_MEMOP_LOGICAL_READ 0 +-#define KVM_S390_MEMOP_LOGICAL_WRITE 1 +-#define KVM_S390_MEMOP_SIDA_READ 2 +-#define KVM_S390_MEMOP_SIDA_WRITE 3 +-#define KVM_S390_MEMOP_ABSOLUTE_READ 4 +-#define KVM_S390_MEMOP_ABSOLUTE_WRITE 5 +-#define KVM_S390_MEMOP_ABSOLUTE_CMPXCHG 6 +- +-/* flags for kvm_s390_mem_op->flags */ +-#define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0) +-#define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1) +-#define KVM_S390_MEMOP_F_SKEY_PROTECTION (1ULL << 2) +- +-/* flags specifying extension support via KVM_CAP_S390_MEM_OP_EXTENSION */ +-#define KVM_S390_MEMOP_EXTENSION_CAP_BASE (1 << 0) +-#define KVM_S390_MEMOP_EXTENSION_CAP_CMPXCHG (1 << 1) +- + /* for KVM_INTERRUPT */ + struct kvm_interrupt { + /* in */ +@@ -633,124 +559,6 @@ struct kvm_mp_state { + __u32 mp_state; + }; + +-struct kvm_s390_psw { +- __u64 mask; +- __u64 addr; +-}; +- +-/* valid values for type in kvm_s390_interrupt */ +-#define KVM_S390_SIGP_STOP 0xfffe0000u +-#define KVM_S390_PROGRAM_INT 0xfffe0001u +-#define KVM_S390_SIGP_SET_PREFIX 0xfffe0002u +-#define KVM_S390_RESTART 0xfffe0003u +-#define KVM_S390_INT_PFAULT_INIT 0xfffe0004u +-#define KVM_S390_INT_PFAULT_DONE 0xfffe0005u +-#define KVM_S390_MCHK 0xfffe1000u +-#define KVM_S390_INT_CLOCK_COMP 0xffff1004u +-#define KVM_S390_INT_CPU_TIMER 0xffff1005u +-#define KVM_S390_INT_VIRTIO 0xffff2603u +-#define KVM_S390_INT_SERVICE 0xffff2401u +-#define KVM_S390_INT_EMERGENCY 0xffff1201u +-#define KVM_S390_INT_EXTERNAL_CALL 0xffff1202u +-/* Anything below 0xfffe0000u is taken by INT_IO */ +-#define KVM_S390_INT_IO(ai,cssid,ssid,schid) \ +- (((schid)) | \ +- ((ssid) << 16) | \ +- ((cssid) << 18) | \ +- ((ai) << 26)) +-#define KVM_S390_INT_IO_MIN 0x00000000u +-#define KVM_S390_INT_IO_MAX 0xfffdffffu +-#define KVM_S390_INT_IO_AI_MASK 0x04000000u +- +- +-struct kvm_s390_interrupt { +- __u32 type; +- __u32 parm; +- __u64 parm64; +-}; +- +-struct kvm_s390_io_info { +- __u16 subchannel_id; +- __u16 subchannel_nr; +- __u32 io_int_parm; +- __u32 io_int_word; +-}; +- +-struct kvm_s390_ext_info { +- __u32 ext_params; +- __u32 pad; +- __u64 ext_params2; +-}; +- +-struct kvm_s390_pgm_info { +- __u64 trans_exc_code; +- __u64 mon_code; +- __u64 per_address; +- __u32 data_exc_code; +- __u16 code; +- __u16 mon_class_nr; +- __u8 per_code; +- __u8 per_atmid; +- __u8 exc_access_id; +- __u8 per_access_id; +- __u8 op_access_id; +-#define KVM_S390_PGM_FLAGS_ILC_VALID 0x01 +-#define KVM_S390_PGM_FLAGS_ILC_0 0x02 +-#define KVM_S390_PGM_FLAGS_ILC_1 0x04 +-#define KVM_S390_PGM_FLAGS_ILC_MASK 0x06 +-#define KVM_S390_PGM_FLAGS_NO_REWIND 0x08 +- __u8 flags; +- __u8 pad[2]; +-}; +- +-struct kvm_s390_prefix_info { +- __u32 address; +-}; +- +-struct kvm_s390_extcall_info { +- __u16 code; +-}; +- +-struct kvm_s390_emerg_info { +- __u16 code; +-}; +- +-#define KVM_S390_STOP_FLAG_STORE_STATUS 0x01 +-struct kvm_s390_stop_info { +- __u32 flags; +-}; +- +-struct kvm_s390_mchk_info { +- __u64 cr14; +- __u64 mcic; +- __u64 failing_storage_address; +- __u32 ext_damage_code; +- __u32 pad; +- __u8 fixed_logout[16]; +-}; +- +-struct kvm_s390_irq { +- __u64 type; +- union { +- struct kvm_s390_io_info io; +- struct kvm_s390_ext_info ext; +- struct kvm_s390_pgm_info pgm; +- struct kvm_s390_emerg_info emerg; +- struct kvm_s390_extcall_info extcall; +- struct kvm_s390_prefix_info prefix; +- struct kvm_s390_stop_info stop; +- struct kvm_s390_mchk_info mchk; +- char reserved[64]; +- } u; +-}; +- +-struct kvm_s390_irq_state { +- __u64 buf; +- __u32 flags; /* will stay unused for compatibility reasons */ +- __u32 len; +- __u32 reserved[4]; /* will stay unused for compatibility reasons */ +-}; +- + /* for KVM_SET_GUEST_DEBUG */ + + #define KVM_GUESTDBG_ENABLE 0x00000001 +@@ -806,50 +614,6 @@ struct kvm_enable_cap { + __u8 pad[64]; + }; + +-/* for KVM_PPC_GET_PVINFO */ +- +-#define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0) +- +-struct kvm_ppc_pvinfo { +- /* out */ +- __u32 flags; +- __u32 hcall[4]; +- __u8 pad[108]; +-}; +- +-/* for KVM_PPC_GET_SMMU_INFO */ +-#define KVM_PPC_PAGE_SIZES_MAX_SZ 8 +- +-struct kvm_ppc_one_page_size { +- __u32 page_shift; /* Page shift (or 0) */ +- __u32 pte_enc; /* Encoding in the HPTE (>>12) */ +-}; +- +-struct kvm_ppc_one_seg_page_size { +- __u32 page_shift; /* Base page shift of segment (or 0) */ +- __u32 slb_enc; /* SLB encoding for BookS */ +- struct kvm_ppc_one_page_size enc[KVM_PPC_PAGE_SIZES_MAX_SZ]; +-}; +- +-#define KVM_PPC_PAGE_SIZES_REAL 0x00000001 +-#define KVM_PPC_1T_SEGMENTS 0x00000002 +-#define KVM_PPC_NO_HASH 0x00000004 +- +-struct kvm_ppc_smmu_info { +- __u64 flags; +- __u32 slb_size; +- __u16 data_keys; /* # storage keys supported for data */ +- __u16 instr_keys; /* # storage keys supported for instructions */ +- struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ]; +-}; +- +-/* for KVM_PPC_RESIZE_HPT_{PREPARE,COMMIT} */ +-struct kvm_ppc_resize_hpt { +- __u64 flags; +- __u32 shift; +- __u32 pad; +-}; +- + #define KVMIO 0xAE + + /* machine type bits, to be used as argument to KVM_CREATE_VM */ +@@ -919,9 +683,7 @@ struct kvm_ppc_resize_hpt { + /* Bug in KVM_SET_USER_MEMORY_REGION fixed: */ + #define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21 + #define KVM_CAP_USER_NMI 22 +-#ifdef __KVM_HAVE_GUEST_DEBUG + #define KVM_CAP_SET_GUEST_DEBUG 23 +-#endif + #ifdef __KVM_HAVE_PIT + #define KVM_CAP_REINJECT_CONTROL 24 + #endif +@@ -1152,8 +914,6 @@ struct kvm_ppc_resize_hpt { + #define KVM_CAP_GUEST_MEMFD 234 + #define KVM_CAP_VM_TYPES 235 + +-#ifdef KVM_CAP_IRQ_ROUTING +- + struct kvm_irq_routing_irqchip { + __u32 irqchip; + __u32 pin; +@@ -1218,42 +978,6 @@ struct kvm_irq_routing { + struct kvm_irq_routing_entry entries[]; + }; + +-#endif +- +-#ifdef KVM_CAP_MCE +-/* x86 MCE */ +-struct kvm_x86_mce { +- __u64 status; +- __u64 addr; +- __u64 misc; +- __u64 mcg_status; +- __u8 bank; +- __u8 pad1[7]; +- __u64 pad2[3]; +-}; +-#endif +- +-#ifdef KVM_CAP_XEN_HVM +-#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0) +-#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1) +-#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2) +-#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3) +-#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4) +-#define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5) +-#define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG (1 << 6) +-#define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE (1 << 7) +- +-struct kvm_xen_hvm_config { +- __u32 flags; +- __u32 msr; +- __u64 blob_addr_32; +- __u64 blob_addr_64; +- __u8 blob_size_32; +- __u8 blob_size_64; +- __u8 pad2[30]; +-}; +-#endif +- + #define KVM_IRQFD_FLAG_DEASSIGN (1 << 0) + /* + * Available with KVM_CAP_IRQFD_RESAMPLE +@@ -1438,11 +1162,6 @@ struct kvm_vfio_spapr_tce { + struct kvm_userspace_memory_region2) + + /* enable ucontrol for s390 */ +-struct kvm_s390_ucas_mapping { +- __u64 user_addr; +- __u64 vcpu_addr; +- __u64 length; +-}; + #define KVM_S390_UCAS_MAP _IOW(KVMIO, 0x50, struct kvm_s390_ucas_mapping) + #define KVM_S390_UCAS_UNMAP _IOW(KVMIO, 0x51, struct kvm_s390_ucas_mapping) + #define KVM_S390_VCPU_FAULT _IOW(KVMIO, 0x52, unsigned long) +@@ -1637,89 +1356,6 @@ struct kvm_enc_region { + #define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3) + #define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4) + +-struct kvm_s390_pv_sec_parm { +- __u64 origin; +- __u64 length; +-}; +- +-struct kvm_s390_pv_unp { +- __u64 addr; +- __u64 size; +- __u64 tweak; +-}; +- +-enum pv_cmd_dmp_id { +- KVM_PV_DUMP_INIT, +- KVM_PV_DUMP_CONFIG_STOR_STATE, +- KVM_PV_DUMP_COMPLETE, +- KVM_PV_DUMP_CPU, +-}; +- +-struct kvm_s390_pv_dmp { +- __u64 subcmd; +- __u64 buff_addr; +- __u64 buff_len; +- __u64 gaddr; /* For dump storage state */ +- __u64 reserved[4]; +-}; +- +-enum pv_cmd_info_id { +- KVM_PV_INFO_VM, +- KVM_PV_INFO_DUMP, +-}; +- +-struct kvm_s390_pv_info_dump { +- __u64 dump_cpu_buffer_len; +- __u64 dump_config_mem_buffer_per_1m; +- __u64 dump_config_finalize_len; +-}; +- +-struct kvm_s390_pv_info_vm { +- __u64 inst_calls_list[4]; +- __u64 max_cpus; +- __u64 max_guests; +- __u64 max_guest_addr; +- __u64 feature_indication; +-}; +- +-struct kvm_s390_pv_info_header { +- __u32 id; +- __u32 len_max; +- __u32 len_written; +- __u32 reserved; +-}; +- +-struct kvm_s390_pv_info { +- struct kvm_s390_pv_info_header header; +- union { +- struct kvm_s390_pv_info_dump dump; +- struct kvm_s390_pv_info_vm vm; +- }; +-}; +- +-enum pv_cmd_id { +- KVM_PV_ENABLE, +- KVM_PV_DISABLE, +- KVM_PV_SET_SEC_PARMS, +- KVM_PV_UNPACK, +- KVM_PV_VERIFY, +- KVM_PV_PREP_RESET, +- KVM_PV_UNSHARE_ALL, +- KVM_PV_INFO, +- KVM_PV_DUMP, +- KVM_PV_ASYNC_CLEANUP_PREPARE, +- KVM_PV_ASYNC_CLEANUP_PERFORM, +-}; +- +-struct kvm_pv_cmd { +- __u32 cmd; /* Command to be executed */ +- __u16 rc; /* Ultravisor return code */ +- __u16 rrc; /* Ultravisor return reason code */ +- __u64 data; /* Data or address */ +- __u32 flags; /* flags for future extensions. Must be 0 for now */ +- __u32 reserved[3]; +-}; +- + /* Available with KVM_CAP_S390_PROTECTED */ + #define KVM_S390_PV_COMMAND _IOWR(KVMIO, 0xc5, struct kvm_pv_cmd) + +@@ -1733,58 +1369,6 @@ struct kvm_pv_cmd { + #define KVM_XEN_HVM_GET_ATTR _IOWR(KVMIO, 0xc8, struct kvm_xen_hvm_attr) + #define KVM_XEN_HVM_SET_ATTR _IOW(KVMIO, 0xc9, struct kvm_xen_hvm_attr) + +-struct kvm_xen_hvm_attr { +- __u16 type; +- __u16 pad[3]; +- union { +- __u8 long_mode; +- __u8 vector; +- __u8 runstate_update_flag; +- struct { +- __u64 gfn; +-#define KVM_XEN_INVALID_GFN ((__u64)-1) +- } shared_info; +- struct { +- __u32 send_port; +- __u32 type; /* EVTCHNSTAT_ipi / EVTCHNSTAT_interdomain */ +- __u32 flags; +-#define KVM_XEN_EVTCHN_DEASSIGN (1 << 0) +-#define KVM_XEN_EVTCHN_UPDATE (1 << 1) +-#define KVM_XEN_EVTCHN_RESET (1 << 2) +- /* +- * Events sent by the guest are either looped back to +- * the guest itself (potentially on a different port#) +- * or signalled via an eventfd. +- */ +- union { +- struct { +- __u32 port; +- __u32 vcpu; +- __u32 priority; +- } port; +- struct { +- __u32 port; /* Zero for eventfd */ +- __s32 fd; +- } eventfd; +- __u32 padding[4]; +- } deliver; +- } evtchn; +- __u32 xen_version; +- __u64 pad[8]; +- } u; +-}; +- +- +-/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ +-#define KVM_XEN_ATTR_TYPE_LONG_MODE 0x0 +-#define KVM_XEN_ATTR_TYPE_SHARED_INFO 0x1 +-#define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR 0x2 +-/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */ +-#define KVM_XEN_ATTR_TYPE_EVTCHN 0x3 +-#define KVM_XEN_ATTR_TYPE_XEN_VERSION 0x4 +-/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG */ +-#define KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG 0x5 +- + /* Per-vCPU Xen attributes */ + #define KVM_XEN_VCPU_GET_ATTR _IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr) + #define KVM_XEN_VCPU_SET_ATTR _IOW(KVMIO, 0xcb, struct kvm_xen_vcpu_attr) +@@ -1795,242 +1379,6 @@ struct kvm_xen_hvm_attr { + #define KVM_GET_SREGS2 _IOR(KVMIO, 0xcc, struct kvm_sregs2) + #define KVM_SET_SREGS2 _IOW(KVMIO, 0xcd, struct kvm_sregs2) + +-struct kvm_xen_vcpu_attr { +- __u16 type; +- __u16 pad[3]; +- union { +- __u64 gpa; +-#define KVM_XEN_INVALID_GPA ((__u64)-1) +- __u64 pad[8]; +- struct { +- __u64 state; +- __u64 state_entry_time; +- __u64 time_running; +- __u64 time_runnable; +- __u64 time_blocked; +- __u64 time_offline; +- } runstate; +- __u32 vcpu_id; +- struct { +- __u32 port; +- __u32 priority; +- __u64 expires_ns; +- } timer; +- __u8 vector; +- } u; +-}; +- +-/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ +-#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO 0x0 +-#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO 0x1 +-#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR 0x2 +-#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT 0x3 +-#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA 0x4 +-#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST 0x5 +-/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */ +-#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID 0x6 +-#define KVM_XEN_VCPU_ATTR_TYPE_TIMER 0x7 +-#define KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR 0x8 +- +-/* Secure Encrypted Virtualization command */ +-enum sev_cmd_id { +- /* Guest initialization commands */ +- KVM_SEV_INIT = 0, +- KVM_SEV_ES_INIT, +- /* Guest launch commands */ +- KVM_SEV_LAUNCH_START, +- KVM_SEV_LAUNCH_UPDATE_DATA, +- KVM_SEV_LAUNCH_UPDATE_VMSA, +- KVM_SEV_LAUNCH_SECRET, +- KVM_SEV_LAUNCH_MEASURE, +- KVM_SEV_LAUNCH_FINISH, +- /* Guest migration commands (outgoing) */ +- KVM_SEV_SEND_START, +- KVM_SEV_SEND_UPDATE_DATA, +- KVM_SEV_SEND_UPDATE_VMSA, +- KVM_SEV_SEND_FINISH, +- /* Guest migration commands (incoming) */ +- KVM_SEV_RECEIVE_START, +- KVM_SEV_RECEIVE_UPDATE_DATA, +- KVM_SEV_RECEIVE_UPDATE_VMSA, +- KVM_SEV_RECEIVE_FINISH, +- /* Guest status and debug commands */ +- KVM_SEV_GUEST_STATUS, +- KVM_SEV_DBG_DECRYPT, +- KVM_SEV_DBG_ENCRYPT, +- /* Guest certificates commands */ +- KVM_SEV_CERT_EXPORT, +- /* Attestation report */ +- KVM_SEV_GET_ATTESTATION_REPORT, +- /* Guest Migration Extension */ +- KVM_SEV_SEND_CANCEL, +- +- KVM_SEV_NR_MAX, +-}; +- +-struct kvm_sev_cmd { +- __u32 id; +- __u64 data; +- __u32 error; +- __u32 sev_fd; +-}; +- +-struct kvm_sev_launch_start { +- __u32 handle; +- __u32 policy; +- __u64 dh_uaddr; +- __u32 dh_len; +- __u64 session_uaddr; +- __u32 session_len; +-}; +- +-struct kvm_sev_launch_update_data { +- __u64 uaddr; +- __u32 len; +-}; +- +- +-struct kvm_sev_launch_secret { +- __u64 hdr_uaddr; +- __u32 hdr_len; +- __u64 guest_uaddr; +- __u32 guest_len; +- __u64 trans_uaddr; +- __u32 trans_len; +-}; +- +-struct kvm_sev_launch_measure { +- __u64 uaddr; +- __u32 len; +-}; +- +-struct kvm_sev_guest_status { +- __u32 handle; +- __u32 policy; +- __u32 state; +-}; +- +-struct kvm_sev_dbg { +- __u64 src_uaddr; +- __u64 dst_uaddr; +- __u32 len; +-}; +- +-struct kvm_sev_attestation_report { +- __u8 mnonce[16]; +- __u64 uaddr; +- __u32 len; +-}; +- +-struct kvm_sev_send_start { +- __u32 policy; +- __u64 pdh_cert_uaddr; +- __u32 pdh_cert_len; +- __u64 plat_certs_uaddr; +- __u32 plat_certs_len; +- __u64 amd_certs_uaddr; +- __u32 amd_certs_len; +- __u64 session_uaddr; +- __u32 session_len; +-}; +- +-struct kvm_sev_send_update_data { +- __u64 hdr_uaddr; +- __u32 hdr_len; +- __u64 guest_uaddr; +- __u32 guest_len; +- __u64 trans_uaddr; +- __u32 trans_len; +-}; +- +-struct kvm_sev_receive_start { +- __u32 handle; +- __u32 policy; +- __u64 pdh_uaddr; +- __u32 pdh_len; +- __u64 session_uaddr; +- __u32 session_len; +-}; +- +-struct kvm_sev_receive_update_data { +- __u64 hdr_uaddr; +- __u32 hdr_len; +- __u64 guest_uaddr; +- __u32 guest_len; +- __u64 trans_uaddr; +- __u32 trans_len; +-}; +- +-#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) +-#define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) +-#define KVM_DEV_ASSIGN_MASK_INTX (1 << 2) +- +-struct kvm_assigned_pci_dev { +- __u32 assigned_dev_id; +- __u32 busnr; +- __u32 devfn; +- __u32 flags; +- __u32 segnr; +- union { +- __u32 reserved[11]; +- }; +-}; +- +-#define KVM_DEV_IRQ_HOST_INTX (1 << 0) +-#define KVM_DEV_IRQ_HOST_MSI (1 << 1) +-#define KVM_DEV_IRQ_HOST_MSIX (1 << 2) +- +-#define KVM_DEV_IRQ_GUEST_INTX (1 << 8) +-#define KVM_DEV_IRQ_GUEST_MSI (1 << 9) +-#define KVM_DEV_IRQ_GUEST_MSIX (1 << 10) +- +-#define KVM_DEV_IRQ_HOST_MASK 0x00ff +-#define KVM_DEV_IRQ_GUEST_MASK 0xff00 +- +-struct kvm_assigned_irq { +- __u32 assigned_dev_id; +- __u32 host_irq; /* ignored (legacy field) */ +- __u32 guest_irq; +- __u32 flags; +- union { +- __u32 reserved[12]; +- }; +-}; +- +-struct kvm_assigned_msix_nr { +- __u32 assigned_dev_id; +- __u16 entry_nr; +- __u16 padding; +-}; +- +-#define KVM_MAX_MSIX_PER_DEV 256 +-struct kvm_assigned_msix_entry { +- __u32 assigned_dev_id; +- __u32 gsi; +- __u16 entry; /* The index of entry in the MSI-X table */ +- __u16 padding[3]; +-}; +- +-#define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0) +-#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1) +- +-/* Available with KVM_CAP_ARM_USER_IRQ */ +- +-/* Bits for run->s.regs.device_irq_level */ +-#define KVM_ARM_DEV_EL1_VTIMER (1 << 0) +-#define KVM_ARM_DEV_EL1_PTIMER (1 << 1) +-#define KVM_ARM_DEV_PMU (1 << 2) +- +-struct kvm_hyperv_eventfd { +- __u32 conn_id; +- __s32 fd; +- __u32 flags; +- __u32 padding[3]; +-}; +- +-#define KVM_HYPERV_CONN_ID_MASK 0x00ffffff +-#define KVM_HYPERV_EVENTFD_DEASSIGN (1 << 0) +- + #define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (1 << 0) + #define KVM_DIRTY_LOG_INITIALLY_SET (1 << 1) + +@@ -2176,33 +1524,6 @@ struct kvm_stats_desc { + /* Available with KVM_CAP_S390_ZPCI_OP */ + #define KVM_S390_ZPCI_OP _IOW(KVMIO, 0xd1, struct kvm_s390_zpci_op) + +-struct kvm_s390_zpci_op { +- /* in */ +- __u32 fh; /* target device */ +- __u8 op; /* operation to perform */ +- __u8 pad[3]; +- union { +- /* for KVM_S390_ZPCIOP_REG_AEN */ +- struct { +- __u64 ibv; /* Guest addr of interrupt bit vector */ +- __u64 sb; /* Guest addr of summary bit */ +- __u32 flags; +- __u32 noi; /* Number of interrupts */ +- __u8 isc; /* Guest interrupt subclass */ +- __u8 sbo; /* Offset of guest summary bit vector */ +- __u16 pad; +- } reg_aen; +- __u64 reserved[8]; +- } u; +-}; +- +-/* types for kvm_s390_zpci_op->op */ +-#define KVM_S390_ZPCIOP_REG_AEN 0 +-#define KVM_S390_ZPCIOP_DEREG_AEN 1 +- +-/* flags for kvm_s390_zpci_op->u.reg_aen.flags */ +-#define KVM_S390_ZPCIOP_REGAEN_HOST (1 << 0) +- + /* Available with KVM_CAP_MEMORY_ATTRIBUTES */ + #define KVM_SET_MEMORY_ATTRIBUTES _IOW(KVMIO, 0xd2, struct kvm_memory_attributes) + +diff --git a/linux-headers/linux/psp-sev.h b/linux-headers/linux/psp-sev.h +index bcb21339ee..c3046c6bff 100644 +--- a/linux-headers/linux/psp-sev.h ++++ b/linux-headers/linux/psp-sev.h +@@ -28,6 +28,9 @@ enum { + SEV_PEK_CERT_IMPORT, + SEV_GET_ID, /* This command is deprecated, use SEV_GET_ID2 */ + SEV_GET_ID2, ++ SNP_PLATFORM_STATUS, ++ SNP_COMMIT, ++ SNP_SET_CONFIG, + + SEV_MAX, + }; +@@ -69,6 +72,12 @@ typedef enum { + SEV_RET_RESOURCE_LIMIT, + SEV_RET_SECURE_DATA_INVALID, + SEV_RET_INVALID_KEY = 0x27, ++ SEV_RET_INVALID_PAGE_SIZE, ++ SEV_RET_INVALID_PAGE_STATE, ++ SEV_RET_INVALID_MDATA_ENTRY, ++ SEV_RET_INVALID_PAGE_OWNER, ++ SEV_RET_INVALID_PAGE_AEAD_OFLOW, ++ SEV_RET_RMP_INIT_REQUIRED, + SEV_RET_MAX, + } sev_ret_code; + +@@ -155,6 +164,56 @@ struct sev_user_data_get_id2 { + __u32 length; /* In/Out */ + } __attribute__((packed)); + ++/** ++ * struct sev_user_data_snp_status - SNP status ++ * ++ * @api_major: API major version ++ * @api_minor: API minor version ++ * @state: current platform state ++ * @is_rmp_initialized: whether RMP is initialized or not ++ * @rsvd: reserved ++ * @build_id: firmware build id for the API version ++ * @mask_chip_id: whether chip id is present in attestation reports or not ++ * @mask_chip_key: whether attestation reports are signed or not ++ * @vlek_en: VLEK (Version Loaded Endorsement Key) hashstick is loaded ++ * @rsvd1: reserved ++ * @guest_count: the number of guest currently managed by the firmware ++ * @current_tcb_version: current TCB version ++ * @reported_tcb_version: reported TCB version ++ */ ++struct sev_user_data_snp_status { ++ __u8 api_major; /* Out */ ++ __u8 api_minor; /* Out */ ++ __u8 state; /* Out */ ++ __u8 is_rmp_initialized:1; /* Out */ ++ __u8 rsvd:7; ++ __u32 build_id; /* Out */ ++ __u32 mask_chip_id:1; /* Out */ ++ __u32 mask_chip_key:1; /* Out */ ++ __u32 vlek_en:1; /* Out */ ++ __u32 rsvd1:29; ++ __u32 guest_count; /* Out */ ++ __u64 current_tcb_version; /* Out */ ++ __u64 reported_tcb_version; /* Out */ ++} __attribute__((packed)); ++ ++/** ++ * struct sev_user_data_snp_config - system wide configuration value for SNP. ++ * ++ * @reported_tcb: the TCB version to report in the guest attestation report. ++ * @mask_chip_id: whether chip id is present in attestation reports or not ++ * @mask_chip_key: whether attestation reports are signed or not ++ * @rsvd: reserved ++ * @rsvd1: reserved ++ */ ++struct sev_user_data_snp_config { ++ __u64 reported_tcb ; /* In */ ++ __u32 mask_chip_id:1; /* In */ ++ __u32 mask_chip_key:1; /* In */ ++ __u32 rsvd:30; /* In */ ++ __u8 rsvd1[52]; ++} __attribute__((packed)); ++ + /** + * struct sev_issue_cmd - SEV ioctl parameters + * +diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h +index 649560c685..bea6973906 100644 +--- a/linux-headers/linux/vhost.h ++++ b/linux-headers/linux/vhost.h +@@ -227,4 +227,11 @@ + */ + #define VHOST_VDPA_GET_VRING_DESC_GROUP _IOWR(VHOST_VIRTIO, 0x7F, \ + struct vhost_vring_state) ++ ++/* Get the queue size of a specific virtqueue. ++ * userspace set the vring index in vhost_vring_state.index ++ * kernel set the queue size in vhost_vring_state.num ++ */ ++#define VHOST_VDPA_GET_VRING_SIZE _IOWR(VHOST_VIRTIO, 0x80, \ ++ struct vhost_vring_state) + #endif +-- +2.39.3 + diff --git a/kvm-machine-allow-early-use-of-machine_require_guest_mem.patch b/kvm-machine-allow-early-use-of-machine_require_guest_mem.patch new file mode 100644 index 0000000..6524d6a --- /dev/null +++ b/kvm-machine-allow-early-use-of-machine_require_guest_mem.patch @@ -0,0 +1,71 @@ +From 9f485c8df885bcd1ff6c5692463c6168bfec07fb Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Fri, 31 May 2024 13:29:53 +0200 +Subject: [PATCH 054/100] machine: allow early use of + machine_require_guest_memfd + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [54/91] fd8c1a6624d5f27268215c8aa70dfc9d37bdb981 (bonzini/rhel-qemu-kvm) + +Ask the ConfidentialGuestSupport object whether to use guest_memfd +for KVM-backend private memory. This bool can be set in instance_init +(or user_complete) so that it is available when the machine is created. + +Signed-off-by: Paolo Bonzini +(cherry picked from commit dc0d28ca46c0e7ee3c055ad4da24022995bd3765) +Signed-off-by: Paolo Bonzini +--- + hw/core/machine.c | 2 +- + include/exec/confidential-guest-support.h | 5 +++++ + include/hw/boards.h | 1 - + 3 files changed, 6 insertions(+), 2 deletions(-) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 07b994e136..2055e0d312 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -1482,7 +1482,7 @@ bool machine_mem_merge(MachineState *machine) + + bool machine_require_guest_memfd(MachineState *machine) + { +- return machine->require_guest_memfd; ++ return machine->cgs && machine->cgs->require_guest_memfd; + } + + static char *cpu_slot_to_string(const CPUArchId *cpu) +diff --git a/include/exec/confidential-guest-support.h b/include/exec/confidential-guest-support.h +index e5b188cffb..02dc4e518f 100644 +--- a/include/exec/confidential-guest-support.h ++++ b/include/exec/confidential-guest-support.h +@@ -31,6 +31,11 @@ OBJECT_DECLARE_TYPE(ConfidentialGuestSupport, + struct ConfidentialGuestSupport { + Object parent; + ++ /* ++ * True if the machine should use guest_memfd for RAM. ++ */ ++ bool require_guest_memfd; ++ + /* + * ready: flag set by CGS initialization code once it's ready to + * start executing instructions in a potentially-secure +diff --git a/include/hw/boards.h b/include/hw/boards.h +index 815a1c4b26..0d1f9533ef 100644 +--- a/include/hw/boards.h ++++ b/include/hw/boards.h +@@ -373,7 +373,6 @@ struct MachineState { + char *dt_compatible; + bool dump_guest_core; + bool mem_merge; +- bool require_guest_memfd; + bool usb; + bool usb_disabled; + char *firmware; +-- +2.39.3 + diff --git a/kvm-memory-Introduce-memory_region_init_ram_guest_memfd.patch b/kvm-memory-Introduce-memory_region_init_ram_guest_memfd.patch new file mode 100644 index 0000000..538e82d --- /dev/null +++ b/kvm-memory-Introduce-memory_region_init_ram_guest_memfd.patch @@ -0,0 +1,85 @@ +From 331c58d87dde8b4757e1d1e09d9b16bac2952d22 Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Thu, 30 May 2024 06:16:15 -0500 +Subject: [PATCH 081/100] memory: Introduce + memory_region_init_ram_guest_memfd() + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [81/91] d5b0898d791f3f90d1acda0230f96ca9bf5be5e4 (bonzini/rhel-qemu-kvm) + +Introduce memory_region_init_ram_guest_memfd() to allocate private +guset memfd on the MemoryRegion initialization. It's for the use case of +TDVF, which must be private on TDX case. + +Signed-off-by: Xiaoyao Li +Signed-off-by: Michael Roth +Signed-off-by: Pankaj Gupta +Message-ID: <20240530111643.1091816-4-pankaj.gupta@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit a0aa6db7ce72a08703774107185e639e73e7754c) +Signed-off-by: Paolo Bonzini +--- + include/exec/memory.h | 6 ++++++ + system/memory.c | 24 ++++++++++++++++++++++++ + 2 files changed, 30 insertions(+) + +diff --git a/include/exec/memory.h b/include/exec/memory.h +index 679a847685..1e351f6fc8 100644 +--- a/include/exec/memory.h ++++ b/include/exec/memory.h +@@ -1603,6 +1603,12 @@ bool memory_region_init_ram(MemoryRegion *mr, + uint64_t size, + Error **errp); + ++bool memory_region_init_ram_guest_memfd(MemoryRegion *mr, ++ Object *owner, ++ const char *name, ++ uint64_t size, ++ Error **errp); ++ + /** + * memory_region_init_rom: Initialize a ROM memory region. + * +diff --git a/system/memory.c b/system/memory.c +index c756950c0c..b09065eef3 100644 +--- a/system/memory.c ++++ b/system/memory.c +@@ -3606,6 +3606,30 @@ bool memory_region_init_ram(MemoryRegion *mr, + return true; + } + ++bool memory_region_init_ram_guest_memfd(MemoryRegion *mr, ++ Object *owner, ++ const char *name, ++ uint64_t size, ++ Error **errp) ++{ ++ DeviceState *owner_dev; ++ ++ if (!memory_region_init_ram_flags_nomigrate(mr, owner, name, size, ++ RAM_GUEST_MEMFD, errp)) { ++ return false; ++ } ++ /* This will assert if owner is neither NULL nor a DeviceState. ++ * We only want the owner here for the purposes of defining a ++ * unique name for migration. TODO: Ideally we should implement ++ * a naming scheme for Objects which are not DeviceStates, in ++ * which case we can relax this restriction. ++ */ ++ owner_dev = DEVICE(owner); ++ vmstate_register_ram(mr, owner_dev); ++ ++ return true; ++} ++ + bool memory_region_init_rom(MemoryRegion *mr, + Object *owner, + const char *name, +-- +2.39.3 + diff --git a/kvm-nbd-server-CVE-2024-7409-Cap-default-max-connections.patch b/kvm-nbd-server-CVE-2024-7409-Cap-default-max-connections.patch new file mode 100644 index 0000000..5459a51 --- /dev/null +++ b/kvm-nbd-server-CVE-2024-7409-Cap-default-max-connections.patch @@ -0,0 +1,184 @@ +From f76d73f62555ad73081558c1f56bcb832fbb8c35 Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Tue, 6 Aug 2024 13:53:00 -0500 +Subject: [PATCH 098/100] nbd/server: CVE-2024-7409: Cap default + max-connections to 100 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Blake +RH-MergeRequest: 262: nbd/server: fix CVE-2024-7409 (qemu crash on nbd-server-stop) +RH-Jira: RHEL-52617 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/4] 1fb3b8cd9781a66bba2f4a6bee2b320e96de86aa (redhat/centos-stream/src/qemu-kvm) + +Allowing an unlimited number of clients to any web service is a recipe +for a rudimentary denial of service attack: the client merely needs to +open lots of sockets without closing them, until qemu no longer has +any more fds available to allocate. + +For qemu-nbd, we default to allowing only 1 connection unless more are +explicitly asked for (-e or --shared); this was historically picked as +a nice default (without an explicit -t, a non-persistent qemu-nbd goes +away after a client disconnects, without needing any additional +follow-up commands), and we are not going to change that interface now +(besides, someday we want to point people towards qemu-storage-daemon +instead of qemu-nbd). + +But for qemu proper, and the newer qemu-storage-daemon, the QMP +nbd-server-start command has historically had a default of unlimited +number of connections, in part because unlike qemu-nbd it is +inherently persistent until nbd-server-stop. Allowing multiple client +sockets is particularly useful for clients that can take advantage of +MULTI_CONN (creating parallel sockets to increase throughput), +although known clients that do so (such as libnbd's nbdcopy) typically +use only 8 or 16 connections (the benefits of scaling diminish once +more sockets are competing for kernel attention). Picking a number +large enough for typical use cases, but not unlimited, makes it +slightly harder for a malicious client to perform a denial of service +merely by opening lots of connections withot progressing through the +handshake. + +This change does not eliminate CVE-2024-7409 on its own, but reduces +the chance for fd exhaustion or unlimited memory usage as an attack +surface. On the other hand, by itself, it makes it more obvious that +with a finite limit, we have the problem of an unauthenticated client +holding 100 fds opened as a way to block out a legitimate client from +being able to connect; thus, later patches will further add timeouts +to reject clients that are not making progress. + +This is an INTENTIONAL change in behavior, and will break any client +of nbd-server-start that was not passing an explicit max-connections +parameter, yet expects more than 100 simultaneous connections. We are +not aware of any such client (as stated above, most clients aware of +MULTI_CONN get by just fine on 8 or 16 connections, and probably cope +with later connections failing by relying on the earlier connections; +libvirt has not yet been passing max-connections, but generally +creates NBD servers with the intent for a single client for the sake +of live storage migration; meanwhile, the KubeSAN project anticipates +a large cluster sharing multiple clients [up to 8 per node, and up to +100 nodes in a cluster], but it currently uses qemu-nbd with an +explicit --shared=0 rather than qemu-storage-daemon with +nbd-server-start). + +We considered using a deprecation period (declare that omitting +max-parameters is deprecated, and make it mandatory in 3 releases - +then we don't need to pick an arbitrary default); that has zero risk +of breaking any apps that accidentally depended on more than 100 +connections, and where such breakage might not be noticed under unit +testing but only under the larger loads of production usage. But it +does not close the denial-of-service hole until far into the future, +and requires all apps to change to add the parameter even if 100 was +good enough. It also has a drawback that any app (like libvirt) that +is accidentally relying on an unlimited default should seriously +consider their own CVE now, at which point they are going to change to +pass explicit max-connections sooner than waiting for 3 qemu releases. +Finally, if our changed default breaks an app, that app can always +pass in an explicit max-parameters with a larger value. + +It is also intentional that the HMP interface to nbd-server-start is +not changed to expose max-connections (any client needing to fine-tune +things should be using QMP). + +Suggested-by: Daniel P. Berrangé +Signed-off-by: Eric Blake +Message-ID: <20240807174943.771624-12-eblake@redhat.com> +Reviewed-by: Daniel P. Berrangé +[ericb: Expand commit message to summarize Dan's argument for why we +break corner-case back-compat behavior without a deprecation period] +Signed-off-by: Eric Blake + +(cherry picked from commit c8a76dbd90c2f48df89b75bef74917f90a59b623) +Jira: https://issues.redhat.com/browse/RHEL-52617 +Signed-off-by: Eric Blake +--- + block/monitor/block-hmp-cmds.c | 3 ++- + blockdev-nbd.c | 8 ++++++++ + include/block/nbd.h | 7 +++++++ + qapi/block-export.json | 4 ++-- + 4 files changed, 19 insertions(+), 3 deletions(-) + +diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c +index d954bec6f1..bdf2eb50b6 100644 +--- a/block/monitor/block-hmp-cmds.c ++++ b/block/monitor/block-hmp-cmds.c +@@ -402,7 +402,8 @@ void hmp_nbd_server_start(Monitor *mon, const QDict *qdict) + goto exit; + } + +- nbd_server_start(addr, NULL, NULL, 0, &local_err); ++ nbd_server_start(addr, NULL, NULL, NBD_DEFAULT_MAX_CONNECTIONS, ++ &local_err); + qapi_free_SocketAddress(addr); + if (local_err != NULL) { + goto exit; +diff --git a/blockdev-nbd.c b/blockdev-nbd.c +index 267a1de903..24ba5382db 100644 +--- a/blockdev-nbd.c ++++ b/blockdev-nbd.c +@@ -170,6 +170,10 @@ void nbd_server_start(SocketAddress *addr, const char *tls_creds, + + void nbd_server_start_options(NbdServerOptions *arg, Error **errp) + { ++ if (!arg->has_max_connections) { ++ arg->max_connections = NBD_DEFAULT_MAX_CONNECTIONS; ++ } ++ + nbd_server_start(arg->addr, arg->tls_creds, arg->tls_authz, + arg->max_connections, errp); + } +@@ -182,6 +186,10 @@ void qmp_nbd_server_start(SocketAddressLegacy *addr, + { + SocketAddress *addr_flat = socket_address_flatten(addr); + ++ if (!has_max_connections) { ++ max_connections = NBD_DEFAULT_MAX_CONNECTIONS; ++ } ++ + nbd_server_start(addr_flat, tls_creds, tls_authz, max_connections, errp); + qapi_free_SocketAddress(addr_flat); + } +diff --git a/include/block/nbd.h b/include/block/nbd.h +index 1d4d65922d..d4f8b21aec 100644 +--- a/include/block/nbd.h ++++ b/include/block/nbd.h +@@ -39,6 +39,13 @@ extern const BlockExportDriver blk_exp_nbd; + */ + #define NBD_DEFAULT_HANDSHAKE_MAX_SECS 10 + ++/* ++ * NBD_DEFAULT_MAX_CONNECTIONS: Number of client sockets to allow at ++ * once; must be large enough to allow a MULTI_CONN-aware client like ++ * nbdcopy to create its typical number of 8-16 sockets. ++ */ ++#define NBD_DEFAULT_MAX_CONNECTIONS 100 ++ + /* Handshake phase structs - this struct is passed on the wire */ + + typedef struct NBDOption { +diff --git a/qapi/block-export.json b/qapi/block-export.json +index 3919a2d5b9..f45e4fd481 100644 +--- a/qapi/block-export.json ++++ b/qapi/block-export.json +@@ -28,7 +28,7 @@ + # @max-connections: The maximum number of connections to allow at the + # same time, 0 for unlimited. Setting this to 1 also stops the + # server from advertising multiple client support (since 5.2; +-# default: 0) ++# default: 100) + # + # Since: 4.2 + ## +@@ -63,7 +63,7 @@ + # @max-connections: The maximum number of connections to allow at the + # same time, 0 for unlimited. Setting this to 1 also stops the + # server from advertising multiple client support (since 5.2; +-# default: 0). ++# default: 100). + # + # Errors: + # - if the server is already running +-- +2.39.3 + diff --git a/kvm-nbd-server-CVE-2024-7409-Close-stray-clients-at-serv.patch b/kvm-nbd-server-CVE-2024-7409-Close-stray-clients-at-serv.patch new file mode 100644 index 0000000..2ba16e5 --- /dev/null +++ b/kvm-nbd-server-CVE-2024-7409-Close-stray-clients-at-serv.patch @@ -0,0 +1,173 @@ +From 6522c68268f00c9c5665f8f98cf6ed1984124cf3 Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Wed, 7 Aug 2024 12:23:13 -0500 +Subject: [PATCH 100/100] nbd/server: CVE-2024-7409: Close stray clients at + server-stop +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Blake +RH-MergeRequest: 262: nbd/server: fix CVE-2024-7409 (qemu crash on nbd-server-stop) +RH-Jira: RHEL-52617 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [4/4] c00bb5a7e73446e9f071ef83e4f1576f73a17059 (redhat/centos-stream/src/qemu-kvm) + +A malicious client can attempt to connect to an NBD server, and then +intentionally delay progress in the handshake, including if it does +not know the TLS secrets. Although the previous two patches reduce +this behavior by capping the default max-connections parameter and +killing slow clients, they did not eliminate the possibility of a +client waiting to close the socket until after the QMP nbd-server-stop +command is executed, at which point qemu would SEGV when trying to +dereference the NULL nbd_server global which is no longer present. +This amounts to a denial of service attack. Worse, if another NBD +server is started before the malicious client disconnects, I cannot +rule out additional adverse effects when the old client interferes +with the connection count of the new server (although the most likely +is a crash due to an assertion failure when checking +nbd_server->connections > 0). + +For environments without this patch, the CVE can be mitigated by +ensuring (such as via a firewall) that only trusted clients can +connect to an NBD server. Note that using frameworks like libvirt +that ensure that TLS is used and that nbd-server-stop is not executed +while any trusted clients are still connected will only help if there +is also no possibility for an untrusted client to open a connection +but then stall on the NBD handshake. + +Given the previous patches, it would be possible to guarantee that no +clients remain connected by having nbd-server-stop sleep for longer +than the default handshake deadline before finally freeing the global +nbd_server object, but that could make QMP non-responsive for a long +time. So intead, this patch fixes the problem by tracking all client +sockets opened while the server is running, and forcefully closing any +such sockets remaining without a completed handshake at the time of +nbd-server-stop, then waiting until the coroutines servicing those +sockets notice the state change. nbd-server-stop now has a second +AIO_WAIT_WHILE_UNLOCKED (the first is indirectly through the +blk_exp_close_all_type() that disconnects all clients that completed +handshakes), but forced socket shutdown is enough to progress the +coroutines and quickly tear down all clients before the server is +freed, thus finally fixing the CVE. + +This patch relies heavily on the fact that nbd/server.c guarantees +that it only calls nbd_blockdev_client_closed() from the main loop +(see the assertion in nbd_client_put() and the hoops used in +nbd_client_put_nonzero() to achieve that); if we did not have that +guarantee, we would also need a mutex protecting our accesses of the +list of connections to survive re-entrancy from independent iothreads. + +Although I did not actually try to test old builds, it looks like this +problem has existed since at least commit 862172f45c (v2.12.0, 2017) - +even back when that patch started using a QIONetListener to handle +listening on multiple sockets, nbd_server_free() was already unaware +that the nbd_blockdev_client_closed callback can be reached later by a +client thread that has not completed handshakes (and therefore the +client's socket never got added to the list closed in +nbd_export_close_all), despite that patch intentionally tearing down +the QIONetListener to prevent new clients. + +Reported-by: Alexander Ivanov +Fixes: CVE-2024-7409 +CC: qemu-stable@nongnu.org +Signed-off-by: Eric Blake +Message-ID: <20240807174943.771624-14-eblake@redhat.com> +Reviewed-by: Daniel P. Berrangé + +(cherry picked from commit 3e7ef738c8462c45043a1d39f702a0990406a3b3) +Jira: https://issues.redhat.com/browse/RHEL-52617 +Signed-off-by: Eric Blake +--- + blockdev-nbd.c | 35 ++++++++++++++++++++++++++++++++++- + 1 file changed, 34 insertions(+), 1 deletion(-) + +diff --git a/blockdev-nbd.c b/blockdev-nbd.c +index 24ba5382db..f73409ae49 100644 +--- a/blockdev-nbd.c ++++ b/blockdev-nbd.c +@@ -21,12 +21,18 @@ + #include "io/channel-socket.h" + #include "io/net-listener.h" + ++typedef struct NBDConn { ++ QIOChannelSocket *cioc; ++ QLIST_ENTRY(NBDConn) next; ++} NBDConn; ++ + typedef struct NBDServerData { + QIONetListener *listener; + QCryptoTLSCreds *tlscreds; + char *tlsauthz; + uint32_t max_connections; + uint32_t connections; ++ QLIST_HEAD(, NBDConn) conns; + } NBDServerData; + + static NBDServerData *nbd_server; +@@ -51,6 +57,14 @@ int nbd_server_max_connections(void) + + static void nbd_blockdev_client_closed(NBDClient *client, bool ignored) + { ++ NBDConn *conn = nbd_client_owner(client); ++ ++ assert(qemu_in_main_thread() && nbd_server); ++ ++ object_unref(OBJECT(conn->cioc)); ++ QLIST_REMOVE(conn, next); ++ g_free(conn); ++ + nbd_client_put(client); + assert(nbd_server->connections > 0); + nbd_server->connections--; +@@ -60,14 +74,20 @@ static void nbd_blockdev_client_closed(NBDClient *client, bool ignored) + static void nbd_accept(QIONetListener *listener, QIOChannelSocket *cioc, + gpointer opaque) + { ++ NBDConn *conn = g_new0(NBDConn, 1); ++ ++ assert(qemu_in_main_thread() && nbd_server); + nbd_server->connections++; ++ object_ref(OBJECT(cioc)); ++ conn->cioc = cioc; ++ QLIST_INSERT_HEAD(&nbd_server->conns, conn, next); + nbd_update_server_watch(nbd_server); + + qio_channel_set_name(QIO_CHANNEL(cioc), "nbd-server"); + /* TODO - expose handshake timeout as QMP option */ + nbd_client_new(cioc, NBD_DEFAULT_HANDSHAKE_MAX_SECS, + nbd_server->tlscreds, nbd_server->tlsauthz, +- nbd_blockdev_client_closed, NULL); ++ nbd_blockdev_client_closed, conn); + } + + static void nbd_update_server_watch(NBDServerData *s) +@@ -81,12 +101,25 @@ static void nbd_update_server_watch(NBDServerData *s) + + static void nbd_server_free(NBDServerData *server) + { ++ NBDConn *conn, *tmp; ++ + if (!server) { + return; + } + ++ /* ++ * Forcefully close the listener socket, and any clients that have ++ * not yet disconnected on their own. ++ */ + qio_net_listener_disconnect(server->listener); + object_unref(OBJECT(server->listener)); ++ QLIST_FOREACH_SAFE(conn, &server->conns, next, tmp) { ++ qio_channel_shutdown(QIO_CHANNEL(conn->cioc), QIO_CHANNEL_SHUTDOWN_BOTH, ++ NULL); ++ } ++ ++ AIO_WAIT_WHILE_UNLOCKED(NULL, server->connections > 0); ++ + if (server->tlscreds) { + object_unref(OBJECT(server->tlscreds)); + } +-- +2.39.3 + diff --git a/kvm-nbd-server-CVE-2024-7409-Drop-non-negotiating-client.patch b/kvm-nbd-server-CVE-2024-7409-Drop-non-negotiating-client.patch new file mode 100644 index 0000000..e1755c2 --- /dev/null +++ b/kvm-nbd-server-CVE-2024-7409-Drop-non-negotiating-client.patch @@ -0,0 +1,135 @@ +From ca30846351f1136d15f55717a5534ad927f7cf52 Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Thu, 8 Aug 2024 16:05:08 -0500 +Subject: [PATCH 099/100] nbd/server: CVE-2024-7409: Drop non-negotiating + clients +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Blake +RH-MergeRequest: 262: nbd/server: fix CVE-2024-7409 (qemu crash on nbd-server-stop) +RH-Jira: RHEL-52617 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [3/4] 8008a1067766951d9752bcc41c2127a07fce934d (redhat/centos-stream/src/qemu-kvm) + +A client that opens a socket but does not negotiate is merely hogging +qemu's resources (an open fd and a small amount of memory); and a +malicious client that can access the port where NBD is listening can +attempt a denial of service attack by intentionally opening and +abandoning lots of unfinished connections. The previous patch put a +default bound on the number of such ongoing connections, but once that +limit is hit, no more clients can connect (including legitimate ones). +The solution is to insist that clients complete handshake within a +reasonable time limit, defaulting to 10 seconds. A client that has +not successfully completed NBD_OPT_GO by then (including the case of +where the client didn't know TLS credentials to even reach the point +of NBD_OPT_GO) is wasting our time and does not deserve to stay +connected. Later patches will allow fine-tuning the limit away from +the default value (including disabling it for doing integration +testing of the handshake process itself). + +Note that this patch in isolation actually makes it more likely to see +qemu SEGV after nbd-server-stop, as any client socket still connected +when the server shuts down will now be closed after 10 seconds rather +than at the client's whims. That will be addressed in the next patch. + +For a demo of this patch in action: +$ qemu-nbd -f raw -r -t -e 10 file & +$ nbdsh --opt-mode -c ' +H = list() +for i in range(20): + print(i) + H.insert(i, nbd.NBD()) + H[i].set_opt_mode(True) + H[i].connect_uri("nbd://localhost") +' +$ kill $! + +where later connections get to start progressing once earlier ones are +forcefully dropped for taking too long, rather than hanging. + +Suggested-by: Daniel P. Berrangé +Signed-off-by: Eric Blake +Message-ID: <20240807174943.771624-13-eblake@redhat.com> +Reviewed-by: Daniel P. Berrangé +[eblake: rebase to changes earlier in series, reduce scope of timer] +Signed-off-by: Eric Blake + +(cherry picked from commit b9b72cb3ce15b693148bd09cef7e50110566d8a0) +Jira: https://issues.redhat.com/browse/RHEL-52617 +Signed-off-by: Eric Blake +--- + nbd/server.c | 28 +++++++++++++++++++++++++++- + nbd/trace-events | 1 + + 2 files changed, 28 insertions(+), 1 deletion(-) + +diff --git a/nbd/server.c b/nbd/server.c +index e50012499f..39285cc971 100644 +--- a/nbd/server.c ++++ b/nbd/server.c +@@ -3186,22 +3186,48 @@ static void nbd_client_receive_next_request(NBDClient *client) + } + } + ++static void nbd_handshake_timer_cb(void *opaque) ++{ ++ QIOChannel *ioc = opaque; ++ ++ trace_nbd_handshake_timer_cb(); ++ qio_channel_shutdown(ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL); ++} ++ + static coroutine_fn void nbd_co_client_start(void *opaque) + { + NBDClient *client = opaque; + Error *local_err = NULL; ++ QEMUTimer *handshake_timer = NULL; + + qemu_co_mutex_init(&client->send_lock); + +- /* TODO - utilize client->handshake_max_secs */ ++ /* ++ * Create a timer to bound the time spent in negotiation. If the ++ * timer expires, it is likely nbd_negotiate will fail because the ++ * socket was shutdown. ++ */ ++ if (client->handshake_max_secs > 0) { ++ handshake_timer = aio_timer_new(qemu_get_aio_context(), ++ QEMU_CLOCK_REALTIME, ++ SCALE_NS, ++ nbd_handshake_timer_cb, ++ client->sioc); ++ timer_mod(handshake_timer, ++ qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + ++ client->handshake_max_secs * NANOSECONDS_PER_SECOND); ++ } ++ + if (nbd_negotiate(client, &local_err)) { + if (local_err) { + error_report_err(local_err); + } ++ timer_free(handshake_timer); + client_close(client, false); + return; + } + ++ timer_free(handshake_timer); + WITH_QEMU_LOCK_GUARD(&client->lock) { + nbd_client_receive_next_request(client); + } +diff --git a/nbd/trace-events b/nbd/trace-events +index 00ae3216a1..cbd0a4ab7e 100644 +--- a/nbd/trace-events ++++ b/nbd/trace-events +@@ -76,6 +76,7 @@ nbd_co_receive_request_payload_received(uint64_t cookie, uint64_t len) "Payload + nbd_co_receive_ext_payload_compliance(uint64_t from, uint64_t len) "client sent non-compliant write without payload flag: from=0x%" PRIx64 ", len=0x%" PRIx64 + nbd_co_receive_align_compliance(const char *op, uint64_t from, uint64_t len, uint32_t align) "client sent non-compliant unaligned %s request: from=0x%" PRIx64 ", len=0x%" PRIx64 ", align=0x%" PRIx32 + nbd_trip(void) "Reading request" ++nbd_handshake_timer_cb(void) "client took too long to negotiate" + + # client-connection.c + nbd_connect_thread_sleep(uint64_t timeout) "timeout %" PRIu64 +-- +2.39.3 + diff --git a/kvm-nbd-server-Plumb-in-new-args-to-nbd_client_add.patch b/kvm-nbd-server-Plumb-in-new-args-to-nbd_client_add.patch new file mode 100644 index 0000000..6b4c670 --- /dev/null +++ b/kvm-nbd-server-Plumb-in-new-args-to-nbd_client_add.patch @@ -0,0 +1,175 @@ +From 70acef52a99e5114699f5fa58de5f0b5c031b880 Mon Sep 17 00:00:00 2001 +From: Eric Blake +Date: Wed, 7 Aug 2024 08:50:01 -0500 +Subject: [PATCH 097/100] nbd/server: Plumb in new args to nbd_client_add() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Blake +RH-MergeRequest: 262: nbd/server: fix CVE-2024-7409 (qemu crash on nbd-server-stop) +RH-Jira: RHEL-52617 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/4] 7614e294e1f5b7861386950ae994bea166d19950 (redhat/centos-stream/src/qemu-kvm) + +Upcoming patches to fix a CVE need to track an opaque pointer passed +in by the owner of a client object, as well as request for a time +limit on how fast negotiation must complete. Prepare for that by +changing the signature of nbd_client_new() and adding an accessor to +get at the opaque pointer, although for now the two servers +(qemu-nbd.c and blockdev-nbd.c) do not change behavior even though +they pass in a new default timeout value. + +Suggested-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Eric Blake +Message-ID: <20240807174943.771624-11-eblake@redhat.com> +Reviewed-by: Daniel P. Berrangé +[eblake: s/LIMIT/MAX_SECS/ as suggested by Dan] +Signed-off-by: Eric Blake + +(cherry picked from commit fb1c2aaa981e0a2fa6362c9985f1296b74f055ac) +Jira: https://issues.redhat.com/browse/RHEL-52617 +Signed-off-by: Eric Blake +--- + blockdev-nbd.c | 6 ++++-- + include/block/nbd.h | 11 ++++++++++- + nbd/server.c | 20 +++++++++++++++++--- + qemu-nbd.c | 4 +++- + 4 files changed, 34 insertions(+), 7 deletions(-) + +diff --git a/blockdev-nbd.c b/blockdev-nbd.c +index 213012435f..267a1de903 100644 +--- a/blockdev-nbd.c ++++ b/blockdev-nbd.c +@@ -64,8 +64,10 @@ static void nbd_accept(QIONetListener *listener, QIOChannelSocket *cioc, + nbd_update_server_watch(nbd_server); + + qio_channel_set_name(QIO_CHANNEL(cioc), "nbd-server"); +- nbd_client_new(cioc, nbd_server->tlscreds, nbd_server->tlsauthz, +- nbd_blockdev_client_closed); ++ /* TODO - expose handshake timeout as QMP option */ ++ nbd_client_new(cioc, NBD_DEFAULT_HANDSHAKE_MAX_SECS, ++ nbd_server->tlscreds, nbd_server->tlsauthz, ++ nbd_blockdev_client_closed, NULL); + } + + static void nbd_update_server_watch(NBDServerData *s) +diff --git a/include/block/nbd.h b/include/block/nbd.h +index 4e7bd6342f..1d4d65922d 100644 +--- a/include/block/nbd.h ++++ b/include/block/nbd.h +@@ -33,6 +33,12 @@ typedef struct NBDMetaContexts NBDMetaContexts; + + extern const BlockExportDriver blk_exp_nbd; + ++/* ++ * NBD_DEFAULT_HANDSHAKE_MAX_SECS: Number of seconds in which client must ++ * succeed at NBD_OPT_GO before being forcefully dropped as too slow. ++ */ ++#define NBD_DEFAULT_HANDSHAKE_MAX_SECS 10 ++ + /* Handshake phase structs - this struct is passed on the wire */ + + typedef struct NBDOption { +@@ -403,9 +409,12 @@ AioContext *nbd_export_aio_context(NBDExport *exp); + NBDExport *nbd_export_find(const char *name); + + void nbd_client_new(QIOChannelSocket *sioc, ++ uint32_t handshake_max_secs, + QCryptoTLSCreds *tlscreds, + const char *tlsauthz, +- void (*close_fn)(NBDClient *, bool)); ++ void (*close_fn)(NBDClient *, bool), ++ void *owner); ++void *nbd_client_owner(NBDClient *client); + void nbd_client_get(NBDClient *client); + void nbd_client_put(NBDClient *client); + +diff --git a/nbd/server.c b/nbd/server.c +index 892797bb11..e50012499f 100644 +--- a/nbd/server.c ++++ b/nbd/server.c +@@ -124,12 +124,14 @@ struct NBDMetaContexts { + struct NBDClient { + int refcount; /* atomic */ + void (*close_fn)(NBDClient *client, bool negotiated); ++ void *owner; + + QemuMutex lock; + + NBDExport *exp; + QCryptoTLSCreds *tlscreds; + char *tlsauthz; ++ uint32_t handshake_max_secs; + QIOChannelSocket *sioc; /* The underlying data channel */ + QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */ + +@@ -3191,6 +3193,7 @@ static coroutine_fn void nbd_co_client_start(void *opaque) + + qemu_co_mutex_init(&client->send_lock); + ++ /* TODO - utilize client->handshake_max_secs */ + if (nbd_negotiate(client, &local_err)) { + if (local_err) { + error_report_err(local_err); +@@ -3205,14 +3208,17 @@ static coroutine_fn void nbd_co_client_start(void *opaque) + } + + /* +- * Create a new client listener using the given channel @sioc. ++ * Create a new client listener using the given channel @sioc and @owner. + * Begin servicing it in a coroutine. When the connection closes, call +- * @close_fn with an indication of whether the client completed negotiation. ++ * @close_fn with an indication of whether the client completed negotiation ++ * within @handshake_max_secs seconds (0 for unbounded). + */ + void nbd_client_new(QIOChannelSocket *sioc, ++ uint32_t handshake_max_secs, + QCryptoTLSCreds *tlscreds, + const char *tlsauthz, +- void (*close_fn)(NBDClient *, bool)) ++ void (*close_fn)(NBDClient *, bool), ++ void *owner) + { + NBDClient *client; + Coroutine *co; +@@ -3225,13 +3231,21 @@ void nbd_client_new(QIOChannelSocket *sioc, + object_ref(OBJECT(client->tlscreds)); + } + client->tlsauthz = g_strdup(tlsauthz); ++ client->handshake_max_secs = handshake_max_secs; + client->sioc = sioc; + qio_channel_set_delay(QIO_CHANNEL(sioc), false); + object_ref(OBJECT(client->sioc)); + client->ioc = QIO_CHANNEL(sioc); + object_ref(OBJECT(client->ioc)); + client->close_fn = close_fn; ++ client->owner = owner; + + co = qemu_coroutine_create(nbd_co_client_start, client); + qemu_coroutine_enter(co); + } ++ ++void * ++nbd_client_owner(NBDClient *client) ++{ ++ return client->owner; ++} +diff --git a/qemu-nbd.c b/qemu-nbd.c +index d7b3ccab21..48e2fa5858 100644 +--- a/qemu-nbd.c ++++ b/qemu-nbd.c +@@ -390,7 +390,9 @@ static void nbd_accept(QIONetListener *listener, QIOChannelSocket *cioc, + + nb_fds++; + nbd_update_server_watch(); +- nbd_client_new(cioc, tlscreds, tlsauthz, nbd_client_closed); ++ /* TODO - expose handshake timeout as command line option */ ++ nbd_client_new(cioc, NBD_DEFAULT_HANDSHAKE_MAX_SECS, ++ tlscreds, tlsauthz, nbd_client_closed, NULL); + } + + static void nbd_update_server_watch(void) +-- +2.39.3 + diff --git a/kvm-pci-host-q35-Move-PAM-initialization-above-SMRAM-ini.patch b/kvm-pci-host-q35-Move-PAM-initialization-above-SMRAM-ini.patch new file mode 100644 index 0000000..2c21dde --- /dev/null +++ b/kvm-pci-host-q35-Move-PAM-initialization-above-SMRAM-ini.patch @@ -0,0 +1,68 @@ +From c0a65c752cd83dea27cbeb34074d65fb2c5a6b59 Mon Sep 17 00:00:00 2001 +From: Isaku Yamahata +Date: Wed, 20 Mar 2024 03:39:13 -0500 +Subject: [PATCH 008/100] pci-host/q35: Move PAM initialization above SMRAM + initialization + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [8/91] 22a9221d4726e872aa0f0dc25ae9d823c0611547 (bonzini/rhel-qemu-kvm) + +In mch_realize(), process PAM initialization before SMRAM initialization so +that later patch can skill all the SMRAM related with a single check. + +Signed-off-by: Isaku Yamahata +Signed-off-by: Xiaoyao Li +Signed-off-by: Michael Roth +Message-ID: <20240320083945.991426-18-michael.roth@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 42c11ae2416dcbcd694ec3ee574fe2f3e70099ae) +Signed-off-by: Paolo Bonzini +--- + hw/pci-host/q35.c | 19 ++++++++++--------- + 1 file changed, 10 insertions(+), 9 deletions(-) + +diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c +index 0d7d4e3f08..98d4a7c253 100644 +--- a/hw/pci-host/q35.c ++++ b/hw/pci-host/q35.c +@@ -568,6 +568,16 @@ static void mch_realize(PCIDevice *d, Error **errp) + /* setup pci memory mapping */ + pc_pci_as_mapping_init(mch->system_memory, mch->pci_address_space); + ++ /* PAM */ ++ init_pam(&mch->pam_regions[0], OBJECT(mch), mch->ram_memory, ++ mch->system_memory, mch->pci_address_space, ++ PAM_BIOS_BASE, PAM_BIOS_SIZE); ++ for (i = 0; i < ARRAY_SIZE(mch->pam_regions) - 1; ++i) { ++ init_pam(&mch->pam_regions[i + 1], OBJECT(mch), mch->ram_memory, ++ mch->system_memory, mch->pci_address_space, ++ PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE, PAM_EXPAN_SIZE); ++ } ++ + /* if *disabled* show SMRAM to all CPUs */ + memory_region_init_alias(&mch->smram_region, OBJECT(mch), "smram-region", + mch->pci_address_space, MCH_HOST_BRIDGE_SMRAM_C_BASE, +@@ -634,15 +644,6 @@ static void mch_realize(PCIDevice *d, Error **errp) + + object_property_add_const_link(qdev_get_machine(), "smram", + OBJECT(&mch->smram)); +- +- init_pam(&mch->pam_regions[0], OBJECT(mch), mch->ram_memory, +- mch->system_memory, mch->pci_address_space, +- PAM_BIOS_BASE, PAM_BIOS_SIZE); +- for (i = 0; i < ARRAY_SIZE(mch->pam_regions) - 1; ++i) { +- init_pam(&mch->pam_regions[i + 1], OBJECT(mch), mch->ram_memory, +- mch->system_memory, mch->pci_address_space, +- PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE, PAM_EXPAN_SIZE); +- } + } + + uint64_t mch_mcfg_base(void) +-- +2.39.3 + diff --git a/kvm-physmem-Introduce-ram_block_discard_guest_memfd_rang.patch b/kvm-physmem-Introduce-ram_block_discard_guest_memfd_rang.patch new file mode 100644 index 0000000..66e0423 --- /dev/null +++ b/kvm-physmem-Introduce-ram_block_discard_guest_memfd_rang.patch @@ -0,0 +1,83 @@ +From c70f6e7e3461e6562c0591079cc71068bf0f2ed8 Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Wed, 20 Mar 2024 03:39:07 -0500 +Subject: [PATCH 033/100] physmem: Introduce + ram_block_discard_guest_memfd_range() + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [33/91] b6169fa8d752d83977b18897be24f6ab9f3d3472 (bonzini/rhel-qemu-kvm) + +When memory page is converted from private to shared, the original +private memory is back'ed by guest_memfd. Introduce +ram_block_discard_guest_memfd_range() for discarding memory in +guest_memfd. + +Based on a patch by Isaku Yamahata . + +Signed-off-by: Xiaoyao Li +Reviewed-by: David Hildenbrand +Signed-off-by: Michael Roth +Message-ID: <20240320083945.991426-12-michael.roth@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit b2e9426c04fdd32d93a3a37db6b0c2e67c88c335) +Signed-off-by: Paolo Bonzini +--- + include/exec/cpu-common.h | 2 ++ + system/physmem.c | 23 +++++++++++++++++++++++ + 2 files changed, 25 insertions(+) + +diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h +index 6346df17ce..6d5318895a 100644 +--- a/include/exec/cpu-common.h ++++ b/include/exec/cpu-common.h +@@ -159,6 +159,8 @@ typedef int (RAMBlockIterFunc)(RAMBlock *rb, void *opaque); + + int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque); + int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length); ++int ram_block_discard_guest_memfd_range(RAMBlock *rb, uint64_t start, ++ size_t length); + + #endif + +diff --git a/system/physmem.c b/system/physmem.c +index 5ebcf5be11..c3d04ca921 100644 +--- a/system/physmem.c ++++ b/system/physmem.c +@@ -3721,6 +3721,29 @@ err: + return ret; + } + ++int ram_block_discard_guest_memfd_range(RAMBlock *rb, uint64_t start, ++ size_t length) ++{ ++ int ret = -1; ++ ++#ifdef CONFIG_FALLOCATE_PUNCH_HOLE ++ ret = fallocate(rb->guest_memfd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, ++ start, length); ++ ++ if (ret) { ++ ret = -errno; ++ error_report("%s: Failed to fallocate %s:%" PRIx64 " +%zx (%d)", ++ __func__, rb->idstr, start, length, ret); ++ } ++#else ++ ret = -ENOSYS; ++ error_report("%s: fallocate not available %s:%" PRIx64 " +%zx (%d)", ++ __func__, rb->idstr, start, length, ret); ++#endif ++ ++ return ret; ++} ++ + bool ramblock_is_pmem(RAMBlock *rb) + { + return rb->flags & RAM_PMEM; +-- +2.39.3 + diff --git a/kvm-ppc-pef-switch-to-use-confidential_guest_kvm_init-re.patch b/kvm-ppc-pef-switch-to-use-confidential_guest_kvm_init-re.patch new file mode 100644 index 0000000..037442c --- /dev/null +++ b/kvm-ppc-pef-switch-to-use-confidential_guest_kvm_init-re.patch @@ -0,0 +1,140 @@ +From cfb109b393e019398a52f66a5ff0e9581c841335 Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Thu, 29 Feb 2024 01:00:37 -0500 +Subject: [PATCH 013/100] ppc/pef: switch to use + confidential_guest_kvm_init/reset() + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [13/91] e25c498fc79a4c8e22ca41d9cbd06e40b4cf1f11 (bonzini/rhel-qemu-kvm) + +Use the unified interface to call confidential guest related kvm_init() +and kvm_reset(), to avoid exposing pef specific functions. + +As a bonus, pef.h goes away since there is no direct call from sPAPR +board code to PEF code anymore. + +Signed-off-by: Xiaoyao Li +Signed-off-by: Paolo Bonzini +(cherry picked from commit 00a238b1a845fd5f0acd771664c5e184a63ed9b6) +Signed-off-by: Paolo Bonzini +--- + hw/ppc/pef.c | 9 ++++++--- + hw/ppc/spapr.c | 10 +++++++--- + include/hw/ppc/pef.h | 17 ----------------- + 3 files changed, 13 insertions(+), 23 deletions(-) + delete mode 100644 include/hw/ppc/pef.h + +diff --git a/hw/ppc/pef.c b/hw/ppc/pef.c +index d28ed3ba73..47553348b1 100644 +--- a/hw/ppc/pef.c ++++ b/hw/ppc/pef.c +@@ -15,7 +15,6 @@ + #include "sysemu/kvm.h" + #include "migration/blocker.h" + #include "exec/confidential-guest-support.h" +-#include "hw/ppc/pef.h" + + #define TYPE_PEF_GUEST "pef-guest" + OBJECT_DECLARE_SIMPLE_TYPE(PefGuest, PEF_GUEST) +@@ -93,7 +92,7 @@ static int kvmppc_svm_off(Error **errp) + #endif + } + +-int pef_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) ++static int pef_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + { + if (!object_dynamic_cast(OBJECT(cgs), TYPE_PEF_GUEST)) { + return 0; +@@ -107,7 +106,7 @@ int pef_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + return kvmppc_svm_init(cgs, errp); + } + +-int pef_kvm_reset(ConfidentialGuestSupport *cgs, Error **errp) ++static int pef_kvm_reset(ConfidentialGuestSupport *cgs, Error **errp) + { + if (!object_dynamic_cast(OBJECT(cgs), TYPE_PEF_GUEST)) { + return 0; +@@ -131,6 +130,10 @@ OBJECT_DEFINE_TYPE_WITH_INTERFACES(PefGuest, + + static void pef_guest_class_init(ObjectClass *oc, void *data) + { ++ ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); ++ ++ klass->kvm_init = pef_kvm_init; ++ klass->kvm_reset = pef_kvm_reset; + } + + static void pef_guest_init(Object *obj) +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index a258d81846..6f6f0fd790 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -75,6 +75,7 @@ + #include "hw/virtio/vhost-scsi-common.h" + + #include "exec/ram_addr.h" ++#include "exec/confidential-guest-support.h" + #include "hw/usb.h" + #include "qemu/config-file.h" + #include "qemu/error-report.h" +@@ -87,7 +88,6 @@ + #include "hw/ppc/spapr_tpm_proxy.h" + #include "hw/ppc/spapr_nvdimm.h" + #include "hw/ppc/spapr_numa.h" +-#include "hw/ppc/pef.h" + + #include "monitor/monitor.h" + +@@ -1715,7 +1715,9 @@ static void spapr_machine_reset(MachineState *machine, ShutdownCause reason) + qemu_guest_getrandom_nofail(spapr->fdt_rng_seed, 32); + } + +- pef_kvm_reset(machine->cgs, &error_fatal); ++ if (machine->cgs) { ++ confidential_guest_kvm_reset(machine->cgs, &error_fatal); ++ } + spapr_caps_apply(spapr); + spapr_nested_reset(spapr); + if (spapr->svm_allowed) { +@@ -2844,7 +2846,9 @@ static void spapr_machine_init(MachineState *machine) + /* + * if Secure VM (PEF) support is configured, then initialize it + */ +- pef_kvm_init(machine->cgs, &error_fatal); ++ if (machine->cgs) { ++ confidential_guest_kvm_init(machine->cgs, &error_fatal); ++ } + + msi_nonbroken = true; + +diff --git a/include/hw/ppc/pef.h b/include/hw/ppc/pef.h +deleted file mode 100644 +index 707dbe524c..0000000000 +--- a/include/hw/ppc/pef.h ++++ /dev/null +@@ -1,17 +0,0 @@ +-/* +- * PEF (Protected Execution Facility) for POWER support +- * +- * Copyright Red Hat. +- * +- * This work is licensed under the terms of the GNU GPL, version 2 or later. +- * See the COPYING file in the top-level directory. +- * +- */ +- +-#ifndef HW_PPC_PEF_H +-#define HW_PPC_PEF_H +- +-int pef_kvm_init(ConfidentialGuestSupport *cgs, Error **errp); +-int pef_kvm_reset(ConfidentialGuestSupport *cgs, Error **errp); +- +-#endif /* HW_PPC_PEF_H */ +-- +2.39.3 + diff --git a/kvm-q35-Introduce-smm_ranges-property-for-q35-pci-host.patch b/kvm-q35-Introduce-smm_ranges-property-for-q35-pci-host.patch new file mode 100644 index 0000000..c7f121b --- /dev/null +++ b/kvm-q35-Introduce-smm_ranges-property-for-q35-pci-host.patch @@ -0,0 +1,164 @@ +From 83bb32c25472b500738a54ac8f2ad0f5c496acf1 Mon Sep 17 00:00:00 2001 +From: Isaku Yamahata +Date: Wed, 20 Mar 2024 03:39:14 -0500 +Subject: [PATCH 009/100] q35: Introduce smm_ranges property for q35-pci-host + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [9/91] 931156772bfc2085e7241eecc56cf6eca3dac1fd (bonzini/rhel-qemu-kvm) + +Add a q35 property to check whether or not SMM ranges, e.g. SMRAM, TSEG, +etc... exist for the target platform. TDX doesn't support SMM and doesn't +play nice with QEMU modifying related guest memory ranges. + +Signed-off-by: Isaku Yamahata +Co-developed-by: Sean Christopherson +Signed-off-by: Sean Christopherson +Signed-off-by: Xiaoyao Li +Signed-off-by: Michael Roth +Message-ID: <20240320083945.991426-19-michael.roth@amd.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit b07bf7b73fd02d24a7baa64a580f4974b86bbc86) +Signed-off-by: Paolo Bonzini +--- + hw/i386/pc_q35.c | 2 ++ + hw/pci-host/q35.c | 42 +++++++++++++++++++++++++++------------ + include/hw/i386/pc.h | 1 + + include/hw/pci-host/q35.h | 1 + + 4 files changed, 33 insertions(+), 13 deletions(-) + +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 9adcdadce8..dedc86eec9 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -219,6 +219,8 @@ static void pc_q35_init(MachineState *machine) + x86ms->above_4g_mem_size, NULL); + object_property_set_bool(phb, PCI_HOST_BYPASS_IOMMU, + pcms->default_bus_bypass_iommu, NULL); ++ object_property_set_bool(phb, PCI_HOST_PROP_SMM_RANGES, ++ x86_machine_is_smm_enabled(x86ms), NULL); + sysbus_realize_and_unref(SYS_BUS_DEVICE(phb), &error_fatal); + + /* pci */ +diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c +index 98d4a7c253..0b6cbaed7e 100644 +--- a/hw/pci-host/q35.c ++++ b/hw/pci-host/q35.c +@@ -179,6 +179,8 @@ static Property q35_host_props[] = { + mch.below_4g_mem_size, 0), + DEFINE_PROP_SIZE(PCI_HOST_ABOVE_4G_MEM_SIZE, Q35PCIHost, + mch.above_4g_mem_size, 0), ++ DEFINE_PROP_BOOL(PCI_HOST_PROP_SMM_RANGES, Q35PCIHost, ++ mch.has_smm_ranges, true), + DEFINE_PROP_BOOL("x-pci-hole64-fix", Q35PCIHost, pci_hole64_fix, true), + DEFINE_PROP_END_OF_LIST(), + }; +@@ -214,6 +216,7 @@ static void q35_host_initfn(Object *obj) + /* mch's object_initialize resets the default value, set it again */ + qdev_prop_set_uint64(DEVICE(s), PCI_HOST_PROP_PCI_HOLE64_SIZE, + Q35_PCI_HOST_HOLE64_SIZE_DEFAULT); ++ + object_property_add(obj, PCI_HOST_PROP_PCI_HOLE_START, "uint32", + q35_host_get_pci_hole_start, + NULL, NULL, NULL); +@@ -476,6 +479,10 @@ static void mch_write_config(PCIDevice *d, + mch_update_pciexbar(mch); + } + ++ if (!mch->has_smm_ranges) { ++ return; ++ } ++ + if (ranges_overlap(address, len, MCH_HOST_BRIDGE_SMRAM, + MCH_HOST_BRIDGE_SMRAM_SIZE)) { + mch_update_smram(mch); +@@ -494,10 +501,13 @@ static void mch_write_config(PCIDevice *d, + static void mch_update(MCHPCIState *mch) + { + mch_update_pciexbar(mch); ++ + mch_update_pam(mch); +- mch_update_smram(mch); +- mch_update_ext_tseg_mbytes(mch); +- mch_update_smbase_smram(mch); ++ if (mch->has_smm_ranges) { ++ mch_update_smram(mch); ++ mch_update_ext_tseg_mbytes(mch); ++ mch_update_smbase_smram(mch); ++ } + + /* + * pci hole goes from end-of-low-ram to io-apic. +@@ -538,18 +548,20 @@ static void mch_reset(DeviceState *qdev) + pci_set_quad(d->config + MCH_HOST_BRIDGE_PCIEXBAR, + MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT); + +- d->config[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_DEFAULT; +- d->config[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_DEFAULT; +- d->wmask[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_WMASK; +- d->wmask[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_WMASK; ++ if (mch->has_smm_ranges) { ++ d->config[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_DEFAULT; ++ d->config[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_DEFAULT; ++ d->wmask[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_WMASK; ++ d->wmask[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_WMASK; + +- if (mch->ext_tseg_mbytes > 0) { +- pci_set_word(d->config + MCH_HOST_BRIDGE_EXT_TSEG_MBYTES, +- MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_QUERY); +- } ++ if (mch->ext_tseg_mbytes > 0) { ++ pci_set_word(d->config + MCH_HOST_BRIDGE_EXT_TSEG_MBYTES, ++ MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_QUERY); ++ } + +- d->config[MCH_HOST_BRIDGE_F_SMBASE] = 0; +- d->wmask[MCH_HOST_BRIDGE_F_SMBASE] = 0xff; ++ d->config[MCH_HOST_BRIDGE_F_SMBASE] = 0; ++ d->wmask[MCH_HOST_BRIDGE_F_SMBASE] = 0xff; ++ } + + mch_update(mch); + } +@@ -578,6 +590,10 @@ static void mch_realize(PCIDevice *d, Error **errp) + PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE, PAM_EXPAN_SIZE); + } + ++ if (!mch->has_smm_ranges) { ++ return; ++ } ++ + /* if *disabled* show SMRAM to all CPUs */ + memory_region_init_alias(&mch->smram_region, OBJECT(mch), "smram-region", + mch->pci_address_space, MCH_HOST_BRIDGE_SMRAM_C_BASE, +diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h +index 87420783ab..467e7fb52f 100644 +--- a/include/hw/i386/pc.h ++++ b/include/hw/i386/pc.h +@@ -164,6 +164,7 @@ void pc_acpi_smi_interrupt(void *opaque, int irq, int level); + #define PCI_HOST_PROP_PCI_HOLE64_SIZE "pci-hole64-size" + #define PCI_HOST_BELOW_4G_MEM_SIZE "below-4g-mem-size" + #define PCI_HOST_ABOVE_4G_MEM_SIZE "above-4g-mem-size" ++#define PCI_HOST_PROP_SMM_RANGES "smm-ranges" + + + void pc_pci_as_mapping_init(MemoryRegion *system_memory, +diff --git a/include/hw/pci-host/q35.h b/include/hw/pci-host/q35.h +index bafcbe6752..22fadfa3ed 100644 +--- a/include/hw/pci-host/q35.h ++++ b/include/hw/pci-host/q35.h +@@ -50,6 +50,7 @@ struct MCHPCIState { + MemoryRegion tseg_blackhole, tseg_window; + MemoryRegion smbase_blackhole, smbase_window; + bool has_smram_at_smbase; ++ bool has_smm_ranges; + Range pci_hole; + uint64_t below_4g_mem_size; + uint64_t above_4g_mem_size; +-- +2.39.3 + diff --git a/kvm-runstate-skip-initial-CPU-reset-if-reset-is-not-actu.patch b/kvm-runstate-skip-initial-CPU-reset-if-reset-is-not-actu.patch new file mode 100644 index 0000000..4f757bf --- /dev/null +++ b/kvm-runstate-skip-initial-CPU-reset-if-reset-is-not-actu.patch @@ -0,0 +1,67 @@ +From 4c93bec108f7e3918a2ef91b51cec477ade38cc3 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 18 Mar 2024 17:45:56 -0400 +Subject: [PATCH 018/100] runstate: skip initial CPU reset if reset is not + actually possible +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [18/91] ced267fdaadbf2072c1897223522457a006e6c81 (bonzini/rhel-qemu-kvm) + +Right now, the system reset is concluded by a call to +cpu_synchronize_all_post_reset() in order to sync any changes +that the machine reset callback applied to the CPU state. + +However, for VMs with encrypted state such as SEV-ES guests (currently +the only case of guests with non-resettable CPUs) this cannot be done, +because guest state has already been finalized by machine-init-done notifiers. +cpu_synchronize_all_post_reset() does nothing on these guests, and actually +we would like to make it fail if called once guest has been encrypted. +So, assume that boards that support non-resettable CPUs do not touch +CPU state and that all such setup is done before, at the time of +cpu_synchronize_all_post_init(). + +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Paolo Bonzini +(cherry picked from commit 08b2d15cdd0d3fbbe37ce23bf192b770db3a7539) +Signed-off-by: Paolo Bonzini +--- + system/runstate.c | 15 ++++++++++++++- + 1 file changed, 14 insertions(+), 1 deletion(-) + +diff --git a/system/runstate.c b/system/runstate.c +index d6ab860eca..cb4905a40f 100644 +--- a/system/runstate.c ++++ b/system/runstate.c +@@ -501,7 +501,20 @@ void qemu_system_reset(ShutdownCause reason) + default: + qapi_event_send_reset(shutdown_caused_by_guest(reason), reason); + } +- cpu_synchronize_all_post_reset(); ++ ++ /* ++ * Some boards use the machine reset callback to point CPUs to the firmware ++ * entry point. Assume that this is not the case for boards that support ++ * non-resettable CPUs (currently used only for confidential guests), in ++ * which case cpu_synchronize_all_post_init() is enough because ++ * it does _more_ than cpu_synchronize_all_post_reset(). ++ */ ++ if (cpus_are_resettable()) { ++ cpu_synchronize_all_post_reset(); ++ } else { ++ assert(runstate_check(RUN_STATE_PRELAUNCH)); ++ } ++ + vm_set_suspended(false); + } + +-- +2.39.3 + diff --git a/kvm-s390-Switch-to-use-confidential_guest_kvm_init.patch b/kvm-s390-Switch-to-use-confidential_guest_kvm_init.patch new file mode 100644 index 0000000..bc6e4e3 --- /dev/null +++ b/kvm-s390-Switch-to-use-confidential_guest_kvm_init.patch @@ -0,0 +1,109 @@ +From 4ebc58d4a7a3d4a20f20f1cd3f21082b80097fe2 Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Thu, 29 Feb 2024 01:00:38 -0500 +Subject: [PATCH 014/100] s390: Switch to use confidential_guest_kvm_init() + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [14/91] 8c9e09ec9976c00b41c02868fff034286a341468 (bonzini/rhel-qemu-kvm) + +Use unified confidential_guest_kvm_init() for consistency with +other architectures. + +Signed-off-by: Xiaoyao Li +Message-Id: <20240229060038.606591-1-xiaoyao.li@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit a14a2b0148e657cc526b7a75f2a1937628764e7a) +Signed-off-by: Paolo Bonzini +--- + hw/s390x/s390-virtio-ccw.c | 5 ++++- + target/s390x/kvm/pv.c | 10 +++++++++- + target/s390x/kvm/pv.h | 14 -------------- + 3 files changed, 13 insertions(+), 16 deletions(-) + +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index 9ad54682c6..828ce6e87e 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -14,6 +14,7 @@ + #include "qemu/osdep.h" + #include "qapi/error.h" + #include "exec/ram_addr.h" ++#include "exec/confidential-guest-support.h" + #include "hw/s390x/s390-virtio-hcall.h" + #include "hw/s390x/sclp.h" + #include "hw/s390x/s390_flic.h" +@@ -260,7 +261,9 @@ static void ccw_init(MachineState *machine) + s390_init_cpus(machine); + + /* Need CPU model to be determined before we can set up PV */ +- s390_pv_init(machine->cgs, &error_fatal); ++ if (machine->cgs) { ++ confidential_guest_kvm_init(machine->cgs, &error_fatal); ++ } + + s390_flic_init(); + +diff --git a/target/s390x/kvm/pv.c b/target/s390x/kvm/pv.c +index 7ca7faec73..dde836d21a 100644 +--- a/target/s390x/kvm/pv.c ++++ b/target/s390x/kvm/pv.c +@@ -334,12 +334,17 @@ static bool s390_pv_guest_check(ConfidentialGuestSupport *cgs, Error **errp) + return s390_pv_check_cpus(errp); + } + +-int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) ++static int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + { + if (!object_dynamic_cast(OBJECT(cgs), TYPE_S390_PV_GUEST)) { + return 0; + } + ++ if (!kvm_enabled()) { ++ error_setg(errp, "Protected Virtualization requires KVM"); ++ return -1; ++ } ++ + if (!s390_has_feat(S390_FEAT_UNPACK)) { + error_setg(errp, + "CPU model does not support Protected Virtualization"); +@@ -364,6 +369,9 @@ OBJECT_DEFINE_TYPE_WITH_INTERFACES(S390PVGuest, + + static void s390_pv_guest_class_init(ObjectClass *oc, void *data) + { ++ ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); ++ ++ klass->kvm_init = s390_pv_kvm_init; + } + + static void s390_pv_guest_init(Object *obj) +diff --git a/target/s390x/kvm/pv.h b/target/s390x/kvm/pv.h +index 5877d28ff1..4b40817439 100644 +--- a/target/s390x/kvm/pv.h ++++ b/target/s390x/kvm/pv.h +@@ -80,18 +80,4 @@ static inline int kvm_s390_dump_mem_state(uint64_t addr, size_t len, + static inline int kvm_s390_dump_completion_data(void *buff) { return 0; } + #endif /* CONFIG_KVM */ + +-int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp); +-static inline int s390_pv_init(ConfidentialGuestSupport *cgs, Error **errp) +-{ +- if (!cgs) { +- return 0; +- } +- if (kvm_enabled()) { +- return s390_pv_kvm_init(cgs, errp); +- } +- +- error_setg(errp, "Protected Virtualization requires KVM"); +- return -1; +-} +- + #endif /* HW_S390_PV_H */ +-- +2.39.3 + diff --git a/kvm-scripts-update-linux-header.sh-be-more-src-tree-frie.patch b/kvm-scripts-update-linux-header.sh-be-more-src-tree-frie.patch new file mode 100644 index 0000000..2c0fb91 --- /dev/null +++ b/kvm-scripts-update-linux-header.sh-be-more-src-tree-frie.patch @@ -0,0 +1,186 @@ +From 3d197f42afea6d0b176c2b26b772965692ffeab3 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Alex=20Benn=C3=A9e?= +Date: Tue, 14 May 2024 18:42:44 +0100 +Subject: [PATCH 047/100] scripts/update-linux-header.sh: be more src tree + friendly +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [47/91] c4165cc8bf79c3f96912e8210b3bb3565add288f (bonzini/rhel-qemu-kvm) + +Running "install_headers" in the Linux source tree is fairly +unfriendly as out-of-tree builds will start complaining about the +kernel source being non-pristine. As we have a temporary directory for +the install we should also do the build step here. So now we have: + + $tmpdir/ + $blddir/ + $hdrdir/ + +Reviewed-by: Pierrick Bouvier +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Alex Bennée +Message-Id: <20240514174253.694591-3-alex.bennee@linaro.org> +(cherry picked from commit b51ddd937f11f76614d4b36d14d8778df242661c) +Signed-off-by: Paolo Bonzini +--- + scripts/update-linux-headers.sh | 80 +++++++++++++++++---------------- + 1 file changed, 41 insertions(+), 39 deletions(-) + +diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh +index 5f20434d5c..4431ba4d54 100755 +--- a/scripts/update-linux-headers.sh ++++ b/scripts/update-linux-headers.sh +@@ -27,6 +27,8 @@ + # types like "__u64". This work is done in the cp_portable function. + + tmpdir=$(mktemp -d) ++hdrdir="$tmpdir/headers" ++blddir="$tmpdir/build" + linux="$1" + output="$2" + +@@ -111,56 +113,56 @@ for arch in $ARCHLIST; do + arch_var=ARCH + fi + +- make -C "$linux" INSTALL_HDR_PATH="$tmpdir" $arch_var=$arch headers_install ++ make -C "$linux" O="$blddir" INSTALL_HDR_PATH="$hdrdir" $arch_var=$arch headers_install + + rm -rf "$output/linux-headers/asm-$arch" + mkdir -p "$output/linux-headers/asm-$arch" + for header in kvm.h unistd.h bitsperlong.h mman.h; do +- cp "$tmpdir/include/asm/$header" "$output/linux-headers/asm-$arch" ++ cp "$hdrdir/include/asm/$header" "$output/linux-headers/asm-$arch" + done + + if [ $arch = mips ]; then +- cp "$tmpdir/include/asm/sgidefs.h" "$output/linux-headers/asm-mips/" +- cp "$tmpdir/include/asm/unistd_o32.h" "$output/linux-headers/asm-mips/" +- cp "$tmpdir/include/asm/unistd_n32.h" "$output/linux-headers/asm-mips/" +- cp "$tmpdir/include/asm/unistd_n64.h" "$output/linux-headers/asm-mips/" ++ cp "$hdrdir/include/asm/sgidefs.h" "$output/linux-headers/asm-mips/" ++ cp "$hdrdir/include/asm/unistd_o32.h" "$output/linux-headers/asm-mips/" ++ cp "$hdrdir/include/asm/unistd_n32.h" "$output/linux-headers/asm-mips/" ++ cp "$hdrdir/include/asm/unistd_n64.h" "$output/linux-headers/asm-mips/" + fi + if [ $arch = powerpc ]; then +- cp "$tmpdir/include/asm/unistd_32.h" "$output/linux-headers/asm-powerpc/" +- cp "$tmpdir/include/asm/unistd_64.h" "$output/linux-headers/asm-powerpc/" ++ cp "$hdrdir/include/asm/unistd_32.h" "$output/linux-headers/asm-powerpc/" ++ cp "$hdrdir/include/asm/unistd_64.h" "$output/linux-headers/asm-powerpc/" + fi + + rm -rf "$output/include/standard-headers/asm-$arch" + mkdir -p "$output/include/standard-headers/asm-$arch" + if [ $arch = s390 ]; then +- cp_portable "$tmpdir/include/asm/virtio-ccw.h" "$output/include/standard-headers/asm-s390/" +- cp "$tmpdir/include/asm/unistd_32.h" "$output/linux-headers/asm-s390/" +- cp "$tmpdir/include/asm/unistd_64.h" "$output/linux-headers/asm-s390/" ++ cp_portable "$hdrdir/include/asm/virtio-ccw.h" "$output/include/standard-headers/asm-s390/" ++ cp "$hdrdir/include/asm/unistd_32.h" "$output/linux-headers/asm-s390/" ++ cp "$hdrdir/include/asm/unistd_64.h" "$output/linux-headers/asm-s390/" + fi + if [ $arch = arm ]; then +- cp "$tmpdir/include/asm/unistd-eabi.h" "$output/linux-headers/asm-arm/" +- cp "$tmpdir/include/asm/unistd-oabi.h" "$output/linux-headers/asm-arm/" +- cp "$tmpdir/include/asm/unistd-common.h" "$output/linux-headers/asm-arm/" ++ cp "$hdrdir/include/asm/unistd-eabi.h" "$output/linux-headers/asm-arm/" ++ cp "$hdrdir/include/asm/unistd-oabi.h" "$output/linux-headers/asm-arm/" ++ cp "$hdrdir/include/asm/unistd-common.h" "$output/linux-headers/asm-arm/" + fi + if [ $arch = arm64 ]; then +- cp "$tmpdir/include/asm/sve_context.h" "$output/linux-headers/asm-arm64/" ++ cp "$hdrdir/include/asm/sve_context.h" "$output/linux-headers/asm-arm64/" + fi + if [ $arch = x86 ]; then +- cp "$tmpdir/include/asm/unistd_32.h" "$output/linux-headers/asm-x86/" +- cp "$tmpdir/include/asm/unistd_x32.h" "$output/linux-headers/asm-x86/" +- cp "$tmpdir/include/asm/unistd_64.h" "$output/linux-headers/asm-x86/" +- cp_portable "$tmpdir/include/asm/kvm_para.h" "$output/include/standard-headers/asm-$arch" ++ cp "$hdrdir/include/asm/unistd_32.h" "$output/linux-headers/asm-x86/" ++ cp "$hdrdir/include/asm/unistd_x32.h" "$output/linux-headers/asm-x86/" ++ cp "$hdrdir/include/asm/unistd_64.h" "$output/linux-headers/asm-x86/" ++ cp_portable "$hdrdir/include/asm/kvm_para.h" "$output/include/standard-headers/asm-$arch" + # Remove everything except the macros from bootparam.h avoiding the + # unnecessary import of several video/ist/etc headers + sed -e '/__ASSEMBLY__/,/__ASSEMBLY__/d' \ +- "$tmpdir/include/asm/bootparam.h" > "$tmpdir/bootparam.h" +- cp_portable "$tmpdir/bootparam.h" \ ++ "$hdrdir/include/asm/bootparam.h" > "$hdrdir/bootparam.h" ++ cp_portable "$hdrdir/bootparam.h" \ + "$output/include/standard-headers/asm-$arch" +- cp_portable "$tmpdir/include/asm/setup_data.h" \ ++ cp_portable "$hdrdir/include/asm/setup_data.h" \ + "$output/standard-headers/asm-x86" + fi + if [ $arch = riscv ]; then +- cp "$tmpdir/include/asm/ptrace.h" "$output/linux-headers/asm-riscv/" ++ cp "$hdrdir/include/asm/ptrace.h" "$output/linux-headers/asm-riscv/" + fi + done + arch= +@@ -170,13 +172,13 @@ mkdir -p "$output/linux-headers/linux" + for header in const.h stddef.h kvm.h vfio.h vfio_ccw.h vfio_zdev.h vhost.h \ + psci.h psp-sev.h userfaultfd.h memfd.h mman.h nvme_ioctl.h \ + vduse.h iommufd.h bits.h; do +- cp "$tmpdir/include/linux/$header" "$output/linux-headers/linux" ++ cp "$hdrdir/include/linux/$header" "$output/linux-headers/linux" + done + + rm -rf "$output/linux-headers/asm-generic" + mkdir -p "$output/linux-headers/asm-generic" + for header in unistd.h bitsperlong.h mman-common.h mman.h hugetlb_encode.h; do +- cp "$tmpdir/include/asm-generic/$header" "$output/linux-headers/asm-generic" ++ cp "$hdrdir/include/asm-generic/$header" "$output/linux-headers/asm-generic" + done + + if [ -L "$linux/source" ]; then +@@ -211,23 +213,23 @@ EOF + + rm -rf "$output/include/standard-headers/linux" + mkdir -p "$output/include/standard-headers/linux" +-for i in "$tmpdir"/include/linux/*virtio*.h \ +- "$tmpdir/include/linux/qemu_fw_cfg.h" \ +- "$tmpdir/include/linux/fuse.h" \ +- "$tmpdir/include/linux/input.h" \ +- "$tmpdir/include/linux/input-event-codes.h" \ +- "$tmpdir/include/linux/udmabuf.h" \ +- "$tmpdir/include/linux/pci_regs.h" \ +- "$tmpdir/include/linux/ethtool.h" \ +- "$tmpdir/include/linux/const.h" \ +- "$tmpdir/include/linux/kernel.h" \ +- "$tmpdir/include/linux/vhost_types.h" \ +- "$tmpdir/include/linux/sysinfo.h" \ +- "$tmpdir/include/misc/pvpanic.h"; do ++for i in "$hdrdir"/include/linux/*virtio*.h \ ++ "$hdrdir/include/linux/qemu_fw_cfg.h" \ ++ "$hdrdir/include/linux/fuse.h" \ ++ "$hdrdir/include/linux/input.h" \ ++ "$hdrdir/include/linux/input-event-codes.h" \ ++ "$hdrdir/include/linux/udmabuf.h" \ ++ "$hdrdir/include/linux/pci_regs.h" \ ++ "$hdrdir/include/linux/ethtool.h" \ ++ "$hdrdir/include/linux/const.h" \ ++ "$hdrdir/include/linux/kernel.h" \ ++ "$hdrdir/include/linux/vhost_types.h" \ ++ "$hdrdir/include/linux/sysinfo.h" \ ++ "$hdrdir/include/misc/pvpanic.h"; do + cp_portable "$i" "$output/include/standard-headers/linux" + done + mkdir -p "$output/include/standard-headers/drm" +-cp_portable "$tmpdir/include/drm/drm_fourcc.h" \ ++cp_portable "$hdrdir/include/drm/drm_fourcc.h" \ + "$output/include/standard-headers/drm" + + rm -rf "$output/include/standard-headers/drivers/infiniband/hw/vmw_pvrdma" +-- +2.39.3 + diff --git a/kvm-scripts-update-linux-headers-Add-bits.h-to-file-impo.patch b/kvm-scripts-update-linux-headers-Add-bits.h-to-file-impo.patch new file mode 100644 index 0000000..eea324e --- /dev/null +++ b/kvm-scripts-update-linux-headers-Add-bits.h-to-file-impo.patch @@ -0,0 +1,38 @@ +From 5db9faee4d6efc9dbe010d2b745aba59d943d2ac Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Wed, 21 Feb 2024 10:51:38 -0600 +Subject: [PATCH 016/100] scripts/update-linux-headers: Add bits.h to file + imports + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [16/91] 150ee6376982bd5f471cb561f6760bf80d1211db (bonzini/rhel-qemu-kvm) + +Signed-off-by: Michael Roth +Signed-off-by: Paolo Bonzini +(cherry picked from commit b40b8eb609d3549ac14aab43849b20f5cba951c9) +Signed-off-by: Paolo Bonzini +--- + scripts/update-linux-headers.sh | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh +index d48856f9e2..5f20434d5c 100755 +--- a/scripts/update-linux-headers.sh ++++ b/scripts/update-linux-headers.sh +@@ -169,7 +169,7 @@ rm -rf "$output/linux-headers/linux" + mkdir -p "$output/linux-headers/linux" + for header in const.h stddef.h kvm.h vfio.h vfio_ccw.h vfio_zdev.h vhost.h \ + psci.h psp-sev.h userfaultfd.h memfd.h mman.h nvme_ioctl.h \ +- vduse.h iommufd.h; do ++ vduse.h iommufd.h bits.h; do + cp "$tmpdir/include/linux/$header" "$output/linux-headers/linux" + done + +-- +2.39.3 + diff --git a/kvm-scripts-update-linux-headers-Add-setup_data.h-to-imp.patch b/kvm-scripts-update-linux-headers-Add-setup_data.h-to-imp.patch new file mode 100644 index 0000000..86c1f9c --- /dev/null +++ b/kvm-scripts-update-linux-headers-Add-setup_data.h-to-imp.patch @@ -0,0 +1,83 @@ +From 2d0989fe09703ef46ba9c5d14770dbf8a6fd2f80 Mon Sep 17 00:00:00 2001 +From: Michael Roth +Date: Sun, 18 Feb 2024 23:35:02 -0600 +Subject: [PATCH 015/100] scripts/update-linux-headers: Add setup_data.h to + import list + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [15/91] 9d46c8787259317710a84e7a6aa36731e9f55a17 (bonzini/rhel-qemu-kvm) + +Data structures like struct setup_data have been moved to a separate +setup_data.h header which bootparam.h relies on. Add setup_data.h to +the cp_portable() list and sync it along with the other header files. + +Note that currently struct setup_data is stripped away as part of +generating bootparam.h, but that handling is no currently needed for +setup_data.h since it doesn't pull in many external +headers/dependencies. However, QEMU currently redefines struct +setup_data in hw/i386/x86.c, so that will need to be removed as part of +any header update that pulls in the new setup_data.h to avoid build +bisect breakage. + +Because is the first architecture specific #include +in include/standard-headers/, add a new sed substitution to rewrite +asm/ include to the standard-headers/asm-* subdirectory for the current +architecture. + +And while at it, remove asm-generic/kvm_para.h from the list of +allowed includes: it does not have a matching substitution, and therefore +it would not be possible to use it on non-Linux systems where there is +no /usr/include/asm-generic/ directory. + +Signed-off-by: Michael Roth +Signed-off-by: Paolo Bonzini +(cherry picked from commit 66210a1a30f2384bb59f9dad8d769dba56dd30f1) +Signed-off-by: Paolo Bonzini +--- + scripts/update-linux-headers.sh | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh +index a0006eec6f..d48856f9e2 100755 +--- a/scripts/update-linux-headers.sh ++++ b/scripts/update-linux-headers.sh +@@ -61,7 +61,7 @@ cp_portable() { + -e 'linux/const' \ + -e 'linux/kernel' \ + -e 'linux/sysinfo' \ +- -e 'asm-generic/kvm_para' \ ++ -e 'asm/setup_data.h' \ + > /dev/null + then + echo "Unexpected #include in input file $f". +@@ -77,6 +77,7 @@ cp_portable() { + -e 's/__be\([0-9][0-9]*\)/uint\1_t/g' \ + -e 's/"\(input-event-codes\.h\)"/"standard-headers\/linux\/\1"/' \ + -e 's/]*\)>/"standard-headers\/linux\/\1"/' \ ++ -e 's/]*\)>/"standard-headers\/asm-'$arch'\/\1"/' \ + -e 's/__bitwise//' \ + -e 's/__attribute__((packed))/QEMU_PACKED/' \ + -e 's/__inline__/inline/' \ +@@ -155,11 +156,14 @@ for arch in $ARCHLIST; do + "$tmpdir/include/asm/bootparam.h" > "$tmpdir/bootparam.h" + cp_portable "$tmpdir/bootparam.h" \ + "$output/include/standard-headers/asm-$arch" ++ cp_portable "$tmpdir/include/asm/setup_data.h" \ ++ "$output/standard-headers/asm-x86" + fi + if [ $arch = riscv ]; then + cp "$tmpdir/include/asm/ptrace.h" "$output/linux-headers/asm-riscv/" + fi + done ++arch= + + rm -rf "$output/linux-headers/linux" + mkdir -p "$output/linux-headers/linux" +-- +2.39.3 + diff --git a/kvm-scripts-update-linux-headers.sh-Fix-the-path-of-setu.patch b/kvm-scripts-update-linux-headers.sh-Fix-the-path-of-setu.patch new file mode 100644 index 0000000..ecd631a --- /dev/null +++ b/kvm-scripts-update-linux-headers.sh-Fix-the-path-of-setu.patch @@ -0,0 +1,47 @@ +From 09acdbc49a4dd85d82ad30ec2859edfcdba8431e Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Mon, 27 May 2024 08:01:26 +0200 +Subject: [PATCH 049/100] scripts/update-linux-headers.sh: Fix the path of + setup_data.h + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [49/91] f3008bc07796687c9440f5720fbc72a12d0a1278 (bonzini/rhel-qemu-kvm) + +When running the update-linx-headers.sh script, it currently fails with: + +scripts/update-linux-headers.sh: line 73: .../qemu/standard-headers/asm-x86/setup_data.h: No such file or directory + +The "include" folder is obviously missing here - no clue how this could +have worked before? + +Fixes: 66210a1a30 ("scripts/update-linux-headers: Add setup_data.h to import list") +Message-ID: <20240527060126.12578-1-thuth@redhat.com> +Reviewed-by: Cornelia Huck +Signed-off-by: Thomas Huth +(cherry picked from commit bde26d90ae9f7551cac90e117fc7216c807a3bfe) +Signed-off-by: Paolo Bonzini +--- + scripts/update-linux-headers.sh | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh +index 0f404d5317..f084bee72e 100755 +--- a/scripts/update-linux-headers.sh ++++ b/scripts/update-linux-headers.sh +@@ -160,7 +160,7 @@ for arch in $ARCHLIST; do + cp_portable "$hdrdir/bootparam.h" \ + "$output/include/standard-headers/asm-$arch" + cp_portable "$hdrdir/include/asm/setup_data.h" \ +- "$output/standard-headers/asm-x86" ++ "$output/include/standard-headers/asm-x86" + fi + if [ $arch = riscv ]; then + cp "$hdrdir/include/asm/ptrace.h" "$output/linux-headers/asm-riscv/" +-- +2.39.3 + diff --git a/kvm-scripts-update-linux-headers.sh-Remove-temporary-dir.patch b/kvm-scripts-update-linux-headers.sh-Remove-temporary-dir.patch new file mode 100644 index 0000000..c7dbd3a --- /dev/null +++ b/kvm-scripts-update-linux-headers.sh-Remove-temporary-dir.patch @@ -0,0 +1,44 @@ +From 8e63d742015bf69a00fd44e88eb1198f594b2de2 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Mon, 27 May 2024 08:02:43 +0200 +Subject: [PATCH 048/100] scripts/update-linux-headers.sh: Remove temporary + directory inbetween + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [48/91] 879554dc7e722c4e20b302a00ca745ddeefdc0fb (bonzini/rhel-qemu-kvm) + +We are reusing the same temporary directory for installing the headers +of all targets, so there could be stale files here when switching from +one target to another. Make sure to delete the folder before installing +a new set of target headers into it. + +Message-ID: <20240527060243.12647-1-thuth@redhat.com> +Reviewed-by: Michael S. Tsirkin +Acked-by: Cornelia Huck +Signed-off-by: Thomas Huth +(cherry picked from commit 3efc75ad9d9317e5709861bbebb2c29390f8e7a2) +Signed-off-by: Paolo Bonzini +--- + scripts/update-linux-headers.sh | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh +index 4431ba4d54..0f404d5317 100755 +--- a/scripts/update-linux-headers.sh ++++ b/scripts/update-linux-headers.sh +@@ -113,6 +113,7 @@ for arch in $ARCHLIST; do + arch_var=ARCH + fi + ++ rm -rf "$hdrdir" + make -C "$linux" O="$blddir" INSTALL_HDR_PATH="$hdrdir" $arch_var=$arch headers_install + + rm -rf "$output/linux-headers/asm-$arch" +-- +2.39.3 + diff --git a/kvm-scsi-block-Don-t-skip-callback-for-sgio-error-status.patch b/kvm-scsi-block-Don-t-skip-callback-for-sgio-error-status.patch new file mode 100644 index 0000000..a7eebbc --- /dev/null +++ b/kvm-scsi-block-Don-t-skip-callback-for-sgio-error-status.patch @@ -0,0 +1,60 @@ +From d580b83d9eda7802ffa3890ea8641793fe78937c Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 31 Jul 2024 14:32:05 +0200 +Subject: [PATCH 094/100] scsi-block: Don't skip callback for sgio error + status/driver_status + +RH-Author: Kevin Wolf +RH-MergeRequest: 261: scsi-block: Fix error handling with r/werror=stop +RH-Jira: RHEL-50000 +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [2/4] 1fee1b21dae314f4f34c88f2d2fabd7af011404a (kmwolf/centos-qemu-kvm) + +Instead of calling into scsi_handle_rw_error() directly from +scsi_block_sgio_complete() and skipping the normal callback, go through +the normal cleanup path by calling the callback with a positive error +value. + +The important difference here is not only that the code path is cleaner, +but that the callbacks set r->req.aiocb = NULL. If we skip setting this +and the error action is BLOCK_ERROR_ACTION_STOP, resuming the VM runs +into an assertion failure in scsi_read_data() or scsi_write_data() +because the dangling aiocb pointer is unexpected. + +Fixes: a108557bbf ("scsi: inline sg_io_sense_from_errno() into the callers.") +Buglink: https://issues.redhat.com/browse/RHEL-50000 +Signed-off-by: Kevin Wolf +Acked-by: Paolo Bonzini +Message-ID: <20240731123207.27636-3-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 622a70161ac258e4a166a7dca4b5be267e0652d9) +Signed-off-by: Kevin Wolf +--- + hw/scsi/scsi-disk.c | 10 ---------- + 1 file changed, 10 deletions(-) + +diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c +index bed2c8746c..e7f57f3230 100644 +--- a/hw/scsi/scsi-disk.c ++++ b/hw/scsi/scsi-disk.c +@@ -2804,16 +2804,6 @@ static void scsi_block_sgio_complete(void *opaque, int ret) + } else { + ret = io_hdr->status; + } +- +- if (ret > 0) { +- if (scsi_handle_rw_error(r, ret, true)) { +- scsi_req_unref(&r->req); +- return; +- } +- +- /* Ignore error. */ +- ret = 0; +- } + } + + req->cb(req->cb_opaque, ret); +-- +2.39.3 + diff --git a/kvm-scsi-disk-Add-warning-comments-that-host_status-erro.patch b/kvm-scsi-disk-Add-warning-comments-that-host_status-erro.patch new file mode 100644 index 0000000..20aa88a --- /dev/null +++ b/kvm-scsi-disk-Add-warning-comments-that-host_status-erro.patch @@ -0,0 +1,79 @@ +From eebe5fe8cbc854a6365e7c1adbb701079b137bcb Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 31 Jul 2024 14:32:06 +0200 +Subject: [PATCH 095/100] scsi-disk: Add warning comments that host_status + errors take a shortcut + +RH-Author: Kevin Wolf +RH-MergeRequest: 261: scsi-block: Fix error handling with r/werror=stop +RH-Jira: RHEL-50000 +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [3/4] 6fcd603fc78fda65a425a1acd9a8710d81c6ed7f (kmwolf/centos-qemu-kvm) + +scsi_block_sgio_complete() has surprising behaviour in that there are +error cases in which it directly completes the request and never calls +the passed callback. In the current state of the code, this doesn't seem +to result in bugs, but with future code changes, we must be careful to +never rely on the callback doing some cleanup until this code smell is +fixed. For now, just add warnings to make people aware of the trap. + +Signed-off-by: Kevin Wolf +Acked-by: Paolo Bonzini +Message-ID: <20240731123207.27636-4-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 8a0495624f23f8f01dfb1484f367174f3b3572e8) +Signed-off-by: Kevin Wolf +--- + hw/scsi/scsi-disk.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c +index e7f57f3230..b4062ac2ff 100644 +--- a/hw/scsi/scsi-disk.c ++++ b/hw/scsi/scsi-disk.c +@@ -65,6 +65,9 @@ struct SCSIDiskClass { + /* + * Callbacks receive ret == 0 for success. Errors are represented either as + * negative errno values, or as positive SAM status codes. ++ * ++ * Beware: For errors returned in host_status, the function may directly ++ * complete the request and never call the callback. + */ + DMAIOFunc *dma_readv; + DMAIOFunc *dma_writev; +@@ -359,6 +362,7 @@ done: + scsi_req_unref(&r->req); + } + ++/* May not be called in all error cases, don't rely on cleanup here */ + static void scsi_dma_complete(void *opaque, int ret) + { + SCSIDiskReq *r = (SCSIDiskReq *)opaque; +@@ -399,6 +403,7 @@ done: + scsi_req_unref(&r->req); + } + ++/* May not be called in all error cases, don't rely on cleanup here */ + static void scsi_read_complete(void *opaque, int ret) + { + SCSIDiskReq *r = (SCSIDiskReq *)opaque; +@@ -538,6 +543,7 @@ done: + scsi_req_unref(&r->req); + } + ++/* May not be called in all error cases, don't rely on cleanup here */ + static void scsi_write_complete(void * opaque, int ret) + { + SCSIDiskReq *r = (SCSIDiskReq *)opaque; +@@ -2793,6 +2799,7 @@ static void scsi_block_sgio_complete(void *opaque, int ret) + sg_io_hdr_t *io_hdr = &req->io_header; + + if (ret == 0) { ++ /* FIXME This skips calling req->cb() and any cleanup in it */ + if (io_hdr->host_status != SCSI_HOST_OK) { + scsi_req_complete_failed(&r->req, io_hdr->host_status); + scsi_req_unref(&r->req); +-- +2.39.3 + diff --git a/kvm-scsi-disk-Always-report-RESERVATION_CONFLICT-to-gues.patch b/kvm-scsi-disk-Always-report-RESERVATION_CONFLICT-to-gues.patch new file mode 100644 index 0000000..0e2aeaf --- /dev/null +++ b/kvm-scsi-disk-Always-report-RESERVATION_CONFLICT-to-gues.patch @@ -0,0 +1,106 @@ +From bd5cace452183053e356a27317c759ecfe0391aa Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 31 Jul 2024 14:32:07 +0200 +Subject: [PATCH 096/100] scsi-disk: Always report RESERVATION_CONFLICT to + guest + +RH-Author: Kevin Wolf +RH-MergeRequest: 261: scsi-block: Fix error handling with r/werror=stop +RH-Jira: RHEL-50000 +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [4/4] eb4142071e5cbe385a949a6c48b0c8f8c6086918 (kmwolf/centos-qemu-kvm) + +In the case of scsi-block, RESERVATION_CONFLICT is not a backend error, +but indicates that the guest tried to make a request that it isn't +allowed to execute. Pass the error to the guest so that it can decide +what to do with it. + +Without this, if we stop the VM in response to a RESERVATION_CONFLICT +(as is the default policy in management software such as oVirt or +KubeVirt), it can happen that the VM cannot be resumed any more because +every attempt to resume it immediately runs into the same error and +stops the VM again. + +One case that expects RESERVATION_CONFLICT errors to be visible in the +guest is running the validation tests in Windows 2019's Failover Cluster +Manager, which intentionally tries to execute invalid requests to see if +they are properly rejected. + +Buglink: https://issues.redhat.com/browse/RHEL-50000 +Signed-off-by: Kevin Wolf +Acked-by: Paolo Bonzini +Message-ID: <20240731123207.27636-5-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 9da6bd39f92434f55573acd017841b195c60188f) +Signed-off-by: Kevin Wolf +--- + hw/scsi/scsi-disk.c | 35 ++++++++++++++++++++++++++++++----- + 1 file changed, 30 insertions(+), 5 deletions(-) + +diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c +index b4062ac2ff..91ccf37fef 100644 +--- a/hw/scsi/scsi-disk.c ++++ b/hw/scsi/scsi-disk.c +@@ -202,7 +202,7 @@ static bool scsi_handle_rw_error(SCSIDiskReq *r, int ret, bool acct_failed) + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s)); + SCSISense sense = SENSE_CODE(NO_SENSE); +- int error = 0; ++ int error; + bool req_has_sense = false; + BlockErrorAction action; + int status; +@@ -213,11 +213,35 @@ static bool scsi_handle_rw_error(SCSIDiskReq *r, int ret, bool acct_failed) + } else { + /* A passthrough command has completed with nonzero status. */ + status = ret; +- if (status == CHECK_CONDITION) { ++ switch (status) { ++ case CHECK_CONDITION: + req_has_sense = true; + error = scsi_sense_buf_to_errno(r->req.sense, sizeof(r->req.sense)); +- } else { ++ break; ++ case RESERVATION_CONFLICT: ++ /* ++ * Don't apply the error policy, always report to the guest. ++ * ++ * This is a passthrough code path, so it's not a backend error, but ++ * a response to an invalid guest request. ++ * ++ * Windows Failover Cluster validation intentionally sends invalid ++ * requests to verify that reservations work as intended. It is ++ * crucial that it sees the resulting errors. ++ * ++ * Treating a reservation conflict as a guest-side error is obvious ++ * when a pr-manager is in use. Without one, the situation is less ++ * clear, but there might be nothing that can be fixed on the host ++ * (like in the above example), and we don't want to be stuck in a ++ * loop where resuming the VM and retrying the request immediately ++ * stops it again. So always reporting is still the safer option in ++ * this case, too. ++ */ ++ error = 0; ++ break; ++ default: + error = EINVAL; ++ break; + } + } + +@@ -227,8 +251,9 @@ static bool scsi_handle_rw_error(SCSIDiskReq *r, int ret, bool acct_failed) + * are usually retried immediately, so do not post them to QMP and + * do not account them as failed I/O. + */ +- if (req_has_sense && +- scsi_sense_buf_is_guest_recoverable(r->req.sense, sizeof(r->req.sense))) { ++ if (!error || (req_has_sense && ++ scsi_sense_buf_is_guest_recoverable(r->req.sense, ++ sizeof(r->req.sense)))) { + action = BLOCK_ERROR_ACTION_REPORT; + acct_failed = false; + } else { +-- +2.39.3 + diff --git a/kvm-scsi-disk-Use-positive-return-value-for-status-in-dm.patch b/kvm-scsi-disk-Use-positive-return-value-for-status-in-dm.patch new file mode 100644 index 0000000..409028a --- /dev/null +++ b/kvm-scsi-disk-Use-positive-return-value-for-status-in-dm.patch @@ -0,0 +1,125 @@ +From 1a0aa9bbdad63d72628002740410b8a28282a96e Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 31 Jul 2024 14:32:04 +0200 +Subject: [PATCH 093/100] scsi-disk: Use positive return value for status in + dma_readv/writev + +RH-Author: Kevin Wolf +RH-MergeRequest: 261: scsi-block: Fix error handling with r/werror=stop +RH-Jira: RHEL-50000 +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [1/4] a0b3e7bfd7b7059c0ec3706f2eb1698c1d430b08 (kmwolf/centos-qemu-kvm) + +In some error cases, scsi_block_sgio_complete() never calls the passed +callback, but directly completes the request. This leads to bugs because +its error paths are not exact copies of what the callback would normally +do. + +In preparation to fix this, allow passing positive return values to the +callbacks that represent the status code that should be used to complete +the request. + +scsi_handle_rw_error() already handles positive values for its ret +parameter because scsi_block_sgio_complete() calls directly into it. + +Signed-off-by: Kevin Wolf +Acked-by: Paolo Bonzini +Message-ID: <20240731123207.27636-2-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit cfe0880835cd364b590ffd27ef8dbd2ad8838bc5) +Signed-off-by: Kevin Wolf +--- + hw/scsi/scsi-disk.c | 21 ++++++++++++++------- + 1 file changed, 14 insertions(+), 7 deletions(-) + +diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c +index 4bd7af9d0c..bed2c8746c 100644 +--- a/hw/scsi/scsi-disk.c ++++ b/hw/scsi/scsi-disk.c +@@ -62,6 +62,10 @@ OBJECT_DECLARE_TYPE(SCSIDiskState, SCSIDiskClass, SCSI_DISK_BASE) + + struct SCSIDiskClass { + SCSIDeviceClass parent_class; ++ /* ++ * Callbacks receive ret == 0 for success. Errors are represented either as ++ * negative errno values, or as positive SAM status codes. ++ */ + DMAIOFunc *dma_readv; + DMAIOFunc *dma_writev; + bool (*need_fua_emulation)(SCSICommand *cmd); +@@ -261,7 +265,7 @@ static bool scsi_disk_req_check_error(SCSIDiskReq *r, int ret, bool acct_failed) + return true; + } + +- if (ret < 0) { ++ if (ret != 0) { + return scsi_handle_rw_error(r, ret, acct_failed); + } + +@@ -338,7 +342,7 @@ static void scsi_write_do_fua(SCSIDiskReq *r) + static void scsi_dma_complete_noio(SCSIDiskReq *r, int ret) + { + assert(r->req.aiocb == NULL); +- if (scsi_disk_req_check_error(r, ret, false)) { ++ if (scsi_disk_req_check_error(r, ret, ret > 0)) { + goto done; + } + +@@ -363,9 +367,10 @@ static void scsi_dma_complete(void *opaque, int ret) + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + ++ /* ret > 0 is accounted for in scsi_disk_req_check_error() */ + if (ret < 0) { + block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); +- } else { ++ } else if (ret == 0) { + block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); + } + scsi_dma_complete_noio(r, ret); +@@ -381,7 +386,7 @@ static void scsi_read_complete_noio(SCSIDiskReq *r, int ret) + qemu_get_current_aio_context()); + + assert(r->req.aiocb == NULL); +- if (scsi_disk_req_check_error(r, ret, false)) { ++ if (scsi_disk_req_check_error(r, ret, ret > 0)) { + goto done; + } + +@@ -402,9 +407,10 @@ static void scsi_read_complete(void *opaque, int ret) + assert(r->req.aiocb != NULL); + r->req.aiocb = NULL; + ++ /* ret > 0 is accounted for in scsi_disk_req_check_error() */ + if (ret < 0) { + block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); +- } else { ++ } else if (ret == 0) { + block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); + trace_scsi_disk_read_complete(r->req.tag, r->qiov.size); + } +@@ -512,7 +518,7 @@ static void scsi_write_complete_noio(SCSIDiskReq *r, int ret) + qemu_get_current_aio_context()); + + assert (r->req.aiocb == NULL); +- if (scsi_disk_req_check_error(r, ret, false)) { ++ if (scsi_disk_req_check_error(r, ret, ret > 0)) { + goto done; + } + +@@ -540,9 +546,10 @@ static void scsi_write_complete(void * opaque, int ret) + assert (r->req.aiocb != NULL); + r->req.aiocb = NULL; + ++ /* ret > 0 is accounted for in scsi_disk_req_check_error() */ + if (ret < 0) { + block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); +- } else { ++ } else if (ret == 0) { + block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); + } + scsi_write_complete_noio(r, ret); +-- +2.39.3 + diff --git a/kvm-target-i386-Add-new-CPU-model-SierraForest.patch b/kvm-target-i386-Add-new-CPU-model-SierraForest.patch new file mode 100644 index 0000000..c72f290 --- /dev/null +++ b/kvm-target-i386-Add-new-CPU-model-SierraForest.patch @@ -0,0 +1,215 @@ +From d9595fecd03c9a69ac562e3f240d50b2fa8d14a4 Mon Sep 17 00:00:00 2001 +From: Tao Su +Date: Wed, 20 Mar 2024 10:10:44 +0800 +Subject: [PATCH 006/100] target/i386: Add new CPU model SierraForest +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [6/91] 4bc71f82c258db46569a7e08965d1d358b19416c (bonzini/rhel-qemu-kvm) + +According to table 1-2 in Intel Architecture Instruction Set Extensions and +Future Features (rev 051) [1], SierraForest has the following new features +which have already been virtualized: + +- CMPCCXADD CPUID.(EAX=7,ECX=1):EAX[bit 7] +- AVX-IFMA CPUID.(EAX=7,ECX=1):EAX[bit 23] +- AVX-VNNI-INT8 CPUID.(EAX=7,ECX=1):EDX[bit 4] +- AVX-NE-CONVERT CPUID.(EAX=7,ECX=1):EDX[bit 5] + +Add above features to new CPU model SierraForest. Comparing with GraniteRapids +CPU model, SierraForest bare-metal removes the following features: + +- HLE CPUID.(EAX=7,ECX=0):EBX[bit 4] +- RTM CPUID.(EAX=7,ECX=0):EBX[bit 11] +- AVX512F CPUID.(EAX=7,ECX=0):EBX[bit 16] +- AVX512DQ CPUID.(EAX=7,ECX=0):EBX[bit 17] +- AVX512_IFMA CPUID.(EAX=7,ECX=0):EBX[bit 21] +- AVX512CD CPUID.(EAX=7,ECX=0):EBX[bit 28] +- AVX512BW CPUID.(EAX=7,ECX=0):EBX[bit 30] +- AVX512VL CPUID.(EAX=7,ECX=0):EBX[bit 31] +- AVX512_VBMI CPUID.(EAX=7,ECX=0):ECX[bit 1] +- AVX512_VBMI2 CPUID.(EAX=7,ECX=0):ECX[bit 6] +- AVX512_VNNI CPUID.(EAX=7,ECX=0):ECX[bit 11] +- AVX512_BITALG CPUID.(EAX=7,ECX=0):ECX[bit 12] +- AVX512_VPOPCNTDQ CPUID.(EAX=7,ECX=0):ECX[bit 14] +- LA57 CPUID.(EAX=7,ECX=0):ECX[bit 16] +- TSXLDTRK CPUID.(EAX=7,ECX=0):EDX[bit 16] +- AMX-BF16 CPUID.(EAX=7,ECX=0):EDX[bit 22] +- AVX512_FP16 CPUID.(EAX=7,ECX=0):EDX[bit 23] +- AMX-TILE CPUID.(EAX=7,ECX=0):EDX[bit 24] +- AMX-INT8 CPUID.(EAX=7,ECX=0):EDX[bit 25] +- AVX512_BF16 CPUID.(EAX=7,ECX=1):EAX[bit 5] +- fast zero-length MOVSB CPUID.(EAX=7,ECX=1):EAX[bit 10] +- fast short CMPSB, SCASB CPUID.(EAX=7,ECX=1):EAX[bit 12] +- AMX-FP16 CPUID.(EAX=7,ECX=1):EAX[bit 21] +- PREFETCHI CPUID.(EAX=7,ECX=1):EDX[bit 14] +- XFD CPUID.(EAX=0xD,ECX=1):EAX[bit 4] +- EPT_PAGE_WALK_LENGTH_5 VMX_EPT_VPID_CAP(0x48c)[bit 7] + +Add all features of GraniteRapids CPU model except above features to +SierraForest CPU model. + +SierraForest doesn’t support TSX and RTM but supports TAA_NO. When RTM is +not enabled in host, KVM will not report TAA_NO. So, just don't include +TAA_NO in SierraForest CPU model. + +[1] https://cdrdv2.intel.com/v1/dl/getContent/671368 + +Reviewed-by: Zhao Liu +Reviewed-by: Xiaoyao Li +Signed-off-by: Tao Su +Message-ID: <20240320021044.508263-1-tao1.su@linux.intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 6e82d3b6220777667968a04c87e1667f164ebe88) +Signed-off-by: Paolo Bonzini +--- + target/i386/cpu.c | 126 ++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 126 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 0aa88d9b48..efbadc3ed7 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -4127,6 +4127,132 @@ static const X86CPUDefinition builtin_x86_defs[] = { + { /* end of list */ }, + }, + }, ++ { ++ .name = "SierraForest", ++ .level = 0x23, ++ .vendor = CPUID_VENDOR_INTEL, ++ .family = 6, ++ .model = 175, ++ .stepping = 0, ++ /* ++ * please keep the ascending order so that we can have a clear view of ++ * bit position of each feature. ++ */ ++ .features[FEAT_1_EDX] = ++ CPUID_FP87 | CPUID_VME | CPUID_DE | CPUID_PSE | CPUID_TSC | ++ CPUID_MSR | CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC | ++ CPUID_SEP | CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV | ++ CPUID_PAT | CPUID_PSE36 | CPUID_CLFLUSH | CPUID_MMX | CPUID_FXSR | ++ CPUID_SSE | CPUID_SSE2, ++ .features[FEAT_1_ECX] = ++ CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSSE3 | ++ CPUID_EXT_FMA | CPUID_EXT_CX16 | CPUID_EXT_PCID | CPUID_EXT_SSE41 | ++ CPUID_EXT_SSE42 | CPUID_EXT_X2APIC | CPUID_EXT_MOVBE | ++ CPUID_EXT_POPCNT | CPUID_EXT_TSC_DEADLINE_TIMER | CPUID_EXT_AES | ++ CPUID_EXT_XSAVE | CPUID_EXT_AVX | CPUID_EXT_F16C | CPUID_EXT_RDRAND, ++ .features[FEAT_8000_0001_EDX] = ++ CPUID_EXT2_SYSCALL | CPUID_EXT2_NX | CPUID_EXT2_PDPE1GB | ++ CPUID_EXT2_RDTSCP | CPUID_EXT2_LM, ++ .features[FEAT_8000_0001_ECX] = ++ CPUID_EXT3_LAHF_LM | CPUID_EXT3_ABM | CPUID_EXT3_3DNOWPREFETCH, ++ .features[FEAT_8000_0008_EBX] = ++ CPUID_8000_0008_EBX_WBNOINVD, ++ .features[FEAT_7_0_EBX] = ++ CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_AVX2 | ++ CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | ++ CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | ++ CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLFLUSHOPT | CPUID_7_0_EBX_CLWB | ++ CPUID_7_0_EBX_SHA_NI, ++ .features[FEAT_7_0_ECX] = ++ CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU | CPUID_7_0_ECX_GFNI | ++ CPUID_7_0_ECX_VAES | CPUID_7_0_ECX_VPCLMULQDQ | ++ CPUID_7_0_ECX_RDPID | CPUID_7_0_ECX_BUS_LOCK_DETECT, ++ .features[FEAT_7_0_EDX] = ++ CPUID_7_0_EDX_FSRM | CPUID_7_0_EDX_SERIALIZE | ++ CPUID_7_0_EDX_SPEC_CTRL | CPUID_7_0_EDX_ARCH_CAPABILITIES | ++ CPUID_7_0_EDX_SPEC_CTRL_SSBD, ++ .features[FEAT_ARCH_CAPABILITIES] = ++ MSR_ARCH_CAP_RDCL_NO | MSR_ARCH_CAP_IBRS_ALL | ++ MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY | MSR_ARCH_CAP_MDS_NO | ++ MSR_ARCH_CAP_PSCHANGE_MC_NO | MSR_ARCH_CAP_SBDR_SSDP_NO | ++ MSR_ARCH_CAP_FBSDP_NO | MSR_ARCH_CAP_PSDP_NO | ++ MSR_ARCH_CAP_PBRSB_NO, ++ .features[FEAT_XSAVE] = ++ CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | ++ CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES, ++ .features[FEAT_6_EAX] = ++ CPUID_6_EAX_ARAT, ++ .features[FEAT_7_1_EAX] = ++ CPUID_7_1_EAX_AVX_VNNI | CPUID_7_1_EAX_CMPCCXADD | ++ CPUID_7_1_EAX_FSRS | CPUID_7_1_EAX_AVX_IFMA, ++ .features[FEAT_7_1_EDX] = ++ CPUID_7_1_EDX_AVX_VNNI_INT8 | CPUID_7_1_EDX_AVX_NE_CONVERT, ++ .features[FEAT_7_2_EDX] = ++ CPUID_7_2_EDX_MCDT_NO, ++ .features[FEAT_VMX_BASIC] = ++ MSR_VMX_BASIC_INS_OUTS | MSR_VMX_BASIC_TRUE_CTLS, ++ .features[FEAT_VMX_ENTRY_CTLS] = ++ VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_IA32E_MODE | ++ VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | ++ VMX_VM_ENTRY_LOAD_IA32_PAT | VMX_VM_ENTRY_LOAD_IA32_EFER, ++ .features[FEAT_VMX_EPT_VPID_CAPS] = ++ MSR_VMX_EPT_EXECONLY | MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | ++ MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | MSR_VMX_EPT_1GB | ++ MSR_VMX_EPT_INVEPT | MSR_VMX_EPT_AD_BITS | ++ MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT | ++ MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR | ++ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | ++ MSR_VMX_EPT_INVVPID_ALL_CONTEXT | ++ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS, ++ .features[FEAT_VMX_EXIT_CTLS] = ++ VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | ++ VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | ++ VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_IA32_PAT | ++ VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER | ++ VMX_VM_EXIT_LOAD_IA32_EFER | VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, ++ .features[FEAT_VMX_MISC] = ++ MSR_VMX_MISC_STORE_LMA | MSR_VMX_MISC_ACTIVITY_HLT | ++ MSR_VMX_MISC_VMWRITE_VMEXIT, ++ .features[FEAT_VMX_PINBASED_CTLS] = ++ VMX_PIN_BASED_EXT_INTR_MASK | VMX_PIN_BASED_NMI_EXITING | ++ VMX_PIN_BASED_VIRTUAL_NMIS | VMX_PIN_BASED_VMX_PREEMPTION_TIMER | ++ VMX_PIN_BASED_POSTED_INTR, ++ .features[FEAT_VMX_PROCBASED_CTLS] = ++ VMX_CPU_BASED_VIRTUAL_INTR_PENDING | ++ VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | ++ VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | ++ VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | ++ VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING | ++ VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | ++ VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_VIRTUAL_NMI_PENDING | ++ VMX_CPU_BASED_MOV_DR_EXITING | VMX_CPU_BASED_UNCOND_IO_EXITING | ++ VMX_CPU_BASED_USE_IO_BITMAPS | VMX_CPU_BASED_MONITOR_TRAP_FLAG | ++ VMX_CPU_BASED_USE_MSR_BITMAPS | VMX_CPU_BASED_MONITOR_EXITING | ++ VMX_CPU_BASED_PAUSE_EXITING | ++ VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, ++ .features[FEAT_VMX_SECONDARY_CTLS] = ++ VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | ++ VMX_SECONDARY_EXEC_ENABLE_EPT | VMX_SECONDARY_EXEC_DESC | ++ VMX_SECONDARY_EXEC_RDTSCP | ++ VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | ++ VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_WBINVD_EXITING | ++ VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST | ++ VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT | ++ VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | ++ VMX_SECONDARY_EXEC_RDRAND_EXITING | ++ VMX_SECONDARY_EXEC_ENABLE_INVPCID | ++ VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS | ++ VMX_SECONDARY_EXEC_RDSEED_EXITING | VMX_SECONDARY_EXEC_ENABLE_PML | ++ VMX_SECONDARY_EXEC_XSAVES, ++ .features[FEAT_VMX_VMFUNC] = ++ MSR_VMX_VMFUNC_EPT_SWITCHING, ++ .xlevel = 0x80000008, ++ .model_id = "Intel Xeon Processor (SierraForest)", ++ .versions = (X86CPUVersionDefinition[]) { ++ { .version = 1 }, ++ { /* end of list */ }, ++ }, ++ }, + { + .name = "Denverton", + .level = 21, +-- +2.39.3 + diff --git a/kvm-target-i386-Export-RFDS-bit-to-guests.patch b/kvm-target-i386-Export-RFDS-bit-to-guests.patch new file mode 100644 index 0000000..74de391 --- /dev/null +++ b/kvm-target-i386-Export-RFDS-bit-to-guests.patch @@ -0,0 +1,50 @@ +From ae6229a3e45318b1101291b99a0e894399dcb1db Mon Sep 17 00:00:00 2001 +From: Pawan Gupta +Date: Wed, 13 Mar 2024 07:53:23 -0700 +Subject: [PATCH 007/100] target/i386: Export RFDS bit to guests + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [7/91] 7eb6cae8821a2e953d3ff2033fa2e973011ad771 (bonzini/rhel-qemu-kvm) + +Register File Data Sampling (RFDS) is a CPU side-channel vulnerability +that may expose stale register value. CPUs that set RFDS_NO bit in MSR +IA32_ARCH_CAPABILITIES indicate that they are not vulnerable to RFDS. +Similarly, RFDS_CLEAR indicates that CPU is affected by RFDS, and has +the microcode to help mitigate RFDS. + +Make RFDS_CLEAR and RFDS_NO bits available to guests. + +Signed-off-by: Pawan Gupta +Reviewed-by: Xiaoyao Li +Reviewed-by: Zhao Liu +Message-ID: <9a38877857392b5c2deae7e7db1b170d15510314.1710341348.git.pawan.kumar.gupta@linux.intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 41bdd9812863c150284a9339a048ed88c40f4df7) +Signed-off-by: Paolo Bonzini +--- + target/i386/cpu.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index efbadc3ed7..489c853b42 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1158,8 +1158,8 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + NULL, "sbdr-ssdp-no", "fbsdp-no", "psdp-no", + NULL, "fb-clear", NULL, NULL, + NULL, NULL, NULL, NULL, +- "pbrsb-no", NULL, "gds-no", NULL, +- NULL, NULL, NULL, NULL, ++ "pbrsb-no", NULL, "gds-no", "rfds-no", ++ "rfds-clear", NULL, NULL, NULL, + }, + .msr = { + .index = MSR_IA32_ARCH_CAPABILITIES, +-- +2.39.3 + diff --git a/kvm-target-i386-Implement-mc-kvm_type-to-get-VM-type.patch b/kvm-target-i386-Implement-mc-kvm_type-to-get-VM-type.patch new file mode 100644 index 0000000..f37dbc2 --- /dev/null +++ b/kvm-target-i386-Implement-mc-kvm_type-to-get-VM-type.patch @@ -0,0 +1,192 @@ +From 4a811f54cdb3c9329f193ea43c76ed4eb1b14c19 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Tue, 19 Mar 2024 15:29:33 +0100 +Subject: [PATCH 022/100] target/i386: Implement mc->kvm_type() to get VM type + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [22/91] d58cf6ead2de37852adc15c7642166904403453f (bonzini/rhel-qemu-kvm) + +KVM is introducing a new API to create confidential guests, which +will be used by TDX and SEV-SNP but is also available for SEV and +SEV-ES. The API uses the VM type argument to KVM_CREATE_VM to +identify which confidential computing technology to use. + +Since there are no other expected uses of VM types, delegate +mc->kvm_type() for x86 boards to the confidential-guest-support +object pointed to by ms->cgs. + +For example, if a sev-guest object is specified to confidential-guest-support, +like, + + qemu -machine ...,confidential-guest-support=sev0 \ + -object sev-guest,id=sev0,... + +it will check if a VM type KVM_X86_SEV_VM or KVM_X86_SEV_ES_VM +is supported, and if so use them together with the KVM_SEV_INIT2 +function of the KVM_MEMORY_ENCRYPT_OP ioctl. If not, it will fall back to +KVM_SEV_INIT and KVM_SEV_ES_INIT. + +This is a preparatory work towards TDX and SEV-SNP support, but it +will also enable support for VMSA features such as DebugSwap, which +are only available via KVM_SEV_INIT2. + +Co-developed-by: Xiaoyao Li +Signed-off-by: Xiaoyao Li +Signed-off-by: Paolo Bonzini +(cherry picked from commit ee88612df1e8d6c2bfec75bff3f9482ea44acec1) +Signed-off-by: Paolo Bonzini +--- + hw/i386/x86.c | 11 ++++++++ + target/i386/confidential-guest.h | 19 ++++++++++++++ + target/i386/kvm/kvm.c | 44 ++++++++++++++++++++++++++++++++ + target/i386/kvm/kvm_i386.h | 2 ++ + 4 files changed, 76 insertions(+) + +diff --git a/hw/i386/x86.c b/hw/i386/x86.c +index 84a4801977..3d5b51e92d 100644 +--- a/hw/i386/x86.c ++++ b/hw/i386/x86.c +@@ -1381,6 +1381,16 @@ static void machine_set_sgx_epc(Object *obj, Visitor *v, const char *name, + qapi_free_SgxEPCList(list); + } + ++static int x86_kvm_type(MachineState *ms, const char *vm_type) ++{ ++ /* ++ * No x86 machine has a kvm-type property. If one is added that has ++ * it, it should call kvm_get_vm_type() directly or not use it at all. ++ */ ++ assert(vm_type == NULL); ++ return kvm_enabled() ? kvm_get_vm_type(ms) : 0; ++} ++ + static void x86_machine_initfn(Object *obj) + { + X86MachineState *x86ms = X86_MACHINE(obj); +@@ -1405,6 +1415,7 @@ static void x86_machine_class_init(ObjectClass *oc, void *data) + mc->cpu_index_to_instance_props = x86_cpu_index_to_props; + mc->get_default_cpu_node_id = x86_get_default_cpu_node_id; + mc->possible_cpu_arch_ids = x86_possible_cpu_arch_ids; ++ mc->kvm_type = x86_kvm_type; + x86mc->save_tsc_khz = true; + x86mc->fwcfg_dma_enabled = true; + nc->nmi_monitor_handler = x86_nmi; +diff --git a/target/i386/confidential-guest.h b/target/i386/confidential-guest.h +index ca12d5a8fb..532e172a60 100644 +--- a/target/i386/confidential-guest.h ++++ b/target/i386/confidential-guest.h +@@ -36,5 +36,24 @@ struct X86ConfidentialGuest { + struct X86ConfidentialGuestClass { + /* */ + ConfidentialGuestSupportClass parent; ++ ++ /* */ ++ int (*kvm_type)(X86ConfidentialGuest *cg); + }; ++ ++/** ++ * x86_confidential_guest_kvm_type: ++ * ++ * Calls #X86ConfidentialGuestClass.unplug callback of @plug_handler. ++ */ ++static inline int x86_confidential_guest_kvm_type(X86ConfidentialGuest *cg) ++{ ++ X86ConfidentialGuestClass *klass = X86_CONFIDENTIAL_GUEST_GET_CLASS(cg); ++ ++ if (klass->kvm_type) { ++ return klass->kvm_type(cg); ++ } else { ++ return 0; ++ } ++} + #endif +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index a12207a8ee..1f0ab12c2e 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -31,6 +31,7 @@ + #include "sysemu/kvm_int.h" + #include "sysemu/runstate.h" + #include "kvm_i386.h" ++#include "../confidential-guest.h" + #include "sev.h" + #include "xen-emu.h" + #include "hyperv.h" +@@ -161,6 +162,49 @@ static KVMMSRHandlers msr_handlers[KVM_MSR_FILTER_MAX_RANGES]; + static RateLimit bus_lock_ratelimit_ctrl; + static int kvm_get_one_msr(X86CPU *cpu, int index, uint64_t *value); + ++static const char *vm_type_name[] = { ++ [KVM_X86_DEFAULT_VM] = "default", ++}; ++ ++bool kvm_is_vm_type_supported(int type) ++{ ++ uint32_t machine_types; ++ ++ /* ++ * old KVM doesn't support KVM_CAP_VM_TYPES but KVM_X86_DEFAULT_VM ++ * is always supported ++ */ ++ if (type == KVM_X86_DEFAULT_VM) { ++ return true; ++ } ++ ++ machine_types = kvm_check_extension(KVM_STATE(current_machine->accelerator), ++ KVM_CAP_VM_TYPES); ++ return !!(machine_types & BIT(type)); ++} ++ ++int kvm_get_vm_type(MachineState *ms) ++{ ++ int kvm_type = KVM_X86_DEFAULT_VM; ++ ++ if (ms->cgs) { ++ if (!object_dynamic_cast(OBJECT(ms->cgs), TYPE_X86_CONFIDENTIAL_GUEST)) { ++ error_report("configuration type %s not supported for x86 guests", ++ object_get_typename(OBJECT(ms->cgs))); ++ exit(1); ++ } ++ kvm_type = x86_confidential_guest_kvm_type( ++ X86_CONFIDENTIAL_GUEST(ms->cgs)); ++ } ++ ++ if (!kvm_is_vm_type_supported(kvm_type)) { ++ error_report("vm-type %s not supported by KVM", vm_type_name[kvm_type]); ++ exit(1); ++ } ++ ++ return kvm_type; ++} ++ + bool kvm_has_smm(void) + { + return kvm_vm_check_extension(kvm_state, KVM_CAP_X86_SMM); +diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h +index 30fedcffea..6b44844d95 100644 +--- a/target/i386/kvm/kvm_i386.h ++++ b/target/i386/kvm/kvm_i386.h +@@ -37,6 +37,7 @@ bool kvm_hv_vpindex_settable(void); + bool kvm_enable_sgx_provisioning(KVMState *s); + bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp); + ++int kvm_get_vm_type(MachineState *ms); + void kvm_arch_reset_vcpu(X86CPU *cs); + void kvm_arch_after_reset_vcpu(X86CPU *cpu); + void kvm_arch_do_init_vcpu(X86CPU *cs); +@@ -49,6 +50,7 @@ void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask); + + #ifdef CONFIG_KVM + ++bool kvm_is_vm_type_supported(int type); + bool kvm_has_adjust_clock_stable(void); + bool kvm_has_exception_payload(void); + void kvm_synchronize_all_tsc(void); +-- +2.39.3 + diff --git a/kvm-target-i386-Introduce-Icelake-Server-v7-to-enable-TS.patch b/kvm-target-i386-Introduce-Icelake-Server-v7-to-enable-TS.patch new file mode 100644 index 0000000..9844da7 --- /dev/null +++ b/kvm-target-i386-Introduce-Icelake-Server-v7-to-enable-TS.patch @@ -0,0 +1,68 @@ +From fe60f8d47b6e14f17dd6c06b03bd00e6bcdbeefb Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Wed, 20 Mar 2024 17:31:38 +0800 +Subject: [PATCH 005/100] target/i386: Introduce Icelake-Server-v7 to enable + TSX + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [5/91] 66d865899e0d510b6c86763422d6b28b904b208a (bonzini/rhel-qemu-kvm) + +When start L2 guest with both L1/L2 using Icelake-Server-v3 or above, +QEMU reports below warning: + +"warning: host doesn't support requested feature: MSR(10AH).taa-no [bit 8]" + +Reason is QEMU Icelake-Server-v3 has TSX feature disabled but enables taa-no +bit. It's meaningless that TSX isn't supported but still claim TSX is secure. +So L1 KVM doesn't expose taa-no to L2 if TSX is unsupported, then starting L2 +triggers the warning. + +Fix it by introducing a new version Icelake-Server-v7 which has both TSX +and taa-no features. Then guest can use TSX securely when it see taa-no. + +This matches the production Icelake which supports TSX and isn't susceptible +to TSX Async Abort (TAA) vulnerabilities, a.k.a, taa-no. + +Ideally, TSX should have being enabled together with taa-no since v3, but for +compatibility, we'd better to add v7 to enable it. + +Fixes: d965dc35592d ("target/i386: Add ARCH_CAPABILITIES related bits into Icelake-Server CPU model") +Tested-by: Xiangfei Ma +Signed-off-by: Zhenzhong Duan +Message-ID: <20240320093138.80267-2-zhenzhong.duan@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit c895fa54e3060c5ac6f3888dce96c9b78626072b) +Signed-off-by: Paolo Bonzini +--- + target/i386/cpu.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index a7f71422ea..0aa88d9b48 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -3840,6 +3840,16 @@ static const X86CPUDefinition builtin_x86_defs[] = { + { /* end of list */ } + }, + }, ++ { ++ .version = 7, ++ .note = "TSX, taa-no", ++ .props = (PropValue[]) { ++ /* Restore TSX features removed by -v2 above */ ++ { "hle", "on" }, ++ { "rtm", "on" }, ++ { /* end of list */ } ++ }, ++ }, + { /* end of list */ } + } + }, +-- +2.39.3 + diff --git a/kvm-target-i386-SEV-fix-formatting-of-CPUID-mismatch-mes.patch b/kvm-target-i386-SEV-fix-formatting-of-CPUID-mismatch-mes.patch new file mode 100644 index 0000000..ace0367 --- /dev/null +++ b/kvm-target-i386-SEV-fix-formatting-of-CPUID-mismatch-mes.patch @@ -0,0 +1,49 @@ +From 070dda07559a7488c62fc80a8c79e8baaee125eb Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Wed, 3 Jul 2024 10:37:23 +0200 +Subject: [PATCH 087/100] target/i386: SEV: fix formatting of CPUID mismatch + message + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [87/91] 36bc2cc80d5ffc1ceeb1836540660ff45885a818 (bonzini/rhel-qemu-kvm) + +Fixes: 70943ad8e4d ("i386/sev: Add support for SNP CPUID validation", 2024-06-05) +Signed-off-by: Paolo Bonzini +(cherry picked from commit f45ef010e19fe86314bffd5d5c9d5d77f4ce8103) +Signed-off-by: Paolo Bonzini +--- + target/i386/sev.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index c40562dce3..37de80adc7 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -839,7 +839,7 @@ sev_snp_cpuid_report_mismatches(SnpCpuidInfo *old, + size_t i; + + if (old->count != new->count) { +- error_report("SEV-SNP: CPUID validation failed due to count mismatch," ++ error_report("SEV-SNP: CPUID validation failed due to count mismatch, " + "provided: %d, expected: %d", old->count, new->count); + return; + } +@@ -851,8 +851,8 @@ sev_snp_cpuid_report_mismatches(SnpCpuidInfo *old, + new_func = &new->entries[i]; + + if (memcmp(old_func, new_func, sizeof(SnpCpuidFunc))) { +- error_report("SEV-SNP: CPUID validation failed for function 0x%x, index: 0x%x" +- "provided: eax:0x%08x, ebx: 0x%08x, ecx: 0x%08x, edx: 0x%08x" ++ error_report("SEV-SNP: CPUID validation failed for function 0x%x, index: 0x%x, " ++ "provided: eax:0x%08x, ebx: 0x%08x, ecx: 0x%08x, edx: 0x%08x, " + "expected: eax:0x%08x, ebx: 0x%08x, ecx: 0x%08x, edx: 0x%08x", + old_func->eax_in, old_func->ecx_in, + old_func->eax, old_func->ebx, old_func->ecx, old_func->edx, +-- +2.39.3 + diff --git a/kvm-target-i386-SEV-fix-mismatch-in-vcek-disabled-proper.patch b/kvm-target-i386-SEV-fix-mismatch-in-vcek-disabled-proper.patch new file mode 100644 index 0000000..3030d59 --- /dev/null +++ b/kvm-target-i386-SEV-fix-mismatch-in-vcek-disabled-proper.patch @@ -0,0 +1,42 @@ +From 37b7e2185f1d23dd5f5a95b545b8d760492915ed Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Fri, 2 Aug 2024 01:43:37 +0200 +Subject: [PATCH 091/100] target/i386: SEV: fix mismatch in vcek-disabled + property name + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [91/91] 3a8abc4a0547b985cb79cef29bd3e8350d3d4b48 (bonzini/rhel-qemu-kvm) + +The vcek-disabled property of the sev-snp-guest object is misspelled +vcek-required (which I suppose would use the opposite polarity) in +the call to object_class_property_add_bool(). Fix it. + +Reported-by: Zixi Chen +Reviewed-by: Pankaj Gupta +Signed-off-by: Paolo Bonzini +(cherry picked from commit d4392415c328f83b2e30517a3561be523874f441) +--- + target/i386/sev.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index b921defb63..aed565dbe8 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -2378,7 +2378,7 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data) + object_class_property_add_bool(oc, "author-key-enabled", + sev_snp_guest_get_author_key_enabled, + sev_snp_guest_set_author_key_enabled); +- object_class_property_add_bool(oc, "vcek-required", ++ object_class_property_add_bool(oc, "vcek-disabled", + sev_snp_guest_get_vcek_disabled, + sev_snp_guest_set_vcek_disabled); + object_class_property_add_str(oc, "host-data", +-- +2.39.3 + diff --git a/kvm-target-i386-SEV-use-KVM_SEV_INIT2-if-possible.patch b/kvm-target-i386-SEV-use-KVM_SEV_INIT2-if-possible.patch new file mode 100644 index 0000000..7c17e53 --- /dev/null +++ b/kvm-target-i386-SEV-use-KVM_SEV_INIT2-if-possible.patch @@ -0,0 +1,146 @@ +From 6bb738fb90a3a1221ae35596b3d03a17e0b1c34d Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Tue, 19 Mar 2024 15:30:25 +0100 +Subject: [PATCH 023/100] target/i386: SEV: use KVM_SEV_INIT2 if possible + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [23/91] 9579d772ae5124a94c6b1e3a4566bf3470d2bc8f (bonzini/rhel-qemu-kvm) + +Implement support for the KVM_X86_SEV_VM and KVM_X86_SEV_ES_VM virtual +machine types, and the KVM_SEV_INIT2 function of KVM_MEMORY_ENCRYPT_OP. + +These replace the KVM_SEV_INIT and KVM_SEV_ES_INIT functions, and have +several advantages: + +- sharing the initialization sequence with SEV-SNP and TDX + +- allowing arguments including the set of desired VMSA features + +- protection against invalid use of KVM_GET/SET_* ioctls for guests + with encrypted state + +If the KVM_X86_SEV_VM and KVM_X86_SEV_ES_VM types are not supported, +fall back to KVM_SEV_INIT and KVM_SEV_ES_INIT (which use the +default x86 VM type). + +Signed-off-by: Paolo Bonzini +(cherry picked from commit 663e2f443e5722370708ce2f4c27d94a2087d2d3) +Signed-off-by: Paolo Bonzini +--- + target/i386/kvm/kvm.c | 2 ++ + target/i386/sev.c | 41 +++++++++++++++++++++++++++++++++++++---- + 2 files changed, 39 insertions(+), 4 deletions(-) + +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index 1f0ab12c2e..408568d053 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -164,6 +164,8 @@ static int kvm_get_one_msr(X86CPU *cpu, int index, uint64_t *value); + + static const char *vm_type_name[] = { + [KVM_X86_DEFAULT_VM] = "default", ++ [KVM_X86_SEV_VM] = "SEV", ++ [KVM_X86_SEV_ES_VM] = "SEV-ES", + }; + + bool kvm_is_vm_type_supported(int type) +diff --git a/target/i386/sev.c b/target/i386/sev.c +index ebe36d4c10..9dab4060b8 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -26,6 +26,7 @@ + #include "qemu/error-report.h" + #include "crypto/hash.h" + #include "sysemu/kvm.h" ++#include "kvm/kvm_i386.h" + #include "sev.h" + #include "sysemu/sysemu.h" + #include "sysemu/runstate.h" +@@ -56,6 +57,8 @@ OBJECT_DECLARE_SIMPLE_TYPE(SevGuestState, SEV_GUEST) + struct SevGuestState { + X86ConfidentialGuest parent_obj; + ++ int kvm_type; ++ + /* configuration parameters */ + char *sev_device; + uint32_t policy; +@@ -850,6 +853,26 @@ sev_vm_state_change(void *opaque, bool running, RunState state) + } + } + ++static int sev_kvm_type(X86ConfidentialGuest *cg) ++{ ++ SevGuestState *sev = SEV_GUEST(cg); ++ int kvm_type; ++ ++ if (sev->kvm_type != -1) { ++ goto out; ++ } ++ ++ kvm_type = (sev->policy & SEV_POLICY_ES) ? KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM; ++ if (kvm_is_vm_type_supported(kvm_type)) { ++ sev->kvm_type = kvm_type; ++ } else { ++ sev->kvm_type = KVM_X86_DEFAULT_VM; ++ } ++ ++out: ++ return sev->kvm_type; ++} ++ + static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + { + SevGuestState *sev = SEV_GUEST(cgs); +@@ -929,13 +952,19 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) + __func__); + goto err; + } +- cmd = KVM_SEV_ES_INIT; +- } else { +- cmd = KVM_SEV_INIT; + } + + trace_kvm_sev_init(); +- ret = sev_ioctl(sev->sev_fd, cmd, NULL, &fw_error); ++ if (sev_kvm_type(X86_CONFIDENTIAL_GUEST(sev)) == KVM_X86_DEFAULT_VM) { ++ cmd = sev_es_enabled() ? KVM_SEV_ES_INIT : KVM_SEV_INIT; ++ ++ ret = sev_ioctl(sev->sev_fd, cmd, NULL, &fw_error); ++ } else { ++ struct kvm_sev_init args = { 0 }; ++ ++ ret = sev_ioctl(sev->sev_fd, KVM_SEV_INIT2, &args, &fw_error); ++ } ++ + if (ret) { + error_setg(errp, "%s: failed to initialize ret=%d fw_error=%d '%s'", + __func__, ret, fw_error, fw_error_to_str(fw_error)); +@@ -1327,8 +1356,10 @@ static void + sev_guest_class_init(ObjectClass *oc, void *data) + { + ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); ++ X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); + + klass->kvm_init = sev_kvm_init; ++ x86_klass->kvm_type = sev_kvm_type; + + object_class_property_add_str(oc, "sev-device", + sev_guest_get_sev_device, +@@ -1357,6 +1388,8 @@ sev_guest_instance_init(Object *obj) + { + SevGuestState *sev = SEV_GUEST(obj); + ++ sev->kvm_type = -1; ++ + sev->sev_device = g_strdup(DEFAULT_SEV_DEVICE); + sev->policy = DEFAULT_GUEST_POLICY; + object_property_add_uint32_ptr(obj, "policy", &sev->policy, +-- +2.39.3 + diff --git a/kvm-target-i386-add-guest-phys-bits-cpu-property.patch b/kvm-target-i386-add-guest-phys-bits-cpu-property.patch new file mode 100644 index 0000000..cd41279 --- /dev/null +++ b/kvm-target-i386-add-guest-phys-bits-cpu-property.patch @@ -0,0 +1,124 @@ +From 090c64ea622534ff2ae6c9b66cdf0b1ddb58bf26 Mon Sep 17 00:00:00 2001 +From: Gerd Hoffmann +Date: Mon, 18 Mar 2024 16:53:36 +0100 +Subject: [PATCH 002/100] target/i386: add guest-phys-bits cpu property + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [2/91] 6603e842012dc484e1f571ea0a77b59095f37003 (bonzini/rhel-qemu-kvm) + +Allows to set guest-phys-bits (cpuid leaf 80000008, eax[23:16]) +via -cpu $model,guest-phys-bits=$nr. + +Signed-off-by: Gerd Hoffmann +Message-ID: <20240318155336.156197-3-kraxel@redhat.com> +Reviewed-by: Zhao Liu +Signed-off-by: Paolo Bonzini +(cherry picked from commit 513ba32dccc659c80722b3a43233b26eaa50309a) +Signed-off-by: Paolo Bonzini +--- + hw/i386/pc.c | 2 ++ + target/i386/cpu.c | 22 ++++++++++++++++++++++ + target/i386/cpu.h | 8 ++++++++ + 3 files changed, 32 insertions(+) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index 648762d908..b9fde3cec1 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -349,6 +349,8 @@ GlobalProperty pc_rhel_compat[] = { + const size_t pc_rhel_compat_len = G_N_ELEMENTS(pc_rhel_compat); + + GlobalProperty pc_rhel_9_5_compat[] = { ++ /* pc_rhel_9_5_compat from pc_compat_pc_9_0 (backported from 9.1) */ ++ { TYPE_X86_CPU, "guest-phys-bits", "0" }, + }; + const size_t pc_rhel_9_5_compat_len = G_N_ELEMENTS(pc_rhel_9_5_compat); + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index be7b0663cd..a7f71422ea 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -6591,6 +6591,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, + if (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) { + /* 64 bit processor */ + *eax |= (cpu_x86_virtual_addr_width(env) << 8); ++ *eax |= (cpu->guest_phys_bits << 16); + } + *ebx = env->features[FEAT_8000_0008_EBX]; + if (cs->nr_cores * cs->nr_threads > 1) { +@@ -7350,6 +7351,14 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) + goto out; + } + ++ if (cpu->guest_phys_bits == -1) { ++ /* ++ * If it was not set by the user, or by the accelerator via ++ * cpu_exec_realizefn, clear. ++ */ ++ cpu->guest_phys_bits = 0; ++ } ++ + if (cpu->ucode_rev == 0) { + /* + * The default is the same as KVM's. Note that this check +@@ -7400,6 +7409,14 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) + if (cpu->phys_bits == 0) { + cpu->phys_bits = TCG_PHYS_ADDR_BITS; + } ++ if (cpu->guest_phys_bits && ++ (cpu->guest_phys_bits > cpu->phys_bits || ++ cpu->guest_phys_bits < 32)) { ++ error_setg(errp, "guest-phys-bits should be between 32 and %u " ++ " (but is %u)", ++ cpu->phys_bits, cpu->guest_phys_bits); ++ return; ++ } + } else { + /* For 32 bit systems don't use the user set value, but keep + * phys_bits consistent with what we tell the guest. +@@ -7408,6 +7425,10 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) + error_setg(errp, "phys-bits is not user-configurable in 32 bit"); + return; + } ++ if (cpu->guest_phys_bits != 0) { ++ error_setg(errp, "guest-phys-bits is not user-configurable in 32 bit"); ++ return; ++ } + + if (env->features[FEAT_1_EDX] & (CPUID_PSE36 | CPUID_PAE)) { + cpu->phys_bits = 36; +@@ -7908,6 +7929,7 @@ static Property x86_cpu_properties[] = { + DEFINE_PROP_BOOL("x-force-features", X86CPU, force_features, false), + DEFINE_PROP_BOOL("kvm", X86CPU, expose_kvm, true), + DEFINE_PROP_UINT32("phys-bits", X86CPU, phys_bits, 0), ++ DEFINE_PROP_UINT32("guest-phys-bits", X86CPU, guest_phys_bits, -1), + DEFINE_PROP_BOOL("host-phys-bits", X86CPU, host_phys_bits, false), + DEFINE_PROP_UINT8("host-phys-bits-limit", X86CPU, host_phys_bits_limit, 0), + DEFINE_PROP_BOOL("fill-mtrr-mask", X86CPU, fill_mtrr_mask, true), +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index 6b05738079..6112e27bfd 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -2027,6 +2027,14 @@ struct ArchCPU { + /* Number of physical address bits supported */ + uint32_t phys_bits; + ++ /* ++ * Number of guest physical address bits available. Usually this is ++ * identical to host physical address bits. With NPT or EPT 4-level ++ * paging, guest physical address space might be restricted to 48 bits ++ * even if the host cpu supports more physical address bits. ++ */ ++ uint32_t guest_phys_bits; ++ + /* in order to simplify APIC support, we leave this pointer to the + user */ + struct DeviceState *apic_state; +-- +2.39.3 + diff --git a/kvm-target-i386-introduce-x86-confidential-guest.patch b/kvm-target-i386-introduce-x86-confidential-guest.patch new file mode 100644 index 0000000..dec3220 --- /dev/null +++ b/kvm-target-i386-introduce-x86-confidential-guest.patch @@ -0,0 +1,161 @@ +From 0573fcd1775b6613127b1906d59d02e65f7519f3 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 18 Mar 2024 11:07:43 -0400 +Subject: [PATCH 021/100] target/i386: introduce x86-confidential-guest + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [21/91] e86d3bcde7e1c2fa1ba8c9bc83e02033644f1ac0 (bonzini/rhel-qemu-kvm) + +Introduce a common superclass for x86 confidential guest implementations. +It will extend ConfidentialGuestSupportClass with a method that provides +the VM type to be passed to KVM_CREATE_VM. + +Signed-off-by: Paolo Bonzini +(cherry picked from commit d82e9c843d662f13821026618aba936eda31a6c0) +Signed-off-by: Paolo Bonzini +--- + target/i386/confidential-guest.c | 33 ++++++++++++++++++++++++++ + target/i386/confidential-guest.h | 40 ++++++++++++++++++++++++++++++++ + target/i386/meson.build | 2 +- + target/i386/sev.c | 6 ++--- + 4 files changed, 77 insertions(+), 4 deletions(-) + create mode 100644 target/i386/confidential-guest.c + create mode 100644 target/i386/confidential-guest.h + +diff --git a/target/i386/confidential-guest.c b/target/i386/confidential-guest.c +new file mode 100644 +index 0000000000..b3727845ad +--- /dev/null ++++ b/target/i386/confidential-guest.c +@@ -0,0 +1,33 @@ ++/* ++ * QEMU Confidential Guest support ++ * ++ * Copyright (C) 2024 Red Hat, Inc. ++ * ++ * Authors: ++ * Paolo Bonzini ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or ++ * later. See the COPYING file in the top-level directory. ++ * ++ */ ++ ++#include "qemu/osdep.h" ++ ++#include "confidential-guest.h" ++ ++OBJECT_DEFINE_ABSTRACT_TYPE(X86ConfidentialGuest, ++ x86_confidential_guest, ++ X86_CONFIDENTIAL_GUEST, ++ CONFIDENTIAL_GUEST_SUPPORT) ++ ++static void x86_confidential_guest_class_init(ObjectClass *oc, void *data) ++{ ++} ++ ++static void x86_confidential_guest_init(Object *obj) ++{ ++} ++ ++static void x86_confidential_guest_finalize(Object *obj) ++{ ++} +diff --git a/target/i386/confidential-guest.h b/target/i386/confidential-guest.h +new file mode 100644 +index 0000000000..ca12d5a8fb +--- /dev/null ++++ b/target/i386/confidential-guest.h +@@ -0,0 +1,40 @@ ++/* ++ * x86-specific confidential guest methods. ++ * ++ * Copyright (c) 2024 Red Hat Inc. ++ * ++ * Authors: ++ * Paolo Bonzini ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ */ ++#ifndef TARGET_I386_CG_H ++#define TARGET_I386_CG_H ++ ++#include "qom/object.h" ++ ++#include "exec/confidential-guest-support.h" ++ ++#define TYPE_X86_CONFIDENTIAL_GUEST "x86-confidential-guest" ++ ++OBJECT_DECLARE_TYPE(X86ConfidentialGuest, ++ X86ConfidentialGuestClass, ++ X86_CONFIDENTIAL_GUEST) ++ ++struct X86ConfidentialGuest { ++ /* */ ++ ConfidentialGuestSupport parent_obj; ++}; ++ ++/** ++ * X86ConfidentialGuestClass: ++ * ++ * Class to be implemented by confidential-guest-support concrete objects ++ * for the x86 target. ++ */ ++struct X86ConfidentialGuestClass { ++ /* */ ++ ConfidentialGuestSupportClass parent; ++}; ++#endif +diff --git a/target/i386/meson.build b/target/i386/meson.build +index 7c74bfa859..8abce725f8 100644 +--- a/target/i386/meson.build ++++ b/target/i386/meson.build +@@ -6,7 +6,7 @@ i386_ss.add(files( + 'xsave_helper.c', + 'cpu-dump.c', + )) +-i386_ss.add(when: 'CONFIG_SEV', if_true: files('host-cpu.c')) ++i386_ss.add(when: 'CONFIG_SEV', if_true: files('host-cpu.c', 'confidential-guest.c')) + + # x86 cpu type + i386_ss.add(when: 'CONFIG_KVM', if_true: files('host-cpu.c')) +diff --git a/target/i386/sev.c b/target/i386/sev.c +index c49a8fd55e..ebe36d4c10 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -35,7 +35,7 @@ + #include "monitor/monitor.h" + #include "monitor/hmp-target.h" + #include "qapi/qapi-commands-misc-target.h" +-#include "exec/confidential-guest-support.h" ++#include "confidential-guest.h" + #include "hw/i386/pc.h" + #include "exec/address-spaces.h" + +@@ -54,7 +54,7 @@ OBJECT_DECLARE_SIMPLE_TYPE(SevGuestState, SEV_GUEST) + * -machine ...,memory-encryption=sev0 + */ + struct SevGuestState { +- ConfidentialGuestSupport parent_obj; ++ X86ConfidentialGuest parent_obj; + + /* configuration parameters */ + char *sev_device; +@@ -1372,7 +1372,7 @@ sev_guest_instance_init(Object *obj) + + /* sev guest info */ + static const TypeInfo sev_guest_info = { +- .parent = TYPE_CONFIDENTIAL_GUEST_SUPPORT, ++ .parent = TYPE_X86_CONFIDENTIAL_GUEST, + .name = TYPE_SEV_GUEST, + .instance_size = sizeof(SevGuestState), + .instance_finalize = sev_guest_finalize, +-- +2.39.3 + diff --git a/kvm-trace-kvm-Split-address-space-and-slot-id-in-trace_k.patch b/kvm-trace-kvm-Split-address-space-and-slot-id-in-trace_k.patch new file mode 100644 index 0000000..d21d298 --- /dev/null +++ b/kvm-trace-kvm-Split-address-space-and-slot-id-in-trace_k.patch @@ -0,0 +1,59 @@ +From b02dc1e5c0f01228053e784f9ec7ac3a47e91d7c Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Thu, 29 Feb 2024 01:36:25 -0500 +Subject: [PATCH 026/100] trace/kvm: Split address space and slot id in + trace_kvm_set_user_memory() + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [26/91] 640511c4ab0ba76bb4483f6c3fb73e060d914f0a (bonzini/rhel-qemu-kvm) + +The upper 16 bits of kvm_userspace_memory_region::slot are +address space id. Parse it separately in trace_kvm_set_user_memory(). + +Signed-off-by: Xiaoyao Li +Message-ID: <20240229063726.610065-5-xiaoyao.li@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 72853afc638b3e28779c86dd05da2f3bb149fe2c) +Signed-off-by: Paolo Bonzini +--- + accel/kvm/kvm-all.c | 5 +++-- + accel/kvm/trace-events | 2 +- + 2 files changed, 4 insertions(+), 3 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index b51e09a583..9bd235c969 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -303,8 +303,9 @@ static int kvm_set_user_memory_region(KVMMemoryListener *kml, KVMSlot *slot, boo + ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem); + slot->old_flags = mem.flags; + err: +- trace_kvm_set_user_memory(mem.slot, mem.flags, mem.guest_phys_addr, +- mem.memory_size, mem.userspace_addr, ret); ++ trace_kvm_set_user_memory(mem.slot >> 16, (uint16_t)mem.slot, mem.flags, ++ mem.guest_phys_addr, mem.memory_size, ++ mem.userspace_addr, ret); + if (ret < 0) { + error_report("%s: KVM_SET_USER_MEMORY_REGION failed, slot=%d," + " start=0x%" PRIx64 ", size=0x%" PRIx64 ": %s", +diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events +index a25902597b..9f599abc17 100644 +--- a/accel/kvm/trace-events ++++ b/accel/kvm/trace-events +@@ -15,7 +15,7 @@ kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d" + kvm_irqchip_release_virq(int virq) "virq %d" + kvm_set_ioeventfd_mmio(int fd, uint64_t addr, uint32_t val, bool assign, uint32_t size, bool datamatch) "fd: %d @0x%" PRIx64 " val=0x%x assign: %d size: %d match: %d" + kvm_set_ioeventfd_pio(int fd, uint16_t addr, uint32_t val, bool assign, uint32_t size, bool datamatch) "fd: %d @0x%x val=0x%x assign: %d size: %d match: %d" +-kvm_set_user_memory(uint32_t slot, uint32_t flags, uint64_t guest_phys_addr, uint64_t memory_size, uint64_t userspace_addr, int ret) "Slot#%d flags=0x%x gpa=0x%"PRIx64 " size=0x%"PRIx64 " ua=0x%"PRIx64 " ret=%d" ++kvm_set_user_memory(uint16_t as, uint16_t slot, uint32_t flags, uint64_t guest_phys_addr, uint64_t memory_size, uint64_t userspace_addr, int ret) "AddrSpace#%d Slot#%d flags=0x%x gpa=0x%"PRIx64 " size=0x%"PRIx64 " ua=0x%"PRIx64 " ret=%d" + kvm_clear_dirty_log(uint32_t slot, uint64_t start, uint32_t size) "slot#%"PRId32" start 0x%"PRIx64" size 0x%"PRIx32 + kvm_resample_fd_notify(int gsi) "gsi %d" + kvm_dirty_ring_full(int id) "vcpu %d" +-- +2.39.3 + diff --git a/kvm-update-linux-headers-fix-forwarding-to-asm-generic-h.patch b/kvm-update-linux-headers-fix-forwarding-to-asm-generic-h.patch new file mode 100644 index 0000000..f141bf1 --- /dev/null +++ b/kvm-update-linux-headers-fix-forwarding-to-asm-generic-h.patch @@ -0,0 +1,62 @@ +From e185104a10a37174d13d981fa1febafbb7e651aa Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 3 Jun 2024 13:49:49 +0200 +Subject: [PATCH 050/100] update-linux-headers: fix forwarding to asm-generic + headers + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [50/91] 3c98a7fe790d943bb5ff8dca1da83f5944ec3e2e (bonzini/rhel-qemu-kvm) + +Afer commit 3efc75ad9d9 ("scripts/update-linux-headers.sh: Remove +temporary directory inbetween", 2024-05-29), updating linux-headers/ +results in errors such as + + cp: cannot stat '/tmp/tmp.1A1Eejh1UE/headers/include/asm/bitsperlong.h': No such file or directory + +because Loongarch does not have an asm/bitsperlong.h file and uses the +generic version. Before commit 3efc75ad9d9, the missing file would +incorrectly cause stale files to be included in linux-headers/. The files +were never committed to qemu.git, but were wrong nevertheless. The build +would just use the system version of the files, which is opposite to +the idea of importing Linux header files into QEMU's tree. + +Create forwarding headers, resembling the ones that are generated during a +kernel build by scripts/Makefile.asm-generic, if a file is only installed +under include/asm-generic/. + +Reviewed-by: Thomas Huth +Signed-off-by: Paolo Bonzini +(cherry picked from commit ef7c70f020ca1fe9e7c98ea2cd9d6ba3c5714716) +Signed-off-by: Paolo Bonzini +--- + scripts/update-linux-headers.sh | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh +index f084bee72e..78c0f2c43e 100755 +--- a/scripts/update-linux-headers.sh ++++ b/scripts/update-linux-headers.sh +@@ -119,7 +119,14 @@ for arch in $ARCHLIST; do + rm -rf "$output/linux-headers/asm-$arch" + mkdir -p "$output/linux-headers/asm-$arch" + for header in kvm.h unistd.h bitsperlong.h mman.h; do +- cp "$hdrdir/include/asm/$header" "$output/linux-headers/asm-$arch" ++ if test -f "$hdrdir/include/asm/$header"; then ++ cp "$hdrdir/include/asm/$header" "$output/linux-headers/asm-$arch" ++ elif test -f "$hdrdir/include/asm-generic/$header"; then ++ # not installed as , but used as such in kernel sources ++ cat <$output/linux-headers/asm-$arch/$header ++#include ++EOF ++ fi + done + + if [ $arch = mips ]; then +-- +2.39.3 + diff --git a/kvm-update-linux-headers-import-linux-kvm_para.h-header.patch b/kvm-update-linux-headers-import-linux-kvm_para.h-header.patch new file mode 100644 index 0000000..a75a2aa --- /dev/null +++ b/kvm-update-linux-headers-import-linux-kvm_para.h-header.patch @@ -0,0 +1,175 @@ +From 8d6c37ddc253f63202cc9519670c258e9d81b98e Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 3 Jun 2024 14:25:06 +0200 +Subject: [PATCH 053/100] update-linux-headers: import linux/kvm_para.h header + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [53/91] 27d4db0ecec7d0b8adeba1ec85fca32eacee1009 (bonzini/rhel-qemu-kvm) + +Right now QEMU is importing arch/x86/include/uapi/asm/kvm_para.h +because it includes definitions for kvmclock and for KVM CPUID +bits. However, other definitions for KVM hypercall values and return +codes are included in include/uapi/linux/kvm_para.h and they will be +used by SEV-SNP. + +To ensure that it is possible to include both and +"standard-headers/asm-x86/kvm_para.h" without conflicts, provide +linux/kvm_para.h as a portable header too, and forward linux-headers/ +files to those in include/standard-headers. Note that +will include architecture-specific definitions as well, but +"standard-headers/linux/kvm_para.h" will not because it can be used in +architecture-independent files. + +This could easily be extended to other architectures, but right now +they do not need any symbol in their specific kvm_para.h files. + +Reviewed-by: Thomas Huth +Signed-off-by: Paolo Bonzini +(cherry picked from commit aa274c33c39e7de981dc195abe60e1a246c9d248) +Signed-off-by: Paolo Bonzini +--- + include/standard-headers/linux/kvm_para.h | 38 +++++++++++++++++++++++ + linux-headers/asm-x86/kvm_para.h | 1 + + linux-headers/linux/kvm_para.h | 2 ++ + scripts/update-linux-headers.sh | 22 ++++++++++++- + 4 files changed, 62 insertions(+), 1 deletion(-) + create mode 100644 include/standard-headers/linux/kvm_para.h + create mode 100644 linux-headers/asm-x86/kvm_para.h + create mode 100644 linux-headers/linux/kvm_para.h + +diff --git a/include/standard-headers/linux/kvm_para.h b/include/standard-headers/linux/kvm_para.h +new file mode 100644 +index 0000000000..015c166302 +--- /dev/null ++++ b/include/standard-headers/linux/kvm_para.h +@@ -0,0 +1,38 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++#ifndef __LINUX_KVM_PARA_H ++#define __LINUX_KVM_PARA_H ++ ++/* ++ * This header file provides a method for making a hypercall to the host ++ * Architectures should define: ++ * - kvm_hypercall0, kvm_hypercall1... ++ * - kvm_arch_para_features ++ * - kvm_para_available ++ */ ++ ++/* Return values for hypercalls */ ++#define KVM_ENOSYS 1000 ++#define KVM_EFAULT EFAULT ++#define KVM_EINVAL EINVAL ++#define KVM_E2BIG E2BIG ++#define KVM_EPERM EPERM ++#define KVM_EOPNOTSUPP 95 ++ ++#define KVM_HC_VAPIC_POLL_IRQ 1 ++#define KVM_HC_MMU_OP 2 ++#define KVM_HC_FEATURES 3 ++#define KVM_HC_PPC_MAP_MAGIC_PAGE 4 ++#define KVM_HC_KICK_CPU 5 ++#define KVM_HC_MIPS_GET_CLOCK_FREQ 6 ++#define KVM_HC_MIPS_EXIT_VM 7 ++#define KVM_HC_MIPS_CONSOLE_OUTPUT 8 ++#define KVM_HC_CLOCK_PAIRING 9 ++#define KVM_HC_SEND_IPI 10 ++#define KVM_HC_SCHED_YIELD 11 ++#define KVM_HC_MAP_GPA_RANGE 12 ++ ++/* ++ * hypercalls use architecture specific ++ */ ++ ++#endif /* __LINUX_KVM_PARA_H */ +diff --git a/linux-headers/asm-x86/kvm_para.h b/linux-headers/asm-x86/kvm_para.h +new file mode 100644 +index 0000000000..1d3e0e0b07 +--- /dev/null ++++ b/linux-headers/asm-x86/kvm_para.h +@@ -0,0 +1 @@ ++#include "standard-headers/asm-x86/kvm_para.h" +diff --git a/linux-headers/linux/kvm_para.h b/linux-headers/linux/kvm_para.h +new file mode 100644 +index 0000000000..6a1e672259 +--- /dev/null ++++ b/linux-headers/linux/kvm_para.h +@@ -0,0 +1,2 @@ ++#include "standard-headers/linux/kvm_para.h" ++#include +diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh +index 90759dcfe0..64d1989961 100755 +--- a/scripts/update-linux-headers.sh ++++ b/scripts/update-linux-headers.sh +@@ -64,6 +64,7 @@ cp_portable() { + -e 'linux/kernel' \ + -e 'linux/sysinfo' \ + -e 'asm/setup_data.h' \ ++ -e 'asm/kvm_para.h' \ + > /dev/null + then + echo "Unexpected #include in input file $f". +@@ -71,6 +72,15 @@ cp_portable() { + fi + + header=$(basename "$f"); ++ ++ if test -z "$arch"; then ++ # Let users of include/standard-headers/linux/ headers pick the ++ # asm-* header that they care about ++ arch_cmd='/]*\)>/d' ++ else ++ arch_cmd='s/]*\)>/"standard-headers\/asm-'$arch'\/\1"/' ++ fi ++ + sed -e 's/__aligned_u64/__u64 __attribute__((aligned(8)))/g' \ + -e 's/__u\([0-9][0-9]*\)/uint\1_t/g' \ + -e 's/u\([0-9][0-9]*\)/uint\1_t/g' \ +@@ -79,7 +89,7 @@ cp_portable() { + -e 's/__be\([0-9][0-9]*\)/uint\1_t/g' \ + -e 's/"\(input-event-codes\.h\)"/"standard-headers\/linux\/\1"/' \ + -e 's/]*\)>/"standard-headers\/linux\/\1"/' \ +- -e 's/]*\)>/"standard-headers\/asm-'$arch'\/\1"/' \ ++ -e "$arch_cmd" \ + -e 's/__bitwise//' \ + -e 's/__attribute__((packed))/QEMU_PACKED/' \ + -e 's/__inline__/inline/' \ +@@ -159,7 +169,12 @@ EOF + cp "$hdrdir/include/asm/unistd_32.h" "$output/linux-headers/asm-x86/" + cp "$hdrdir/include/asm/unistd_x32.h" "$output/linux-headers/asm-x86/" + cp "$hdrdir/include/asm/unistd_64.h" "$output/linux-headers/asm-x86/" ++ + cp_portable "$hdrdir/include/asm/kvm_para.h" "$output/include/standard-headers/asm-$arch" ++ cat <$output/linux-headers/asm-$arch/kvm_para.h ++#include "standard-headers/asm-$arch/kvm_para.h" ++EOF ++ + # Remove everything except the macros from bootparam.h avoiding the + # unnecessary import of several video/ist/etc headers + sed -e '/__ASSEMBLY__/,/__ASSEMBLY__/d' \ +@@ -209,6 +224,10 @@ if [ -d "$linux/LICENSES" ]; then + done + fi + ++cat <$output/linux-headers/linux/kvm_para.h ++#include "standard-headers/linux/kvm_para.h" ++#include ++EOF + cat <$output/linux-headers/linux/virtio_config.h + #include "standard-headers/linux/virtio_config.h" + EOF +@@ -231,6 +250,7 @@ for i in "$hdrdir"/include/linux/*virtio*.h \ + "$hdrdir/include/linux/ethtool.h" \ + "$hdrdir/include/linux/const.h" \ + "$hdrdir/include/linux/kernel.h" \ ++ "$hdrdir/include/linux/kvm_para.h" \ + "$hdrdir/include/linux/vhost_types.h" \ + "$hdrdir/include/linux/sysinfo.h"; do + cp_portable "$i" "$output/include/standard-headers/linux" +-- +2.39.3 + diff --git a/kvm-update-linux-headers-move-pvpanic.h-to-correct-direc.patch b/kvm-update-linux-headers-move-pvpanic.h-to-correct-direc.patch new file mode 100644 index 0000000..cb0a4d4 --- /dev/null +++ b/kvm-update-linux-headers-move-pvpanic.h-to-correct-direc.patch @@ -0,0 +1,95 @@ +From 00e250d9df1949d363758a34e3f46d8c71be054f Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 3 Jun 2024 14:16:55 +0200 +Subject: [PATCH 051/100] update-linux-headers: move pvpanic.h to correct + directory + +RH-Author: Paolo Bonzini +RH-MergeRequest: 245: SEV-SNP support +RH-Jira: RHEL-39544 +RH-Acked-by: Thomas Huth +RH-Acked-by: Bandan Das +RH-Acked-by: Vitaly Kuznetsov +RH-Commit: [51/91] 10efff5bbcb867ba34f3f9ff8045381ea96f94c7 (bonzini/rhel-qemu-kvm) + +Linux has , not . Use the same +directory for QEMU's include/standard-headers/ copy. + +Reviewed-by: Thomas Huth +Signed-off-by: Paolo Bonzini +(cherry picked from commit b8116f4cbaa0f64bb07564f20b3b5219e23c8bff) +Signed-off-by: Paolo Bonzini +--- + hw/misc/pvpanic-isa.c | 2 +- + hw/misc/pvpanic-pci.c | 2 +- + hw/misc/pvpanic.c | 2 +- + include/standard-headers/{linux => misc}/pvpanic.h | 0 + scripts/update-linux-headers.sh | 6 ++++-- + 5 files changed, 7 insertions(+), 5 deletions(-) + rename include/standard-headers/{linux => misc}/pvpanic.h (100%) + +diff --git a/hw/misc/pvpanic-isa.c b/hw/misc/pvpanic-isa.c +index ccec50f61b..b4f84c4110 100644 +--- a/hw/misc/pvpanic-isa.c ++++ b/hw/misc/pvpanic-isa.c +@@ -21,7 +21,7 @@ + #include "hw/misc/pvpanic.h" + #include "qom/object.h" + #include "hw/isa/isa.h" +-#include "standard-headers/linux/pvpanic.h" ++#include "standard-headers/misc/pvpanic.h" + #include "hw/acpi/acpi_aml_interface.h" + + OBJECT_DECLARE_SIMPLE_TYPE(PVPanicISAState, PVPANIC_ISA_DEVICE) +diff --git a/hw/misc/pvpanic-pci.c b/hw/misc/pvpanic-pci.c +index 83be95d0d2..4d44a881da 100644 +--- a/hw/misc/pvpanic-pci.c ++++ b/hw/misc/pvpanic-pci.c +@@ -21,7 +21,7 @@ + #include "hw/misc/pvpanic.h" + #include "qom/object.h" + #include "hw/pci/pci_device.h" +-#include "standard-headers/linux/pvpanic.h" ++#include "standard-headers/misc/pvpanic.h" + + OBJECT_DECLARE_SIMPLE_TYPE(PVPanicPCIState, PVPANIC_PCI_DEVICE) + +diff --git a/hw/misc/pvpanic.c b/hw/misc/pvpanic.c +index 1540e9091a..80289ecf5f 100644 +--- a/hw/misc/pvpanic.c ++++ b/hw/misc/pvpanic.c +@@ -21,7 +21,7 @@ + #include "hw/qdev-properties.h" + #include "hw/misc/pvpanic.h" + #include "qom/object.h" +-#include "standard-headers/linux/pvpanic.h" ++#include "standard-headers/misc/pvpanic.h" + + static void handle_event(int event) + { +diff --git a/include/standard-headers/linux/pvpanic.h b/include/standard-headers/misc/pvpanic.h +similarity index 100% +rename from include/standard-headers/linux/pvpanic.h +rename to include/standard-headers/misc/pvpanic.h +diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh +index 78c0f2c43e..90759dcfe0 100755 +--- a/scripts/update-linux-headers.sh ++++ b/scripts/update-linux-headers.sh +@@ -232,10 +232,12 @@ for i in "$hdrdir"/include/linux/*virtio*.h \ + "$hdrdir/include/linux/const.h" \ + "$hdrdir/include/linux/kernel.h" \ + "$hdrdir/include/linux/vhost_types.h" \ +- "$hdrdir/include/linux/sysinfo.h" \ +- "$hdrdir/include/misc/pvpanic.h"; do ++ "$hdrdir/include/linux/sysinfo.h"; do + cp_portable "$i" "$output/include/standard-headers/linux" + done ++mkdir -p "$output/include/standard-headers/misc" ++cp_portable "$hdrdir/include/misc/pvpanic.h" \ ++ "$output/include/standard-headers/misc" + mkdir -p "$output/include/standard-headers/drm" + cp_portable "$hdrdir/include/drm/drm_fourcc.h" \ + "$output/include/standard-headers/drm" +-- +2.39.3 + diff --git a/kvm-virtio-rng-block-max-bytes-0.patch b/kvm-virtio-rng-block-max-bytes-0.patch new file mode 100644 index 0000000..2fba53d --- /dev/null +++ b/kvm-virtio-rng-block-max-bytes-0.patch @@ -0,0 +1,49 @@ +From 3dd1412176a8ee6c06b5d41aa00ca49b535d99b7 Mon Sep 17 00:00:00 2001 +From: "Michael S. Tsirkin" +Date: Wed, 24 Jul 2024 06:48:59 -0400 +Subject: [PATCH 092/100] virtio-rng: block max-bytes=0 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +RH-MergeRequest: 259: virtio-rng: block max-bytes=0 +RH-Jira: RHEL-50336 +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Thomas Huth +RH-Acked-by: Eric Auger +RH-Commit: [1/1] 6d9852cc7cf7fdf49521b6301ceda26e11b1291f (lvivier/qemu-kvm-centos) + +JIRA: https://issues.redhat.com/browse/RHEL-50336 + +with max-bytes set to 0, quota is 0 and so device does not work. +block this to avoid user confusion + +Message-Id: <73a89a42d82ec8b47358f25119b87063e4a6ea57.1721818306.git.mst@redhat.com> +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Philippe Mathieu-Daudé +(cherry picked from commit 024d046bf41b5256adec671085bcee767a6da125) +Signed-off-by: Laurent Vivier +--- + hw/virtio/virtio-rng.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/hw/virtio/virtio-rng.c b/hw/virtio/virtio-rng.c +index f74efffef7..7cf31da071 100644 +--- a/hw/virtio/virtio-rng.c ++++ b/hw/virtio/virtio-rng.c +@@ -184,8 +184,9 @@ static void virtio_rng_device_realize(DeviceState *dev, Error **errp) + + /* Workaround: Property parsing does not enforce unsigned integers, + * So this is a hack to reject such numbers. */ +- if (vrng->conf.max_bytes > INT64_MAX) { +- error_setg(errp, "'max-bytes' parameter must be non-negative, " ++ if (vrng->conf.max_bytes == 0 || ++ vrng->conf.max_bytes > INT64_MAX) { ++ error_setg(errp, "'max-bytes' parameter must be positive, " + "and less than 2^63"); + return; + } +-- +2.39.3 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 65a1cc7..5159c7b 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -149,7 +149,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 9.0.0 -Release: 7%{?rcrel}%{?dist}%{?cc_suffix} +Release: 8%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -228,6 +228,206 @@ Patch34: kvm-block-Parse-filenames-only-when-explicitly-requested.patch Patch35: kvm-hw-virtio-Fix-the-de-initialization-of-vhost-user-de.patch # For RHEL-39936 - ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (128 < 256) on FUJITSU Patch36: kvm-hw-arm-virt-Avoid-unexpected-warning-from-Linux-gues.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch37: kvm-introduce-pc_rhel_9_5_compat.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch38: kvm-target-i386-add-guest-phys-bits-cpu-property.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch39: kvm-kvm-add-support-for-guest-physical-bits.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch40: kvm-i386-kvm-Move-architectural-CPUID-leaf-generation-to.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch41: kvm-target-i386-Introduce-Icelake-Server-v7-to-enable-TS.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch42: kvm-target-i386-Add-new-CPU-model-SierraForest.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch43: kvm-target-i386-Export-RFDS-bit-to-guests.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch44: kvm-pci-host-q35-Move-PAM-initialization-above-SMRAM-ini.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch45: kvm-q35-Introduce-smm_ranges-property-for-q35-pci-host.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch46: kvm-hw-i386-acpi-Set-PCAT_COMPAT-bit-only-when-pic-is-no.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch47: kvm-confidential-guest-support-Add-kvm_init-and-kvm_rese.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch48: kvm-i386-sev-Switch-to-use-confidential_guest_kvm_init.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch49: kvm-ppc-pef-switch-to-use-confidential_guest_kvm_init-re.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch50: kvm-s390-Switch-to-use-confidential_guest_kvm_init.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch51: kvm-scripts-update-linux-headers-Add-setup_data.h-to-imp.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch52: kvm-scripts-update-linux-headers-Add-bits.h-to-file-impo.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch53: kvm-linux-headers-update-to-current-kvm-next.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch54: kvm-runstate-skip-initial-CPU-reset-if-reset-is-not-actu.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch55: kvm-KVM-track-whether-guest-state-is-encrypted.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch56: kvm-KVM-remove-kvm_arch_cpu_check_are_resettable.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch57: kvm-target-i386-introduce-x86-confidential-guest.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch58: kvm-target-i386-Implement-mc-kvm_type-to-get-VM-type.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch59: kvm-target-i386-SEV-use-KVM_SEV_INIT2-if-possible.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch60: kvm-i386-sev-Add-legacy-vm-type-parameter-for-SEV-guest-.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch61: kvm-hw-i386-sev-Use-legacy-SEV-VM-types-for-older-machin.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch62: kvm-trace-kvm-Split-address-space-and-slot-id-in-trace_k.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch63: kvm-kvm-Introduce-support-for-memory_attributes.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch64: kvm-RAMBlock-Add-support-of-KVM-private-guest-memfd.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch65: kvm-kvm-Enable-KVM_SET_USER_MEMORY_REGION2-for-memslot.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch66: kvm-kvm-memory-Make-memory-type-private-by-default-if-it.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch67: kvm-HostMem-Add-mechanism-to-opt-in-kvm-guest-memfd-via-.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch68: kvm-RAMBlock-make-guest_memfd-require-uncoordinated-disc.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch69: kvm-physmem-Introduce-ram_block_discard_guest_memfd_rang.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch70: kvm-kvm-handle-KVM_EXIT_MEMORY_FAULT.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch71: kvm-kvm-tdx-Don-t-complain-when-converting-vMMIO-region-.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch72: kvm-kvm-tdx-Ignore-memory-conversion-to-shared-of-unassi.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch73: kvm-hw-i386-x86-Eliminate-two-if-statements-in-x86_bios_.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch74: kvm-hw-i386-Have-x86_bios_rom_init-take-X86MachineState-.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch75: kvm-hw-i386-pc_sysfw-Remove-unused-parameter-from-pc_isa.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch76: kvm-hw-i386-x86-Don-t-leak-isa-bios-memory-regions.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch77: kvm-hw-i386-x86-Don-t-leak-pc.bios-memory-region.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch78: kvm-hw-i386-x86-Extract-x86_isa_bios_init-from-x86_bios_.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch79: kvm-hw-i386-pc_sysfw-Alias-rather-than-copy-isa-bios-reg.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch80: kvm-i386-correctly-select-code-in-hw-i386-that-depends-o.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch81: kvm-i386-pc-remove-unnecessary-MachineClass-overrides.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch82: kvm-hw-i386-split-x86.c-in-multiple-parts.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch83: kvm-scripts-update-linux-header.sh-be-more-src-tree-frie.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch84: kvm-scripts-update-linux-headers.sh-Remove-temporary-dir.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch85: kvm-scripts-update-linux-headers.sh-Fix-the-path-of-setu.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch86: kvm-update-linux-headers-fix-forwarding-to-asm-generic-h.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch87: kvm-update-linux-headers-move-pvpanic.h-to-correct-direc.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch88: kvm-linux-headers-Update-to-current-kvm-next.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch89: kvm-update-linux-headers-import-linux-kvm_para.h-header.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch90: kvm-machine-allow-early-use-of-machine_require_guest_mem.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch91: kvm-i386-sev-Replace-error_report-with-error_setg.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch92: kvm-i386-sev-Introduce-sev-common-type-to-encapsulate-co.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch93: kvm-i386-sev-Move-sev_launch_update-to-separate-class-me.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch94: kvm-i386-sev-Move-sev_launch_finish-to-separate-class-me.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch95: kvm-i386-sev-Introduce-sev-snp-guest-object.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch96: kvm-i386-sev-Add-a-sev_snp_enabled-helper.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch97: kvm-i386-sev-Add-sev_kvm_init-override-for-SEV-class.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch98: kvm-i386-sev-Add-snp_kvm_init-override-for-SNP-class.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch99: kvm-i386-cpu-Set-SEV-SNP-CPUID-bit-when-SNP-enabled.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch100: kvm-i386-sev-Don-t-return-launch-measurements-for-SEV-SN.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch101: kvm-i386-sev-Add-a-class-method-to-determine-KVM-VM-type.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch102: kvm-i386-sev-Update-query-sev-QAPI-format-to-handle-SEV-.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch103: kvm-i386-sev-Add-the-SNP-launch-start-context.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch104: kvm-i386-sev-Add-handling-to-encrypt-finalize-guest-laun.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch105: kvm-i386-sev-Set-CPU-state-to-protected-once-SNP-guest-p.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch106: kvm-hw-i386-sev-Add-function-to-get-SEV-metadata-from-OV.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch107: kvm-i386-sev-Add-support-for-populating-OVMF-metadata-pa.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch108: kvm-i386-sev-Add-support-for-SNP-CPUID-validation.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch109: kvm-hw-i386-sev-Add-support-to-encrypt-BIOS-when-SEV-SNP.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch110: kvm-i386-sev-Invoke-launch_updata_data-for-SEV-class.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch111: kvm-i386-sev-Invoke-launch_updata_data-for-SNP-class.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch112: kvm-i386-kvm-Add-KVM_EXIT_HYPERCALL-handling-for-KVM_HC_.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch113: kvm-i386-sev-Enable-KVM_HC_MAP_GPA_RANGE-hcall-for-SNP-g.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch114: kvm-i386-sev-Extract-build_kernel_loader_hashes.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch115: kvm-i386-sev-Reorder-struct-declarations.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch116: kvm-i386-sev-Allow-measured-direct-kernel-boot-on-SNP.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch117: kvm-memory-Introduce-memory_region_init_ram_guest_memfd.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch118: kvm-hw-i386-sev-Use-guest_memfd-for-legacy-ROMs.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch119: kvm-hw-i386-Add-support-for-loading-BIOS-using-guest_mem.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch120: kvm-i386-sev-fix-unreachable-code-coverity-issue.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch121: kvm-i386-sev-Move-SEV_COMMON-null-check-before-dereferen.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch122: kvm-i386-sev-Return-when-sev_common-is-null.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch123: kvm-target-i386-SEV-fix-formatting-of-CPUID-mismatch-mes.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch124: kvm-i386-sev-Fix-error-message-in-sev_get_capabilities.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch125: kvm-i386-sev-Fallback-to-the-default-SEV-device-if-none-.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch126: kvm-i386-sev-Don-t-allow-automatic-fallback-to-legacy-KV.patch +# For RHEL-39544 - [QEMU] Add support for AMD SEV-SNP to Qemu +Patch127: kvm-target-i386-SEV-fix-mismatch-in-vcek-disabled-proper.patch +# For RHEL-50336 - Fail to boot up the guest including vtpm and virtio-rng (max-bytes=0) devices +Patch128: kvm-virtio-rng-block-max-bytes-0.patch +# For RHEL-50000 - scsi-block: Cannot setup Windows Failover Cluster, qemu crashes on assert +Patch129: kvm-scsi-disk-Use-positive-return-value-for-status-in-dm.patch +# For RHEL-50000 - scsi-block: Cannot setup Windows Failover Cluster, qemu crashes on assert +Patch130: kvm-scsi-block-Don-t-skip-callback-for-sgio-error-status.patch +# For RHEL-50000 - scsi-block: Cannot setup Windows Failover Cluster, qemu crashes on assert +Patch131: kvm-scsi-disk-Add-warning-comments-that-host_status-erro.patch +# For RHEL-50000 - scsi-block: Cannot setup Windows Failover Cluster, qemu crashes on assert +Patch132: kvm-scsi-disk-Always-report-RESERVATION_CONFLICT-to-gues.patch +# For RHEL-52617 - CVE-2024-7409 qemu-kvm: Denial of Service via Improper Synchronization in QEMU NBD Server During Socket Closure [rhel-9.5] +Patch133: kvm-nbd-server-Plumb-in-new-args-to-nbd_client_add.patch +# For RHEL-52617 - CVE-2024-7409 qemu-kvm: Denial of Service via Improper Synchronization in QEMU NBD Server During Socket Closure [rhel-9.5] +Patch134: kvm-nbd-server-CVE-2024-7409-Cap-default-max-connections.patch +# For RHEL-52617 - CVE-2024-7409 qemu-kvm: Denial of Service via Improper Synchronization in QEMU NBD Server During Socket Closure [rhel-9.5] +Patch135: kvm-nbd-server-CVE-2024-7409-Drop-non-negotiating-client.patch +# For RHEL-52617 - CVE-2024-7409 qemu-kvm: Denial of Service via Improper Synchronization in QEMU NBD Server During Socket Closure [rhel-9.5] +Patch136: kvm-nbd-server-CVE-2024-7409-Close-stray-clients-at-serv.patch %if %{have_clang} BuildRequires: clang @@ -1294,6 +1494,116 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Wed Aug 14 2024 Miroslav Rezanina - 9.0.0-8 +- kvm-introduce-pc_rhel_9_5_compat.patch [RHEL-39544] +- kvm-target-i386-add-guest-phys-bits-cpu-property.patch [RHEL-39544] +- kvm-kvm-add-support-for-guest-physical-bits.patch [RHEL-39544] +- kvm-i386-kvm-Move-architectural-CPUID-leaf-generation-to.patch [RHEL-39544] +- kvm-target-i386-Introduce-Icelake-Server-v7-to-enable-TS.patch [RHEL-39544] +- kvm-target-i386-Add-new-CPU-model-SierraForest.patch [RHEL-39544] +- kvm-target-i386-Export-RFDS-bit-to-guests.patch [RHEL-39544] +- kvm-pci-host-q35-Move-PAM-initialization-above-SMRAM-ini.patch [RHEL-39544] +- kvm-q35-Introduce-smm_ranges-property-for-q35-pci-host.patch [RHEL-39544] +- kvm-hw-i386-acpi-Set-PCAT_COMPAT-bit-only-when-pic-is-no.patch [RHEL-39544] +- kvm-confidential-guest-support-Add-kvm_init-and-kvm_rese.patch [RHEL-39544] +- kvm-i386-sev-Switch-to-use-confidential_guest_kvm_init.patch [RHEL-39544] +- kvm-ppc-pef-switch-to-use-confidential_guest_kvm_init-re.patch [RHEL-39544] +- kvm-s390-Switch-to-use-confidential_guest_kvm_init.patch [RHEL-39544] +- kvm-scripts-update-linux-headers-Add-setup_data.h-to-imp.patch [RHEL-39544] +- kvm-scripts-update-linux-headers-Add-bits.h-to-file-impo.patch [RHEL-39544] +- kvm-linux-headers-update-to-current-kvm-next.patch [RHEL-39544] +- kvm-runstate-skip-initial-CPU-reset-if-reset-is-not-actu.patch [RHEL-39544] +- kvm-KVM-track-whether-guest-state-is-encrypted.patch [RHEL-39544] +- kvm-KVM-remove-kvm_arch_cpu_check_are_resettable.patch [RHEL-39544] +- kvm-target-i386-introduce-x86-confidential-guest.patch [RHEL-39544] +- kvm-target-i386-Implement-mc-kvm_type-to-get-VM-type.patch [RHEL-39544] +- kvm-target-i386-SEV-use-KVM_SEV_INIT2-if-possible.patch [RHEL-39544] +- kvm-i386-sev-Add-legacy-vm-type-parameter-for-SEV-guest-.patch [RHEL-39544] +- kvm-hw-i386-sev-Use-legacy-SEV-VM-types-for-older-machin.patch [RHEL-39544] +- kvm-trace-kvm-Split-address-space-and-slot-id-in-trace_k.patch [RHEL-39544] +- kvm-kvm-Introduce-support-for-memory_attributes.patch [RHEL-39544] +- kvm-RAMBlock-Add-support-of-KVM-private-guest-memfd.patch [RHEL-39544] +- kvm-kvm-Enable-KVM_SET_USER_MEMORY_REGION2-for-memslot.patch [RHEL-39544] +- kvm-kvm-memory-Make-memory-type-private-by-default-if-it.patch [RHEL-39544] +- kvm-HostMem-Add-mechanism-to-opt-in-kvm-guest-memfd-via-.patch [RHEL-39544] +- kvm-RAMBlock-make-guest_memfd-require-uncoordinated-disc.patch [RHEL-39544] +- kvm-physmem-Introduce-ram_block_discard_guest_memfd_rang.patch [RHEL-39544] +- kvm-kvm-handle-KVM_EXIT_MEMORY_FAULT.patch [RHEL-39544] +- kvm-kvm-tdx-Don-t-complain-when-converting-vMMIO-region-.patch [RHEL-39544] +- kvm-kvm-tdx-Ignore-memory-conversion-to-shared-of-unassi.patch [RHEL-39544] +- kvm-hw-i386-x86-Eliminate-two-if-statements-in-x86_bios_.patch [RHEL-39544] +- kvm-hw-i386-Have-x86_bios_rom_init-take-X86MachineState-.patch [RHEL-39544] +- kvm-hw-i386-pc_sysfw-Remove-unused-parameter-from-pc_isa.patch [RHEL-39544] +- kvm-hw-i386-x86-Don-t-leak-isa-bios-memory-regions.patch [RHEL-39544] +- kvm-hw-i386-x86-Don-t-leak-pc.bios-memory-region.patch [RHEL-39544] +- kvm-hw-i386-x86-Extract-x86_isa_bios_init-from-x86_bios_.patch [RHEL-39544] +- kvm-hw-i386-pc_sysfw-Alias-rather-than-copy-isa-bios-reg.patch [RHEL-39544] +- kvm-i386-correctly-select-code-in-hw-i386-that-depends-o.patch [RHEL-39544] +- kvm-i386-pc-remove-unnecessary-MachineClass-overrides.patch [RHEL-39544] +- kvm-hw-i386-split-x86.c-in-multiple-parts.patch [RHEL-39544] +- kvm-scripts-update-linux-header.sh-be-more-src-tree-frie.patch [RHEL-39544] +- kvm-scripts-update-linux-headers.sh-Remove-temporary-dir.patch [RHEL-39544] +- kvm-scripts-update-linux-headers.sh-Fix-the-path-of-setu.patch [RHEL-39544] +- kvm-update-linux-headers-fix-forwarding-to-asm-generic-h.patch [RHEL-39544] +- kvm-update-linux-headers-move-pvpanic.h-to-correct-direc.patch [RHEL-39544] +- kvm-linux-headers-Update-to-current-kvm-next.patch [RHEL-39544] +- kvm-update-linux-headers-import-linux-kvm_para.h-header.patch [RHEL-39544] +- kvm-machine-allow-early-use-of-machine_require_guest_mem.patch [RHEL-39544] +- kvm-i386-sev-Replace-error_report-with-error_setg.patch [RHEL-39544] +- kvm-i386-sev-Introduce-sev-common-type-to-encapsulate-co.patch [RHEL-39544] +- kvm-i386-sev-Move-sev_launch_update-to-separate-class-me.patch [RHEL-39544] +- kvm-i386-sev-Move-sev_launch_finish-to-separate-class-me.patch [RHEL-39544] +- kvm-i386-sev-Introduce-sev-snp-guest-object.patch [RHEL-39544] +- kvm-i386-sev-Add-a-sev_snp_enabled-helper.patch [RHEL-39544] +- kvm-i386-sev-Add-sev_kvm_init-override-for-SEV-class.patch [RHEL-39544] +- kvm-i386-sev-Add-snp_kvm_init-override-for-SNP-class.patch [RHEL-39544] +- kvm-i386-cpu-Set-SEV-SNP-CPUID-bit-when-SNP-enabled.patch [RHEL-39544] +- kvm-i386-sev-Don-t-return-launch-measurements-for-SEV-SN.patch [RHEL-39544] +- kvm-i386-sev-Add-a-class-method-to-determine-KVM-VM-type.patch [RHEL-39544] +- kvm-i386-sev-Update-query-sev-QAPI-format-to-handle-SEV-.patch [RHEL-39544] +- kvm-i386-sev-Add-the-SNP-launch-start-context.patch [RHEL-39544] +- kvm-i386-sev-Add-handling-to-encrypt-finalize-guest-laun.patch [RHEL-39544] +- kvm-i386-sev-Set-CPU-state-to-protected-once-SNP-guest-p.patch [RHEL-39544] +- kvm-hw-i386-sev-Add-function-to-get-SEV-metadata-from-OV.patch [RHEL-39544] +- kvm-i386-sev-Add-support-for-populating-OVMF-metadata-pa.patch [RHEL-39544] +- kvm-i386-sev-Add-support-for-SNP-CPUID-validation.patch [RHEL-39544] +- kvm-hw-i386-sev-Add-support-to-encrypt-BIOS-when-SEV-SNP.patch [RHEL-39544] +- kvm-i386-sev-Invoke-launch_updata_data-for-SEV-class.patch [RHEL-39544] +- kvm-i386-sev-Invoke-launch_updata_data-for-SNP-class.patch [RHEL-39544] +- kvm-i386-kvm-Add-KVM_EXIT_HYPERCALL-handling-for-KVM_HC_.patch [RHEL-39544] +- kvm-i386-sev-Enable-KVM_HC_MAP_GPA_RANGE-hcall-for-SNP-g.patch [RHEL-39544] +- kvm-i386-sev-Extract-build_kernel_loader_hashes.patch [RHEL-39544] +- kvm-i386-sev-Reorder-struct-declarations.patch [RHEL-39544] +- kvm-i386-sev-Allow-measured-direct-kernel-boot-on-SNP.patch [RHEL-39544] +- kvm-memory-Introduce-memory_region_init_ram_guest_memfd.patch [RHEL-39544] +- kvm-hw-i386-sev-Use-guest_memfd-for-legacy-ROMs.patch [RHEL-39544] +- kvm-hw-i386-Add-support-for-loading-BIOS-using-guest_mem.patch [RHEL-39544] +- kvm-i386-sev-fix-unreachable-code-coverity-issue.patch [RHEL-39544] +- kvm-i386-sev-Move-SEV_COMMON-null-check-before-dereferen.patch [RHEL-39544] +- kvm-i386-sev-Return-when-sev_common-is-null.patch [RHEL-39544] +- kvm-target-i386-SEV-fix-formatting-of-CPUID-mismatch-mes.patch [RHEL-39544] +- kvm-i386-sev-Fix-error-message-in-sev_get_capabilities.patch [RHEL-39544] +- kvm-i386-sev-Fallback-to-the-default-SEV-device-if-none-.patch [RHEL-39544] +- kvm-i386-sev-Don-t-allow-automatic-fallback-to-legacy-KV.patch [RHEL-39544] +- kvm-target-i386-SEV-fix-mismatch-in-vcek-disabled-proper.patch [RHEL-39544] +- kvm-virtio-rng-block-max-bytes-0.patch [RHEL-50336] +- kvm-scsi-disk-Use-positive-return-value-for-status-in-dm.patch [RHEL-50000] +- kvm-scsi-block-Don-t-skip-callback-for-sgio-error-status.patch [RHEL-50000] +- kvm-scsi-disk-Add-warning-comments-that-host_status-erro.patch [RHEL-50000] +- kvm-scsi-disk-Always-report-RESERVATION_CONFLICT-to-gues.patch [RHEL-50000] +- kvm-nbd-server-Plumb-in-new-args-to-nbd_client_add.patch [RHEL-52617] +- kvm-nbd-server-CVE-2024-7409-Cap-default-max-connections.patch [RHEL-52617] +- kvm-nbd-server-CVE-2024-7409-Drop-non-negotiating-client.patch [RHEL-52617] +- kvm-nbd-server-CVE-2024-7409-Close-stray-clients-at-serv.patch [RHEL-52617] +- Resolves: RHEL-39544 + ([QEMU] Add support for AMD SEV-SNP to Qemu) +- Resolves: RHEL-50336 + (Fail to boot up the guest including vtpm and virtio-rng (max-bytes=0) devices) +- Resolves: RHEL-50000 + (scsi-block: Cannot setup Windows Failover Cluster, qemu crashes on assert) +- Resolves: RHEL-52617 + (CVE-2024-7409 qemu-kvm: Denial of Service via Improper Synchronization in QEMU NBD Server During Socket Closure [rhel-9.5]) + * Mon Jul 15 2024 Miroslav Rezanina - 9.0.0-7 - kvm-hw-virtio-Fix-the-de-initialization-of-vhost-user-de.patch [RHEL-40708] - kvm-hw-arm-virt-Avoid-unexpected-warning-from-Linux-gues.patch [RHEL-39936]