qemu-kvm/kvm-target-i386-mshv-Add-x86-decoder-emu-implementation.patch
Miroslav Rezanina ca65b33041 * Tue Dec 09 2025 Miroslav Rezanina <mrezanin@redhat.com> - 10.1.0-8
- kvm-block-backend-Fix-race-when-resuming-queued-requests.patch [RHEL-129540]
- kvm-file-posix-Handle-suspended-dm-multipath-better-for-.patch [RHEL-121543]
- kvm-accel-Add-Meson-and-config-support-for-MSHV-accelera.patch [RHEL-134212]
- kvm-target-i386-emulate-Allow-instruction-decoding-from-.patch [RHEL-134212]
- kvm-target-i386-mshv-Add-x86-decoder-emu-implementation.patch [RHEL-134212]
- kvm-hw-intc-Generalize-APIC-helper-names-from-kvm_-to-ac.patch [RHEL-134212]
- kvm-include-hw-hyperv-Add-MSHV-ABI-header-definitions.patch [RHEL-134212]
- kvm-linux-headers-linux-Add-mshv.h-headers.patch [RHEL-134212]
- kvm-accel-mshv-Add-accelerator-skeleton.patch [RHEL-134212]
- kvm-accel-mshv-Register-memory-region-listeners.patch [RHEL-134212]
- kvm-accel-mshv-Initialize-VM-partition.patch [RHEL-134212]
- kvm-treewide-rename-qemu_wait_io_event-qemu_wait_io_even.patch [RHEL-134212]
- kvm-accel-mshv-Add-vCPU-creation-and-execution-loop.patch [RHEL-134212]
- kvm-accel-mshv-Add-vCPU-signal-handling.patch [RHEL-134212]
- kvm-target-i386-mshv-Add-CPU-create-and-remove-logic.patch [RHEL-134212]
- kvm-target-i386-mshv-Implement-mshv_store_regs.patch [RHEL-134212]
- kvm-target-i386-mshv-Implement-mshv_get_standard_regs.patch [RHEL-134212]
- kvm-target-i386-mshv-Implement-mshv_get_special_regs.patch [RHEL-134212]
- kvm-target-i386-mshv-Implement-mshv_arch_put_registers.patch [RHEL-134212]
- kvm-target-i386-mshv-Set-local-interrupt-controller-stat.patch [RHEL-134212]
- kvm-target-i386-mshv-Register-CPUID-entries-with-MSHV.patch [RHEL-134212]
- kvm-target-i386-mshv-Register-MSRs-with-MSHV.patch [RHEL-134212]
- kvm-target-i386-mshv-Integrate-x86-instruction-decoder-e.patch [RHEL-134212]
- kvm-target-i386-mshv-Write-MSRs-to-the-hypervisor.patch [RHEL-134212]
- kvm-target-i386-mshv-Implement-mshv_vcpu_run.patch [RHEL-134212]
- kvm-accel-mshv-Handle-overlapping-mem-mappings.patch [RHEL-134212]
- kvm-qapi-accel-Allow-to-query-mshv-capabilities.patch [RHEL-134212]
- kvm-target-i386-mshv-Use-preallocated-page-for-hvcall.patch [RHEL-134212]
- kvm-docs-Add-mshv-to-documentation.patch [RHEL-134212]
- kvm-MAINTAINERS-Add-maintainers-for-mshv-accelerator.patch [RHEL-134212]
- kvm-accel-mshv-initialize-thread-name.patch [RHEL-134212]
- kvm-accel-mshv-use-return-value-of-handle_pio_str_read.patch [RHEL-134212]
- Resolves: RHEL-129540
  (Assertion failure on drain with iothread and I/O load)
- Resolves: RHEL-121543
  (The VM hit io error when do S3-PR integration on the pass-through  failover multipath device)
- Resolves: RHEL-134212
  ([RHEL10.2] L1VH qemu downstream initial merge RHEL10.2)
2025-12-09 11:43:14 +01:00

432 lines
11 KiB
Diff

From 84edc1855313a6a06517794e1b2e65e75691725e Mon Sep 17 00:00:00 2001
From: Magnus Kulke <magnuskulke@linux.microsoft.com>
Date: Tue, 16 Sep 2025 18:48:23 +0200
Subject: [PATCH 05/32] target/i386/mshv: Add x86 decoder/emu implementation
RH-Author: Igor Mammedov <imammedo@redhat.com>
RH-MergeRequest: 437: el10: x86: enablement for Azure L1VH OCP readiness
RH-Jira: RHEL-134212
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [3/30] 8fa2e2f50c18532d98ef3917f624bc3e65ad167d
The MSHV accelerator requires a x86 decoder/emulator in userland to
emulate MMIO instructions. This change contains the implementations for
the generalized i386 instruction decoder/emulator.
Signed-off-by: Magnus Kulke <magnuskulke@linux.microsoft.com>
Link: https://lore.kernel.org/r/20250916164847.77883-4-magnuskulke@linux.microsoft.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 0daf817c80b57e58168309420abf0a8a3d2a60f6)
Signed-off-by: Igor Mammedov <imammedo@redhat.com>
---
include/system/mshv.h | 25 +++
target/i386/cpu.h | 2 +-
target/i386/emulate/meson.build | 7 +-
target/i386/meson.build | 2 +
target/i386/mshv/meson.build | 7 +
target/i386/mshv/x86.c | 297 ++++++++++++++++++++++++++++++++
6 files changed, 337 insertions(+), 3 deletions(-)
create mode 100644 include/system/mshv.h
create mode 100644 target/i386/mshv/meson.build
create mode 100644 target/i386/mshv/x86.c
diff --git a/include/system/mshv.h b/include/system/mshv.h
new file mode 100644
index 0000000000..342f1ef6a9
--- /dev/null
+++ b/include/system/mshv.h
@@ -0,0 +1,25 @@
+/*
+ * QEMU MSHV support
+ *
+ * Copyright Microsoft, Corp. 2025
+ *
+ * Authors: Ziqiao Zhou <ziqiaozhou@microsoft.com>
+ * Magnus Kulke <magnuskulke@microsoft.com>
+ * Jinank Jain <jinankjain@microsoft.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ */
+
+#ifndef QEMU_MSHV_H
+#define QEMU_MSHV_H
+
+#ifdef COMPILING_PER_TARGET
+#ifdef CONFIG_MSHV
+#define CONFIG_MSHV_IS_POSSIBLE
+#endif
+#else
+#define CONFIG_MSHV_IS_POSSIBLE
+#endif
+
+#endif
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 2187e61654..4b7eae43e1 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -2126,7 +2126,7 @@ typedef struct CPUArchState {
QEMUTimer *xen_periodic_timer;
QemuMutex xen_timers_lock;
#endif
-#if defined(CONFIG_HVF)
+#if defined(CONFIG_HVF) || defined(CONFIG_MSHV)
void *emu_mmio_buf;
#endif
diff --git a/target/i386/emulate/meson.build b/target/i386/emulate/meson.build
index 4edd4f462f..b6dafb6a5b 100644
--- a/target/i386/emulate/meson.build
+++ b/target/i386/emulate/meson.build
@@ -1,5 +1,8 @@
-i386_system_ss.add(when: [hvf, 'CONFIG_HVF'], if_true: files(
+emulator_files = files(
'x86_decode.c',
'x86_emu.c',
'x86_flags.c',
-))
+)
+
+i386_system_ss.add(when: [hvf, 'CONFIG_HVF'], if_true: emulator_files)
+i386_system_ss.add(when: 'CONFIG_MSHV', if_true: emulator_files)
diff --git a/target/i386/meson.build b/target/i386/meson.build
index 092af34e2d..89ba4912aa 100644
--- a/target/i386/meson.build
+++ b/target/i386/meson.build
@@ -13,6 +13,7 @@ i386_ss.add(when: 'CONFIG_KVM', if_true: files('host-cpu.c'))
i386_ss.add(when: 'CONFIG_HVF', if_true: files('host-cpu.c'))
i386_ss.add(when: 'CONFIG_WHPX', if_true: files('host-cpu.c'))
i386_ss.add(when: 'CONFIG_NVMM', if_true: files('host-cpu.c'))
+i386_ss.add(when: 'CONFIG_MSHV', if_true: files('host-cpu.c'))
i386_system_ss = ss.source_set()
i386_system_ss.add(files(
@@ -34,6 +35,7 @@ subdir('nvmm')
subdir('hvf')
subdir('tcg')
subdir('emulate')
+subdir('mshv')
target_arch += {'i386': i386_ss}
target_system_arch += {'i386': i386_system_ss}
diff --git a/target/i386/mshv/meson.build b/target/i386/mshv/meson.build
new file mode 100644
index 0000000000..8ddaa7c11d
--- /dev/null
+++ b/target/i386/mshv/meson.build
@@ -0,0 +1,7 @@
+i386_mshv_ss = ss.source_set()
+
+i386_mshv_ss.add(files(
+ 'x86.c',
+))
+
+i386_system_ss.add_all(when: 'CONFIG_MSHV', if_true: i386_mshv_ss)
diff --git a/target/i386/mshv/x86.c b/target/i386/mshv/x86.c
new file mode 100644
index 0000000000..d574b3bc52
--- /dev/null
+++ b/target/i386/mshv/x86.c
@@ -0,0 +1,297 @@
+/*
+ * QEMU MSHV support
+ *
+ * Copyright Microsoft, Corp. 2025
+ *
+ * Authors: Magnus Kulke <magnuskulke@microsoft.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+
+#include "cpu.h"
+#include "emulate/x86_decode.h"
+#include "emulate/x86_emu.h"
+#include "qemu/typedefs.h"
+#include "qemu/error-report.h"
+#include "system/mshv.h"
+
+/* RW or Exec segment */
+static const uint8_t RWRX_SEGMENT_TYPE = 0x2;
+static const uint8_t CODE_SEGMENT_TYPE = 0x8;
+static const uint8_t EXPAND_DOWN_SEGMENT_TYPE = 0x4;
+
+typedef enum CpuMode {
+ REAL_MODE,
+ PROTECTED_MODE,
+ LONG_MODE,
+} CpuMode;
+
+static CpuMode cpu_mode(CPUState *cpu)
+{
+ enum CpuMode m = REAL_MODE;
+
+ if (x86_is_protected(cpu)) {
+ m = PROTECTED_MODE;
+
+ if (x86_is_long_mode(cpu)) {
+ m = LONG_MODE;
+ }
+ }
+
+ return m;
+}
+
+static bool segment_type_ro(const SegmentCache *seg)
+{
+ uint32_t type_ = (seg->flags >> DESC_TYPE_SHIFT) & 15;
+ return (type_ & (~RWRX_SEGMENT_TYPE)) == 0;
+}
+
+static bool segment_type_code(const SegmentCache *seg)
+{
+ uint32_t type_ = (seg->flags >> DESC_TYPE_SHIFT) & 15;
+ return (type_ & CODE_SEGMENT_TYPE) != 0;
+}
+
+static bool segment_expands_down(const SegmentCache *seg)
+{
+ uint32_t type_ = (seg->flags >> DESC_TYPE_SHIFT) & 15;
+
+ if (segment_type_code(seg)) {
+ return false;
+ }
+
+ return (type_ & EXPAND_DOWN_SEGMENT_TYPE) != 0;
+}
+
+static uint32_t segment_limit(const SegmentCache *seg)
+{
+ uint32_t limit = seg->limit;
+ uint32_t granularity = (seg->flags & DESC_G_MASK) != 0;
+
+ if (granularity != 0) {
+ limit = (limit << 12) | 0xFFF;
+ }
+
+ return limit;
+}
+
+static uint8_t segment_db(const SegmentCache *seg)
+{
+ return (seg->flags >> DESC_B_SHIFT) & 1;
+}
+
+static uint32_t segment_max_limit(const SegmentCache *seg)
+{
+ if (segment_db(seg) != 0) {
+ return 0xFFFFFFFF;
+ }
+ return 0xFFFF;
+}
+
+static int linearize(CPUState *cpu,
+ target_ulong logical_addr, target_ulong *linear_addr,
+ X86Seg seg_idx)
+{
+ enum CpuMode mode;
+ X86CPU *x86_cpu = X86_CPU(cpu);
+ CPUX86State *env = &x86_cpu->env;
+ SegmentCache *seg = &env->segs[seg_idx];
+ target_ulong base = seg->base;
+ target_ulong logical_addr_32b;
+ uint32_t limit;
+ /* TODO: the emulator will not pass us "write" indicator yet */
+ bool write = false;
+
+ mode = cpu_mode(cpu);
+
+ switch (mode) {
+ case LONG_MODE:
+ if (__builtin_add_overflow(logical_addr, base, linear_addr)) {
+ error_report("Address overflow");
+ return -1;
+ }
+ break;
+ case PROTECTED_MODE:
+ case REAL_MODE:
+ if (segment_type_ro(seg) && write) {
+ error_report("Cannot write to read-only segment");
+ return -1;
+ }
+
+ logical_addr_32b = logical_addr & 0xFFFFFFFF;
+ limit = segment_limit(seg);
+
+ if (segment_expands_down(seg)) {
+ if (logical_addr_32b >= limit) {
+ error_report("Address exceeds limit (expands down)");
+ return -1;
+ }
+
+ limit = segment_max_limit(seg);
+ }
+
+ if (logical_addr_32b > limit) {
+ error_report("Address exceeds limit %u", limit);
+ return -1;
+ }
+ *linear_addr = logical_addr_32b + base;
+ break;
+ default:
+ error_report("Unknown cpu mode: %d", mode);
+ return -1;
+ }
+
+ return 0;
+}
+
+bool x86_read_segment_descriptor(CPUState *cpu,
+ struct x86_segment_descriptor *desc,
+ x86_segment_selector sel)
+{
+ target_ulong base;
+ uint32_t limit;
+ X86CPU *x86_cpu = X86_CPU(cpu);
+ CPUX86State *env = &x86_cpu->env;
+ target_ulong gva;
+
+ memset(desc, 0, sizeof(*desc));
+
+ /* valid gdt descriptors start from index 1 */
+ if (!sel.index && GDT_SEL == sel.ti) {
+ return false;
+ }
+
+ if (GDT_SEL == sel.ti) {
+ base = env->gdt.base;
+ limit = env->gdt.limit;
+ } else {
+ base = env->ldt.base;
+ limit = env->ldt.limit;
+ }
+
+ if (sel.index * 8 >= limit) {
+ return false;
+ }
+
+ gva = base + sel.index * 8;
+ emul_ops->read_mem(cpu, desc, gva, sizeof(*desc));
+
+ return true;
+}
+
+bool x86_read_call_gate(CPUState *cpu, struct x86_call_gate *idt_desc,
+ int gate)
+{
+ target_ulong base;
+ uint32_t limit;
+ X86CPU *x86_cpu = X86_CPU(cpu);
+ CPUX86State *env = &x86_cpu->env;
+ target_ulong gva;
+
+ base = env->idt.base;
+ limit = env->idt.limit;
+
+ memset(idt_desc, 0, sizeof(*idt_desc));
+ if (gate * 8 >= limit) {
+ perror("call gate exceeds idt limit");
+ return false;
+ }
+
+ gva = base + gate * 8;
+ emul_ops->read_mem(cpu, idt_desc, gva, sizeof(*idt_desc));
+
+ return true;
+}
+
+bool x86_is_protected(CPUState *cpu)
+{
+ X86CPU *x86_cpu = X86_CPU(cpu);
+ CPUX86State *env = &x86_cpu->env;
+ uint64_t cr0 = env->cr[0];
+
+ return cr0 & CR0_PE_MASK;
+}
+
+bool x86_is_real(CPUState *cpu)
+{
+ return !x86_is_protected(cpu);
+}
+
+bool x86_is_v8086(CPUState *cpu)
+{
+ X86CPU *x86_cpu = X86_CPU(cpu);
+ CPUX86State *env = &x86_cpu->env;
+ return x86_is_protected(cpu) && (env->eflags & VM_MASK);
+}
+
+bool x86_is_long_mode(CPUState *cpu)
+{
+ X86CPU *x86_cpu = X86_CPU(cpu);
+ CPUX86State *env = &x86_cpu->env;
+ uint64_t efer = env->efer;
+ uint64_t lme_lma = (MSR_EFER_LME | MSR_EFER_LMA);
+
+ return ((efer & lme_lma) == lme_lma);
+}
+
+bool x86_is_long64_mode(CPUState *cpu)
+{
+ error_report("unimplemented: is_long64_mode()");
+ abort();
+}
+
+bool x86_is_paging_mode(CPUState *cpu)
+{
+ X86CPU *x86_cpu = X86_CPU(cpu);
+ CPUX86State *env = &x86_cpu->env;
+ uint64_t cr0 = env->cr[0];
+
+ return cr0 & CR0_PG_MASK;
+}
+
+bool x86_is_pae_enabled(CPUState *cpu)
+{
+ X86CPU *x86_cpu = X86_CPU(cpu);
+ CPUX86State *env = &x86_cpu->env;
+ uint64_t cr4 = env->cr[4];
+
+ return cr4 & CR4_PAE_MASK;
+}
+
+target_ulong linear_addr(CPUState *cpu, target_ulong addr, X86Seg seg)
+{
+ int ret;
+ target_ulong linear_addr;
+
+ ret = linearize(cpu, addr, &linear_addr, seg);
+ if (ret < 0) {
+ error_report("failed to linearize address");
+ abort();
+ }
+
+ return linear_addr;
+}
+
+target_ulong linear_addr_size(CPUState *cpu, target_ulong addr, int size,
+ X86Seg seg)
+{
+ switch (size) {
+ case 2:
+ addr = (uint16_t)addr;
+ break;
+ case 4:
+ addr = (uint32_t)addr;
+ break;
+ default:
+ break;
+ }
+ return linear_addr(cpu, addr, seg);
+}
+
+target_ulong linear_rip(CPUState *cpu, target_ulong rip)
+{
+ return linear_addr(cpu, rip, R_CS);
+}
--
2.47.3