diff --git a/kvm-Revert-hw-386-Add-EPYC-mode-topology-decoding-functi.patch b/kvm-Revert-hw-386-Add-EPYC-mode-topology-decoding-functi.patch new file mode 100644 index 0000000..22f49b7 --- /dev/null +++ b/kvm-Revert-hw-386-Add-EPYC-mode-topology-decoding-functi.patch @@ -0,0 +1,168 @@ +From d2629755385917d277b80267cb88436c950123a7 Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Fri, 28 Aug 2020 16:23:49 -0400 +Subject: [PATCH 07/11] Revert "hw/386: Add EPYC mode topology decoding + functions" + +RH-Author: Igor Mammedov +Message-id: <20200828162349.1616028-8-imammedo@redhat.com> +Patchwork-id: 98250 +O-Subject: [RHEL-AV 8.3.0 qemu-kvm PATCH 7/7] Revert "hw/386: Add EPYC mode topology decoding functions" +Bugzilla: 1873417 +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Dr. David Alan Gilbert + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1873417 +Brew: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=31005031 +Branch: rhel-av-8.3.0 +Upstream: RHEL only +Tested: locally + +A regression was introduced since qemu-5.0, when EPYC specific +APIC ID encoding was introduced. Which leads to migration failing +with: +" + : Unknown savevm section or instance 'apic' 4. Make sure that your current VM setup matches your saved VM setup, including any hotplugged devices + : load of migration failed: Invalid argument +" +when EPYC cpu model and more than 1 numa node is used. +EPYC specific APIC ID encoding is considered as failed +experiment and upstream is preparing to revert it as well. + +This reverts commit 7568b205555a6405042f62c64af3268f4330aed5. + +Signed-off-by: Igor Mammedov +Signed-off-by: Danilo C. L. de Paula +--- + include/hw/i386/topology.h | 100 ------------------------------------- + 1 file changed, 100 deletions(-) + +diff --git a/include/hw/i386/topology.h b/include/hw/i386/topology.h +index 07239f95f4..b9593b9905 100644 +--- a/include/hw/i386/topology.h ++++ b/include/hw/i386/topology.h +@@ -47,7 +47,6 @@ typedef uint32_t apic_id_t; + + typedef struct X86CPUTopoIDs { + unsigned pkg_id; +- unsigned node_id; + unsigned die_id; + unsigned core_id; + unsigned smt_id; +@@ -89,11 +88,6 @@ static inline unsigned apicid_die_width(X86CPUTopoInfo *topo_info) + return apicid_bitwidth_for_count(topo_info->dies_per_pkg); + } + +-/* Bit width of the node_id field per socket */ +-static inline unsigned apicid_node_width_epyc(X86CPUTopoInfo *topo_info) +-{ +- return apicid_bitwidth_for_count(MAX(topo_info->nodes_per_pkg, 1)); +-} + /* Bit offset of the Core_ID field + */ + static inline unsigned apicid_core_offset(X86CPUTopoInfo *topo_info) +@@ -114,100 +108,6 @@ static inline unsigned apicid_pkg_offset(X86CPUTopoInfo *topo_info) + return apicid_die_offset(topo_info) + apicid_die_width(topo_info); + } + +-#define NODE_ID_OFFSET 3 /* Minimum node_id offset if numa configured */ +- +-/* +- * Bit offset of the node_id field +- * +- * Make sure nodes_per_pkg > 0 if numa configured else zero. +- */ +-static inline unsigned apicid_node_offset_epyc(X86CPUTopoInfo *topo_info) +-{ +- unsigned offset = apicid_die_offset(topo_info) + +- apicid_die_width(topo_info); +- +- if (topo_info->nodes_per_pkg) { +- return MAX(NODE_ID_OFFSET, offset); +- } else { +- return offset; +- } +-} +- +-/* Bit offset of the Pkg_ID (socket ID) field */ +-static inline unsigned apicid_pkg_offset_epyc(X86CPUTopoInfo *topo_info) +-{ +- return apicid_node_offset_epyc(topo_info) + +- apicid_node_width_epyc(topo_info); +-} +- +-/* +- * Make APIC ID for the CPU based on Pkg_ID, Core_ID, SMT_ID +- * +- * The caller must make sure core_id < nr_cores and smt_id < nr_threads. +- */ +-static inline apic_id_t +-x86_apicid_from_topo_ids_epyc(X86CPUTopoInfo *topo_info, +- const X86CPUTopoIDs *topo_ids) +-{ +- return (topo_ids->pkg_id << apicid_pkg_offset_epyc(topo_info)) | +- (topo_ids->node_id << apicid_node_offset_epyc(topo_info)) | +- (topo_ids->die_id << apicid_die_offset(topo_info)) | +- (topo_ids->core_id << apicid_core_offset(topo_info)) | +- topo_ids->smt_id; +-} +- +-static inline void x86_topo_ids_from_idx_epyc(X86CPUTopoInfo *topo_info, +- unsigned cpu_index, +- X86CPUTopoIDs *topo_ids) +-{ +- unsigned nr_nodes = MAX(topo_info->nodes_per_pkg, 1); +- unsigned nr_dies = topo_info->dies_per_pkg; +- unsigned nr_cores = topo_info->cores_per_die; +- unsigned nr_threads = topo_info->threads_per_core; +- unsigned cores_per_node = DIV_ROUND_UP((nr_dies * nr_cores * nr_threads), +- nr_nodes); +- +- topo_ids->pkg_id = cpu_index / (nr_dies * nr_cores * nr_threads); +- topo_ids->node_id = (cpu_index / cores_per_node) % nr_nodes; +- topo_ids->die_id = cpu_index / (nr_cores * nr_threads) % nr_dies; +- topo_ids->core_id = cpu_index / nr_threads % nr_cores; +- topo_ids->smt_id = cpu_index % nr_threads; +-} +- +-/* +- * Calculate thread/core/package IDs for a specific topology, +- * based on APIC ID +- */ +-static inline void x86_topo_ids_from_apicid_epyc(apic_id_t apicid, +- X86CPUTopoInfo *topo_info, +- X86CPUTopoIDs *topo_ids) +-{ +- topo_ids->smt_id = apicid & +- ~(0xFFFFFFFFUL << apicid_smt_width(topo_info)); +- topo_ids->core_id = +- (apicid >> apicid_core_offset(topo_info)) & +- ~(0xFFFFFFFFUL << apicid_core_width(topo_info)); +- topo_ids->die_id = +- (apicid >> apicid_die_offset(topo_info)) & +- ~(0xFFFFFFFFUL << apicid_die_width(topo_info)); +- topo_ids->node_id = +- (apicid >> apicid_node_offset_epyc(topo_info)) & +- ~(0xFFFFFFFFUL << apicid_node_width_epyc(topo_info)); +- topo_ids->pkg_id = apicid >> apicid_pkg_offset_epyc(topo_info); +-} +- +-/* +- * Make APIC ID for the CPU 'cpu_index' +- * +- * 'cpu_index' is a sequential, contiguous ID for the CPU. +- */ +-static inline apic_id_t x86_apicid_from_cpu_idx_epyc(X86CPUTopoInfo *topo_info, +- unsigned cpu_index) +-{ +- X86CPUTopoIDs topo_ids; +- x86_topo_ids_from_idx_epyc(topo_info, cpu_index, &topo_ids); +- return x86_apicid_from_topo_ids_epyc(topo_info, &topo_ids); +-} + /* Make APIC ID for the CPU based on Pkg_ID, Core_ID, SMT_ID + * + * The caller must make sure core_id < nr_cores and smt_id < nr_threads. +-- +2.27.0 + diff --git a/kvm-Revert-hw-i386-Introduce-apicid-functions-inside-X86.patch b/kvm-Revert-hw-i386-Introduce-apicid-functions-inside-X86.patch new file mode 100644 index 0000000..5988443 --- /dev/null +++ b/kvm-Revert-hw-i386-Introduce-apicid-functions-inside-X86.patch @@ -0,0 +1,80 @@ +From da24d2c5e2d61043340b601a09f22e41a1d52e5e Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Fri, 28 Aug 2020 16:23:47 -0400 +Subject: [PATCH 05/11] Revert "hw/i386: Introduce apicid functions inside + X86MachineState" + +RH-Author: Igor Mammedov +Message-id: <20200828162349.1616028-6-imammedo@redhat.com> +Patchwork-id: 98246 +O-Subject: [RHEL-AV 8.3.0 qemu-kvm PATCH 5/7] Revert "hw/i386: Introduce apicid functions inside X86MachineState" +Bugzilla: 1873417 +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Dr. David Alan Gilbert + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1873417 +Brew: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=31005031 +Branch: rhel-av-8.3.0 +Upstream: RHEL only +Tested: locally + +A regression was introduced since qemu-5.0, when EPYC specific +APIC ID encoding was introduced. Which leads to migration failing +with: +" + : Unknown savevm section or instance 'apic' 4. Make sure that your current VM setup matches your saved VM setup, including any hotplugged devices + : load of migration failed: Invalid argument +" +when EPYC cpu model and more than 1 numa node is used. +EPYC specific APIC ID encoding is considered as failed +experiment and upstream is preparing to revert it as well. + +This reverts commit 6121c7fbfd98dbc3af1b00b56ff2eef66df87828. + +Signed-off-by: Igor Mammedov +Signed-off-by: Danilo C. L. de Paula +--- + hw/i386/x86.c | 5 ----- + include/hw/i386/x86.h | 9 --------- + 2 files changed, 14 deletions(-) + +diff --git a/hw/i386/x86.c b/hw/i386/x86.c +index 41bdf146bd..4d8cb66258 100644 +--- a/hw/i386/x86.c ++++ b/hw/i386/x86.c +@@ -896,11 +896,6 @@ static void x86_machine_initfn(Object *obj) + x86ms->smm = ON_OFF_AUTO_AUTO; + x86ms->acpi = ON_OFF_AUTO_AUTO; + x86ms->smp_dies = 1; +- +- x86ms->apicid_from_cpu_idx = x86_apicid_from_cpu_idx; +- x86ms->topo_ids_from_apicid = x86_topo_ids_from_apicid; +- x86ms->apicid_from_topo_ids = x86_apicid_from_topo_ids; +- x86ms->apicid_pkg_offset = apicid_pkg_offset; + } + + static void x86_machine_class_init(ObjectClass *oc, void *data) +diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h +index b79f24e285..4d9a26326d 100644 +--- a/include/hw/i386/x86.h ++++ b/include/hw/i386/x86.h +@@ -63,15 +63,6 @@ typedef struct { + OnOffAuto smm; + OnOffAuto acpi; + +- /* Apic id specific handlers */ +- uint32_t (*apicid_from_cpu_idx)(X86CPUTopoInfo *topo_info, +- unsigned cpu_index); +- void (*topo_ids_from_apicid)(apic_id_t apicid, X86CPUTopoInfo *topo_info, +- X86CPUTopoIDs *topo_ids); +- apic_id_t (*apicid_from_topo_ids)(X86CPUTopoInfo *topo_info, +- const X86CPUTopoIDs *topo_ids); +- uint32_t (*apicid_pkg_offset)(X86CPUTopoInfo *topo_info); +- + /* + * Address space used by IOAPIC device. All IOAPIC interrupts + * will be translated to MSI messages in the address space. +-- +2.27.0 + diff --git a/kvm-Revert-hw-i386-Move-arch_id-decode-inside-x86_cpus_i.patch b/kvm-Revert-hw-i386-Move-arch_id-decode-inside-x86_cpus_i.patch new file mode 100644 index 0000000..b9ac7b1 --- /dev/null +++ b/kvm-Revert-hw-i386-Move-arch_id-decode-inside-x86_cpus_i.patch @@ -0,0 +1,157 @@ +From 61b9bdeafac573093e171947be1a0c9212ba8b95 Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Fri, 28 Aug 2020 16:23:45 -0400 +Subject: [PATCH 03/11] Revert "hw/i386: Move arch_id decode inside + x86_cpus_init" + +RH-Author: Igor Mammedov +Message-id: <20200828162349.1616028-4-imammedo@redhat.com> +Patchwork-id: 98248 +O-Subject: [RHEL-AV 8.3.0 qemu-kvm PATCH 3/7] Revert "hw/i386: Move arch_id decode inside x86_cpus_init" +Bugzilla: 1873417 +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Dr. David Alan Gilbert + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1873417 +Brew: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=31005031 +Branch: rhel-av-8.3.0 +Upstream: RHEL only +Tested: locally + +A regression was introduced since qemu-5.0, when EPYC specific +APIC ID encoding was introduced. Which leads to migration failing +with: +" + : Unknown savevm section or instance 'apic' 4. Make sure that your current VM setup matches your saved VM setup, including any hotplugged devices + : load of migration failed: Invalid argument +" +when EPYC cpu model and more than 1 numa node is used. +EPYC specific APIC ID encoding is considered as failed +experiment and upstream is preparing to revert it as well. + +This reverts commit 2e26f4ab3bf8390a2677d3afd9b1a04f015d7721. + +Signed-off-by: Igor Mammedov +Signed-off-by: Danilo C. L. de Paula +--- + hw/i386/pc.c | 6 +++--- + hw/i386/x86.c | 37 +++++++------------------------------ + 2 files changed, 10 insertions(+), 33 deletions(-) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index f469c060e5..ac2cc79fca 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -1817,14 +1817,14 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, + topo_ids.die_id = cpu->die_id; + topo_ids.core_id = cpu->core_id; + topo_ids.smt_id = cpu->thread_id; +- cpu->apic_id = x86ms->apicid_from_topo_ids(&topo_info, &topo_ids); ++ cpu->apic_id = x86_apicid_from_topo_ids(&topo_info, &topo_ids); + } + + cpu_slot = pc_find_cpu_slot(MACHINE(pcms), cpu->apic_id, &idx); + if (!cpu_slot) { + MachineState *ms = MACHINE(pcms); + +- x86ms->topo_ids_from_apicid(cpu->apic_id, &topo_info, &topo_ids); ++ x86_topo_ids_from_apicid(cpu->apic_id, &topo_info, &topo_ids); + error_setg(errp, + "Invalid CPU [socket: %u, die: %u, core: %u, thread: %u] with" + " APIC ID %" PRIu32 ", valid index range 0:%d", +@@ -1845,7 +1845,7 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, + /* TODO: move socket_id/core_id/thread_id checks into x86_cpu_realizefn() + * once -smp refactoring is complete and there will be CPU private + * CPUState::nr_cores and CPUState::nr_threads fields instead of globals */ +- x86ms->topo_ids_from_apicid(cpu->apic_id, &topo_info, &topo_ids); ++ x86_topo_ids_from_apicid(cpu->apic_id, &topo_info, &topo_ids); + if (cpu->socket_id != -1 && cpu->socket_id != topo_ids.pkg_id) { + error_setg(errp, "property socket-id: %u doesn't match set apic-id:" + " 0x%x (socket-id: %u)", cpu->socket_id, cpu->apic_id, +diff --git a/hw/i386/x86.c b/hw/i386/x86.c +index 67bee1bcb8..41bdf146bd 100644 +--- a/hw/i386/x86.c ++++ b/hw/i386/x86.c +@@ -68,22 +68,6 @@ inline void init_topo_info(X86CPUTopoInfo *topo_info, + topo_info->threads_per_core = ms->smp.threads; + } + +-/* +- * Set up with the new EPYC topology handlers +- * +- * AMD uses different apic id encoding for EPYC based cpus. Override +- * the default topo handlers with EPYC encoding handlers. +- */ +-static void x86_set_epyc_topo_handlers(MachineState *machine) +-{ +- X86MachineState *x86ms = X86_MACHINE(machine); +- +- x86ms->apicid_from_cpu_idx = x86_apicid_from_cpu_idx_epyc; +- x86ms->topo_ids_from_apicid = x86_topo_ids_from_apicid_epyc; +- x86ms->apicid_from_topo_ids = x86_apicid_from_topo_ids_epyc; +- x86ms->apicid_pkg_offset = apicid_pkg_offset_epyc; +-} +- + /* + * Calculates initial APIC ID for a specific CPU index + * +@@ -102,7 +86,7 @@ uint32_t x86_cpu_apic_id_from_index(X86MachineState *x86ms, + + init_topo_info(&topo_info, x86ms); + +- correct_id = x86ms->apicid_from_cpu_idx(&topo_info, cpu_index); ++ correct_id = x86_apicid_from_cpu_idx(&topo_info, cpu_index); + if (x86mc->compat_apic_id_mode) { + if (cpu_index != correct_id && !warned && !qtest_enabled()) { + error_report("APIC IDs set in compatibility mode, " +@@ -136,11 +120,6 @@ void x86_cpus_init(X86MachineState *x86ms, int default_cpu_version) + MachineState *ms = MACHINE(x86ms); + MachineClass *mc = MACHINE_GET_CLASS(x86ms); + +- /* Check for apicid encoding */ +- if (cpu_x86_use_epyc_apic_id_encoding(ms->cpu_type)) { +- x86_set_epyc_topo_handlers(ms); +- } +- + x86_cpu_set_default_version(default_cpu_version); + + /* +@@ -154,12 +133,6 @@ void x86_cpus_init(X86MachineState *x86ms, int default_cpu_version) + x86ms->apic_id_limit = x86_cpu_apic_id_from_index(x86ms, + ms->smp.max_cpus - 1) + 1; + possible_cpus = mc->possible_cpu_arch_ids(ms); +- +- for (i = 0; i < ms->possible_cpus->len; i++) { +- ms->possible_cpus->cpus[i].arch_id = +- x86_cpu_apic_id_from_index(x86ms, i); +- } +- + for (i = 0; i < ms->smp.cpus; i++) { + x86_cpu_new(x86ms, possible_cpus->cpus[i].arch_id, &error_fatal); + } +@@ -184,7 +157,8 @@ int64_t x86_get_default_cpu_node_id(const MachineState *ms, int idx) + init_topo_info(&topo_info, x86ms); + + assert(idx < ms->possible_cpus->len); +- x86_topo_ids_from_idx(&topo_info, idx, &topo_ids); ++ x86_topo_ids_from_apicid(ms->possible_cpus->cpus[idx].arch_id, ++ &topo_info, &topo_ids); + return topo_ids.pkg_id % ms->numa_state->num_nodes; + } + +@@ -215,7 +189,10 @@ const CPUArchIdList *x86_possible_cpu_arch_ids(MachineState *ms) + + ms->possible_cpus->cpus[i].type = ms->cpu_type; + ms->possible_cpus->cpus[i].vcpus_count = 1; +- x86_topo_ids_from_idx(&topo_info, i, &topo_ids); ++ ms->possible_cpus->cpus[i].arch_id = ++ x86_cpu_apic_id_from_index(x86ms, i); ++ x86_topo_ids_from_apicid(ms->possible_cpus->cpus[i].arch_id, ++ &topo_info, &topo_ids); + ms->possible_cpus->cpus[i].props.has_socket_id = true; + ms->possible_cpus->cpus[i].props.socket_id = topo_ids.pkg_id; + if (x86ms->smp_dies > 1) { +-- +2.27.0 + diff --git a/kvm-Revert-i386-Fix-pkg_id-offset-for-EPYC-cpu-models.patch b/kvm-Revert-i386-Fix-pkg_id-offset-for-EPYC-cpu-models.patch new file mode 100644 index 0000000..9492f85 --- /dev/null +++ b/kvm-Revert-i386-Fix-pkg_id-offset-for-EPYC-cpu-models.patch @@ -0,0 +1,103 @@ +From 7f7a15ba9ad3f1d906b472cad4972c80d11b77fc Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Fri, 28 Aug 2020 16:23:43 -0400 +Subject: [PATCH 01/11] Revert "i386: Fix pkg_id offset for EPYC cpu models" + +RH-Author: Igor Mammedov +Message-id: <20200828162349.1616028-2-imammedo@redhat.com> +Patchwork-id: 98247 +O-Subject: [RHEL-AV 8.3.0 qemu-kvm PATCH 1/7] Revert "i386: Fix pkg_id offset for EPYC cpu models" +Bugzilla: 1873417 +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Dr. David Alan Gilbert + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1873417 +Brew: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=31005031 +Branch: rhel-av-8.3.0 +Upstream: RHEL only +Tested: locally + +A regression was introduced since qemu-5.0, when EPYC specific +APIC ID encoding was introduced. Which leads to migration failing +with: +" + : Unknown savevm section or instance 'apic' 4. Make sure that your current VM setup matches your saved VM setup, including any hotplugged devices + : load of migration failed: Invalid argument +" +when EPYC cpu model and more than 1 numa node is used. +EPYC specific APIC ID encoding is considered as failed +experiment and upstream is preparing to revert it as well. + +This reverts commit 7b225762c8c05fd31d4c2be116aedfbc00383f8b. + +PS: +fixup an access to pkg_offset that were added by +cac9edfc4da (target/i386: Fix the CPUID leaf CPUID_Fn80000008) + +Signed-off-by: Igor Mammedov +Signed-off-by: Danilo C. L. de Paula +--- + hw/i386/pc.c | 1 - + target/i386/cpu.c | 6 +++--- + target/i386/cpu.h | 1 - + 3 files changed, 3 insertions(+), 5 deletions(-) + +diff --git a/hw/i386/pc.c b/hw/i386/pc.c +index a75e0137ab..f469c060e5 100644 +--- a/hw/i386/pc.c ++++ b/hw/i386/pc.c +@@ -1763,7 +1763,6 @@ static void pc_cpu_pre_plug(HotplugHandler *hotplug_dev, + + env->nr_dies = x86ms->smp_dies; + env->nr_nodes = topo_info.nodes_per_pkg; +- env->pkg_offset = x86ms->apicid_pkg_offset(&topo_info); + + /* + * If APIC ID is not set, +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index cdaa1463f2..6517cc73a2 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -5680,7 +5680,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, + *ecx |= CPUID_TOPOLOGY_LEVEL_SMT; + break; + case 1: +- *eax = env->pkg_offset; ++ *eax = apicid_pkg_offset(&topo_info); + *ebx = cs->nr_cores * cs->nr_threads; + *ecx |= CPUID_TOPOLOGY_LEVEL_CORE; + break; +@@ -5714,7 +5714,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, + *ecx |= CPUID_TOPOLOGY_LEVEL_CORE; + break; + case 2: +- *eax = env->pkg_offset; ++ *eax = apicid_pkg_offset(&topo_info); + *ebx = env->nr_dies * cs->nr_cores * cs->nr_threads; + *ecx |= CPUID_TOPOLOGY_LEVEL_DIE; + break; +@@ -5895,7 +5895,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, + * CPUX86State::pkg_offset. + * Bits 7:0 is "The number of threads in the package is NC+1" + */ +- *ecx = (env->pkg_offset << 12) | ++ *ecx = (apicid_pkg_offset(&topo_info) << 12) | + ((cs->nr_cores * cs->nr_threads) - 1); + } else { + *ecx = 0; +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index e1a5c174dc..d5ad42d694 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -1630,7 +1630,6 @@ typedef struct CPUX86State { + + unsigned nr_dies; + unsigned nr_nodes; +- unsigned pkg_offset; + } CPUX86State; + + struct kvm_msrs; +-- +2.27.0 + diff --git a/kvm-Revert-i386-Introduce-use_epyc_apic_id_encoding-in-X.patch b/kvm-Revert-i386-Introduce-use_epyc_apic_id_encoding-in-X.patch new file mode 100644 index 0000000..489c5a3 --- /dev/null +++ b/kvm-Revert-i386-Introduce-use_epyc_apic_id_encoding-in-X.patch @@ -0,0 +1,90 @@ +From bc3db6832c57b1b28204b376f3c4c61cadfe0a35 Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Fri, 28 Aug 2020 16:23:46 -0400 +Subject: [PATCH 04/11] Revert "i386: Introduce use_epyc_apic_id_encoding in + X86CPUDefinition" + +RH-Author: Igor Mammedov +Message-id: <20200828162349.1616028-5-imammedo@redhat.com> +Patchwork-id: 98249 +O-Subject: [RHEL-AV 8.3.0 qemu-kvm PATCH 4/7] Revert "i386: Introduce use_epyc_apic_id_encoding in X86CPUDefinition" +Bugzilla: 1873417 +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Dr. David Alan Gilbert + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1873417 +Brew: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=31005031 +Branch: rhel-av-8.3.0 +Upstream: RHEL only +Tested: locally + +A regression was introduced since qemu-5.0, when EPYC specific +APIC ID encoding was introduced. Which leads to migration failing +with: +" + : Unknown savevm section or instance 'apic' 4. Make sure that your current VM setup matches your saved VM setup, including any hotplugged devices + : load of migration failed: Invalid argument +" +when EPYC cpu model and more than 1 numa node is used. +EPYC specific APIC ID encoding is considered as failed +experiment and upstream is preparing to revert it as well. + +This reverts commit 0c1538cb1a26287c072645f4759b9872b1596d79. + +Signed-off-by: Igor Mammedov +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/cpu.c | 16 ---------------- + target/i386/cpu.h | 1 - + 2 files changed, 17 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 66b6a77b2f..5e3d086f05 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1638,10 +1638,6 @@ typedef struct X86CPUDefinition { + FeatureWordArray features; + const char *model_id; + CPUCaches *cache_info; +- +- /* Use AMD EPYC encoding for apic id */ +- bool use_epyc_apic_id_encoding; +- + /* + * Definitions for alternative versions of CPU model. + * List is terminated by item with version == 0. +@@ -1683,18 +1679,6 @@ static const X86CPUVersionDefinition *x86_cpu_def_get_versions(X86CPUDefinition + return def->versions ?: default_version_list; + } + +-bool cpu_x86_use_epyc_apic_id_encoding(const char *cpu_type) +-{ +- X86CPUClass *xcc = X86_CPU_CLASS(object_class_by_name(cpu_type)); +- +- assert(xcc); +- if (xcc->model && xcc->model->cpudef) { +- return xcc->model->cpudef->use_epyc_apic_id_encoding; +- } else { +- return false; +- } +-} +- + static CPUCaches epyc_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index d5ad42d694..5ff8ad8427 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -1918,7 +1918,6 @@ void cpu_clear_apic_feature(CPUX86State *env); + void host_cpuid(uint32_t function, uint32_t count, + uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx); + void host_vendor_fms(char *vendor, int *family, int *model, int *stepping); +-bool cpu_x86_use_epyc_apic_id_encoding(const char *cpu_type); + + /* helper.c */ + bool x86_cpu_tlb_fill(CPUState *cs, vaddr address, int size, +-- +2.27.0 + diff --git a/kvm-Revert-target-i386-Cleanup-and-use-the-EPYC-mode-top.patch b/kvm-Revert-target-i386-Cleanup-and-use-the-EPYC-mode-top.patch new file mode 100644 index 0000000..eeea50d --- /dev/null +++ b/kvm-Revert-target-i386-Cleanup-and-use-the-EPYC-mode-top.patch @@ -0,0 +1,288 @@ +From 4236a54d72270d871ff1ed3fd09a2971327077a1 Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Fri, 28 Aug 2020 16:23:48 -0400 +Subject: [PATCH 06/11] Revert "target/i386: Cleanup and use the EPYC mode + topology functions" + +RH-Author: Igor Mammedov +Message-id: <20200828162349.1616028-7-imammedo@redhat.com> +Patchwork-id: 98251 +O-Subject: [RHEL-AV 8.3.0 qemu-kvm PATCH 6/7] Revert "target/i386: Cleanup and use the EPYC mode topology functions" +Bugzilla: 1873417 +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Dr. David Alan Gilbert + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1873417 +Brew: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=31005031 +Branch: rhel-av-8.3.0 +Upstream: RHEL only +Tested: locally + +A regression was introduced since qemu-5.0, when EPYC specific +APIC ID encoding was introduced. Which leads to migration failing +with: +" + : Unknown savevm section or instance 'apic' 4. Make sure that your current VM setup matches your saved VM setup, including any hotplugged devices + : load of migration failed: Invalid argument +" +when EPYC cpu model and more than 1 numa node is used. +EPYC specific APIC ID encoding is considered as failed +experiment and upstream is preparing to revert it as well. + +This reverts commit dd08ef0318e2b61d14bc069590d174913f7f437a. + +Signed-off-by: Igor Mammedov +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/cpu.c | 161 ++++++++++++++++++++++++++++++++++++---------- + 1 file changed, 127 insertions(+), 34 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 5e3d086f05..73fc83e53f 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -338,15 +338,68 @@ static void encode_cache_cpuid80000006(CPUCacheInfo *l2, + } + } + ++/* ++ * Definitions used for building CPUID Leaf 0x8000001D and 0x8000001E ++ * Please refer to the AMD64 Architecture Programmer’s Manual Volume 3. ++ * Define the constants to build the cpu topology. Right now, TOPOEXT ++ * feature is enabled only on EPYC. So, these constants are based on ++ * EPYC supported configurations. We may need to handle the cases if ++ * these values change in future. ++ */ ++/* Maximum core complexes in a node */ ++#define MAX_CCX 2 ++/* Maximum cores in a core complex */ ++#define MAX_CORES_IN_CCX 4 ++/* Maximum cores in a node */ ++#define MAX_CORES_IN_NODE 8 ++/* Maximum nodes in a socket */ ++#define MAX_NODES_PER_SOCKET 4 ++ ++/* ++ * Figure out the number of nodes required to build this config. ++ * Max cores in a node is 8 ++ */ ++static int nodes_in_socket(int nr_cores) ++{ ++ int nodes; ++ ++ nodes = DIV_ROUND_UP(nr_cores, MAX_CORES_IN_NODE); ++ ++ /* Hardware does not support config with 3 nodes, return 4 in that case */ ++ return (nodes == 3) ? 4 : nodes; ++} ++ ++/* ++ * Decide the number of cores in a core complex with the given nr_cores using ++ * following set constants MAX_CCX, MAX_CORES_IN_CCX, MAX_CORES_IN_NODE and ++ * MAX_NODES_PER_SOCKET. Maintain symmetry as much as possible ++ * L3 cache is shared across all cores in a core complex. So, this will also ++ * tell us how many cores are sharing the L3 cache. ++ */ ++static int cores_in_core_complex(int nr_cores) ++{ ++ int nodes; ++ ++ /* Check if we can fit all the cores in one core complex */ ++ if (nr_cores <= MAX_CORES_IN_CCX) { ++ return nr_cores; ++ } ++ /* Get the number of nodes required to build this config */ ++ nodes = nodes_in_socket(nr_cores); ++ ++ /* ++ * Divide the cores accros all the core complexes ++ * Return rounded up value ++ */ ++ return DIV_ROUND_UP(nr_cores, nodes * MAX_CCX); ++} ++ + /* Encode cache info for CPUID[8000001D] */ +-static void encode_cache_cpuid8000001d(CPUCacheInfo *cache, +- X86CPUTopoInfo *topo_info, +- uint32_t *eax, uint32_t *ebx, +- uint32_t *ecx, uint32_t *edx) ++static void encode_cache_cpuid8000001d(CPUCacheInfo *cache, CPUState *cs, ++ uint32_t *eax, uint32_t *ebx, ++ uint32_t *ecx, uint32_t *edx) + { + uint32_t l3_cores; +- unsigned nodes = MAX(topo_info->nodes_per_pkg, 1); +- + assert(cache->size == cache->line_size * cache->associativity * + cache->partitions * cache->sets); + +@@ -355,13 +408,10 @@ static void encode_cache_cpuid8000001d(CPUCacheInfo *cache, + + /* L3 is shared among multiple cores */ + if (cache->level == 3) { +- l3_cores = DIV_ROUND_UP((topo_info->dies_per_pkg * +- topo_info->cores_per_die * +- topo_info->threads_per_core), +- nodes); +- *eax |= (l3_cores - 1) << 14; ++ l3_cores = cores_in_core_complex(cs->nr_cores); ++ *eax |= ((l3_cores * cs->nr_threads) - 1) << 14; + } else { +- *eax |= ((topo_info->threads_per_core - 1) << 14); ++ *eax |= ((cs->nr_threads - 1) << 14); + } + + assert(cache->line_size > 0); +@@ -381,17 +431,55 @@ static void encode_cache_cpuid8000001d(CPUCacheInfo *cache, + (cache->complex_indexing ? CACHE_COMPLEX_IDX : 0); + } + ++/* Data structure to hold the configuration info for a given core index */ ++struct core_topology { ++ /* core complex id of the current core index */ ++ int ccx_id; ++ /* ++ * Adjusted core index for this core in the topology ++ * This can be 0,1,2,3 with max 4 cores in a core complex ++ */ ++ int core_id; ++ /* Node id for this core index */ ++ int node_id; ++ /* Number of nodes in this config */ ++ int num_nodes; ++}; ++ ++/* ++ * Build the configuration closely match the EPYC hardware. Using the EPYC ++ * hardware configuration values (MAX_CCX, MAX_CORES_IN_CCX, MAX_CORES_IN_NODE) ++ * right now. This could change in future. ++ * nr_cores : Total number of cores in the config ++ * core_id : Core index of the current CPU ++ * topo : Data structure to hold all the config info for this core index ++ */ ++static void build_core_topology(int nr_cores, int core_id, ++ struct core_topology *topo) ++{ ++ int nodes, cores_in_ccx; ++ ++ /* First get the number of nodes required */ ++ nodes = nodes_in_socket(nr_cores); ++ ++ cores_in_ccx = cores_in_core_complex(nr_cores); ++ ++ topo->node_id = core_id / (cores_in_ccx * MAX_CCX); ++ topo->ccx_id = (core_id % (cores_in_ccx * MAX_CCX)) / cores_in_ccx; ++ topo->core_id = core_id % cores_in_ccx; ++ topo->num_nodes = nodes; ++} ++ + /* Encode cache info for CPUID[8000001E] */ +-static void encode_topo_cpuid8000001e(X86CPUTopoInfo *topo_info, X86CPU *cpu, ++static void encode_topo_cpuid8000001e(CPUState *cs, X86CPU *cpu, + uint32_t *eax, uint32_t *ebx, + uint32_t *ecx, uint32_t *edx) + { +- X86CPUTopoIDs topo_ids = {0}; +- unsigned long nodes = MAX(topo_info->nodes_per_pkg, 1); ++ struct core_topology topo = {0}; ++ unsigned long nodes; + int shift; + +- x86_topo_ids_from_apicid_epyc(cpu->apic_id, topo_info, &topo_ids); +- ++ build_core_topology(cs->nr_cores, cpu->core_id, &topo); + *eax = cpu->apic_id; + /* + * CPUID_Fn8000001E_EBX +@@ -408,8 +496,12 @@ static void encode_topo_cpuid8000001e(X86CPUTopoInfo *topo_info, X86CPU *cpu, + * 3 Core complex id + * 1:0 Core id + */ +- *ebx = ((topo_info->threads_per_core - 1) << 8) | (topo_ids.node_id << 3) | +- (topo_ids.core_id); ++ if (cs->nr_threads - 1) { ++ *ebx = ((cs->nr_threads - 1) << 8) | (topo.node_id << 3) | ++ (topo.ccx_id << 2) | topo.core_id; ++ } else { ++ *ebx = (topo.node_id << 4) | (topo.ccx_id << 3) | topo.core_id; ++ } + /* + * CPUID_Fn8000001E_ECX + * 31:11 Reserved +@@ -418,8 +510,9 @@ static void encode_topo_cpuid8000001e(X86CPUTopoInfo *topo_info, X86CPU *cpu, + * 2 Socket id + * 1:0 Node id + */ +- if (nodes <= 4) { +- *ecx = ((nodes - 1) << 8) | (topo_ids.pkg_id << 2) | topo_ids.node_id; ++ if (topo.num_nodes <= 4) { ++ *ecx = ((topo.num_nodes - 1) << 8) | (cpu->socket_id << 2) | ++ topo.node_id; + } else { + /* + * Node id fix up. Actual hardware supports up to 4 nodes. But with +@@ -434,10 +527,10 @@ static void encode_topo_cpuid8000001e(X86CPUTopoInfo *topo_info, X86CPU *cpu, + * number of nodes. find_last_bit returns last set bit(0 based). Left + * shift(+1) the socket id to represent all the nodes. + */ +- nodes -= 1; ++ nodes = topo.num_nodes - 1; + shift = find_last_bit(&nodes, 8); +- *ecx = (nodes << 8) | (topo_ids.pkg_id << (shift + 1)) | +- topo_ids.node_id; ++ *ecx = ((topo.num_nodes - 1) << 8) | (cpu->socket_id << (shift + 1)) | ++ topo.node_id; + } + *edx = 0; + } +@@ -5473,7 +5566,6 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, + uint32_t signature[3]; + X86CPUTopoInfo topo_info; + +- topo_info.nodes_per_pkg = env->nr_nodes; + topo_info.dies_per_pkg = env->nr_dies; + topo_info.cores_per_die = cs->nr_cores; + topo_info.threads_per_core = cs->nr_threads; +@@ -5905,20 +5997,20 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, + } + switch (count) { + case 0: /* L1 dcache info */ +- encode_cache_cpuid8000001d(env->cache_info_amd.l1d_cache, +- &topo_info, eax, ebx, ecx, edx); ++ encode_cache_cpuid8000001d(env->cache_info_amd.l1d_cache, cs, ++ eax, ebx, ecx, edx); + break; + case 1: /* L1 icache info */ +- encode_cache_cpuid8000001d(env->cache_info_amd.l1i_cache, +- &topo_info, eax, ebx, ecx, edx); ++ encode_cache_cpuid8000001d(env->cache_info_amd.l1i_cache, cs, ++ eax, ebx, ecx, edx); + break; + case 2: /* L2 cache info */ +- encode_cache_cpuid8000001d(env->cache_info_amd.l2_cache, +- &topo_info, eax, ebx, ecx, edx); ++ encode_cache_cpuid8000001d(env->cache_info_amd.l2_cache, cs, ++ eax, ebx, ecx, edx); + break; + case 3: /* L3 cache info */ +- encode_cache_cpuid8000001d(env->cache_info_amd.l3_cache, +- &topo_info, eax, ebx, ecx, edx); ++ encode_cache_cpuid8000001d(env->cache_info_amd.l3_cache, cs, ++ eax, ebx, ecx, edx); + break; + default: /* end of info */ + *eax = *ebx = *ecx = *edx = 0; +@@ -5927,7 +6019,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, + break; + case 0x8000001E: + assert(cpu->core_id <= 255); +- encode_topo_cpuid8000001e(&topo_info, cpu, eax, ebx, ecx, edx); ++ encode_topo_cpuid8000001e(cs, cpu, ++ eax, ebx, ecx, edx); + break; + case 0xC0000000: + *eax = env->cpuid_xlevel2; +-- +2.27.0 + diff --git a/kvm-Revert-target-i386-Enable-new-apic-id-encoding-for-E.patch b/kvm-Revert-target-i386-Enable-new-apic-id-encoding-for-E.patch new file mode 100644 index 0000000..0326049 --- /dev/null +++ b/kvm-Revert-target-i386-Enable-new-apic-id-encoding-for-E.patch @@ -0,0 +1,63 @@ +From a36be18a97841a091256e9934fb323afc9c3a57a Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Fri, 28 Aug 2020 16:23:44 -0400 +Subject: [PATCH 02/11] Revert "target/i386: Enable new apic id encoding for + EPYC based cpus models" + +RH-Author: Igor Mammedov +Message-id: <20200828162349.1616028-3-imammedo@redhat.com> +Patchwork-id: 98245 +O-Subject: [RHEL-AV 8.3.0 qemu-kvm PATCH 2/7] Revert "target/i386: Enable new apic id encoding for EPYC based cpus models" +Bugzilla: 1873417 +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Eduardo Habkost +RH-Acked-by: Dr. David Alan Gilbert + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1873417 +Brew: http://brewweb.devel.redhat.com/brew/taskinfo?taskID=31005031 +Branch: rhel-av-8.3.0 +Upstream: RHEL only +Tested: locally + +A regression was introduced since qemu-5.0, when EPYC specific +APIC ID encoding was introduced. Which leads to migration failing +with: +" + : Unknown savevm section or instance 'apic' 4. Make sure that your current VM setup matches your saved VM setup, including any hotplugged devices + : load of migration failed: Invalid argument +" +when EPYC cpu model and more than 1 numa node is used. +EPYC specific APIC ID encoding is considered as failed +experiment and upstream is preparing to revert it as well. + +This reverts commit 247b18c593ec298446645af8d5d28911daf653b1. + +Signed-off-by: Igor Mammedov +Signed-off-by: Danilo C. L. de Paula +--- + target/i386/cpu.c | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 6517cc73a2..66b6a77b2f 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -3996,7 +3996,6 @@ static X86CPUDefinition builtin_x86_defs[] = { + .xlevel = 0x8000001E, + .model_id = "AMD EPYC Processor", + .cache_info = &epyc_cache_info, +- .use_epyc_apic_id_encoding = 1, + .versions = (X86CPUVersionDefinition[]) { + { .version = 1 }, + { +@@ -4124,7 +4123,6 @@ static X86CPUDefinition builtin_x86_defs[] = { + .xlevel = 0x8000001E, + .model_id = "AMD EPYC-Rome Processor", + .cache_info = &epyc_rome_cache_info, +- .use_epyc_apic_id_encoding = 1, + }, + }; + +-- +2.27.0 + diff --git a/kvm-nvram-Exit-QEMU-if-NVRAM-cannot-contain-all-prom-env.patch b/kvm-nvram-Exit-QEMU-if-NVRAM-cannot-contain-all-prom-env.patch new file mode 100644 index 0000000..c6f1506 --- /dev/null +++ b/kvm-nvram-Exit-QEMU-if-NVRAM-cannot-contain-all-prom-env.patch @@ -0,0 +1,254 @@ +From 74ce16018bcb202ab81f3aa7b5a33279dd4800da Mon Sep 17 00:00:00 2001 +From: Greg Kurz +Date: Tue, 1 Sep 2020 19:07:04 -0400 +Subject: [PATCH 08/11] nvram: Exit QEMU if NVRAM cannot contain all -prom-env + data +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Greg Kurz +Message-id: <20200901190704.474799-2-gkurz@redhat.com> +Patchwork-id: 98256 +O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 1/1] nvram: Exit QEMU if NVRAM cannot contain all -prom-env data +Bugzilla: 1867739 +RH-Acked-by: Philippe Mathieu-Daudé +RH-Acked-by: Thomas Huth +RH-Acked-by: Laurent Vivier +RH-Acked-by: David Gibson + +From: Greg Kurz + +Since commit 61f20b9dc5b7 ("spapr_nvram: Pre-initialize the NVRAM to +support the -prom-env parameter"), pseries machines can pre-initialize +the "system" partition in the NVRAM with the data passed to all -prom-env +parameters on the QEMU command line. + +In this case it is assumed that all the data fits in 64 KiB, but the user +can easily pass more and crash QEMU: + +$ qemu-system-ppc64 -M pseries $(for ((x=0;x<128;x++)); do \ + echo -n " -prom-env " ; printf "%0.sx" {1..1024}; \ + done) # this requires ~128 Kib +malloc(): corrupted top size +Aborted (core dumped) + +This happens because we don't check if all the prom-env data fits in +the NVRAM and chrp_nvram_set_var() happily memcpy() it passed the +buffer. + +This crash affects basically all ppc/ppc64 machine types that use -prom-env: +- pseries (all versions) +- g3beige +- mac99 + +and also sparc/sparc64 machine types: +- LX +- SPARCClassic +- SPARCbook +- SS-10 +- SS-20 +- SS-4 +- SS-5 +- SS-600MP +- Voyager +- sun4u +- sun4v + +Add a max_len argument to chrp_nvram_create_system_partition() so that +it can check the available size before writing to memory. + +Since NVRAM is populated at machine init, it seems reasonable to consider +this error as fatal. So, instead of reporting an error when we detect that +the NVRAM is too small and adapt all machine types to handle it, we simply +exit QEMU in all cases. This is still better than crashing. If someone +wants another behavior, I guess this can be reworked later. + +Tested with: + +$ yes q | \ + (for arch in ppc ppc64 sparc sparc64; do \ + echo == $arch ==; \ + qemu=${arch}-softmmu/qemu-system-$arch; \ + for mach in $($qemu -M help | awk '! /^Supported/ { print $1 }'); do \ + echo $mach; \ + $qemu -M $mach -monitor stdio -nodefaults -nographic \ + $(for ((x=0;x<128;x++)); do \ + echo -n " -prom-env " ; printf "%0.sx" {1..1024}; \ + done) >/dev/null; \ + done; echo; \ + done) + +Without the patch, affected machine types cause QEMU to report some +memory corruption and crash: + +malloc(): corrupted top size + +free(): invalid size + +*** stack smashing detected ***: terminated + +With the patch, QEMU prints the following message and exits: + +NVRAM is too small. Try to pass less data to -prom-env + +It seems that the conditions for the crash have always existed, but it +affects pseries, the machine type I care for, since commit 61f20b9dc5b7 +only. + +Fixes: 61f20b9dc5b7 ("spapr_nvram: Pre-initialize the NVRAM to support the -prom-env parameter") +RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1867739 +Reported-by: John Snow +Reviewed-by: Laurent Vivier +Signed-off-by: Greg Kurz +Message-Id: <159736033937.350502.12402444542194031035.stgit@bahia.lan> +Signed-off-by: David Gibson +(cherry picked from commit 37035df51eaabb8d26b71da75b88a1c6727de8fa) +Signed-off-by: Greg Kurz +Signed-off-by: Danilo C. L. de Paula +--- + hw/nvram/chrp_nvram.c | 24 +++++++++++++++++++++--- + hw/nvram/mac_nvram.c | 2 +- + hw/nvram/spapr_nvram.c | 3 ++- + hw/sparc/sun4m.c | 2 +- + hw/sparc64/sun4u.c | 2 +- + include/hw/nvram/chrp_nvram.h | 3 ++- + 6 files changed, 28 insertions(+), 8 deletions(-) + +diff --git a/hw/nvram/chrp_nvram.c b/hw/nvram/chrp_nvram.c +index d969f26704..d4d10a7c03 100644 +--- a/hw/nvram/chrp_nvram.c ++++ b/hw/nvram/chrp_nvram.c +@@ -21,14 +21,21 @@ + + #include "qemu/osdep.h" + #include "qemu/cutils.h" ++#include "qemu/error-report.h" + #include "hw/nvram/chrp_nvram.h" + #include "sysemu/sysemu.h" + +-static int chrp_nvram_set_var(uint8_t *nvram, int addr, const char *str) ++static int chrp_nvram_set_var(uint8_t *nvram, int addr, const char *str, ++ int max_len) + { + int len; + + len = strlen(str) + 1; ++ ++ if (max_len < len) { ++ return -1; ++ } ++ + memcpy(&nvram[addr], str, len); + + return addr + len; +@@ -38,19 +45,26 @@ static int chrp_nvram_set_var(uint8_t *nvram, int addr, const char *str) + * Create a "system partition", used for the Open Firmware + * environment variables. + */ +-int chrp_nvram_create_system_partition(uint8_t *data, int min_len) ++int chrp_nvram_create_system_partition(uint8_t *data, int min_len, int max_len) + { + ChrpNvramPartHdr *part_header; + unsigned int i; + int end; + ++ if (max_len < sizeof(*part_header)) { ++ goto fail; ++ } ++ + part_header = (ChrpNvramPartHdr *)data; + part_header->signature = CHRP_NVPART_SYSTEM; + pstrcpy(part_header->name, sizeof(part_header->name), "system"); + + end = sizeof(ChrpNvramPartHdr); + for (i = 0; i < nb_prom_envs; i++) { +- end = chrp_nvram_set_var(data, end, prom_envs[i]); ++ end = chrp_nvram_set_var(data, end, prom_envs[i], max_len - end); ++ if (end == -1) { ++ goto fail; ++ } + } + + /* End marker */ +@@ -65,6 +79,10 @@ int chrp_nvram_create_system_partition(uint8_t *data, int min_len) + chrp_nvram_finish_partition(part_header, end); + + return end; ++ ++fail: ++ error_report("NVRAM is too small. Try to pass less data to -prom-env"); ++ exit(EXIT_FAILURE); + } + + /** +diff --git a/hw/nvram/mac_nvram.c b/hw/nvram/mac_nvram.c +index beec1c4e4d..11f2d31cdb 100644 +--- a/hw/nvram/mac_nvram.c ++++ b/hw/nvram/mac_nvram.c +@@ -141,7 +141,7 @@ static void pmac_format_nvram_partition_of(MacIONVRAMState *nvr, int off, + + /* OpenBIOS nvram variables partition */ + sysp_end = chrp_nvram_create_system_partition(&nvr->data[off], +- DEF_SYSTEM_SIZE) + off; ++ DEF_SYSTEM_SIZE, len) + off; + + /* Free space partition */ + chrp_nvram_create_free_partition(&nvr->data[sysp_end], len - sysp_end); +diff --git a/hw/nvram/spapr_nvram.c b/hw/nvram/spapr_nvram.c +index 15d08281d4..386513499f 100644 +--- a/hw/nvram/spapr_nvram.c ++++ b/hw/nvram/spapr_nvram.c +@@ -188,7 +188,8 @@ static void spapr_nvram_realize(SpaprVioDevice *dev, Error **errp) + } + } else if (nb_prom_envs > 0) { + /* Create a system partition to pass the -prom-env variables */ +- chrp_nvram_create_system_partition(nvram->buf, MIN_NVRAM_SIZE / 4); ++ chrp_nvram_create_system_partition(nvram->buf, MIN_NVRAM_SIZE / 4, ++ nvram->size); + chrp_nvram_create_free_partition(&nvram->buf[MIN_NVRAM_SIZE / 4], + nvram->size - MIN_NVRAM_SIZE / 4); + } +diff --git a/hw/sparc/sun4m.c b/hw/sparc/sun4m.c +index 9be930415f..cf7dfa4af5 100644 +--- a/hw/sparc/sun4m.c ++++ b/hw/sparc/sun4m.c +@@ -143,7 +143,7 @@ static void nvram_init(Nvram *nvram, uint8_t *macaddr, + memset(image, '\0', sizeof(image)); + + /* OpenBIOS nvram variables partition */ +- sysp_end = chrp_nvram_create_system_partition(image, 0); ++ sysp_end = chrp_nvram_create_system_partition(image, 0, 0x1fd0); + + /* Free space partition */ + chrp_nvram_create_free_partition(&image[sysp_end], 0x1fd0 - sysp_end); +diff --git a/hw/sparc64/sun4u.c b/hw/sparc64/sun4u.c +index 9e30203dcc..37310b73e6 100644 +--- a/hw/sparc64/sun4u.c ++++ b/hw/sparc64/sun4u.c +@@ -136,7 +136,7 @@ static int sun4u_NVRAM_set_params(Nvram *nvram, uint16_t NVRAM_size, + memset(image, '\0', sizeof(image)); + + /* OpenBIOS nvram variables partition */ +- sysp_end = chrp_nvram_create_system_partition(image, 0); ++ sysp_end = chrp_nvram_create_system_partition(image, 0, 0x1fd0); + + /* Free space partition */ + chrp_nvram_create_free_partition(&image[sysp_end], 0x1fd0 - sysp_end); +diff --git a/include/hw/nvram/chrp_nvram.h b/include/hw/nvram/chrp_nvram.h +index 09941a9be4..4a0f5c21b8 100644 +--- a/include/hw/nvram/chrp_nvram.h ++++ b/include/hw/nvram/chrp_nvram.h +@@ -50,7 +50,8 @@ chrp_nvram_finish_partition(ChrpNvramPartHdr *header, uint32_t size) + header->checksum = sum & 0xff; + } + +-int chrp_nvram_create_system_partition(uint8_t *data, int min_len); ++/* chrp_nvram_create_system_partition() failure is fatal */ ++int chrp_nvram_create_system_partition(uint8_t *data, int min_len, int max_len); + int chrp_nvram_create_free_partition(uint8_t *data, int len); + + #endif +-- +2.27.0 + diff --git a/kvm-usb-fix-setup_len-init-CVE-2020-14364.patch b/kvm-usb-fix-setup_len-init-CVE-2020-14364.patch new file mode 100644 index 0000000..e53187c --- /dev/null +++ b/kvm-usb-fix-setup_len-init-CVE-2020-14364.patch @@ -0,0 +1,102 @@ +From e6d43ded51d658d77bb7f8a490f2bf93946d3215 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Thu, 3 Sep 2020 14:27:19 -0400 +Subject: [PATCH 09/11] usb: fix setup_len init (CVE-2020-14364) + +RH-Author: Jon Maloy +Message-id: <20200903142719.1415757-2-jmaloy@redhat.com> +Patchwork-id: 98265 +O-Subject: [RHEL-AV-8.3.0 qemu-kvm PATCH 1/1] usb: fix setup_len init (CVE-2020-14364) +Bugzilla: 1869715 +RH-Acked-by: Gerd Hoffmann +RH-Acked-by: Thomas Huth +RH-Acked-by: Laurent Vivier + +From: Gerd Hoffmann + +Store calculated setup_len in a local variable, verify it, and only +write it to the struct (USBDevice->setup_len) in case it passed the +sanity checks. + +This prevents other code (do_token_{in,out} functions specifically) +from working with invalid USBDevice->setup_len values and overrunning +the USBDevice->setup_buf[] buffer. + +Fixes: CVE-2020-14364 +Signed-off-by: Gerd Hoffmann +Tested-by: Gonglei +Reviewed-by: Li Qiang +Message-id: 20200825053636.29648-1-kraxel@redhat.com +(cherry picked from commit b946434f2659a182afc17e155be6791ebfb302eb) +Signed-off-by: Jon Maloy +Signed-off-by: Danilo C. L. de Paula +--- + hw/usb/core.c | 16 ++++++++++------ + 1 file changed, 10 insertions(+), 6 deletions(-) + +diff --git a/hw/usb/core.c b/hw/usb/core.c +index 5abd128b6b..5234dcc73f 100644 +--- a/hw/usb/core.c ++++ b/hw/usb/core.c +@@ -129,6 +129,7 @@ void usb_wakeup(USBEndpoint *ep, unsigned int stream) + static void do_token_setup(USBDevice *s, USBPacket *p) + { + int request, value, index; ++ unsigned int setup_len; + + if (p->iov.size != 8) { + p->status = USB_RET_STALL; +@@ -138,14 +139,15 @@ static void do_token_setup(USBDevice *s, USBPacket *p) + usb_packet_copy(p, s->setup_buf, p->iov.size); + s->setup_index = 0; + p->actual_length = 0; +- s->setup_len = (s->setup_buf[7] << 8) | s->setup_buf[6]; +- if (s->setup_len > sizeof(s->data_buf)) { ++ setup_len = (s->setup_buf[7] << 8) | s->setup_buf[6]; ++ if (setup_len > sizeof(s->data_buf)) { + fprintf(stderr, + "usb_generic_handle_packet: ctrl buffer too small (%d > %zu)\n", +- s->setup_len, sizeof(s->data_buf)); ++ setup_len, sizeof(s->data_buf)); + p->status = USB_RET_STALL; + return; + } ++ s->setup_len = setup_len; + + request = (s->setup_buf[0] << 8) | s->setup_buf[1]; + value = (s->setup_buf[3] << 8) | s->setup_buf[2]; +@@ -259,26 +261,28 @@ static void do_token_out(USBDevice *s, USBPacket *p) + static void do_parameter(USBDevice *s, USBPacket *p) + { + int i, request, value, index; ++ unsigned int setup_len; + + for (i = 0; i < 8; i++) { + s->setup_buf[i] = p->parameter >> (i*8); + } + + s->setup_state = SETUP_STATE_PARAM; +- s->setup_len = (s->setup_buf[7] << 8) | s->setup_buf[6]; + s->setup_index = 0; + + request = (s->setup_buf[0] << 8) | s->setup_buf[1]; + value = (s->setup_buf[3] << 8) | s->setup_buf[2]; + index = (s->setup_buf[5] << 8) | s->setup_buf[4]; + +- if (s->setup_len > sizeof(s->data_buf)) { ++ setup_len = (s->setup_buf[7] << 8) | s->setup_buf[6]; ++ if (setup_len > sizeof(s->data_buf)) { + fprintf(stderr, + "usb_generic_handle_packet: ctrl buffer too small (%d > %zu)\n", +- s->setup_len, sizeof(s->data_buf)); ++ setup_len, sizeof(s->data_buf)); + p->status = USB_RET_STALL; + return; + } ++ s->setup_len = setup_len; + + if (p->pid == USB_TOKEN_OUT) { + usb_packet_copy(p, s->data_buf, s->setup_len); +-- +2.27.0 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index b80c4b7..80e6227 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -69,7 +69,7 @@ Obsoletes: %1-rhev Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 5.1.0 -Release: 4%{?dist} +Release: 5%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -158,6 +158,24 @@ Patch44: kvm-migration-Add-block-bitmap-mapping-parameter.patch Patch45: kvm-iotests.py-Let-wait_migration-return-on-failure.patch # For bz#1790492 - 'dirty-bitmaps' migration capability should allow configuring target nodenames Patch46: kvm-iotests-Test-node-bitmap-aliases-during-migration.patch +# For bz#1873417 - AMD/NUMA topology - revert 5.1 changes +Patch47: kvm-Revert-i386-Fix-pkg_id-offset-for-EPYC-cpu-models.patch +# For bz#1873417 - AMD/NUMA topology - revert 5.1 changes +Patch48: kvm-Revert-target-i386-Enable-new-apic-id-encoding-for-E.patch +# For bz#1873417 - AMD/NUMA topology - revert 5.1 changes +Patch49: kvm-Revert-hw-i386-Move-arch_id-decode-inside-x86_cpus_i.patch +# For bz#1873417 - AMD/NUMA topology - revert 5.1 changes +Patch50: kvm-Revert-i386-Introduce-use_epyc_apic_id_encoding-in-X.patch +# For bz#1873417 - AMD/NUMA topology - revert 5.1 changes +Patch51: kvm-Revert-hw-i386-Introduce-apicid-functions-inside-X86.patch +# For bz#1873417 - AMD/NUMA topology - revert 5.1 changes +Patch52: kvm-Revert-target-i386-Cleanup-and-use-the-EPYC-mode-top.patch +# For bz#1873417 - AMD/NUMA topology - revert 5.1 changes +Patch53: kvm-Revert-hw-386-Add-EPYC-mode-topology-decoding-functi.patch +# For bz#1867739 - -prom-env does not validate input +Patch54: kvm-nvram-Exit-QEMU-if-NVRAM-cannot-contain-all-prom-env.patch +# For bz#1869715 - CVE-2020-14364 qemu-kvm: QEMU: usb: out-of-bounds r/w access issue while processing usb packets [rhel-av-8.3.0] +Patch55: kvm-usb-fix-setup_len-init-CVE-2020-14364.patch BuildRequires: wget BuildRequires: rpm-build @@ -184,7 +202,6 @@ BuildRequires: python3-sphinx BuildRequires: spice-protocol >= 0.12.12 BuildRequires: spice-server-devel >= 0.12.8 BuildRequires: libcacard-devel -BuildRequires: virglrenderer-devel # For smartcard NSS support BuildRequires: nss-devel %endif @@ -196,7 +213,7 @@ BuildRequires: librados-devel BuildRequires: librbd-devel %if %{have_gluster} # For gluster block driver -BuildRequires: glusterfs-api-devel >= 3.6.0 +BuildRequires: glusterfs-api-devel BuildRequires: glusterfs-devel %endif # We need both because the 'stap' binary is probed for by configure @@ -306,9 +323,6 @@ Requires: %{name}-common = %{epoch}:%{version}-%{release} Requires: libseccomp >= 2.4.0 # For compressed guest memory dumps Requires: lzo snappy -%if %{have_gluster} -Requires: glusterfs-api >= 3.6.0 -%endif %if %{have_kvm_setup} Requires(post): systemd-units Requires(preun): systemd-units @@ -617,11 +631,7 @@ cd qemu-kvm-build --enable-vhost-user \ --enable-vhost-vdpa \ --enable-vhost-vsock \ -%if 0%{have_spice} - --enable-virglrenderer \ -%else --disable-virglrenderer \ -%endif --disable-virtfs \ --enable-vnc \ --disable-vnc-jpeg \ @@ -1068,10 +1078,6 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %if 0%{have_memlock_limits} %{_sysconfdir}/security/limits.d/95-kvm-memlock.conf %endif -%if %{have_spice} -%{_libexecdir}/vhost-user-gpu -%{_datadir}/%{name}/vhost-user/50-qemu-gpu.json -%endif %{_libexecdir}/virtiofsd %{_datadir}/%{name}/vhost-user/50-qemu-virtiofsd.json %if %{have_usbredir} @@ -1126,6 +1132,29 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Tue Sep 08 2020 Danilo Cesar Lemes de Paula - 5.1.0-5.el8 +- kvm-Revert-i386-Fix-pkg_id-offset-for-EPYC-cpu-models.patch [bz#1873417] +- kvm-Revert-target-i386-Enable-new-apic-id-encoding-for-E.patch [bz#1873417] +- kvm-Revert-hw-i386-Move-arch_id-decode-inside-x86_cpus_i.patch [bz#1873417] +- kvm-Revert-i386-Introduce-use_epyc_apic_id_encoding-in-X.patch [bz#1873417] +- kvm-Revert-hw-i386-Introduce-apicid-functions-inside-X86.patch [bz#1873417] +- kvm-Revert-target-i386-Cleanup-and-use-the-EPYC-mode-top.patch [bz#1873417] +- kvm-Revert-hw-386-Add-EPYC-mode-topology-decoding-functi.patch [bz#1873417] +- kvm-nvram-Exit-QEMU-if-NVRAM-cannot-contain-all-prom-env.patch [bz#1867739] +- kvm-usb-fix-setup_len-init-CVE-2020-14364.patch [bz#1869715] +- kvm-Remove-explicit-glusterfs-api-dependency.patch [bz#1872853] +- kvm-disable-virgl.patch [bz#1831271] +- Resolves: bz#1831271 + (Drop virgil acceleration support and remove virglrenderer dependency) +- Resolves: bz#1867739 + (-prom-env does not validate input) +- Resolves: bz#1869715 + (CVE-2020-14364 qemu-kvm: QEMU: usb: out-of-bounds r/w access issue while processing usb packets [rhel-av-8.3.0]) +- Resolves: bz#1872853 + (move the glusterfs dependency out of qemu-kvm-core to the glusterfs module) +- Resolves: bz#1873417 + (AMD/NUMA topology - revert 5.1 changes) + * Thu Aug 27 2020 Danilo Cesar Lemes de Paula - 5.1.0-4.el8 - kvm-Drop-bogus-IPv6-messages.patch [bz#1867075] - kvm-machine-types-numa-set-numa_mem_supported-on-old-mac.patch [bz#1849707]