* Mon Jul 10 2023 Miroslav Rezanina <mrezanin@redhat.com> - 8.0.0-7
- kvm-numa-Validate-cluster-and-NUMA-node-boundary-if-requ.patch [bz#2171363] - kvm-hw-arm-Validate-cluster-and-NUMA-node-boundary.patch [bz#2171363] - kvm-hw-arm-virt-Validate-cluster-and-NUMA-node-boundary-.patch [bz#2171363] - kvm-vhost-fix-vhost_dev_enable_notifiers-error-case.patch [RHEL-330] - kvm-kvm-reuse-per-vcpu-stats-fd-to-avoid-vcpu-interrupti.patch [bz#2218644] - kvm-vhost-vdpa-do-not-cleanup-the-vdpa-vhost-net-structu.patch [bz#2128929] - Resolves: bz#2171363 ([aarch64] Kernel hits Call trace with irregular CPU-to-NUMA association) - Resolves: RHEL-330 ([virtual network][qemu-kvm-8.0.0-rc1]qemu core dump: qemu-kvm: ../softmmu/memory.c:2592: void memory_region_del_eventfd(MemoryRegion *, hwaddr, unsigned int, _Bool, uint64_t, EventNotifier *): Assertion `i != mr->ioeventfd_nb' failed) - Resolves: bz#2218644 (query-stats QMP command interrupts vcpus, the Max Latencies could be more than 100us (rhel 9.3.0 clone)) - Resolves: bz#2128929 ([rhel9.2] hotplug/hotunplug mlx vdpa device to the occupied addr port, then qemu core dump occurs after shutdown guest)
This commit is contained in:
parent
b738488387
commit
b56a1fa35b
60
kvm-hw-arm-Validate-cluster-and-NUMA-node-boundary.patch
Normal file
60
kvm-hw-arm-Validate-cluster-and-NUMA-node-boundary.patch
Normal file
@ -0,0 +1,60 @@
|
||||
From 7b57aec372fc238cbaafe86557f9fb4b560895b1 Mon Sep 17 00:00:00 2001
|
||||
From: Gavin Shan <gshan@redhat.com>
|
||||
Date: Tue, 27 Jun 2023 20:20:09 +1000
|
||||
Subject: [PATCH 2/6] hw/arm: Validate cluster and NUMA node boundary
|
||||
|
||||
RH-Author: Gavin Shan <gshan@redhat.com>
|
||||
RH-MergeRequest: 175: hw/arm: Validate CPU cluster and NUMA node boundary for RHEL machines
|
||||
RH-Bugzilla: 2171363
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Commit: [2/3] fcac7ea85d9f73613989903c642fc1bf6c51946b
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2171363
|
||||
|
||||
There are two ARM machines where NUMA is aware: 'virt' and 'sbsa-ref'.
|
||||
Both of them are required to follow cluster-NUMA-node boundary. To
|
||||
enable the validation to warn about the irregular configuration where
|
||||
multiple CPUs in one cluster have been associated with different NUMA
|
||||
nodes.
|
||||
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
Acked-by: Igor Mammedov <imammedo@redhat.com>
|
||||
Message-Id: <20230509002739.18388-3-gshan@redhat.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit fecff672351ace5e39adf7dbcf7a8ee748b201cb)
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
---
|
||||
hw/arm/sbsa-ref.c | 2 ++
|
||||
hw/arm/virt.c | 2 ++
|
||||
2 files changed, 4 insertions(+)
|
||||
|
||||
diff --git a/hw/arm/sbsa-ref.c b/hw/arm/sbsa-ref.c
|
||||
index 0b93558dde..efb380e7c8 100644
|
||||
--- a/hw/arm/sbsa-ref.c
|
||||
+++ b/hw/arm/sbsa-ref.c
|
||||
@@ -864,6 +864,8 @@ static void sbsa_ref_class_init(ObjectClass *oc, void *data)
|
||||
mc->possible_cpu_arch_ids = sbsa_ref_possible_cpu_arch_ids;
|
||||
mc->cpu_index_to_instance_props = sbsa_ref_cpu_index_to_props;
|
||||
mc->get_default_cpu_node_id = sbsa_ref_get_default_cpu_node_id;
|
||||
+ /* platform instead of architectural choice */
|
||||
+ mc->cpu_cluster_has_numa_boundary = true;
|
||||
}
|
||||
|
||||
static const TypeInfo sbsa_ref_info = {
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index 9be53e9355..df6a0231bc 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -3083,6 +3083,8 @@ static void virt_machine_class_init(ObjectClass *oc, void *data)
|
||||
mc->smp_props.clusters_supported = true;
|
||||
mc->auto_enable_numa_with_memhp = true;
|
||||
mc->auto_enable_numa_with_memdev = true;
|
||||
+ /* platform instead of architectural choice */
|
||||
+ mc->cpu_cluster_has_numa_boundary = true;
|
||||
mc->default_ram_id = "mach-virt.ram";
|
||||
|
||||
object_class_property_add(oc, "acpi", "OnOffAuto",
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,41 @@
|
||||
From 022529f6d0ee306da857825c72a98bf7ddf5de22 Mon Sep 17 00:00:00 2001
|
||||
From: Gavin Shan <gshan@redhat.com>
|
||||
Date: Tue, 27 Jun 2023 20:20:09 +1000
|
||||
Subject: [PATCH 3/6] hw/arm/virt: Validate cluster and NUMA node boundary for
|
||||
RHEL machines
|
||||
|
||||
RH-Author: Gavin Shan <gshan@redhat.com>
|
||||
RH-MergeRequest: 175: hw/arm: Validate CPU cluster and NUMA node boundary for RHEL machines
|
||||
RH-Bugzilla: 2171363
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Commit: [3/3] a396c499259b566861ca007b01f8539bf6113711
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2171363
|
||||
Upstream Status: RHEL only
|
||||
|
||||
Set mc->cpu_cluster_has_numa_boundary to true so that the boundary of
|
||||
CPU cluster and NUMA node will be validated for 'virt-rhel*' machines.
|
||||
A warning message will be printed if the boundary is broken.
|
||||
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
---
|
||||
hw/arm/virt.c | 2 ++
|
||||
1 file changed, 2 insertions(+)
|
||||
|
||||
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
|
||||
index df6a0231bc..faf68488d5 100644
|
||||
--- a/hw/arm/virt.c
|
||||
+++ b/hw/arm/virt.c
|
||||
@@ -3530,6 +3530,8 @@ static void rhel_machine_class_init(ObjectClass *oc, void *data)
|
||||
mc->smp_props.clusters_supported = true;
|
||||
mc->auto_enable_numa_with_memhp = true;
|
||||
mc->auto_enable_numa_with_memdev = true;
|
||||
+ /* platform instead of architectural choice */
|
||||
+ mc->cpu_cluster_has_numa_boundary = true;
|
||||
mc->default_ram_id = "mach-virt.ram";
|
||||
|
||||
object_class_property_add(oc, "acpi", "OnOffAuto",
|
||||
--
|
||||
2.39.3
|
||||
|
160
kvm-kvm-reuse-per-vcpu-stats-fd-to-avoid-vcpu-interrupti.patch
Normal file
160
kvm-kvm-reuse-per-vcpu-stats-fd-to-avoid-vcpu-interrupti.patch
Normal file
@ -0,0 +1,160 @@
|
||||
From a5857fb12fcad46e27c415fe82ce13c0cb5d09c7 Mon Sep 17 00:00:00 2001
|
||||
From: Marcelo Tosatti <mtosatti@redhat.com>
|
||||
Date: Thu, 29 Jun 2023 14:48:32 -0300
|
||||
Subject: [PATCH 5/6] kvm: reuse per-vcpu stats fd to avoid vcpu interruption
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Marcelo Tosatti <None>
|
||||
RH-MergeRequest: 177: kvm: reuse per-vcpu stats fd to avoid vcpu interruption
|
||||
RH-Bugzilla: 2218644
|
||||
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||||
RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
RH-Acked-by: Leonardo Brás <leobras@redhat.com>
|
||||
RH-Commit: [1/1] 4ec72385a9047888121485f49bacb1aff84f7018 (mtosatti/qemu-kvm)
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2218644
|
||||
Commit: 3b6f485275ae95a81eec589d2773b86ca9ddec4d
|
||||
|
||||
A regression has been detected in latency testing of KVM guests.
|
||||
More specifically, it was observed that the cyclictest
|
||||
numbers inside of an isolated vcpu (running on isolated pcpu) are:
|
||||
|
||||
Where a maximum of 50us is acceptable.
|
||||
|
||||
The implementation of KVM_GET_STATS_FD uses run_on_cpu to query
|
||||
per vcpu statistics, which interrupts the vcpu (and is unnecessary).
|
||||
|
||||
To fix this, open the per vcpu stats fd on vcpu initialization,
|
||||
and read from that fd from QEMU's main thread.
|
||||
|
||||
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
---
|
||||
accel/kvm/kvm-all.c | 30 +++++++++++++++---------------
|
||||
include/hw/core/cpu.h | 1 +
|
||||
2 files changed, 16 insertions(+), 15 deletions(-)
|
||||
|
||||
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
|
||||
index cf3a88d90e..fa7ca46c66 100644
|
||||
--- a/accel/kvm/kvm-all.c
|
||||
+++ b/accel/kvm/kvm-all.c
|
||||
@@ -450,6 +450,8 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp)
|
||||
"kvm_init_vcpu: kvm_arch_init_vcpu failed (%lu)",
|
||||
kvm_arch_vcpu_id(cpu));
|
||||
}
|
||||
+ cpu->kvm_vcpu_stats_fd = kvm_vcpu_ioctl(cpu, KVM_GET_STATS_FD, NULL);
|
||||
+
|
||||
err:
|
||||
return ret;
|
||||
}
|
||||
@@ -3959,7 +3961,7 @@ static StatsDescriptors *find_stats_descriptors(StatsTarget target, int stats_fd
|
||||
|
||||
/* Read stats header */
|
||||
kvm_stats_header = &descriptors->kvm_stats_header;
|
||||
- ret = read(stats_fd, kvm_stats_header, sizeof(*kvm_stats_header));
|
||||
+ ret = pread(stats_fd, kvm_stats_header, sizeof(*kvm_stats_header), 0);
|
||||
if (ret != sizeof(*kvm_stats_header)) {
|
||||
error_setg(errp, "KVM stats: failed to read stats header: "
|
||||
"expected %zu actual %zu",
|
||||
@@ -3990,7 +3992,8 @@ static StatsDescriptors *find_stats_descriptors(StatsTarget target, int stats_fd
|
||||
}
|
||||
|
||||
static void query_stats(StatsResultList **result, StatsTarget target,
|
||||
- strList *names, int stats_fd, Error **errp)
|
||||
+ strList *names, int stats_fd, CPUState *cpu,
|
||||
+ Error **errp)
|
||||
{
|
||||
struct kvm_stats_desc *kvm_stats_desc;
|
||||
struct kvm_stats_header *kvm_stats_header;
|
||||
@@ -4048,7 +4051,7 @@ static void query_stats(StatsResultList **result, StatsTarget target,
|
||||
break;
|
||||
case STATS_TARGET_VCPU:
|
||||
add_stats_entry(result, STATS_PROVIDER_KVM,
|
||||
- current_cpu->parent_obj.canonical_path,
|
||||
+ cpu->parent_obj.canonical_path,
|
||||
stats_list);
|
||||
break;
|
||||
default:
|
||||
@@ -4085,10 +4088,9 @@ static void query_stats_schema(StatsSchemaList **result, StatsTarget target,
|
||||
add_stats_schema(result, STATS_PROVIDER_KVM, target, stats_list);
|
||||
}
|
||||
|
||||
-static void query_stats_vcpu(CPUState *cpu, run_on_cpu_data data)
|
||||
+static void query_stats_vcpu(CPUState *cpu, StatsArgs *kvm_stats_args)
|
||||
{
|
||||
- StatsArgs *kvm_stats_args = (StatsArgs *) data.host_ptr;
|
||||
- int stats_fd = kvm_vcpu_ioctl(cpu, KVM_GET_STATS_FD, NULL);
|
||||
+ int stats_fd = cpu->kvm_vcpu_stats_fd;
|
||||
Error *local_err = NULL;
|
||||
|
||||
if (stats_fd == -1) {
|
||||
@@ -4097,14 +4099,13 @@ static void query_stats_vcpu(CPUState *cpu, run_on_cpu_data data)
|
||||
return;
|
||||
}
|
||||
query_stats(kvm_stats_args->result.stats, STATS_TARGET_VCPU,
|
||||
- kvm_stats_args->names, stats_fd, kvm_stats_args->errp);
|
||||
- close(stats_fd);
|
||||
+ kvm_stats_args->names, stats_fd, cpu,
|
||||
+ kvm_stats_args->errp);
|
||||
}
|
||||
|
||||
-static void query_stats_schema_vcpu(CPUState *cpu, run_on_cpu_data data)
|
||||
+static void query_stats_schema_vcpu(CPUState *cpu, StatsArgs *kvm_stats_args)
|
||||
{
|
||||
- StatsArgs *kvm_stats_args = (StatsArgs *) data.host_ptr;
|
||||
- int stats_fd = kvm_vcpu_ioctl(cpu, KVM_GET_STATS_FD, NULL);
|
||||
+ int stats_fd = cpu->kvm_vcpu_stats_fd;
|
||||
Error *local_err = NULL;
|
||||
|
||||
if (stats_fd == -1) {
|
||||
@@ -4114,7 +4115,6 @@ static void query_stats_schema_vcpu(CPUState *cpu, run_on_cpu_data data)
|
||||
}
|
||||
query_stats_schema(kvm_stats_args->result.schema, STATS_TARGET_VCPU, stats_fd,
|
||||
kvm_stats_args->errp);
|
||||
- close(stats_fd);
|
||||
}
|
||||
|
||||
static void query_stats_cb(StatsResultList **result, StatsTarget target,
|
||||
@@ -4132,7 +4132,7 @@ static void query_stats_cb(StatsResultList **result, StatsTarget target,
|
||||
error_setg_errno(errp, errno, "KVM stats: ioctl failed");
|
||||
return;
|
||||
}
|
||||
- query_stats(result, target, names, stats_fd, errp);
|
||||
+ query_stats(result, target, names, stats_fd, NULL, errp);
|
||||
close(stats_fd);
|
||||
break;
|
||||
}
|
||||
@@ -4146,7 +4146,7 @@ static void query_stats_cb(StatsResultList **result, StatsTarget target,
|
||||
if (!apply_str_list_filter(cpu->parent_obj.canonical_path, targets)) {
|
||||
continue;
|
||||
}
|
||||
- run_on_cpu(cpu, query_stats_vcpu, RUN_ON_CPU_HOST_PTR(&stats_args));
|
||||
+ query_stats_vcpu(cpu, &stats_args);
|
||||
}
|
||||
break;
|
||||
}
|
||||
@@ -4172,6 +4172,6 @@ void query_stats_schemas_cb(StatsSchemaList **result, Error **errp)
|
||||
if (first_cpu) {
|
||||
stats_args.result.schema = result;
|
||||
stats_args.errp = errp;
|
||||
- run_on_cpu(first_cpu, query_stats_schema_vcpu, RUN_ON_CPU_HOST_PTR(&stats_args));
|
||||
+ query_stats_schema_vcpu(first_cpu, &stats_args);
|
||||
}
|
||||
}
|
||||
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
|
||||
index 397fd3ac68..ae96be07e7 100644
|
||||
--- a/include/hw/core/cpu.h
|
||||
+++ b/include/hw/core/cpu.h
|
||||
@@ -399,6 +399,7 @@ struct CPUState {
|
||||
struct kvm_dirty_gfn *kvm_dirty_gfns;
|
||||
uint32_t kvm_fetch_index;
|
||||
uint64_t dirty_pages;
|
||||
+ int kvm_vcpu_stats_fd;
|
||||
|
||||
/* Use by accel-block: CPU is executing an ioctl() */
|
||||
QemuLockCnt in_ioctl_lock;
|
||||
--
|
||||
2.39.3
|
||||
|
145
kvm-numa-Validate-cluster-and-NUMA-node-boundary-if-requ.patch
Normal file
145
kvm-numa-Validate-cluster-and-NUMA-node-boundary-if-requ.patch
Normal file
@ -0,0 +1,145 @@
|
||||
From 760a2f284f6d4cd3cd3b1685411bbca21c4ad233 Mon Sep 17 00:00:00 2001
|
||||
From: Gavin Shan <gshan@redhat.com>
|
||||
Date: Tue, 27 Jun 2023 20:20:09 +1000
|
||||
Subject: [PATCH 1/6] numa: Validate cluster and NUMA node boundary if required
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Gavin Shan <gshan@redhat.com>
|
||||
RH-MergeRequest: 175: hw/arm: Validate CPU cluster and NUMA node boundary for RHEL machines
|
||||
RH-Bugzilla: 2171363
|
||||
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
||||
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
||||
RH-Commit: [1/3] 24580064b9a0076ec4d9a916839d85135ac48cd9
|
||||
|
||||
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2171363
|
||||
|
||||
For some architectures like ARM64, multiple CPUs in one cluster can be
|
||||
associated with different NUMA nodes, which is irregular configuration
|
||||
because we shouldn't have this in baremetal environment. The irregular
|
||||
configuration causes Linux guest to misbehave, as the following warning
|
||||
messages indicate.
|
||||
|
||||
-smp 6,maxcpus=6,sockets=2,clusters=1,cores=3,threads=1 \
|
||||
-numa node,nodeid=0,cpus=0-1,memdev=ram0 \
|
||||
-numa node,nodeid=1,cpus=2-3,memdev=ram1 \
|
||||
-numa node,nodeid=2,cpus=4-5,memdev=ram2 \
|
||||
|
||||
------------[ cut here ]------------
|
||||
WARNING: CPU: 0 PID: 1 at kernel/sched/topology.c:2271 build_sched_domains+0x284/0x910
|
||||
Modules linked in:
|
||||
CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.14.0-268.el9.aarch64 #1
|
||||
pstate: 00400005 (nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
|
||||
pc : build_sched_domains+0x284/0x910
|
||||
lr : build_sched_domains+0x184/0x910
|
||||
sp : ffff80000804bd50
|
||||
x29: ffff80000804bd50 x28: 0000000000000002 x27: 0000000000000000
|
||||
x26: ffff800009cf9a80 x25: 0000000000000000 x24: ffff800009cbf840
|
||||
x23: ffff000080325000 x22: ffff0000005df800 x21: ffff80000a4ce508
|
||||
x20: 0000000000000000 x19: ffff000080324440 x18: 0000000000000014
|
||||
x17: 00000000388925c0 x16: 000000005386a066 x15: 000000009c10cc2e
|
||||
x14: 00000000000001c0 x13: 0000000000000001 x12: ffff00007fffb1a0
|
||||
x11: ffff00007fffb180 x10: ffff80000a4ce508 x9 : 0000000000000041
|
||||
x8 : ffff80000a4ce500 x7 : ffff80000a4cf920 x6 : 0000000000000001
|
||||
x5 : 0000000000000001 x4 : 0000000000000007 x3 : 0000000000000002
|
||||
x2 : 0000000000001000 x1 : ffff80000a4cf928 x0 : 0000000000000001
|
||||
Call trace:
|
||||
build_sched_domains+0x284/0x910
|
||||
sched_init_domains+0xac/0xe0
|
||||
sched_init_smp+0x48/0xc8
|
||||
kernel_init_freeable+0x140/0x1ac
|
||||
kernel_init+0x28/0x140
|
||||
ret_from_fork+0x10/0x20
|
||||
|
||||
Improve the situation to warn when multiple CPUs in one cluster have
|
||||
been associated with different NUMA nodes. However, one NUMA node is
|
||||
allowed to be associated with different clusters.
|
||||
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
Acked-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
Acked-by: Igor Mammedov <imammedo@redhat.com>
|
||||
Message-Id: <20230509002739.18388-2-gshan@redhat.com>
|
||||
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||||
(cherry picked from commit a494fdb715832000ee9047a549a35aacfea8175e)
|
||||
Signed-off-by: Gavin Shan <gshan@redhat.com>
|
||||
---
|
||||
hw/core/machine.c | 42 ++++++++++++++++++++++++++++++++++++++++++
|
||||
include/hw/boards.h | 1 +
|
||||
2 files changed, 43 insertions(+)
|
||||
|
||||
diff --git a/hw/core/machine.c b/hw/core/machine.c
|
||||
index c28702b690..5abdc8c39b 100644
|
||||
--- a/hw/core/machine.c
|
||||
+++ b/hw/core/machine.c
|
||||
@@ -1496,6 +1496,45 @@ static void machine_numa_finish_cpu_init(MachineState *machine)
|
||||
g_string_free(s, true);
|
||||
}
|
||||
|
||||
+static void validate_cpu_cluster_to_numa_boundary(MachineState *ms)
|
||||
+{
|
||||
+ MachineClass *mc = MACHINE_GET_CLASS(ms);
|
||||
+ NumaState *state = ms->numa_state;
|
||||
+ const CPUArchIdList *possible_cpus = mc->possible_cpu_arch_ids(ms);
|
||||
+ const CPUArchId *cpus = possible_cpus->cpus;
|
||||
+ int i, j;
|
||||
+
|
||||
+ if (state->num_nodes <= 1 || possible_cpus->len <= 1) {
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ /*
|
||||
+ * The Linux scheduling domain can't be parsed when the multiple CPUs
|
||||
+ * in one cluster have been associated with different NUMA nodes. However,
|
||||
+ * it's fine to associate one NUMA node with CPUs in different clusters.
|
||||
+ */
|
||||
+ for (i = 0; i < possible_cpus->len; i++) {
|
||||
+ for (j = i + 1; j < possible_cpus->len; j++) {
|
||||
+ if (cpus[i].props.has_socket_id &&
|
||||
+ cpus[i].props.has_cluster_id &&
|
||||
+ cpus[i].props.has_node_id &&
|
||||
+ cpus[j].props.has_socket_id &&
|
||||
+ cpus[j].props.has_cluster_id &&
|
||||
+ cpus[j].props.has_node_id &&
|
||||
+ cpus[i].props.socket_id == cpus[j].props.socket_id &&
|
||||
+ cpus[i].props.cluster_id == cpus[j].props.cluster_id &&
|
||||
+ cpus[i].props.node_id != cpus[j].props.node_id) {
|
||||
+ warn_report("CPU-%d and CPU-%d in socket-%" PRId64 "-cluster-%" PRId64
|
||||
+ " have been associated with node-%" PRId64 " and node-%" PRId64
|
||||
+ " respectively. It can cause OSes like Linux to"
|
||||
+ " misbehave", i, j, cpus[i].props.socket_id,
|
||||
+ cpus[i].props.cluster_id, cpus[i].props.node_id,
|
||||
+ cpus[j].props.node_id);
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
MemoryRegion *machine_consume_memdev(MachineState *machine,
|
||||
HostMemoryBackend *backend)
|
||||
{
|
||||
@@ -1581,6 +1620,9 @@ void machine_run_board_init(MachineState *machine, const char *mem_path, Error *
|
||||
numa_complete_configuration(machine);
|
||||
if (machine->numa_state->num_nodes) {
|
||||
machine_numa_finish_cpu_init(machine);
|
||||
+ if (machine_class->cpu_cluster_has_numa_boundary) {
|
||||
+ validate_cpu_cluster_to_numa_boundary(machine);
|
||||
+ }
|
||||
}
|
||||
}
|
||||
|
||||
diff --git a/include/hw/boards.h b/include/hw/boards.h
|
||||
index 5f08bd7550..3628671228 100644
|
||||
--- a/include/hw/boards.h
|
||||
+++ b/include/hw/boards.h
|
||||
@@ -275,6 +275,7 @@ struct MachineClass {
|
||||
bool nvdimm_supported;
|
||||
bool numa_mem_supported;
|
||||
bool auto_enable_numa;
|
||||
+ bool cpu_cluster_has_numa_boundary;
|
||||
SMPCompatProps smp_props;
|
||||
const char *default_ram_id;
|
||||
|
||||
--
|
||||
2.39.3
|
||||
|
138
kvm-vhost-fix-vhost_dev_enable_notifiers-error-case.patch
Normal file
138
kvm-vhost-fix-vhost_dev_enable_notifiers-error-case.patch
Normal file
@ -0,0 +1,138 @@
|
||||
From ac54f5f746782da89ab674733af5622e524b58eb Mon Sep 17 00:00:00 2001
|
||||
From: Laurent Vivier <lvivier@redhat.com>
|
||||
Date: Fri, 2 Jun 2023 18:27:35 +0200
|
||||
Subject: [PATCH 4/6] vhost: fix vhost_dev_enable_notifiers() error case
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
RH-Author: Laurent Vivier <lvivier@redhat.com>
|
||||
RH-MergeRequest: 176: vhost: fix vhost_dev_enable_notifiers() error case
|
||||
RH-Jira: RHEL-330
|
||||
RH-Acked-by: MST <mst@redhat.com>
|
||||
RH-Acked-by: Cindy Lu <lulu@redhat.com>
|
||||
RH-Acked-by: Eugenio Pérez <eperezma@redhat.com>
|
||||
RH-Acked-by: Jason Wang <jasowang@redhat.com>
|
||||
RH-Commit: [1/1] fd30d7501be59f7e5b9d6fc5ed84efcc4037d08e (lvivier/qemu-kvm-centos)
|
||||
|
||||
JIRA: https://issues.redhat.com/browse/RHEL-330
|
||||
|
||||
in vhost_dev_enable_notifiers(), if virtio_bus_set_host_notifier(true)
|
||||
fails, we call vhost_dev_disable_notifiers() that executes
|
||||
virtio_bus_set_host_notifier(false) on all queues, even on queues that
|
||||
have failed to be initialized.
|
||||
|
||||
This triggers a core dump in memory_region_del_eventfd():
|
||||
|
||||
virtio_bus_set_host_notifier: unable to init event notifier: Too many open files (-24)
|
||||
vhost VQ 1 notifier binding failed: 24
|
||||
.../softmmu/memory.c:2611: memory_region_del_eventfd: Assertion `i != mr->ioeventfd_nb' failed.
|
||||
|
||||
Fix the problem by providing to vhost_dev_disable_notifiers() the
|
||||
number of queues to disable.
|
||||
|
||||
Fixes: 8771589b6f81 ("vhost: simplify vhost_dev_enable_notifiers")
|
||||
Cc: longpeng2@huawei.com
|
||||
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
|
||||
Message-Id: <20230602162735.3670785-1-lvivier@redhat.com>
|
||||
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
|
||||
(cherry picked from commit 92099aa4e9a3bb6856c290afaf41c76f9e3dd9fd)
|
||||
---
|
||||
hw/virtio/vhost.c | 65 ++++++++++++++++++++++++++---------------------
|
||||
1 file changed, 36 insertions(+), 29 deletions(-)
|
||||
|
||||
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
|
||||
index a266396576..ae0a033e60 100644
|
||||
--- a/hw/virtio/vhost.c
|
||||
+++ b/hw/virtio/vhost.c
|
||||
@@ -1545,6 +1545,40 @@ void vhost_dev_cleanup(struct vhost_dev *hdev)
|
||||
memset(hdev, 0, sizeof(struct vhost_dev));
|
||||
}
|
||||
|
||||
+static void vhost_dev_disable_notifiers_nvqs(struct vhost_dev *hdev,
|
||||
+ VirtIODevice *vdev,
|
||||
+ unsigned int nvqs)
|
||||
+{
|
||||
+ BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
|
||||
+ int i, r;
|
||||
+
|
||||
+ /*
|
||||
+ * Batch all the host notifiers in a single transaction to avoid
|
||||
+ * quadratic time complexity in address_space_update_ioeventfds().
|
||||
+ */
|
||||
+ memory_region_transaction_begin();
|
||||
+
|
||||
+ for (i = 0; i < nvqs; ++i) {
|
||||
+ r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i,
|
||||
+ false);
|
||||
+ if (r < 0) {
|
||||
+ error_report("vhost VQ %d notifier cleanup failed: %d", i, -r);
|
||||
+ }
|
||||
+ assert(r >= 0);
|
||||
+ }
|
||||
+
|
||||
+ /*
|
||||
+ * The transaction expects the ioeventfds to be open when it
|
||||
+ * commits. Do it now, before the cleanup loop.
|
||||
+ */
|
||||
+ memory_region_transaction_commit();
|
||||
+
|
||||
+ for (i = 0; i < nvqs; ++i) {
|
||||
+ virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i);
|
||||
+ }
|
||||
+ virtio_device_release_ioeventfd(vdev);
|
||||
+}
|
||||
+
|
||||
/* Stop processing guest IO notifications in qemu.
|
||||
* Start processing them in vhost in kernel.
|
||||
*/
|
||||
@@ -1574,7 +1608,7 @@ int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
|
||||
if (r < 0) {
|
||||
error_report("vhost VQ %d notifier binding failed: %d", i, -r);
|
||||
memory_region_transaction_commit();
|
||||
- vhost_dev_disable_notifiers(hdev, vdev);
|
||||
+ vhost_dev_disable_notifiers_nvqs(hdev, vdev, i);
|
||||
return r;
|
||||
}
|
||||
}
|
||||
@@ -1591,34 +1625,7 @@ int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
|
||||
*/
|
||||
void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
|
||||
{
|
||||
- BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
|
||||
- int i, r;
|
||||
-
|
||||
- /*
|
||||
- * Batch all the host notifiers in a single transaction to avoid
|
||||
- * quadratic time complexity in address_space_update_ioeventfds().
|
||||
- */
|
||||
- memory_region_transaction_begin();
|
||||
-
|
||||
- for (i = 0; i < hdev->nvqs; ++i) {
|
||||
- r = virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i,
|
||||
- false);
|
||||
- if (r < 0) {
|
||||
- error_report("vhost VQ %d notifier cleanup failed: %d", i, -r);
|
||||
- }
|
||||
- assert (r >= 0);
|
||||
- }
|
||||
-
|
||||
- /*
|
||||
- * The transaction expects the ioeventfds to be open when it
|
||||
- * commits. Do it now, before the cleanup loop.
|
||||
- */
|
||||
- memory_region_transaction_commit();
|
||||
-
|
||||
- for (i = 0; i < hdev->nvqs; ++i) {
|
||||
- virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i);
|
||||
- }
|
||||
- virtio_device_release_ioeventfd(vdev);
|
||||
+ vhost_dev_disable_notifiers_nvqs(hdev, vdev, hdev->nvqs);
|
||||
}
|
||||
|
||||
/* Test and clear event pending status.
|
||||
--
|
||||
2.39.3
|
||||
|
@ -0,0 +1,67 @@
|
||||
From 4e30ca551fb3740a428017a0debf0a6aab976639 Mon Sep 17 00:00:00 2001
|
||||
From: Ani Sinha <anisinha@redhat.com>
|
||||
Date: Mon, 19 Jun 2023 12:22:09 +0530
|
||||
Subject: [PATCH 6/6] vhost-vdpa: do not cleanup the vdpa/vhost-net structures
|
||||
if peer nic is present
|
||||
|
||||
RH-Author: Ani Sinha <None>
|
||||
RH-MergeRequest: 174: vhost-vdpa: do not cleanup the vdpa/vhost-net structures if peer nic is present
|
||||
RH-Bugzilla: 2128929
|
||||
RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
|
||||
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
||||
RH-Commit: [1/1] c70d4e5fd93256326d318e0b507db6b9eb93ad86 (anisinha/centos-qemu-kvm)
|
||||
|
||||
When a peer nic is still attached to the vdpa backend, it is too early to free
|
||||
up the vhost-net and vdpa structures. If these structures are freed here, then
|
||||
QEMU crashes when the guest is being shut down. The following call chain
|
||||
would result in an assertion failure since the pointer returned from
|
||||
vhost_vdpa_get_vhost_net() would be NULL:
|
||||
|
||||
do_vm_stop() -> vm_state_notify() -> virtio_set_status() ->
|
||||
virtio_net_vhost_status() -> get_vhost_net().
|
||||
|
||||
Therefore, we defer freeing up the structures until at guest shutdown
|
||||
time when qemu_cleanup() calls net_cleanup() which then calls
|
||||
qemu_del_net_client() which would eventually call vhost_vdpa_cleanup()
|
||||
again to free up the structures. This time, the loop in net_cleanup()
|
||||
ensures that vhost_vdpa_cleanup() will be called one last time when
|
||||
all the peer nics are detached and freed.
|
||||
|
||||
All unit tests pass with this change.
|
||||
|
||||
CC: imammedo@redhat.com
|
||||
CC: jusual@redhat.com
|
||||
CC: mst@redhat.com
|
||||
Fixes: CVE-2023-3301
|
||||
Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2128929
|
||||
Signed-off-by: Ani Sinha <anisinha@redhat.com>
|
||||
Message-Id: <20230619065209.442185-1-anisinha@redhat.com>
|
||||
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
||||
(cherry picked from commit a0d7215e339b61c7d7a7b3fcf754954d80d93eb8)
|
||||
---
|
||||
net/vhost-vdpa.c | 8 ++++++++
|
||||
1 file changed, 8 insertions(+)
|
||||
|
||||
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
|
||||
index 99904a0da7..8c8900f0f4 100644
|
||||
--- a/net/vhost-vdpa.c
|
||||
+++ b/net/vhost-vdpa.c
|
||||
@@ -184,6 +184,14 @@ static void vhost_vdpa_cleanup(NetClientState *nc)
|
||||
{
|
||||
VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
|
||||
|
||||
+ /*
|
||||
+ * If a peer NIC is attached, do not cleanup anything.
|
||||
+ * Cleanup will happen as a part of qemu_cleanup() -> net_cleanup()
|
||||
+ * when the guest is shutting down.
|
||||
+ */
|
||||
+ if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_NIC) {
|
||||
+ return;
|
||||
+ }
|
||||
qemu_vfree(s->cvq_cmd_out_buffer);
|
||||
qemu_vfree(s->status);
|
||||
if (s->vhost_net) {
|
||||
--
|
||||
2.39.3
|
||||
|
@ -149,7 +149,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \
|
||||
Summary: QEMU is a machine emulator and virtualizer
|
||||
Name: qemu-kvm
|
||||
Version: 8.0.0
|
||||
Release: 6%{?rcrel}%{?dist}%{?cc_suffix}
|
||||
Release: 7%{?rcrel}%{?dist}%{?cc_suffix}
|
||||
# Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped
|
||||
# Epoch 15 used for RHEL 8
|
||||
# Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5)
|
||||
@ -360,6 +360,18 @@ Patch102: kvm-target-i386-add-support-for-FB_CLEAR-feature.patch
|
||||
Patch103: kvm-block-blkio-use-qemu_open-to-support-fd-passing-for-.patch
|
||||
# For bz#2180076 - [qemu-kvm] support fd passing for libblkio QEMU BlockDrivers
|
||||
Patch104: kvm-qapi-add-fdset-feature-for-BlockdevOptionsVirtioBlkV.patch
|
||||
# For bz#2171363 - [aarch64] Kernel hits Call trace with irregular CPU-to-NUMA association
|
||||
Patch105: kvm-numa-Validate-cluster-and-NUMA-node-boundary-if-requ.patch
|
||||
# For bz#2171363 - [aarch64] Kernel hits Call trace with irregular CPU-to-NUMA association
|
||||
Patch106: kvm-hw-arm-Validate-cluster-and-NUMA-node-boundary.patch
|
||||
# For bz#2171363 - [aarch64] Kernel hits Call trace with irregular CPU-to-NUMA association
|
||||
Patch107: kvm-hw-arm-virt-Validate-cluster-and-NUMA-node-boundary-.patch
|
||||
# For RHEL-330 - [virtual network][qemu-kvm-8.0.0-rc1]qemu core dump: qemu-kvm: ../softmmu/memory.c:2592: void memory_region_del_eventfd(MemoryRegion *, hwaddr, unsigned int, _Bool, uint64_t, EventNotifier *): Assertion `i != mr->ioeventfd_nb' failed
|
||||
Patch108: kvm-vhost-fix-vhost_dev_enable_notifiers-error-case.patch
|
||||
# For bz#2218644 - query-stats QMP command interrupts vcpus, the Max Latencies could be more than 100us (rhel 9.3.0 clone)
|
||||
Patch109: kvm-kvm-reuse-per-vcpu-stats-fd-to-avoid-vcpu-interrupti.patch
|
||||
# For bz#2128929 - [rhel9.2] hotplug/hotunplug mlx vdpa device to the occupied addr port, then qemu core dump occurs after shutdown guest
|
||||
Patch110: kvm-vhost-vdpa-do-not-cleanup-the-vdpa-vhost-net-structu.patch
|
||||
|
||||
%if %{have_clang}
|
||||
BuildRequires: clang
|
||||
@ -1400,6 +1412,22 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \
|
||||
%endif
|
||||
|
||||
%changelog
|
||||
* Mon Jul 10 2023 Miroslav Rezanina <mrezanin@redhat.com> - 8.0.0-7
|
||||
- kvm-numa-Validate-cluster-and-NUMA-node-boundary-if-requ.patch [bz#2171363]
|
||||
- kvm-hw-arm-Validate-cluster-and-NUMA-node-boundary.patch [bz#2171363]
|
||||
- kvm-hw-arm-virt-Validate-cluster-and-NUMA-node-boundary-.patch [bz#2171363]
|
||||
- kvm-vhost-fix-vhost_dev_enable_notifiers-error-case.patch [RHEL-330]
|
||||
- kvm-kvm-reuse-per-vcpu-stats-fd-to-avoid-vcpu-interrupti.patch [bz#2218644]
|
||||
- kvm-vhost-vdpa-do-not-cleanup-the-vdpa-vhost-net-structu.patch [bz#2128929]
|
||||
- Resolves: bz#2171363
|
||||
([aarch64] Kernel hits Call trace with irregular CPU-to-NUMA association)
|
||||
- Resolves: RHEL-330
|
||||
([virtual network][qemu-kvm-8.0.0-rc1]qemu core dump: qemu-kvm: ../softmmu/memory.c:2592: void memory_region_del_eventfd(MemoryRegion *, hwaddr, unsigned int, _Bool, uint64_t, EventNotifier *): Assertion `i != mr->ioeventfd_nb' failed)
|
||||
- Resolves: bz#2218644
|
||||
(query-stats QMP command interrupts vcpus, the Max Latencies could be more than 100us (rhel 9.3.0 clone))
|
||||
- Resolves: bz#2128929
|
||||
([rhel9.2] hotplug/hotunplug mlx vdpa device to the occupied addr port, then qemu core dump occurs after shutdown guest)
|
||||
|
||||
* Mon Jun 26 2023 Miroslav Rezanina <mrezanin@redhat.com> - 8.0.0-6
|
||||
- kvm-target-i386-add-support-for-FLUSH_L1D-feature.patch [bz#2216201]
|
||||
- kvm-target-i386-add-support-for-FB_CLEAR-feature.patch [bz#2216201]
|
||||
|
Loading…
Reference in New Issue
Block a user