75 lines
2.9 KiB
Diff
75 lines
2.9 KiB
Diff
From: Prarit Bhargava <prarit@redhat.com>
|
|
|
|
Subject: linux: fix support for NUMA node0 being offline
|
|
|
|
commit 0114c2b0b3e39265e0829eebfff87ac9f4412fe9
|
|
Author: Brice Goglin <Brice.Goglin@inria.fr>
|
|
Date: Mon Apr 26 20:35:42 2021 +0200
|
|
|
|
linux: fix support for NUMA node0 being offline
|
|
|
|
Just like we didn't support offline CPU#0 until commit
|
|
7bcc273efd50536961ba16d474efca4ae163229b, we need to
|
|
support node0 being offline as well.
|
|
It's not clear whether it's a new Linux feature or not,
|
|
this was reported on a POWER LPAR VM.
|
|
|
|
The symptoms are different here because we got no NUMA
|
|
nodes at all, hence the core hwloc added a default
|
|
machine-wide node. But this node got marked disallowed
|
|
by Linux cgroups. Hence load() failed with
|
|
"Topology does not contain any NUMA node, aborting!"
|
|
|
|
We opportunistically assume node0 is online to avoid
|
|
the overhead in the vast majority of cases. If node0
|
|
is missing, we parse "online" to find the first node.
|
|
|
|
Thanks to Jirka Hladky for the report.
|
|
|
|
Signed-off-by: Brice Goglin <Brice.Goglin@inria.fr>
|
|
|
|
Signed-off-by: Prarit Bhargava <prarit@redhat.com>
|
|
|
|
diff -urNp hwloc-2.2.0.orig/hwloc/topology-linux.c hwloc-2.2.0/hwloc/topology-linux.c
|
|
--- hwloc-2.2.0.orig/hwloc/topology-linux.c 2021-05-10 14:44:42.690559128 -0400
|
|
+++ hwloc-2.2.0/hwloc/topology-linux.c 2021-05-10 14:44:57.858982883 -0400
|
|
@@ -5342,6 +5342,9 @@ static const char *find_sysfs_cpu_path(i
|
|
|
|
static const char *find_sysfs_node_path(int root_fd)
|
|
{
|
|
+ unsigned first;
|
|
+ int err;
|
|
+
|
|
if (!hwloc_access("/sys/bus/node/devices", R_OK|X_OK, root_fd)
|
|
&& !hwloc_access("/sys/bus/node/devices/node0/cpumap", R_OK, root_fd))
|
|
return "/sys/bus/node/devices";
|
|
@@ -5350,6 +5353,28 @@ static const char *find_sysfs_node_path(
|
|
&& !hwloc_access("/sys/devices/system/node/node0/cpumap", R_OK, root_fd))
|
|
return "/sys/devices/system/node";
|
|
|
|
+ /* node0 might be offline, fallback to looking at the first online node.
|
|
+ * online contains comma-separated ranges, just read the first number.
|
|
+ */
|
|
+ hwloc_debug("Failed to find sysfs node files using node0, looking at online nodes...\n");
|
|
+ err = hwloc_read_path_as_uint("/sys/devices/system/node/online", &first, root_fd);
|
|
+ if (err) {
|
|
+ hwloc_debug("Failed to find read /sys/devices/system/node/online.\n");
|
|
+ } else {
|
|
+ char path[PATH_MAX];
|
|
+ hwloc_debug("Found node#%u as first online node\n", first);
|
|
+
|
|
+ snprintf(path, sizeof(path), "/sys/bus/node/devices/node%u/cpumap", first);
|
|
+ if (!hwloc_access("/sys/bus/node/devices", R_OK|X_OK, root_fd)
|
|
+ && !hwloc_access(path, R_OK, root_fd))
|
|
+ return "/sys/bus/node/devices";
|
|
+
|
|
+ snprintf(path, sizeof(path), "/sys/devices/system/node/node%u/cpumap", first);
|
|
+ if (!hwloc_access("/sys/devices/system/node", R_OK|X_OK, root_fd)
|
|
+ && !hwloc_access(path, R_OK, root_fd))
|
|
+ return "/sys/devices/system/node";
|
|
+ }
|
|
+
|
|
return NULL;
|
|
}
|
|
|