77da67160e
- Use od instead xxd - rebase patch fix_boot-time_bonding_interface_cleanup_and_avoid_use_ifcfg - ppc64_cpu --help does not list --version as an option - take care of NUMA topology when removing memory (DLPAR)
439 lines
11 KiB
Diff
439 lines
11 KiB
Diff
From 88caa91a4c8f0ac2376da433f697bc6845595dac Mon Sep 17 00:00:00 2001
|
|
From: Laurent Dufour <ldufour@linux.ibm.com>
|
|
Date: Wed, 2 Dec 2020 16:10:57 +0100
|
|
Subject: [PATCH 2/3] drmgr: read the CPU NUMA topology
|
|
|
|
This will be used in the next commit to compute LMB removal based on the
|
|
NUMA topology.
|
|
|
|
The NUMA topology is read using the libnuma, so a dependency against it is
|
|
added in the configure file.
|
|
|
|
Signed-off-by: Laurent Dufour <ldufour@linux.ibm.com>
|
|
---
|
|
Makefile.am | 5 +-
|
|
configure.ac | 4 +
|
|
src/drmgr/common_numa.c | 268 ++++++++++++++++++++++++++++++++++++++++
|
|
src/drmgr/common_numa.h | 83 +++++++++++++
|
|
4 files changed, 359 insertions(+), 1 deletion(-)
|
|
create mode 100644 src/drmgr/common_numa.c
|
|
create mode 100644 src/drmgr/common_numa.h
|
|
|
|
diff --git a/Makefile.am b/Makefile.am
|
|
index 2ff2232537df..31baaa74b353 100644
|
|
--- a/Makefile.am
|
|
+++ b/Makefile.am
|
|
@@ -155,6 +155,7 @@ src_drmgr_drmgr_SOURCES = \
|
|
src/drmgr/common_cpu.c \
|
|
src/drmgr/common_ofdt.c \
|
|
src/drmgr/common_pci.c \
|
|
+ src/drmgr/common_numa.c \
|
|
src/drmgr/drmgr.c \
|
|
src/drmgr/drmig_chrp_pmig.c \
|
|
src/drmgr/drslot_chrp_cpu.c \
|
|
@@ -171,13 +172,14 @@ noinst_HEADERS += \
|
|
src/drmgr/drcpu.h \
|
|
src/drmgr/dr.h \
|
|
src/drmgr/drmem.h \
|
|
+ src/drmgr/numa.h \
|
|
src/drmgr/drpci.h \
|
|
src/drmgr/rtas_calls.h \
|
|
src/drmgr/ofdt.h \
|
|
src/drmgr/rtas_calls.h \
|
|
src/drmgr/options.c
|
|
|
|
-src_drmgr_drmgr_LDADD = -lrtas
|
|
+src_drmgr_drmgr_LDADD = -lrtas -lnuma
|
|
|
|
src_drmgr_lsslot_SOURCES = \
|
|
src/drmgr/lsslot.c \
|
|
@@ -186,6 +188,7 @@ src_drmgr_lsslot_SOURCES = \
|
|
src/drmgr/common_cpu.c \
|
|
src/drmgr/common_pci.c \
|
|
src/drmgr/common_ofdt.c \
|
|
+ src/drmgr/common_numa.c \
|
|
src/drmgr/rtas_calls.c \
|
|
src/drmgr/drslot_chrp_mem.c \
|
|
$(pseries_platform_SOURCES)
|
|
diff --git a/configure.ac b/configure.ac
|
|
index de3c6758389a..0239754cc4f4 100644
|
|
--- a/configure.ac
|
|
+++ b/configure.ac
|
|
@@ -42,6 +42,10 @@ AC_CHECK_HEADER(zlib.h,
|
|
[AC_CHECK_LIB(z, inflate, [], [AC_MSG_FAILURE([zlib library is required for compilation])])],
|
|
[AC_MSG_FAILURE([zlib.h is required for compiliation])])
|
|
|
|
+AC_CHECK_HEADER(numa.h,
|
|
+ [AC_CHECK_LIB(numa, numa_available, [], [AC_MSG_FAILURE([numa library is required for compilation])])],
|
|
+ [AC_MSG_FAILURE([numa.h is required for compiliation])])
|
|
+
|
|
# check for librtas
|
|
AC_ARG_WITH([librtas],
|
|
[AS_HELP_STRING([--without-librtas],
|
|
diff --git a/src/drmgr/common_numa.c b/src/drmgr/common_numa.c
|
|
new file mode 100644
|
|
index 000000000000..5778769b25b6
|
|
--- /dev/null
|
|
+++ b/src/drmgr/common_numa.c
|
|
@@ -0,0 +1,268 @@
|
|
+/**
|
|
+ * @file common_numa.c
|
|
+ *
|
|
+ * Copyright (C) IBM Corporation 2020
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of the GNU General Public License
|
|
+ * as published by the Free Software Foundation; either version 2
|
|
+ * of the License, or (at your option) any later version.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
+ */
|
|
+
|
|
+#include <stdio.h>
|
|
+#include <errno.h>
|
|
+#include <numa.h>
|
|
+
|
|
+#include "dr.h"
|
|
+#include "ofdt.h"
|
|
+#include "drmem.h" /* for DYNAMIC_RECONFIG_MEM */
|
|
+#include "common_numa.h"
|
|
+
|
|
+#define RTAS_DIRECTORY "/proc/device-tree/rtas"
|
|
+#define CHOSEN_DIRECTORY "/proc/device-tree/chosen"
|
|
+#define ASSOC_REF_POINTS "ibm,associativity-reference-points"
|
|
+#define ASSOC_LOOKUP_ARRAYS "ibm,associativity-lookup-arrays"
|
|
+#define ARCHITECTURE_VEC_5 "ibm,architecture-vec-5"
|
|
+
|
|
+/*
|
|
+ * Allocate and read a property, return the size.
|
|
+ * The read property is not converted to the host endianess.
|
|
+ */
|
|
+static int load_property(char *dir, char *prop, uint32_t **buf)
|
|
+{
|
|
+ int size;
|
|
+
|
|
+ size = get_property_size(dir, prop);
|
|
+ if (!size)
|
|
+ return -ENOENT;
|
|
+
|
|
+ *buf = zalloc(size);
|
|
+ if (!*buf) {
|
|
+ say(ERROR, "Could not allocate buffer read %s (%d bytes)\n",
|
|
+ prop, size);
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+
|
|
+ if (get_property(dir, prop, *buf, size)) {
|
|
+ free(*buf);
|
|
+ say(ERROR, "Can't retrieve %s/%s\n", dir, prop);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ return size;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Get the minimal common depth, based on the form 1 of the ibm,associativ-
|
|
+ * ity-reference-points property. We only support that form.
|
|
+ *
|
|
+ * We should check that the "ibm,architecture-vec-5" property byte 5 bit 0
|
|
+ * has the value of one.
|
|
+ */
|
|
+static int get_min_common_depth(struct numa_topology *numa)
|
|
+{
|
|
+ int size;
|
|
+ uint32_t *p;
|
|
+ unsigned char val;
|
|
+
|
|
+ size = load_property(CHOSEN_DIRECTORY, ARCHITECTURE_VEC_5, &p);
|
|
+ if (size < 0)
|
|
+ return size;
|
|
+
|
|
+ /* PAPR byte start at 1 (and not 0) but there is the length field */
|
|
+ if (size < 6) {
|
|
+ report_unknown_error(__FILE__, __LINE__);
|
|
+ free(p);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+ val = ((unsigned char *)p)[5];
|
|
+ free(p);
|
|
+
|
|
+ if (!(val & 0x80))
|
|
+ return -ENOTSUP;
|
|
+
|
|
+ size = load_property(RTAS_DIRECTORY, ASSOC_REF_POINTS, &p);
|
|
+ if (size <= 0)
|
|
+ return size;
|
|
+ if (size < sizeof(uint32_t)) {
|
|
+ report_unknown_error(__FILE__, __LINE__);
|
|
+ free(p);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ /* Get the first entry */
|
|
+ numa->min_common_depth = be32toh(*p);
|
|
+ free(p);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int get_assoc_arrays(struct numa_topology *numa)
|
|
+{
|
|
+ int size;
|
|
+ int rc;
|
|
+ uint32_t *prop, i;
|
|
+ struct assoc_arrays *aa = &numa->aa;
|
|
+
|
|
+ size = load_property(DYNAMIC_RECONFIG_MEM, ASSOC_LOOKUP_ARRAYS, &prop);
|
|
+ if (size < 0)
|
|
+ return size;
|
|
+
|
|
+ size /= sizeof(uint32_t);
|
|
+ if (size < 2) {
|
|
+ say(ERROR, "Could not find the associativity lookup arrays\n");
|
|
+ free(prop);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ aa->n_arrays = be32toh(prop[0]);
|
|
+ aa->array_sz = be32toh(prop[1]);
|
|
+
|
|
+ rc = -EINVAL;
|
|
+ if (numa->min_common_depth > aa->array_sz) {
|
|
+ say(ERROR, "Bad min common depth or associativity array size\n");
|
|
+ goto out_free;
|
|
+ }
|
|
+
|
|
+ /* Sanity check */
|
|
+ if (size != (aa->n_arrays * aa->array_sz + 2)) {
|
|
+ say(ERROR, "Bad size of the associativity lookup arrays\n");
|
|
+ goto out_free;
|
|
+ }
|
|
+
|
|
+ aa->min_array = zalloc(aa->n_arrays * sizeof(uint32_t));
|
|
+
|
|
+ /* Keep only the most significant value */
|
|
+ for (i = 0; i < aa->n_arrays; i++) {
|
|
+ int prop_index = i * aa->array_sz + numa->min_common_depth + 1;
|
|
+
|
|
+ aa->min_array[i] = be32toh(prop[prop_index]);
|
|
+ }
|
|
+ rc = 0;
|
|
+
|
|
+out_free:
|
|
+ free(prop);
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+struct numa_node *numa_fetch_node(struct numa_topology *numa, int nid)
|
|
+{
|
|
+ struct numa_node *node;
|
|
+
|
|
+ if (nid > MAX_NUMNODES) {
|
|
+ report_unknown_error(__FILE__, __LINE__);
|
|
+ return NULL;
|
|
+ }
|
|
+
|
|
+ node = numa->nodes[nid];
|
|
+ if (node)
|
|
+ return node;
|
|
+
|
|
+ node = zalloc(sizeof(struct numa_node));
|
|
+ if (!node) {
|
|
+ say(ERROR, "Can't allocate a new node\n");
|
|
+ return NULL;
|
|
+ }
|
|
+
|
|
+ node->node_id = nid;
|
|
+
|
|
+ if (!numa->node_count || nid < numa->node_min)
|
|
+ numa->node_min = nid;
|
|
+ if (nid > numa->node_max)
|
|
+ numa->node_max = nid;
|
|
+
|
|
+ numa->nodes[nid] = node;
|
|
+ numa->node_count++;
|
|
+
|
|
+ return node;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Read the number of CPU for each node using the libnuma to get the details
|
|
+ * from sysfs.
|
|
+ */
|
|
+static int read_numa_topology(struct numa_topology *numa)
|
|
+{
|
|
+ struct bitmask *cpus;
|
|
+ struct numa_node *node;
|
|
+ int rc, max_node, nid, i;
|
|
+
|
|
+ if (numa_available() < 0)
|
|
+ return -ENOENT;
|
|
+
|
|
+ max_node = numa_max_node();
|
|
+ if (max_node >= MAX_NUMNODES) {
|
|
+ say(ERROR, "Too many nodes %d (max:%d)\n",
|
|
+ max_node, MAX_NUMNODES);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ rc = 0;
|
|
+
|
|
+ /* In case of allocation error, the libnuma is calling exit() */
|
|
+ cpus = numa_allocate_cpumask();
|
|
+
|
|
+ for (nid = 0; nid <= max_node; nid++) {
|
|
+
|
|
+ if (!numa_bitmask_isbitset(numa_nodes_ptr, nid))
|
|
+ continue;
|
|
+
|
|
+ node = numa_fetch_node(numa, nid);
|
|
+ if (!node) {
|
|
+ rc = -ENOMEM;
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ rc = numa_node_to_cpus(nid, cpus);
|
|
+ if (rc < 0)
|
|
+ break;
|
|
+
|
|
+ /* Count the CPUs in that node */
|
|
+ for (i = 0; i < cpus->size; i++)
|
|
+ if (numa_bitmask_isbitset(cpus, i))
|
|
+ node->n_cpus++;
|
|
+
|
|
+ numa->cpu_count += node->n_cpus;
|
|
+ }
|
|
+
|
|
+ numa_bitmask_free(cpus);
|
|
+
|
|
+ if (rc) {
|
|
+ numa_foreach_node(numa, nid, node)
|
|
+ node->n_cpus = 0;
|
|
+ numa->cpu_count = 0;
|
|
+ }
|
|
+
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+int numa_get_topology(struct numa_topology *numa)
|
|
+{
|
|
+ int rc;
|
|
+
|
|
+ rc = get_min_common_depth(numa);
|
|
+ if (rc)
|
|
+ return rc;
|
|
+
|
|
+
|
|
+ rc = get_assoc_arrays(numa);
|
|
+ if (rc)
|
|
+ return rc;
|
|
+
|
|
+ rc = read_numa_topology(numa);
|
|
+ if (rc)
|
|
+ return rc;
|
|
+
|
|
+ if (!numa->node_count)
|
|
+ return -1;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
diff --git a/src/drmgr/common_numa.h b/src/drmgr/common_numa.h
|
|
new file mode 100644
|
|
index 000000000000..4d0054926819
|
|
--- /dev/null
|
|
+++ b/src/drmgr/common_numa.h
|
|
@@ -0,0 +1,83 @@
|
|
+/**
|
|
+ * @file numa.h
|
|
+ *
|
|
+ * Copyright (C) IBM Corporation 2020
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of the GNU General Public License
|
|
+ * as published by the Free Software Foundation; either version 2
|
|
+ * of the License, or (at your option) any later version.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
+ */
|
|
+#ifndef _NUMA_H_
|
|
+#define _NUMA_H_
|
|
+
|
|
+#define MAX_NUMNODES 256
|
|
+#define NUMA_NO_NODE -1
|
|
+
|
|
+struct numa_node {
|
|
+ int node_id;
|
|
+ unsigned int n_cpus;
|
|
+ unsigned int n_lmbs;
|
|
+ unsigned int ratio;
|
|
+ struct dr_node *lmbs; /* linked by lmb_numa_next */
|
|
+ struct numa_node *ratio_next;
|
|
+};
|
|
+
|
|
+struct assoc_arrays {
|
|
+ uint32_t n_arrays;
|
|
+ uint32_t array_sz;
|
|
+ uint32_t *min_array;
|
|
+};
|
|
+
|
|
+struct numa_topology {
|
|
+ unsigned int cpu_count;
|
|
+ unsigned int lmb_count;
|
|
+ unsigned int cpuless_node_count;
|
|
+ unsigned int cpuless_lmb_count;
|
|
+ unsigned int node_count, node_min, node_max;
|
|
+ struct numa_node *nodes[MAX_NUMNODES];
|
|
+ struct numa_node *ratio;
|
|
+ uint32_t min_common_depth;
|
|
+ struct assoc_arrays aa;
|
|
+};
|
|
+
|
|
+int numa_get_topology(struct numa_topology *numa);
|
|
+struct numa_node *numa_fetch_node(struct numa_topology *numa, int node_id);
|
|
+
|
|
+static inline int numa_aa_index_to_node(struct numa_topology *numa,
|
|
+ uint32_t aa_index)
|
|
+{
|
|
+ if (aa_index < numa->aa.n_arrays)
|
|
+ return numa->aa.min_array[aa_index];
|
|
+ return NUMA_NO_NODE;
|
|
+}
|
|
+
|
|
+static inline int next_node(struct numa_topology *numa, int nid,
|
|
+ struct numa_node **node)
|
|
+{
|
|
+ for (nid++; nid <= numa->node_max; nid++)
|
|
+ if (numa->nodes[nid]) {
|
|
+ *node = numa->nodes[nid];
|
|
+ break;
|
|
+ }
|
|
+ return nid;
|
|
+}
|
|
+
|
|
+#define numa_foreach_node(numa, nid, node) \
|
|
+ for (nid = (numa)->node_min, node = (numa)->nodes[nid]; \
|
|
+ nid <= (numa)->node_max; \
|
|
+ nid = next_node(numa, nid, &(node)))
|
|
+
|
|
+#define numa_foreach_node_by_ratio(numa, node) \
|
|
+ for (node = (numa)->ratio; node; node = node->ratio_next)
|
|
+
|
|
+#endif /* _NUMA_H_ */
|
|
--
|
|
2.29.2
|
|
|