powerpc-utils/0002-drmgr-read-the-CPU-NUMA-topology.patch
Than Ngo 77da67160e - Resolves: #1937038, New lparstat -x option to report the security flavor
- Use od instead xxd
- rebase patch fix_boot-time_bonding_interface_cleanup_and_avoid_use_ifcfg
- ppc64_cpu --help does not list --version as an option
- take care of NUMA topology when removing memory (DLPAR)
2021-06-11 12:03:17 +02:00

439 lines
11 KiB
Diff

From 88caa91a4c8f0ac2376da433f697bc6845595dac Mon Sep 17 00:00:00 2001
From: Laurent Dufour <ldufour@linux.ibm.com>
Date: Wed, 2 Dec 2020 16:10:57 +0100
Subject: [PATCH 2/3] drmgr: read the CPU NUMA topology
This will be used in the next commit to compute LMB removal based on the
NUMA topology.
The NUMA topology is read using the libnuma, so a dependency against it is
added in the configure file.
Signed-off-by: Laurent Dufour <ldufour@linux.ibm.com>
---
Makefile.am | 5 +-
configure.ac | 4 +
src/drmgr/common_numa.c | 268 ++++++++++++++++++++++++++++++++++++++++
src/drmgr/common_numa.h | 83 +++++++++++++
4 files changed, 359 insertions(+), 1 deletion(-)
create mode 100644 src/drmgr/common_numa.c
create mode 100644 src/drmgr/common_numa.h
diff --git a/Makefile.am b/Makefile.am
index 2ff2232537df..31baaa74b353 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -155,6 +155,7 @@ src_drmgr_drmgr_SOURCES = \
src/drmgr/common_cpu.c \
src/drmgr/common_ofdt.c \
src/drmgr/common_pci.c \
+ src/drmgr/common_numa.c \
src/drmgr/drmgr.c \
src/drmgr/drmig_chrp_pmig.c \
src/drmgr/drslot_chrp_cpu.c \
@@ -171,13 +172,14 @@ noinst_HEADERS += \
src/drmgr/drcpu.h \
src/drmgr/dr.h \
src/drmgr/drmem.h \
+ src/drmgr/numa.h \
src/drmgr/drpci.h \
src/drmgr/rtas_calls.h \
src/drmgr/ofdt.h \
src/drmgr/rtas_calls.h \
src/drmgr/options.c
-src_drmgr_drmgr_LDADD = -lrtas
+src_drmgr_drmgr_LDADD = -lrtas -lnuma
src_drmgr_lsslot_SOURCES = \
src/drmgr/lsslot.c \
@@ -186,6 +188,7 @@ src_drmgr_lsslot_SOURCES = \
src/drmgr/common_cpu.c \
src/drmgr/common_pci.c \
src/drmgr/common_ofdt.c \
+ src/drmgr/common_numa.c \
src/drmgr/rtas_calls.c \
src/drmgr/drslot_chrp_mem.c \
$(pseries_platform_SOURCES)
diff --git a/configure.ac b/configure.ac
index de3c6758389a..0239754cc4f4 100644
--- a/configure.ac
+++ b/configure.ac
@@ -42,6 +42,10 @@ AC_CHECK_HEADER(zlib.h,
[AC_CHECK_LIB(z, inflate, [], [AC_MSG_FAILURE([zlib library is required for compilation])])],
[AC_MSG_FAILURE([zlib.h is required for compiliation])])
+AC_CHECK_HEADER(numa.h,
+ [AC_CHECK_LIB(numa, numa_available, [], [AC_MSG_FAILURE([numa library is required for compilation])])],
+ [AC_MSG_FAILURE([numa.h is required for compiliation])])
+
# check for librtas
AC_ARG_WITH([librtas],
[AS_HELP_STRING([--without-librtas],
diff --git a/src/drmgr/common_numa.c b/src/drmgr/common_numa.c
new file mode 100644
index 000000000000..5778769b25b6
--- /dev/null
+++ b/src/drmgr/common_numa.c
@@ -0,0 +1,268 @@
+/**
+ * @file common_numa.c
+ *
+ * Copyright (C) IBM Corporation 2020
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include <stdio.h>
+#include <errno.h>
+#include <numa.h>
+
+#include "dr.h"
+#include "ofdt.h"
+#include "drmem.h" /* for DYNAMIC_RECONFIG_MEM */
+#include "common_numa.h"
+
+#define RTAS_DIRECTORY "/proc/device-tree/rtas"
+#define CHOSEN_DIRECTORY "/proc/device-tree/chosen"
+#define ASSOC_REF_POINTS "ibm,associativity-reference-points"
+#define ASSOC_LOOKUP_ARRAYS "ibm,associativity-lookup-arrays"
+#define ARCHITECTURE_VEC_5 "ibm,architecture-vec-5"
+
+/*
+ * Allocate and read a property, return the size.
+ * The read property is not converted to the host endianess.
+ */
+static int load_property(char *dir, char *prop, uint32_t **buf)
+{
+ int size;
+
+ size = get_property_size(dir, prop);
+ if (!size)
+ return -ENOENT;
+
+ *buf = zalloc(size);
+ if (!*buf) {
+ say(ERROR, "Could not allocate buffer read %s (%d bytes)\n",
+ prop, size);
+ return -ENOMEM;
+ }
+
+ if (get_property(dir, prop, *buf, size)) {
+ free(*buf);
+ say(ERROR, "Can't retrieve %s/%s\n", dir, prop);
+ return -EINVAL;
+ }
+
+ return size;
+}
+
+/*
+ * Get the minimal common depth, based on the form 1 of the ibm,associativ-
+ * ity-reference-points property. We only support that form.
+ *
+ * We should check that the "ibm,architecture-vec-5" property byte 5 bit 0
+ * has the value of one.
+ */
+static int get_min_common_depth(struct numa_topology *numa)
+{
+ int size;
+ uint32_t *p;
+ unsigned char val;
+
+ size = load_property(CHOSEN_DIRECTORY, ARCHITECTURE_VEC_5, &p);
+ if (size < 0)
+ return size;
+
+ /* PAPR byte start at 1 (and not 0) but there is the length field */
+ if (size < 6) {
+ report_unknown_error(__FILE__, __LINE__);
+ free(p);
+ return -EINVAL;
+ }
+ val = ((unsigned char *)p)[5];
+ free(p);
+
+ if (!(val & 0x80))
+ return -ENOTSUP;
+
+ size = load_property(RTAS_DIRECTORY, ASSOC_REF_POINTS, &p);
+ if (size <= 0)
+ return size;
+ if (size < sizeof(uint32_t)) {
+ report_unknown_error(__FILE__, __LINE__);
+ free(p);
+ return -EINVAL;
+ }
+
+ /* Get the first entry */
+ numa->min_common_depth = be32toh(*p);
+ free(p);
+ return 0;
+}
+
+static int get_assoc_arrays(struct numa_topology *numa)
+{
+ int size;
+ int rc;
+ uint32_t *prop, i;
+ struct assoc_arrays *aa = &numa->aa;
+
+ size = load_property(DYNAMIC_RECONFIG_MEM, ASSOC_LOOKUP_ARRAYS, &prop);
+ if (size < 0)
+ return size;
+
+ size /= sizeof(uint32_t);
+ if (size < 2) {
+ say(ERROR, "Could not find the associativity lookup arrays\n");
+ free(prop);
+ return -EINVAL;
+ }
+
+ aa->n_arrays = be32toh(prop[0]);
+ aa->array_sz = be32toh(prop[1]);
+
+ rc = -EINVAL;
+ if (numa->min_common_depth > aa->array_sz) {
+ say(ERROR, "Bad min common depth or associativity array size\n");
+ goto out_free;
+ }
+
+ /* Sanity check */
+ if (size != (aa->n_arrays * aa->array_sz + 2)) {
+ say(ERROR, "Bad size of the associativity lookup arrays\n");
+ goto out_free;
+ }
+
+ aa->min_array = zalloc(aa->n_arrays * sizeof(uint32_t));
+
+ /* Keep only the most significant value */
+ for (i = 0; i < aa->n_arrays; i++) {
+ int prop_index = i * aa->array_sz + numa->min_common_depth + 1;
+
+ aa->min_array[i] = be32toh(prop[prop_index]);
+ }
+ rc = 0;
+
+out_free:
+ free(prop);
+ return rc;
+}
+
+struct numa_node *numa_fetch_node(struct numa_topology *numa, int nid)
+{
+ struct numa_node *node;
+
+ if (nid > MAX_NUMNODES) {
+ report_unknown_error(__FILE__, __LINE__);
+ return NULL;
+ }
+
+ node = numa->nodes[nid];
+ if (node)
+ return node;
+
+ node = zalloc(sizeof(struct numa_node));
+ if (!node) {
+ say(ERROR, "Can't allocate a new node\n");
+ return NULL;
+ }
+
+ node->node_id = nid;
+
+ if (!numa->node_count || nid < numa->node_min)
+ numa->node_min = nid;
+ if (nid > numa->node_max)
+ numa->node_max = nid;
+
+ numa->nodes[nid] = node;
+ numa->node_count++;
+
+ return node;
+}
+
+/*
+ * Read the number of CPU for each node using the libnuma to get the details
+ * from sysfs.
+ */
+static int read_numa_topology(struct numa_topology *numa)
+{
+ struct bitmask *cpus;
+ struct numa_node *node;
+ int rc, max_node, nid, i;
+
+ if (numa_available() < 0)
+ return -ENOENT;
+
+ max_node = numa_max_node();
+ if (max_node >= MAX_NUMNODES) {
+ say(ERROR, "Too many nodes %d (max:%d)\n",
+ max_node, MAX_NUMNODES);
+ return -EINVAL;
+ }
+
+ rc = 0;
+
+ /* In case of allocation error, the libnuma is calling exit() */
+ cpus = numa_allocate_cpumask();
+
+ for (nid = 0; nid <= max_node; nid++) {
+
+ if (!numa_bitmask_isbitset(numa_nodes_ptr, nid))
+ continue;
+
+ node = numa_fetch_node(numa, nid);
+ if (!node) {
+ rc = -ENOMEM;
+ break;
+ }
+
+ rc = numa_node_to_cpus(nid, cpus);
+ if (rc < 0)
+ break;
+
+ /* Count the CPUs in that node */
+ for (i = 0; i < cpus->size; i++)
+ if (numa_bitmask_isbitset(cpus, i))
+ node->n_cpus++;
+
+ numa->cpu_count += node->n_cpus;
+ }
+
+ numa_bitmask_free(cpus);
+
+ if (rc) {
+ numa_foreach_node(numa, nid, node)
+ node->n_cpus = 0;
+ numa->cpu_count = 0;
+ }
+
+ return rc;
+}
+
+int numa_get_topology(struct numa_topology *numa)
+{
+ int rc;
+
+ rc = get_min_common_depth(numa);
+ if (rc)
+ return rc;
+
+
+ rc = get_assoc_arrays(numa);
+ if (rc)
+ return rc;
+
+ rc = read_numa_topology(numa);
+ if (rc)
+ return rc;
+
+ if (!numa->node_count)
+ return -1;
+
+ return 0;
+}
diff --git a/src/drmgr/common_numa.h b/src/drmgr/common_numa.h
new file mode 100644
index 000000000000..4d0054926819
--- /dev/null
+++ b/src/drmgr/common_numa.h
@@ -0,0 +1,83 @@
+/**
+ * @file numa.h
+ *
+ * Copyright (C) IBM Corporation 2020
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+#ifndef _NUMA_H_
+#define _NUMA_H_
+
+#define MAX_NUMNODES 256
+#define NUMA_NO_NODE -1
+
+struct numa_node {
+ int node_id;
+ unsigned int n_cpus;
+ unsigned int n_lmbs;
+ unsigned int ratio;
+ struct dr_node *lmbs; /* linked by lmb_numa_next */
+ struct numa_node *ratio_next;
+};
+
+struct assoc_arrays {
+ uint32_t n_arrays;
+ uint32_t array_sz;
+ uint32_t *min_array;
+};
+
+struct numa_topology {
+ unsigned int cpu_count;
+ unsigned int lmb_count;
+ unsigned int cpuless_node_count;
+ unsigned int cpuless_lmb_count;
+ unsigned int node_count, node_min, node_max;
+ struct numa_node *nodes[MAX_NUMNODES];
+ struct numa_node *ratio;
+ uint32_t min_common_depth;
+ struct assoc_arrays aa;
+};
+
+int numa_get_topology(struct numa_topology *numa);
+struct numa_node *numa_fetch_node(struct numa_topology *numa, int node_id);
+
+static inline int numa_aa_index_to_node(struct numa_topology *numa,
+ uint32_t aa_index)
+{
+ if (aa_index < numa->aa.n_arrays)
+ return numa->aa.min_array[aa_index];
+ return NUMA_NO_NODE;
+}
+
+static inline int next_node(struct numa_topology *numa, int nid,
+ struct numa_node **node)
+{
+ for (nid++; nid <= numa->node_max; nid++)
+ if (numa->nodes[nid]) {
+ *node = numa->nodes[nid];
+ break;
+ }
+ return nid;
+}
+
+#define numa_foreach_node(numa, nid, node) \
+ for (nid = (numa)->node_min, node = (numa)->nodes[nid]; \
+ nid <= (numa)->node_max; \
+ nid = next_node(numa, nid, &(node)))
+
+#define numa_foreach_node_by_ratio(numa, node) \
+ for (node = (numa)->ratio; node; node = node->ratio_next)
+
+#endif /* _NUMA_H_ */
--
2.29.2