merge upstream patches
This commit is contained in:
parent
e3f1a6d00a
commit
f50e4f6aef
87
0001-drmgr-don-t-open-sysfs-file-for-each-command.patch
Normal file
87
0001-drmgr-don-t-open-sysfs-file-for-each-command.patch
Normal file
@ -0,0 +1,87 @@
|
|||||||
|
From 014e8ba4580c7917e258df084776c16079dc07ce Mon Sep 17 00:00:00 2001
|
||||||
|
From: Laurent Dufour <ldufour@linux.ibm.com>
|
||||||
|
Date: Tue, 24 Nov 2020 19:28:48 +0100
|
||||||
|
Subject: [PATCH 1/3] drmgr: don't open sysfs file for each command
|
||||||
|
|
||||||
|
The new __do_kernel_dlpar() API will be used in later commit to remove by
|
||||||
|
DRC Index LMB per LMB. This will avoiding opennig and closing the fd each
|
||||||
|
time.
|
||||||
|
|
||||||
|
The fd closing will now be done at the process exit time.
|
||||||
|
|
||||||
|
In addition add an optinal parameter to silently ignore some error.
|
||||||
|
|
||||||
|
Also, change the log level of the "success" message to debug to match
|
||||||
|
the previous one saying "Trying.."
|
||||||
|
|
||||||
|
Signed-off-by: Laurent Dufour <ldufour@linux.ibm.com>
|
||||||
|
---
|
||||||
|
src/drmgr/common.c | 22 +++++++++++++---------
|
||||||
|
src/drmgr/dr.h | 3 ++-
|
||||||
|
2 files changed, 15 insertions(+), 10 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/src/drmgr/common.c b/src/drmgr/common.c
|
||||||
|
index 5e8135bcf77e..25d244cb2f57 100644
|
||||||
|
--- a/src/drmgr/common.c
|
||||||
|
+++ b/src/drmgr/common.c
|
||||||
|
@@ -1469,32 +1469,36 @@ int kernel_dlpar_exists(void)
|
||||||
|
* @param cmd command string to write to sysfs
|
||||||
|
* @returns 0 on success, !0 otherwise
|
||||||
|
*/
|
||||||
|
-int do_kernel_dlpar(const char *cmd, int cmdlen)
|
||||||
|
+int __do_kernel_dlpar(const char *cmd, int cmdlen, int silent_error)
|
||||||
|
{
|
||||||
|
- int fd, rc;
|
||||||
|
+ static int fd = -1;
|
||||||
|
+ int rc;
|
||||||
|
int my_errno;
|
||||||
|
|
||||||
|
say(DEBUG, "Initiating kernel DLPAR \"%s\"\n", cmd);
|
||||||
|
|
||||||
|
/* write to file */
|
||||||
|
- fd = open(SYSFS_DLPAR_FILE, O_WRONLY);
|
||||||
|
- if (fd <= 0) {
|
||||||
|
- say(ERROR, "Could not open %s to initiate DLPAR request\n",
|
||||||
|
- SYSFS_DLPAR_FILE);
|
||||||
|
- return -1;
|
||||||
|
+ if (fd == -1) {
|
||||||
|
+ fd = open(SYSFS_DLPAR_FILE, O_WRONLY);
|
||||||
|
+ if (fd <= 0) {
|
||||||
|
+ say(ERROR, "Could not open %s to initiate DLPAR request\n",
|
||||||
|
+ SYSFS_DLPAR_FILE);
|
||||||
|
+ return -1;
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
|
||||||
|
rc = write(fd, cmd, cmdlen);
|
||||||
|
my_errno = errno;
|
||||||
|
- close(fd);
|
||||||
|
if (rc <= 0) {
|
||||||
|
+ if (silent_error)
|
||||||
|
+ return (my_errno == 0) ? -1 : -my_errno;
|
||||||
|
/* write does not set errno for rc == 0 */
|
||||||
|
say(ERROR, "Failed to write to %s: %s\n", SYSFS_DLPAR_FILE,
|
||||||
|
(rc == 0) ? "wrote 0 bytes" : strerror(my_errno));
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
- say(INFO, "Success\n");
|
||||||
|
+ say(DEBUG, "Success\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
diff --git a/src/drmgr/dr.h b/src/drmgr/dr.h
|
||||||
|
index f171bfea73c3..00d2fffc9919 100644
|
||||||
|
--- a/src/drmgr/dr.h
|
||||||
|
+++ b/src/drmgr/dr.h
|
||||||
|
@@ -172,5 +172,6 @@ enum drc_type to_drc_type(const char *);
|
||||||
|
int handle_prrn(void);
|
||||||
|
|
||||||
|
int kernel_dlpar_exists(void);
|
||||||
|
-int do_kernel_dlpar(const char *, int);
|
||||||
|
+int __do_kernel_dlpar(const char *, int, int);
|
||||||
|
+#define do_kernel_dlpar(c, l) __do_kernel_dlpar(c, l, 0)
|
||||||
|
#endif
|
||||||
|
--
|
||||||
|
2.29.2
|
||||||
|
|
40
0001-drmgr-fix-remove-by-index-operation.patch
Normal file
40
0001-drmgr-fix-remove-by-index-operation.patch
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
From 16469b696959aee4ce32d9f77483e1e3f192e82d Mon Sep 17 00:00:00 2001
|
||||||
|
From: Laurent Dufour <ldufour@linux.ibm.com>
|
||||||
|
Date: Fri, 16 Apr 2021 18:10:36 +0200
|
||||||
|
Subject: [PATCH] drmgr: fix remove by index operation
|
||||||
|
|
||||||
|
The commit e9f06531356f ("drmgr: introduce NUMA based LMB removal")
|
||||||
|
introduce a special processing when NUMA is on and the remove by count
|
||||||
|
operation is done.
|
||||||
|
|
||||||
|
Unfortunately, that code is also triggered when doing a remove by index
|
||||||
|
operation (-s argument) becauses usr_drc_count is set to 1. As a
|
||||||
|
consequence the index constraint is not respected and any LMB can be
|
||||||
|
removed.
|
||||||
|
|
||||||
|
Add a check agains usr_drc_index which is set when a remove by index
|
||||||
|
operation is done to ensure the numa removal code is not triggered in that
|
||||||
|
case.
|
||||||
|
|
||||||
|
Fixes: e9f06531356f ("drmgr: introduce NUMA based LMB removal")
|
||||||
|
Signed-off-by: Laurent Dufour <ldufour@linux.ibm.com>
|
||||||
|
---
|
||||||
|
src/drmgr/drslot_chrp_mem.c | 2 +-
|
||||||
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/src/drmgr/drslot_chrp_mem.c b/src/drmgr/drslot_chrp_mem.c
|
||||||
|
index f17c94adc270..8db98bb9e9ea 100644
|
||||||
|
--- a/src/drmgr/drslot_chrp_mem.c
|
||||||
|
+++ b/src/drmgr/drslot_chrp_mem.c
|
||||||
|
@@ -1749,7 +1749,7 @@ int do_mem_kernel_dlpar(void)
|
||||||
|
int rc, offset;
|
||||||
|
|
||||||
|
|
||||||
|
- if (usr_action == REMOVE && usr_drc_count) {
|
||||||
|
+ if (usr_action == REMOVE && usr_drc_count && !usr_drc_index) {
|
||||||
|
build_numa_topology();
|
||||||
|
if (numa_enabled) {
|
||||||
|
if (!numa_based_remove(usr_drc_count))
|
||||||
|
--
|
||||||
|
2.31.1
|
||||||
|
|
438
0002-drmgr-read-the-CPU-NUMA-topology.patch
Normal file
438
0002-drmgr-read-the-CPU-NUMA-topology.patch
Normal file
@ -0,0 +1,438 @@
|
|||||||
|
From 88caa91a4c8f0ac2376da433f697bc6845595dac Mon Sep 17 00:00:00 2001
|
||||||
|
From: Laurent Dufour <ldufour@linux.ibm.com>
|
||||||
|
Date: Wed, 2 Dec 2020 16:10:57 +0100
|
||||||
|
Subject: [PATCH 2/3] drmgr: read the CPU NUMA topology
|
||||||
|
|
||||||
|
This will be used in the next commit to compute LMB removal based on the
|
||||||
|
NUMA topology.
|
||||||
|
|
||||||
|
The NUMA topology is read using the libnuma, so a dependency against it is
|
||||||
|
added in the configure file.
|
||||||
|
|
||||||
|
Signed-off-by: Laurent Dufour <ldufour@linux.ibm.com>
|
||||||
|
---
|
||||||
|
Makefile.am | 5 +-
|
||||||
|
configure.ac | 4 +
|
||||||
|
src/drmgr/common_numa.c | 268 ++++++++++++++++++++++++++++++++++++++++
|
||||||
|
src/drmgr/common_numa.h | 83 +++++++++++++
|
||||||
|
4 files changed, 359 insertions(+), 1 deletion(-)
|
||||||
|
create mode 100644 src/drmgr/common_numa.c
|
||||||
|
create mode 100644 src/drmgr/common_numa.h
|
||||||
|
|
||||||
|
diff --git a/Makefile.am b/Makefile.am
|
||||||
|
index 2ff2232537df..31baaa74b353 100644
|
||||||
|
--- a/Makefile.am
|
||||||
|
+++ b/Makefile.am
|
||||||
|
@@ -155,6 +155,7 @@ src_drmgr_drmgr_SOURCES = \
|
||||||
|
src/drmgr/common_cpu.c \
|
||||||
|
src/drmgr/common_ofdt.c \
|
||||||
|
src/drmgr/common_pci.c \
|
||||||
|
+ src/drmgr/common_numa.c \
|
||||||
|
src/drmgr/drmgr.c \
|
||||||
|
src/drmgr/drmig_chrp_pmig.c \
|
||||||
|
src/drmgr/drslot_chrp_cpu.c \
|
||||||
|
@@ -171,13 +172,14 @@ noinst_HEADERS += \
|
||||||
|
src/drmgr/drcpu.h \
|
||||||
|
src/drmgr/dr.h \
|
||||||
|
src/drmgr/drmem.h \
|
||||||
|
+ src/drmgr/numa.h \
|
||||||
|
src/drmgr/drpci.h \
|
||||||
|
src/drmgr/rtas_calls.h \
|
||||||
|
src/drmgr/ofdt.h \
|
||||||
|
src/drmgr/rtas_calls.h \
|
||||||
|
src/drmgr/options.c
|
||||||
|
|
||||||
|
-src_drmgr_drmgr_LDADD = -lrtas
|
||||||
|
+src_drmgr_drmgr_LDADD = -lrtas -lnuma
|
||||||
|
|
||||||
|
src_drmgr_lsslot_SOURCES = \
|
||||||
|
src/drmgr/lsslot.c \
|
||||||
|
@@ -186,6 +188,7 @@ src_drmgr_lsslot_SOURCES = \
|
||||||
|
src/drmgr/common_cpu.c \
|
||||||
|
src/drmgr/common_pci.c \
|
||||||
|
src/drmgr/common_ofdt.c \
|
||||||
|
+ src/drmgr/common_numa.c \
|
||||||
|
src/drmgr/rtas_calls.c \
|
||||||
|
src/drmgr/drslot_chrp_mem.c \
|
||||||
|
$(pseries_platform_SOURCES)
|
||||||
|
diff --git a/configure.ac b/configure.ac
|
||||||
|
index de3c6758389a..0239754cc4f4 100644
|
||||||
|
--- a/configure.ac
|
||||||
|
+++ b/configure.ac
|
||||||
|
@@ -42,6 +42,10 @@ AC_CHECK_HEADER(zlib.h,
|
||||||
|
[AC_CHECK_LIB(z, inflate, [], [AC_MSG_FAILURE([zlib library is required for compilation])])],
|
||||||
|
[AC_MSG_FAILURE([zlib.h is required for compiliation])])
|
||||||
|
|
||||||
|
+AC_CHECK_HEADER(numa.h,
|
||||||
|
+ [AC_CHECK_LIB(numa, numa_available, [], [AC_MSG_FAILURE([numa library is required for compilation])])],
|
||||||
|
+ [AC_MSG_FAILURE([numa.h is required for compiliation])])
|
||||||
|
+
|
||||||
|
# check for librtas
|
||||||
|
AC_ARG_WITH([librtas],
|
||||||
|
[AS_HELP_STRING([--without-librtas],
|
||||||
|
diff --git a/src/drmgr/common_numa.c b/src/drmgr/common_numa.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000000..5778769b25b6
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/src/drmgr/common_numa.c
|
||||||
|
@@ -0,0 +1,268 @@
|
||||||
|
+/**
|
||||||
|
+ * @file common_numa.c
|
||||||
|
+ *
|
||||||
|
+ * Copyright (C) IBM Corporation 2020
|
||||||
|
+ *
|
||||||
|
+ * This program is free software; you can redistribute it and/or
|
||||||
|
+ * modify it under the terms of the GNU General Public License
|
||||||
|
+ * as published by the Free Software Foundation; either version 2
|
||||||
|
+ * of the License, or (at your option) any later version.
|
||||||
|
+ *
|
||||||
|
+ * This program is distributed in the hope that it will be useful,
|
||||||
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
+ * GNU General Public License for more details.
|
||||||
|
+ *
|
||||||
|
+ * You should have received a copy of the GNU General Public License
|
||||||
|
+ * along with this program; if not, write to the Free Software
|
||||||
|
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||||
|
+ */
|
||||||
|
+
|
||||||
|
+#include <stdio.h>
|
||||||
|
+#include <errno.h>
|
||||||
|
+#include <numa.h>
|
||||||
|
+
|
||||||
|
+#include "dr.h"
|
||||||
|
+#include "ofdt.h"
|
||||||
|
+#include "drmem.h" /* for DYNAMIC_RECONFIG_MEM */
|
||||||
|
+#include "common_numa.h"
|
||||||
|
+
|
||||||
|
+#define RTAS_DIRECTORY "/proc/device-tree/rtas"
|
||||||
|
+#define CHOSEN_DIRECTORY "/proc/device-tree/chosen"
|
||||||
|
+#define ASSOC_REF_POINTS "ibm,associativity-reference-points"
|
||||||
|
+#define ASSOC_LOOKUP_ARRAYS "ibm,associativity-lookup-arrays"
|
||||||
|
+#define ARCHITECTURE_VEC_5 "ibm,architecture-vec-5"
|
||||||
|
+
|
||||||
|
+/*
|
||||||
|
+ * Allocate and read a property, return the size.
|
||||||
|
+ * The read property is not converted to the host endianess.
|
||||||
|
+ */
|
||||||
|
+static int load_property(char *dir, char *prop, uint32_t **buf)
|
||||||
|
+{
|
||||||
|
+ int size;
|
||||||
|
+
|
||||||
|
+ size = get_property_size(dir, prop);
|
||||||
|
+ if (!size)
|
||||||
|
+ return -ENOENT;
|
||||||
|
+
|
||||||
|
+ *buf = zalloc(size);
|
||||||
|
+ if (!*buf) {
|
||||||
|
+ say(ERROR, "Could not allocate buffer read %s (%d bytes)\n",
|
||||||
|
+ prop, size);
|
||||||
|
+ return -ENOMEM;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (get_property(dir, prop, *buf, size)) {
|
||||||
|
+ free(*buf);
|
||||||
|
+ say(ERROR, "Can't retrieve %s/%s\n", dir, prop);
|
||||||
|
+ return -EINVAL;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return size;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/*
|
||||||
|
+ * Get the minimal common depth, based on the form 1 of the ibm,associativ-
|
||||||
|
+ * ity-reference-points property. We only support that form.
|
||||||
|
+ *
|
||||||
|
+ * We should check that the "ibm,architecture-vec-5" property byte 5 bit 0
|
||||||
|
+ * has the value of one.
|
||||||
|
+ */
|
||||||
|
+static int get_min_common_depth(struct numa_topology *numa)
|
||||||
|
+{
|
||||||
|
+ int size;
|
||||||
|
+ uint32_t *p;
|
||||||
|
+ unsigned char val;
|
||||||
|
+
|
||||||
|
+ size = load_property(CHOSEN_DIRECTORY, ARCHITECTURE_VEC_5, &p);
|
||||||
|
+ if (size < 0)
|
||||||
|
+ return size;
|
||||||
|
+
|
||||||
|
+ /* PAPR byte start at 1 (and not 0) but there is the length field */
|
||||||
|
+ if (size < 6) {
|
||||||
|
+ report_unknown_error(__FILE__, __LINE__);
|
||||||
|
+ free(p);
|
||||||
|
+ return -EINVAL;
|
||||||
|
+ }
|
||||||
|
+ val = ((unsigned char *)p)[5];
|
||||||
|
+ free(p);
|
||||||
|
+
|
||||||
|
+ if (!(val & 0x80))
|
||||||
|
+ return -ENOTSUP;
|
||||||
|
+
|
||||||
|
+ size = load_property(RTAS_DIRECTORY, ASSOC_REF_POINTS, &p);
|
||||||
|
+ if (size <= 0)
|
||||||
|
+ return size;
|
||||||
|
+ if (size < sizeof(uint32_t)) {
|
||||||
|
+ report_unknown_error(__FILE__, __LINE__);
|
||||||
|
+ free(p);
|
||||||
|
+ return -EINVAL;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /* Get the first entry */
|
||||||
|
+ numa->min_common_depth = be32toh(*p);
|
||||||
|
+ free(p);
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int get_assoc_arrays(struct numa_topology *numa)
|
||||||
|
+{
|
||||||
|
+ int size;
|
||||||
|
+ int rc;
|
||||||
|
+ uint32_t *prop, i;
|
||||||
|
+ struct assoc_arrays *aa = &numa->aa;
|
||||||
|
+
|
||||||
|
+ size = load_property(DYNAMIC_RECONFIG_MEM, ASSOC_LOOKUP_ARRAYS, &prop);
|
||||||
|
+ if (size < 0)
|
||||||
|
+ return size;
|
||||||
|
+
|
||||||
|
+ size /= sizeof(uint32_t);
|
||||||
|
+ if (size < 2) {
|
||||||
|
+ say(ERROR, "Could not find the associativity lookup arrays\n");
|
||||||
|
+ free(prop);
|
||||||
|
+ return -EINVAL;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ aa->n_arrays = be32toh(prop[0]);
|
||||||
|
+ aa->array_sz = be32toh(prop[1]);
|
||||||
|
+
|
||||||
|
+ rc = -EINVAL;
|
||||||
|
+ if (numa->min_common_depth > aa->array_sz) {
|
||||||
|
+ say(ERROR, "Bad min common depth or associativity array size\n");
|
||||||
|
+ goto out_free;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /* Sanity check */
|
||||||
|
+ if (size != (aa->n_arrays * aa->array_sz + 2)) {
|
||||||
|
+ say(ERROR, "Bad size of the associativity lookup arrays\n");
|
||||||
|
+ goto out_free;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ aa->min_array = zalloc(aa->n_arrays * sizeof(uint32_t));
|
||||||
|
+
|
||||||
|
+ /* Keep only the most significant value */
|
||||||
|
+ for (i = 0; i < aa->n_arrays; i++) {
|
||||||
|
+ int prop_index = i * aa->array_sz + numa->min_common_depth + 1;
|
||||||
|
+
|
||||||
|
+ aa->min_array[i] = be32toh(prop[prop_index]);
|
||||||
|
+ }
|
||||||
|
+ rc = 0;
|
||||||
|
+
|
||||||
|
+out_free:
|
||||||
|
+ free(prop);
|
||||||
|
+ return rc;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+struct numa_node *numa_fetch_node(struct numa_topology *numa, int nid)
|
||||||
|
+{
|
||||||
|
+ struct numa_node *node;
|
||||||
|
+
|
||||||
|
+ if (nid > MAX_NUMNODES) {
|
||||||
|
+ report_unknown_error(__FILE__, __LINE__);
|
||||||
|
+ return NULL;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ node = numa->nodes[nid];
|
||||||
|
+ if (node)
|
||||||
|
+ return node;
|
||||||
|
+
|
||||||
|
+ node = zalloc(sizeof(struct numa_node));
|
||||||
|
+ if (!node) {
|
||||||
|
+ say(ERROR, "Can't allocate a new node\n");
|
||||||
|
+ return NULL;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ node->node_id = nid;
|
||||||
|
+
|
||||||
|
+ if (!numa->node_count || nid < numa->node_min)
|
||||||
|
+ numa->node_min = nid;
|
||||||
|
+ if (nid > numa->node_max)
|
||||||
|
+ numa->node_max = nid;
|
||||||
|
+
|
||||||
|
+ numa->nodes[nid] = node;
|
||||||
|
+ numa->node_count++;
|
||||||
|
+
|
||||||
|
+ return node;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/*
|
||||||
|
+ * Read the number of CPU for each node using the libnuma to get the details
|
||||||
|
+ * from sysfs.
|
||||||
|
+ */
|
||||||
|
+static int read_numa_topology(struct numa_topology *numa)
|
||||||
|
+{
|
||||||
|
+ struct bitmask *cpus;
|
||||||
|
+ struct numa_node *node;
|
||||||
|
+ int rc, max_node, nid, i;
|
||||||
|
+
|
||||||
|
+ if (numa_available() < 0)
|
||||||
|
+ return -ENOENT;
|
||||||
|
+
|
||||||
|
+ max_node = numa_max_node();
|
||||||
|
+ if (max_node >= MAX_NUMNODES) {
|
||||||
|
+ say(ERROR, "Too many nodes %d (max:%d)\n",
|
||||||
|
+ max_node, MAX_NUMNODES);
|
||||||
|
+ return -EINVAL;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ rc = 0;
|
||||||
|
+
|
||||||
|
+ /* In case of allocation error, the libnuma is calling exit() */
|
||||||
|
+ cpus = numa_allocate_cpumask();
|
||||||
|
+
|
||||||
|
+ for (nid = 0; nid <= max_node; nid++) {
|
||||||
|
+
|
||||||
|
+ if (!numa_bitmask_isbitset(numa_nodes_ptr, nid))
|
||||||
|
+ continue;
|
||||||
|
+
|
||||||
|
+ node = numa_fetch_node(numa, nid);
|
||||||
|
+ if (!node) {
|
||||||
|
+ rc = -ENOMEM;
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ rc = numa_node_to_cpus(nid, cpus);
|
||||||
|
+ if (rc < 0)
|
||||||
|
+ break;
|
||||||
|
+
|
||||||
|
+ /* Count the CPUs in that node */
|
||||||
|
+ for (i = 0; i < cpus->size; i++)
|
||||||
|
+ if (numa_bitmask_isbitset(cpus, i))
|
||||||
|
+ node->n_cpus++;
|
||||||
|
+
|
||||||
|
+ numa->cpu_count += node->n_cpus;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ numa_bitmask_free(cpus);
|
||||||
|
+
|
||||||
|
+ if (rc) {
|
||||||
|
+ numa_foreach_node(numa, nid, node)
|
||||||
|
+ node->n_cpus = 0;
|
||||||
|
+ numa->cpu_count = 0;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return rc;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int numa_get_topology(struct numa_topology *numa)
|
||||||
|
+{
|
||||||
|
+ int rc;
|
||||||
|
+
|
||||||
|
+ rc = get_min_common_depth(numa);
|
||||||
|
+ if (rc)
|
||||||
|
+ return rc;
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+ rc = get_assoc_arrays(numa);
|
||||||
|
+ if (rc)
|
||||||
|
+ return rc;
|
||||||
|
+
|
||||||
|
+ rc = read_numa_topology(numa);
|
||||||
|
+ if (rc)
|
||||||
|
+ return rc;
|
||||||
|
+
|
||||||
|
+ if (!numa->node_count)
|
||||||
|
+ return -1;
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
diff --git a/src/drmgr/common_numa.h b/src/drmgr/common_numa.h
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000000..4d0054926819
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/src/drmgr/common_numa.h
|
||||||
|
@@ -0,0 +1,83 @@
|
||||||
|
+/**
|
||||||
|
+ * @file numa.h
|
||||||
|
+ *
|
||||||
|
+ * Copyright (C) IBM Corporation 2020
|
||||||
|
+ *
|
||||||
|
+ * This program is free software; you can redistribute it and/or
|
||||||
|
+ * modify it under the terms of the GNU General Public License
|
||||||
|
+ * as published by the Free Software Foundation; either version 2
|
||||||
|
+ * of the License, or (at your option) any later version.
|
||||||
|
+ *
|
||||||
|
+ * This program is distributed in the hope that it will be useful,
|
||||||
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
+ * GNU General Public License for more details.
|
||||||
|
+ *
|
||||||
|
+ * You should have received a copy of the GNU General Public License
|
||||||
|
+ * along with this program; if not, write to the Free Software
|
||||||
|
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||||
|
+ */
|
||||||
|
+#ifndef _NUMA_H_
|
||||||
|
+#define _NUMA_H_
|
||||||
|
+
|
||||||
|
+#define MAX_NUMNODES 256
|
||||||
|
+#define NUMA_NO_NODE -1
|
||||||
|
+
|
||||||
|
+struct numa_node {
|
||||||
|
+ int node_id;
|
||||||
|
+ unsigned int n_cpus;
|
||||||
|
+ unsigned int n_lmbs;
|
||||||
|
+ unsigned int ratio;
|
||||||
|
+ struct dr_node *lmbs; /* linked by lmb_numa_next */
|
||||||
|
+ struct numa_node *ratio_next;
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+struct assoc_arrays {
|
||||||
|
+ uint32_t n_arrays;
|
||||||
|
+ uint32_t array_sz;
|
||||||
|
+ uint32_t *min_array;
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+struct numa_topology {
|
||||||
|
+ unsigned int cpu_count;
|
||||||
|
+ unsigned int lmb_count;
|
||||||
|
+ unsigned int cpuless_node_count;
|
||||||
|
+ unsigned int cpuless_lmb_count;
|
||||||
|
+ unsigned int node_count, node_min, node_max;
|
||||||
|
+ struct numa_node *nodes[MAX_NUMNODES];
|
||||||
|
+ struct numa_node *ratio;
|
||||||
|
+ uint32_t min_common_depth;
|
||||||
|
+ struct assoc_arrays aa;
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+int numa_get_topology(struct numa_topology *numa);
|
||||||
|
+struct numa_node *numa_fetch_node(struct numa_topology *numa, int node_id);
|
||||||
|
+
|
||||||
|
+static inline int numa_aa_index_to_node(struct numa_topology *numa,
|
||||||
|
+ uint32_t aa_index)
|
||||||
|
+{
|
||||||
|
+ if (aa_index < numa->aa.n_arrays)
|
||||||
|
+ return numa->aa.min_array[aa_index];
|
||||||
|
+ return NUMA_NO_NODE;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static inline int next_node(struct numa_topology *numa, int nid,
|
||||||
|
+ struct numa_node **node)
|
||||||
|
+{
|
||||||
|
+ for (nid++; nid <= numa->node_max; nid++)
|
||||||
|
+ if (numa->nodes[nid]) {
|
||||||
|
+ *node = numa->nodes[nid];
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+ return nid;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+#define numa_foreach_node(numa, nid, node) \
|
||||||
|
+ for (nid = (numa)->node_min, node = (numa)->nodes[nid]; \
|
||||||
|
+ nid <= (numa)->node_max; \
|
||||||
|
+ nid = next_node(numa, nid, &(node)))
|
||||||
|
+
|
||||||
|
+#define numa_foreach_node_by_ratio(numa, node) \
|
||||||
|
+ for (node = (numa)->ratio; node; node = node->ratio_next)
|
||||||
|
+
|
||||||
|
+#endif /* _NUMA_H_ */
|
||||||
|
--
|
||||||
|
2.29.2
|
||||||
|
|
443
0003-drmgr-introduce-NUMA-based-LMB-removal.patch
Normal file
443
0003-drmgr-introduce-NUMA-based-LMB-removal.patch
Normal file
@ -0,0 +1,443 @@
|
|||||||
|
From 3c549c7494e729a68b64ac5519bcf1506b24f945 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Laurent Dufour <ldufour@linux.ibm.com>
|
||||||
|
Date: Wed, 25 Nov 2020 18:03:45 +0100
|
||||||
|
Subject: [PATCH 3/3] drmgr: introduce NUMA based LMB removal
|
||||||
|
|
||||||
|
When the NUMA topology can be read, all the LMBs found in the Device Tree
|
||||||
|
are linked the corresponding node. LMB not associated to node are
|
||||||
|
considered as not used.
|
||||||
|
|
||||||
|
LMB associated to CPU less node are accounted separately because they will
|
||||||
|
be targeted first to be remove. The LMB are removed from the CPU less nodes
|
||||||
|
to reach an average number LMBs per CPU less node.
|
||||||
|
|
||||||
|
Node with CPU have a ration indexed on their number of CPUs. The higher a
|
||||||
|
node have CPU the lower number LMB will be removed. This way node with a
|
||||||
|
high number of CPU will get a higher amount of memory.
|
||||||
|
|
||||||
|
When a LMB can't be removed (because its memory can't be offlined by the
|
||||||
|
kernel), the LMB count for node is decremented and the LMB is removed from
|
||||||
|
the node's LMB list. This way, it is no more accounted as 'active' and the
|
||||||
|
removal operation will continue without taking it in account anymore.
|
||||||
|
|
||||||
|
The removal is done through the remove by DRC index API, allowing to remove
|
||||||
|
a LMB at a time. One futur optimization would be to extend that API to
|
||||||
|
remove a linear range of LMB each time.
|
||||||
|
|
||||||
|
If the NUMA topology can't be read, we fallback using the legacy remove
|
||||||
|
way.
|
||||||
|
|
||||||
|
Signed-off-by: Laurent Dufour <ldufour@linux.ibm.com>
|
||||||
|
---
|
||||||
|
src/drmgr/drslot_chrp_mem.c | 335 +++++++++++++++++++++++++++++++++++-
|
||||||
|
src/drmgr/ofdt.h | 2 +
|
||||||
|
2 files changed, 336 insertions(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/src/drmgr/drslot_chrp_mem.c b/src/drmgr/drslot_chrp_mem.c
|
||||||
|
index 502aa3e9fff0..47d9f7b8ed90 100644
|
||||||
|
--- a/src/drmgr/drslot_chrp_mem.c
|
||||||
|
+++ b/src/drmgr/drslot_chrp_mem.c
|
||||||
|
@@ -31,12 +31,16 @@
|
||||||
|
#include "dr.h"
|
||||||
|
#include "ofdt.h"
|
||||||
|
#include "drmem.h"
|
||||||
|
+#include "common_numa.h"
|
||||||
|
|
||||||
|
static int block_sz_bytes = 0;
|
||||||
|
static char *state_strs[] = {"offline", "online"};
|
||||||
|
|
||||||
|
static char *usagestr = "-c mem {-a | -r} {-q <quantity> -p {variable_weight | ent_capacity} | {-q <quantity> | -s [<drc_name> | <drc_index>]}}";
|
||||||
|
|
||||||
|
+static struct numa_topology numa;
|
||||||
|
+static int numa_enabled = 0;
|
||||||
|
+
|
||||||
|
/**
|
||||||
|
* mem_usage
|
||||||
|
* @brief return usage string
|
||||||
|
@@ -306,6 +310,31 @@ get_mem_node_lmbs(struct lmb_list_head *lmb_list)
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
+static int link_lmb_to_numa_node(struct dr_node *lmb)
|
||||||
|
+{
|
||||||
|
+ int nid;
|
||||||
|
+ struct numa_node *node;
|
||||||
|
+
|
||||||
|
+ nid = numa_aa_index_to_node(&numa, lmb->lmb_aa_index);
|
||||||
|
+ if (nid == NUMA_NO_NODE)
|
||||||
|
+ return 0;
|
||||||
|
+
|
||||||
|
+ node = numa_fetch_node(&numa, nid);
|
||||||
|
+ if (!node)
|
||||||
|
+ return -ENOMEM;
|
||||||
|
+
|
||||||
|
+ lmb->lmb_numa_next = node->lmbs;
|
||||||
|
+ node->lmbs = lmb;
|
||||||
|
+ node->n_lmbs++;
|
||||||
|
+
|
||||||
|
+ if (node->n_cpus)
|
||||||
|
+ numa.lmb_count++;
|
||||||
|
+ else
|
||||||
|
+ numa.cpuless_lmb_count++;
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
int add_lmb(struct lmb_list_head *lmb_list, uint32_t drc_index,
|
||||||
|
uint64_t address, uint64_t lmb_sz, uint32_t aa_index,
|
||||||
|
uint32_t flags)
|
||||||
|
@@ -324,6 +353,9 @@ int add_lmb(struct lmb_list_head *lmb_list, uint32_t drc_index,
|
||||||
|
lmb->lmb_address = address;
|
||||||
|
lmb->lmb_aa_index = aa_index;
|
||||||
|
|
||||||
|
+ if (numa_enabled && link_lmb_to_numa_node(lmb))
|
||||||
|
+ return -ENOMEM;
|
||||||
|
+
|
||||||
|
if (flags & DRMEM_ASSIGNED) {
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
@@ -490,7 +522,7 @@ get_dynamic_reconfig_lmbs(struct lmb_list_head *lmb_list)
|
||||||
|
|
||||||
|
if (stat(DYNAMIC_RECONFIG_MEM_V1, &sbuf) == 0) {
|
||||||
|
rc = get_dynamic_reconfig_lmbs_v1(lmb_sz, lmb_list);
|
||||||
|
- } else if (is_lsslot_cmd &&
|
||||||
|
+ } else if ((is_lsslot_cmd || numa_enabled) &&
|
||||||
|
stat(DYNAMIC_RECONFIG_MEM_V2, &sbuf) == 0) {
|
||||||
|
rc = get_dynamic_reconfig_lmbs_v2(lmb_sz, lmb_list);
|
||||||
|
} else {
|
||||||
|
@@ -1424,11 +1456,312 @@ int valid_mem_options(void)
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
+static int remove_lmb_by_index(uint32_t drc_index)
|
||||||
|
+{
|
||||||
|
+ char cmdbuf[128];
|
||||||
|
+ int offset;
|
||||||
|
+
|
||||||
|
+ offset = sprintf(cmdbuf, "memory remove index 0x%x", drc_index);
|
||||||
|
+
|
||||||
|
+ return __do_kernel_dlpar(cmdbuf, offset, 1 /* Don't report error */);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int remove_lmb_from_node(struct numa_node *node, uint32_t count)
|
||||||
|
+{
|
||||||
|
+ struct dr_node *lmb;
|
||||||
|
+ int err, done = 0, unlinked = 0;
|
||||||
|
+
|
||||||
|
+ say(DEBUG, "Try removing %d / %d LMBs from node %d\n",
|
||||||
|
+ count, node->n_lmbs, node->node_id);
|
||||||
|
+
|
||||||
|
+ for (lmb = node->lmbs; lmb && done < count; lmb = lmb->lmb_numa_next) {
|
||||||
|
+ unlinked ++;
|
||||||
|
+ err = remove_lmb_by_index(lmb->drc_index);
|
||||||
|
+ if (err)
|
||||||
|
+ say(WARN,"Can't remove LMB node:%d index:0x%x: %s\n",
|
||||||
|
+ node->node_id, lmb->drc_index, strerror(-err));
|
||||||
|
+ else
|
||||||
|
+ done++;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * Decrement the node LMB's count since whatever is the success
|
||||||
|
+ * of the removal operation, it will not be tried again on that
|
||||||
|
+ * LMB.
|
||||||
|
+ */
|
||||||
|
+ node->n_lmbs -= unlinked;
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * Update the node's list of LMB to not process the one we removed or
|
||||||
|
+ * tried to removed again.
|
||||||
|
+ */
|
||||||
|
+ node->lmbs = lmb;
|
||||||
|
+
|
||||||
|
+ /* Update numa's counters */
|
||||||
|
+ if (node->n_cpus)
|
||||||
|
+ numa.lmb_count -= unlinked;
|
||||||
|
+ else
|
||||||
|
+ numa.cpuless_node_count -= unlinked;
|
||||||
|
+
|
||||||
|
+ if (!node->n_lmbs) {
|
||||||
|
+ node->ratio = 0; /* for sanity only */
|
||||||
|
+ if (node->n_cpus)
|
||||||
|
+ numa.cpu_count -= node->n_cpus;
|
||||||
|
+ else
|
||||||
|
+ numa.cpuless_node_count--;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ say(INFO, "Removed %d LMBs from node %d\n", done, node->node_id);
|
||||||
|
+ return done;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+#define min(a,b) ((a < b) ? a : b)
|
||||||
|
+
|
||||||
|
+static void update_cpuless_node_ratio(void)
|
||||||
|
+{
|
||||||
|
+ struct numa_node *node;
|
||||||
|
+ int nid;
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * Assumptions:
|
||||||
|
+ * 1. numa->cpuless_node_count is up to date
|
||||||
|
+ * 2. numa->cpuless_lmb_count is up to date
|
||||||
|
+ * Nodes with no memory and nodes with CPUs are ignored here.
|
||||||
|
+ */
|
||||||
|
+ numa_foreach_node(&numa, nid, node) {
|
||||||
|
+ if (node->n_cpus ||!node->n_lmbs)
|
||||||
|
+ continue;
|
||||||
|
+ node->ratio = (node->n_lmbs * 100) / numa.cpuless_lmb_count;
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/*
|
||||||
|
+ * Remove LMBs from node without CPUs only.
|
||||||
|
+ * The more the node has LMBs, the more LMBs will be removed from it.
|
||||||
|
+ *
|
||||||
|
+ * We have to retry the operation multiple times because some LMB cannot be
|
||||||
|
+ * removed due to the page usage in the kernel. In that case, that LMB is no
|
||||||
|
+ * more taken in account and the node's LMB count is decremented, assuming that
|
||||||
|
+ * LMB is unremovable at this time. Thus each node's ratio has to be computed on
|
||||||
|
+ * each iteration. This is not a big deal, usually, there are not so much nodes.
|
||||||
|
+ */
|
||||||
|
+static int remove_cpuless_lmbs(uint32_t count)
|
||||||
|
+{
|
||||||
|
+ struct numa_node *node;
|
||||||
|
+ int nid;
|
||||||
|
+ uint32_t total = count, todo, done = 0, this_loop;
|
||||||
|
+
|
||||||
|
+ while (count) {
|
||||||
|
+ count = min(count, numa.cpuless_lmb_count);
|
||||||
|
+ if (!count)
|
||||||
|
+ break;
|
||||||
|
+
|
||||||
|
+ update_cpuless_node_ratio();
|
||||||
|
+
|
||||||
|
+ this_loop = 0;
|
||||||
|
+ numa_foreach_node(&numa, nid, node) {
|
||||||
|
+ if (!node->n_lmbs || node->n_cpus)
|
||||||
|
+ continue;
|
||||||
|
+
|
||||||
|
+ todo = (count * node->ratio) / 100;
|
||||||
|
+ todo = min(todo, node->n_lmbs);
|
||||||
|
+ /* Fix rounded value to 0 */
|
||||||
|
+ if (!todo && node->n_lmbs)
|
||||||
|
+ todo = (count - this_loop);
|
||||||
|
+
|
||||||
|
+ if (todo)
|
||||||
|
+ todo = remove_lmb_from_node(node, todo);
|
||||||
|
+
|
||||||
|
+ this_loop += todo;
|
||||||
|
+ done += todo;
|
||||||
|
+ if (done >= total)
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /* Don't continue if we didn't make any progress. */
|
||||||
|
+ if (!this_loop)
|
||||||
|
+ break;
|
||||||
|
+
|
||||||
|
+ count -= this_loop;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ say(DEBUG, "%d / %d LMBs removed from the CPU less nodes\n",
|
||||||
|
+ done, total);
|
||||||
|
+ return done;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void update_node_ratio(void)
|
||||||
|
+{
|
||||||
|
+ int nid;
|
||||||
|
+ struct numa_node *node, *n, **p;
|
||||||
|
+ uint32_t cpu_ratio, mem_ratio;
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * Assumptions:
|
||||||
|
+ * 1. numa->cpu_count is up to date
|
||||||
|
+ * 2. numa->lmb_count is up to date
|
||||||
|
+ * Nodes with no memory and nodes with no CPU are ignored here.
|
||||||
|
+ */
|
||||||
|
+
|
||||||
|
+ numa.ratio = NULL;
|
||||||
|
+ numa_foreach_node(&numa, nid, node) {
|
||||||
|
+ if (!node->n_lmbs || !node->n_cpus)
|
||||||
|
+ continue;
|
||||||
|
+ cpu_ratio = (node->n_cpus * 100) / numa.cpu_count;
|
||||||
|
+ mem_ratio = (node->n_lmbs * 100) / numa.lmb_count;
|
||||||
|
+
|
||||||
|
+ /* Say that CPU ratio is 90% of the ratio */
|
||||||
|
+ node->ratio = (cpu_ratio * 9 + mem_ratio) / 10;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /* Create an ordered link of the nodes */
|
||||||
|
+ numa_foreach_node(&numa, nid, node) {
|
||||||
|
+ if (!node->n_lmbs || !node->n_cpus)
|
||||||
|
+ continue;
|
||||||
|
+
|
||||||
|
+ p = &numa.ratio;
|
||||||
|
+ for (n = numa.ratio;
|
||||||
|
+ n && n->ratio < node->ratio; n = n->ratio_next)
|
||||||
|
+ p = &n->ratio_next;
|
||||||
|
+ *p = node;
|
||||||
|
+ node->ratio_next = n;
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/*
|
||||||
|
+ * Remove LMBs from node with CPUs.
|
||||||
|
+ *
|
||||||
|
+ * The less a node has CPU, the more memory will be removed from it.
|
||||||
|
+ *
|
||||||
|
+ * As for the CPU less nodes, we must iterate because some LMBs may not be
|
||||||
|
+ * removable at this time.
|
||||||
|
+ */
|
||||||
|
+static int remove_cpu_lmbs(uint32_t count)
|
||||||
|
+{
|
||||||
|
+ struct numa_node *node;
|
||||||
|
+ uint32_t total = count, todo, done = 0, this_loop;
|
||||||
|
+ uint32_t new_lmb_count;
|
||||||
|
+
|
||||||
|
+ while(count) {
|
||||||
|
+ count = min(count, numa.lmb_count);
|
||||||
|
+ if (!count)
|
||||||
|
+ break;
|
||||||
|
+
|
||||||
|
+ update_node_ratio();
|
||||||
|
+
|
||||||
|
+ new_lmb_count = numa.lmb_count - count;
|
||||||
|
+
|
||||||
|
+ this_loop = 0;
|
||||||
|
+ numa_foreach_node_by_ratio(&numa, node) {
|
||||||
|
+ if (!node->n_lmbs || !node->n_cpus)
|
||||||
|
+ continue;
|
||||||
|
+
|
||||||
|
+ todo = (new_lmb_count * node->ratio) / 100;
|
||||||
|
+ todo = node->n_lmbs - min(todo, node->n_lmbs);
|
||||||
|
+ todo = min(count, todo);
|
||||||
|
+
|
||||||
|
+ if (todo) {
|
||||||
|
+ todo = remove_lmb_from_node(node, todo);
|
||||||
|
+ count -= todo;
|
||||||
|
+ this_loop += todo;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (!count)
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /* Don't continue if we didn't make any progress. */
|
||||||
|
+ if (!this_loop)
|
||||||
|
+ break;
|
||||||
|
+ done += this_loop;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ say(DEBUG, "%d / %d LMBs removed from the CPU nodes\n",
|
||||||
|
+ done, total);
|
||||||
|
+ return done;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void build_numa_topology(void)
|
||||||
|
+{
|
||||||
|
+ int rc;
|
||||||
|
+
|
||||||
|
+ rc = numa_get_topology(&numa);
|
||||||
|
+ if (rc)
|
||||||
|
+ return;
|
||||||
|
+
|
||||||
|
+ numa_enabled = 1;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void clear_numa_lmb_links(void)
|
||||||
|
+{
|
||||||
|
+ int nid;
|
||||||
|
+ struct numa_node *node;
|
||||||
|
+
|
||||||
|
+ numa_foreach_node(&numa, nid, node)
|
||||||
|
+ node->lmbs = NULL;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int numa_based_remove(uint32_t count)
|
||||||
|
+{
|
||||||
|
+ struct lmb_list_head *lmb_list;
|
||||||
|
+ struct numa_node *node;
|
||||||
|
+ int nid;
|
||||||
|
+ uint32_t done = 0;
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * Read the LMBs
|
||||||
|
+ * Link the LMBs to their node
|
||||||
|
+ * Update global counter
|
||||||
|
+ */
|
||||||
|
+ lmb_list = get_lmbs(LMB_NORMAL_SORT);
|
||||||
|
+ if (lmb_list == NULL) {
|
||||||
|
+ clear_numa_lmb_links();
|
||||||
|
+ return -1;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (!numa.node_count) {
|
||||||
|
+ clear_numa_lmb_links();
|
||||||
|
+ free_lmbs(lmb_list);
|
||||||
|
+ return -EINVAL;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ numa_foreach_node(&numa, nid, node) {
|
||||||
|
+ say(INFO, "node %4d %4d CPUs %8d LMBs\n",
|
||||||
|
+ nid, node->n_cpus, node->n_lmbs);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ done += remove_cpuless_lmbs(count);
|
||||||
|
+ count -= done;
|
||||||
|
+
|
||||||
|
+ done += remove_cpu_lmbs(count);
|
||||||
|
+
|
||||||
|
+ report_resource_count(done);
|
||||||
|
+
|
||||||
|
+ clear_numa_lmb_links();
|
||||||
|
+ free_lmbs(lmb_list);
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
int do_mem_kernel_dlpar(void)
|
||||||
|
{
|
||||||
|
char cmdbuf[128];
|
||||||
|
int rc, offset;
|
||||||
|
|
||||||
|
+
|
||||||
|
+ if (usr_action == REMOVE && usr_drc_count) {
|
||||||
|
+ build_numa_topology();
|
||||||
|
+ if (numa_enabled) {
|
||||||
|
+ if (!numa_based_remove(usr_drc_count))
|
||||||
|
+ return 0;
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * If the NUMA based removal failed, lets try the legacy
|
||||||
|
+ * way.
|
||||||
|
+ */
|
||||||
|
+ say(WARN, "Can't do NUMA based removal operation.\n");
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
offset = sprintf(cmdbuf, "%s ", "memory");
|
||||||
|
|
||||||
|
switch (usr_action) {
|
||||||
|
diff --git a/src/drmgr/ofdt.h b/src/drmgr/ofdt.h
|
||||||
|
index 3850a77229b4..3c2840b2e0ee 100644
|
||||||
|
--- a/src/drmgr/ofdt.h
|
||||||
|
+++ b/src/drmgr/ofdt.h
|
||||||
|
@@ -92,6 +92,7 @@ struct dr_node {
|
||||||
|
uint32_t _lmb_aa_index;
|
||||||
|
struct mem_scn *_mem_scns;
|
||||||
|
struct of_node *_of_node;
|
||||||
|
+ struct dr_node *_numa_next;
|
||||||
|
} _smem;
|
||||||
|
|
||||||
|
#define lmb_address _node_u._smem._address
|
||||||
|
@@ -99,6 +100,7 @@ struct dr_node {
|
||||||
|
#define lmb_aa_index _node_u._smem._lmb_aa_index
|
||||||
|
#define lmb_mem_scns _node_u._smem._mem_scns
|
||||||
|
#define lmb_of_node _node_u._smem._of_node
|
||||||
|
+#define lmb_numa_next _node_u._smem._numa_next
|
||||||
|
|
||||||
|
struct hea_info {
|
||||||
|
uint _port_no;
|
||||||
|
--
|
||||||
|
2.29.2
|
||||||
|
|
@ -0,0 +1,33 @@
|
|||||||
|
commit 0b59d4a372aa266caa75f3b6a253b8f5aeaf3802
|
||||||
|
Author: Mingming Cao <mmc@linux.vnet.ibm.com>
|
||||||
|
Date: Mon Mar 1 19:34:29 2021 -0800
|
||||||
|
|
||||||
|
hcnmgr: Avoid cleanup of bond interface at boot time when no HNV exists
|
||||||
|
|
||||||
|
At boot time, hcn scans the device tree and discovers if there was a new
|
||||||
|
HNV being added while lpar was inactive. It also cleans up the old hnv
|
||||||
|
interfaces. This patch avoids cleaning up bonding interface when no HNV
|
||||||
|
network devices exists.
|
||||||
|
|
||||||
|
Signed-off-by: Mingming Cao <mmc@linux.vnet.ibm.com>
|
||||||
|
[tyreld: fixup commit log]
|
||||||
|
Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
|
||||||
|
|
||||||
|
diff --git a/scripts/hcnmgr b/scripts/hcnmgr
|
||||||
|
index a76505e..c95edba 100644
|
||||||
|
--- a/scripts/hcnmgr
|
||||||
|
+++ b/scripts/hcnmgr
|
||||||
|
@@ -575,7 +575,13 @@ scanhcn() {
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
|
+ if [ ${HcnIds[@]} -eq 0 ]; then
|
||||||
|
+ hcnlog DEBUG "scanhcn: scan for hybrid virtual network finished"
|
||||||
|
+ return $E_SUCCESS
|
||||||
|
+ fi
|
||||||
|
+
|
||||||
|
# Next clean up dead connections left from orgitinal LPAR after inactive miration
|
||||||
|
+ # Only do this when the HNV ID array is not empty
|
||||||
|
|
||||||
|
# list of all HCN ids
|
||||||
|
ids="${HcnIds[*]}"
|
@ -0,0 +1,32 @@
|
|||||||
|
commit 1cb8bd89d6386c60e75c47d4a4452d3f130d5138
|
||||||
|
Author: Mingming Cao <mmc@linux.vnet.ibm.com>
|
||||||
|
Date: Fri Mar 12 14:18:18 2021 -0800
|
||||||
|
|
||||||
|
hcnmgr: Avoid using xargs to process NM show connections
|
||||||
|
|
||||||
|
When removing HNV bonding connections xargs can fail to process the output of
|
||||||
|
nmcli show propererly.
|
||||||
|
|
||||||
|
Instead of piping into xargs fix this by using a loop to check for all related
|
||||||
|
bonding connections and remove them explicitly one by one.
|
||||||
|
|
||||||
|
Signed-off-by: Mingming Cao <mmc@linux.vnet.ibm.com>
|
||||||
|
[tyreld: fixed up commit log]
|
||||||
|
Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
|
||||||
|
|
||||||
|
diff --git a/scripts/hcnmgr b/scripts/hcnmgr
|
||||||
|
index d66b5d1..30d31e7 100644
|
||||||
|
--- a/scripts/hcnmgr
|
||||||
|
+++ b/scripts/hcnmgr
|
||||||
|
@@ -377,7 +377,10 @@ rmhcn() {
|
||||||
|
fi
|
||||||
|
|
||||||
|
hcnlog INFO "rmhcn: delete bond $BONDNAME and slaves "
|
||||||
|
- nmcli -f NAME con show | grep "$BONDNAME" | xargs sudo nmcli con delete
|
||||||
|
+ for connection in $(nmcli -f NAME con show | grep "$BONDNAME"); do
|
||||||
|
+ hcnlog INFO "Delete bonding connection $connection"
|
||||||
|
+ nmcli con delete "$connection"
|
||||||
|
+ done
|
||||||
|
hcnlog DEBUG "rmhcn: exit"
|
||||||
|
return $E_SUCCESS
|
||||||
|
}
|
@ -0,0 +1,26 @@
|
|||||||
|
commit 366e17553ed647613668678c2d301d369038f41b
|
||||||
|
Author: Brahadambal Srinivasan <latha@linux.vnet.ibm.com>
|
||||||
|
Date: Thu Nov 12 19:00:47 2020 +0530
|
||||||
|
|
||||||
|
Update ppc64-cpu usage
|
||||||
|
|
||||||
|
'ppc64_cpu --help' doesn't list '--version' as an option. This patch
|
||||||
|
adds the option in the usage information of ppc64-cpu command.
|
||||||
|
|
||||||
|
Signed-off-by: Brahadambal Srinivasan <latha@linux.vnet.ibm.com>
|
||||||
|
Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
|
||||||
|
|
||||||
|
diff --git a/src/ppc64_cpu.c b/src/ppc64_cpu.c
|
||||||
|
index 71f4720..2b0f66c 100644
|
||||||
|
--- a/src/ppc64_cpu.c
|
||||||
|
+++ b/src/ppc64_cpu.c
|
||||||
|
@@ -1195,7 +1195,8 @@ static void usage(void)
|
||||||
|
"ppc64_cpu --subcores-per-core # Get number of subcores per core\n"
|
||||||
|
"ppc64_cpu --subcores-per-core=X # Set subcores per core to X (1 or 4)\n"
|
||||||
|
"ppc64_cpu --threads-per-core # Get threads per core\n"
|
||||||
|
-"ppc64_cpu --info # Display system state information)\n");
|
||||||
|
+"ppc64_cpu --info # Display system state information\n"
|
||||||
|
+"ppc64_cpu --version # Display version of ppc64-cpu\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
struct option longopts[] = {
|
@ -0,0 +1,30 @@
|
|||||||
|
commit d9bcb21179ccfea122f326aca4690afe0f7de0c6
|
||||||
|
Author: Mingming Cao <mmc@linux.vnet.ibm.com>
|
||||||
|
Date: Mon Mar 1 21:34:34 2021 -0800
|
||||||
|
|
||||||
|
hcnmgr: Wait for sysfs device ready when looking up device name
|
||||||
|
|
||||||
|
At the time of calling ofpathname to look up for devicename, wait
|
||||||
|
for sysfs device ready. Otherwise, the OS may be in the middle of device
|
||||||
|
renaming.
|
||||||
|
|
||||||
|
Signed-off-by: Mingming Cao <mmc@linux.vnet.ibm.com>
|
||||||
|
[tyreld: fixed up commit log]
|
||||||
|
Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
|
||||||
|
|
||||||
|
diff --git a/scripts/hcnmgr b/scripts/hcnmgr
|
||||||
|
index c95edba..0d20e7d 100644
|
||||||
|
--- a/scripts/hcnmgr
|
||||||
|
+++ b/scripts/hcnmgr
|
||||||
|
@@ -241,7 +241,10 @@ get_dev_hcn() {
|
||||||
|
# Let's retry a few times.
|
||||||
|
while [ $wait != 0 ]; do
|
||||||
|
if DEVNAME=$(ofpathname -l "$(echo "$1" | sed -e "s/\/proc\/device-tree//")" 2>/dev/null); then
|
||||||
|
- break
|
||||||
|
+ if [ -e /sys/class/net/"$DEVNAME" ]; then
|
||||||
|
+ hcnlog DEBUG "ofpathname waiting for /sys/class/net device $DEVNAME ready"
|
||||||
|
+ break
|
||||||
|
+ fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
hcnlog DEBUG "ofpathname return $?, devname is $DEVNAME rety counter $wait"
|
@ -0,0 +1,30 @@
|
|||||||
|
commit e25d71be411b610e5e889f8efaaf04b38c2d9ecb
|
||||||
|
Author: Mingming Cao <mmc@linux.vnet.ibm.com>
|
||||||
|
Date: Fri Mar 12 13:50:33 2021 -0800
|
||||||
|
|
||||||
|
hcnmgr: Avoid using ifcfg file for checking bonding interface status
|
||||||
|
|
||||||
|
When configuring migratable sr_iov into hybrid network, it checks if
|
||||||
|
there is an existing HNV using the presense of ifcfg file location. This
|
||||||
|
is not preferred as the location can be different on distros.
|
||||||
|
|
||||||
|
This patch fixes this by using NetworkManager nmcli.
|
||||||
|
|
||||||
|
Signed-off-by: Mingming Cao <mmc@linux.vnet.ibm.com>
|
||||||
|
[tyreld: fixed spelling]
|
||||||
|
Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
|
||||||
|
|
||||||
|
diff --git a/scripts/hcnmgr b/scripts/hcnmgr
|
||||||
|
index 0d20e7d..d66b5d1 100644
|
||||||
|
--- a/scripts/hcnmgr
|
||||||
|
+++ b/scripts/hcnmgr
|
||||||
|
@@ -282,8 +282,7 @@ do_config_vdevice() {
|
||||||
|
|
||||||
|
hcnlog DEBUG "Check if there is bond $BONDNAME with hcn id $HCNID"
|
||||||
|
|
||||||
|
- hcnlog DEBUG "ifconfig file $IFCONFIG_PATH/ifconfig-$BONDNAME"
|
||||||
|
- if [ ! -e "$IFCONFIG_PATH/ifcfg-$BONDNAME" ]; then
|
||||||
|
+ if ! nmcli -f NAME con show --active | grep -q "$BONDNAME\s"; then
|
||||||
|
hcnlog INFO "nmcli con add type bond con-name $BONDNAME ifname $BONDNAME"
|
||||||
|
nmcli con add type bond con-name "$BONDNAME" ifname "$BONDNAME"
|
||||||
|
|
@ -0,0 +1,132 @@
|
|||||||
|
commit 97269d301797e23b75d0c7a5cb63ce280783f615
|
||||||
|
Author: Laurent Dufour <ldufour@linux.ibm.com>
|
||||||
|
Date: Thu Mar 4 14:51:38 2021 +0100
|
||||||
|
|
||||||
|
lpartstat: add -x option for the security flavor
|
||||||
|
|
||||||
|
This allows user to get the security flavor settings fer the LPAR.
|
||||||
|
|
||||||
|
The output is :
|
||||||
|
|
||||||
|
$ lparstat -x
|
||||||
|
Speculative Execution Mode : 1
|
||||||
|
|
||||||
|
Where the output number means
|
||||||
|
0 = Speculative execution fully enabled
|
||||||
|
1 = Speculative execution controls to mitigate user-to-kernel side-channel
|
||||||
|
attacks
|
||||||
|
2 = Speculative execution controls to mitigate user-to-kernel and
|
||||||
|
user-to-user side-channel attacks
|
||||||
|
|
||||||
|
In the case the running kernel is not exposing the security flavor in
|
||||||
|
/proc/powerpc/lparcfg, the output is:
|
||||||
|
|
||||||
|
$ lparstat -x
|
||||||
|
Speculative Execution Mode : -
|
||||||
|
|
||||||
|
Signed-off-by: Laurent Dufour <ldufour@linux.ibm.com>
|
||||||
|
Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
|
||||||
|
|
||||||
|
diff --git a/src/lparstat.c b/src/lparstat.c
|
||||||
|
index 23e4b85..00922c4 100644
|
||||||
|
--- a/src/lparstat.c
|
||||||
|
+++ b/src/lparstat.c
|
||||||
|
@@ -42,6 +42,7 @@
|
||||||
|
|
||||||
|
static bool o_legacy = false;
|
||||||
|
static bool o_scaled = false;
|
||||||
|
+static bool o_security = false;
|
||||||
|
|
||||||
|
static int threads_per_cpu;
|
||||||
|
static int cpus_in_system;
|
||||||
|
@@ -1152,6 +1153,15 @@ void print_scaled_output(int interval, int count)
|
||||||
|
} while (--count > 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
+static void print_security_flavor(void)
|
||||||
|
+{
|
||||||
|
+ char value[64];
|
||||||
|
+ char *descr;
|
||||||
|
+
|
||||||
|
+ get_sysdata("security_flavor", &descr, value);
|
||||||
|
+ fprintf(stdout, "%-45s: %s\n", descr, value);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static void usage(void)
|
||||||
|
{
|
||||||
|
printf("Usage: lparstat [ options ]\n\tlparstat <interval> [ count ]\n\n"
|
||||||
|
@@ -1159,6 +1169,7 @@ static void usage(void)
|
||||||
|
"\t-h, --help Show this message and exit.\n"
|
||||||
|
"\t-V, --version \tDisplay lparstat version information.\n"
|
||||||
|
"\t-i Lists details on the LPAR configuration.\n"
|
||||||
|
+ "\t-x Print the security mode settings for the LPAR.\n"
|
||||||
|
"\t-E Print SPURR metrics.\n"
|
||||||
|
"\t-l, --legacy Print the report in legacy format.\n"
|
||||||
|
"interval The interval parameter specifies the amount of time between each report.\n"
|
||||||
|
@@ -1184,7 +1195,7 @@ int main(int argc, char *argv[])
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
- while ((c = getopt_long(argc, argv, "iEVhl",
|
||||||
|
+ while ((c = getopt_long(argc, argv, "iEVhlx",
|
||||||
|
long_opts, &opt_index)) != -1) {
|
||||||
|
switch(c) {
|
||||||
|
case 'i':
|
||||||
|
@@ -1199,6 +1210,9 @@ int main(int argc, char *argv[])
|
||||||
|
case 'V':
|
||||||
|
printf("lparstat - %s\n", VERSION);
|
||||||
|
return 0;
|
||||||
|
+ case 'x':
|
||||||
|
+ o_security = true;
|
||||||
|
+ break;
|
||||||
|
case 'h':
|
||||||
|
usage();
|
||||||
|
return 0;
|
||||||
|
@@ -1223,6 +1237,8 @@ int main(int argc, char *argv[])
|
||||||
|
|
||||||
|
if (i_option)
|
||||||
|
print_iflag_data();
|
||||||
|
+ else if (o_security)
|
||||||
|
+ print_security_flavor();
|
||||||
|
else if (o_scaled) {
|
||||||
|
print_scaled_output(interval, count);
|
||||||
|
close_cpu_sysfs_fds(threads_in_system);
|
||||||
|
diff --git a/src/lparstat.h b/src/lparstat.h
|
||||||
|
index 9b7117f..26ed4ba 100644
|
||||||
|
--- a/src/lparstat.h
|
||||||
|
+++ b/src/lparstat.h
|
||||||
|
@@ -302,6 +302,10 @@ struct sysentry system_data[] = {
|
||||||
|
.descr = "Idle CPU value - SPURR",
|
||||||
|
.get = &get_cpu_idle_spurr},
|
||||||
|
|
||||||
|
+ /* Security flavor */
|
||||||
|
+ {.name = "security_flavor",
|
||||||
|
+ .descr = "Speculative Execution Mode"},
|
||||||
|
+
|
||||||
|
{.name[0] = '\0'},
|
||||||
|
};
|
||||||
|
|
||||||
|
diff -up powerpc-utils-1.3.8/man/lparstat.8.me powerpc-utils-1.3.8/man/lparstat.8
|
||||||
|
--- powerpc-utils-1.3.8/man/lparstat.8.me 2021-04-20 15:49:18.305532697 +0200
|
||||||
|
+++ powerpc-utils-1.3.8/man/lparstat.8 2021-04-20 15:52:04.703021972 +0200
|
||||||
|
@@ -209,6 +209,20 @@ The variable memory capacity weight of t
|
||||||
|
.TP
|
||||||
|
.SH
|
||||||
|
.TP
|
||||||
|
+\fB\-x\fR
|
||||||
|
+Display the LPAR security flavor mode
|
||||||
|
+.RS
|
||||||
|
+.TP
|
||||||
|
+.B 0
|
||||||
|
+Speculative execution fully enabled
|
||||||
|
+.TP
|
||||||
|
+.B 1
|
||||||
|
+Speculative execution controls to mitigate user-to-kernel side-channel attacks
|
||||||
|
+.TP
|
||||||
|
+.B 2
|
||||||
|
+Speculative execution controls to mitigate user-to-kernel and user-to-user side-channel attacks
|
||||||
|
+.RE
|
||||||
|
+.TP
|
||||||
|
\fB\-E\fR
|
||||||
|
Display Scaled Processor Utilization Resource Register(SPURR) based CPU utilization.
|
||||||
|
.RS
|
Loading…
Reference in New Issue
Block a user