merge upstream patches
This commit is contained in:
		
							parent
							
								
									e3f1a6d00a
								
							
						
					
					
						commit
						f50e4f6aef
					
				
							
								
								
									
										87
									
								
								0001-drmgr-don-t-open-sysfs-file-for-each-command.patch
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										87
									
								
								0001-drmgr-don-t-open-sysfs-file-for-each-command.patch
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,87 @@
 | 
				
			|||||||
 | 
					From 014e8ba4580c7917e258df084776c16079dc07ce Mon Sep 17 00:00:00 2001
 | 
				
			||||||
 | 
					From: Laurent Dufour <ldufour@linux.ibm.com>
 | 
				
			||||||
 | 
					Date: Tue, 24 Nov 2020 19:28:48 +0100
 | 
				
			||||||
 | 
					Subject: [PATCH 1/3] drmgr: don't open sysfs file for each command
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The new __do_kernel_dlpar() API will be used in later commit to remove by
 | 
				
			||||||
 | 
					DRC Index LMB per LMB. This will avoiding opennig and closing the fd each
 | 
				
			||||||
 | 
					time.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The fd closing will now be done at the process exit time.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					In addition add an optinal parameter to silently ignore some error.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Also, change the log level of the "success" message to debug to match
 | 
				
			||||||
 | 
					the previous one saying "Trying.."
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Signed-off-by: Laurent Dufour <ldufour@linux.ibm.com>
 | 
				
			||||||
 | 
					---
 | 
				
			||||||
 | 
					 src/drmgr/common.c | 22 +++++++++++++---------
 | 
				
			||||||
 | 
					 src/drmgr/dr.h     |  3 ++-
 | 
				
			||||||
 | 
					 2 files changed, 15 insertions(+), 10 deletions(-)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					diff --git a/src/drmgr/common.c b/src/drmgr/common.c
 | 
				
			||||||
 | 
					index 5e8135bcf77e..25d244cb2f57 100644
 | 
				
			||||||
 | 
					--- a/src/drmgr/common.c
 | 
				
			||||||
 | 
					+++ b/src/drmgr/common.c
 | 
				
			||||||
 | 
					@@ -1469,32 +1469,36 @@ int kernel_dlpar_exists(void)
 | 
				
			||||||
 | 
					  * @param cmd command string to write to sysfs
 | 
				
			||||||
 | 
					  * @returns 0 on success, !0 otherwise
 | 
				
			||||||
 | 
					  */
 | 
				
			||||||
 | 
					-int do_kernel_dlpar(const char *cmd, int cmdlen)
 | 
				
			||||||
 | 
					+int __do_kernel_dlpar(const char *cmd, int cmdlen, int silent_error)
 | 
				
			||||||
 | 
					 {
 | 
				
			||||||
 | 
					-	int fd, rc;
 | 
				
			||||||
 | 
					+	static int fd = -1;
 | 
				
			||||||
 | 
					+	int rc;
 | 
				
			||||||
 | 
					 	int my_errno;
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					 	say(DEBUG, "Initiating kernel DLPAR \"%s\"\n", cmd);
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					 	/* write to file */
 | 
				
			||||||
 | 
					-	fd = open(SYSFS_DLPAR_FILE, O_WRONLY);
 | 
				
			||||||
 | 
					-	if (fd <= 0) {
 | 
				
			||||||
 | 
					-		say(ERROR, "Could not open %s to initiate DLPAR request\n",
 | 
				
			||||||
 | 
					-		    SYSFS_DLPAR_FILE);
 | 
				
			||||||
 | 
					-		return -1;
 | 
				
			||||||
 | 
					+	if (fd == -1) {
 | 
				
			||||||
 | 
					+		fd = open(SYSFS_DLPAR_FILE, O_WRONLY);
 | 
				
			||||||
 | 
					+		if (fd <= 0) {
 | 
				
			||||||
 | 
					+			say(ERROR, "Could not open %s to initiate DLPAR request\n",
 | 
				
			||||||
 | 
					+			    SYSFS_DLPAR_FILE);
 | 
				
			||||||
 | 
					+			return -1;
 | 
				
			||||||
 | 
					+		}
 | 
				
			||||||
 | 
					 	}
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					 	rc = write(fd, cmd, cmdlen);
 | 
				
			||||||
 | 
					 	my_errno = errno;
 | 
				
			||||||
 | 
					-	close(fd);
 | 
				
			||||||
 | 
					 	if (rc <= 0) {
 | 
				
			||||||
 | 
					+		if (silent_error)
 | 
				
			||||||
 | 
					+			return (my_errno == 0) ? -1 : -my_errno;
 | 
				
			||||||
 | 
					 		/* write does not set errno for rc == 0 */
 | 
				
			||||||
 | 
					 		say(ERROR, "Failed to write to %s: %s\n", SYSFS_DLPAR_FILE,
 | 
				
			||||||
 | 
					 		    (rc == 0) ? "wrote 0 bytes" : strerror(my_errno));
 | 
				
			||||||
 | 
					 		return -1;
 | 
				
			||||||
 | 
					 	}
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					-	say(INFO, "Success\n");
 | 
				
			||||||
 | 
					+	say(DEBUG, "Success\n");
 | 
				
			||||||
 | 
					 	return 0;
 | 
				
			||||||
 | 
					 }
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					diff --git a/src/drmgr/dr.h b/src/drmgr/dr.h
 | 
				
			||||||
 | 
					index f171bfea73c3..00d2fffc9919 100644
 | 
				
			||||||
 | 
					--- a/src/drmgr/dr.h
 | 
				
			||||||
 | 
					+++ b/src/drmgr/dr.h
 | 
				
			||||||
 | 
					@@ -172,5 +172,6 @@ enum drc_type to_drc_type(const char *);
 | 
				
			||||||
 | 
					 int handle_prrn(void);
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					 int kernel_dlpar_exists(void);
 | 
				
			||||||
 | 
					-int do_kernel_dlpar(const char *, int);
 | 
				
			||||||
 | 
					+int __do_kernel_dlpar(const char *, int, int);
 | 
				
			||||||
 | 
					+#define do_kernel_dlpar(c, l)	__do_kernel_dlpar(c, l, 0)
 | 
				
			||||||
 | 
					 #endif
 | 
				
			||||||
 | 
					-- 
 | 
				
			||||||
 | 
					2.29.2
 | 
				
			||||||
 | 
					
 | 
				
			||||||
							
								
								
									
										40
									
								
								0001-drmgr-fix-remove-by-index-operation.patch
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										40
									
								
								0001-drmgr-fix-remove-by-index-operation.patch
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,40 @@
 | 
				
			|||||||
 | 
					From 16469b696959aee4ce32d9f77483e1e3f192e82d Mon Sep 17 00:00:00 2001
 | 
				
			||||||
 | 
					From: Laurent Dufour <ldufour@linux.ibm.com>
 | 
				
			||||||
 | 
					Date: Fri, 16 Apr 2021 18:10:36 +0200
 | 
				
			||||||
 | 
					Subject: [PATCH] drmgr: fix remove by index operation
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The commit e9f06531356f ("drmgr: introduce NUMA based LMB removal")
 | 
				
			||||||
 | 
					introduce a special processing when NUMA is on and the remove by count
 | 
				
			||||||
 | 
					operation is done.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unfortunately, that code is also triggered when doing a remove by index
 | 
				
			||||||
 | 
					operation (-s argument) becauses usr_drc_count is set to 1. As a
 | 
				
			||||||
 | 
					consequence the index constraint is not respected and any LMB can be
 | 
				
			||||||
 | 
					removed.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Add a check agains usr_drc_index which is set when a remove by index
 | 
				
			||||||
 | 
					operation is done to ensure the numa removal code is not triggered in that
 | 
				
			||||||
 | 
					case.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Fixes: e9f06531356f ("drmgr: introduce NUMA based LMB removal")
 | 
				
			||||||
 | 
					Signed-off-by: Laurent Dufour <ldufour@linux.ibm.com>
 | 
				
			||||||
 | 
					---
 | 
				
			||||||
 | 
					 src/drmgr/drslot_chrp_mem.c | 2 +-
 | 
				
			||||||
 | 
					 1 file changed, 1 insertion(+), 1 deletion(-)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					diff --git a/src/drmgr/drslot_chrp_mem.c b/src/drmgr/drslot_chrp_mem.c
 | 
				
			||||||
 | 
					index f17c94adc270..8db98bb9e9ea 100644
 | 
				
			||||||
 | 
					--- a/src/drmgr/drslot_chrp_mem.c
 | 
				
			||||||
 | 
					+++ b/src/drmgr/drslot_chrp_mem.c
 | 
				
			||||||
 | 
					@@ -1749,7 +1749,7 @@ int do_mem_kernel_dlpar(void)
 | 
				
			||||||
 | 
					 	int rc, offset;
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					-	if (usr_action == REMOVE && usr_drc_count) {
 | 
				
			||||||
 | 
					+	if (usr_action == REMOVE && usr_drc_count && !usr_drc_index) {
 | 
				
			||||||
 | 
					 		build_numa_topology();
 | 
				
			||||||
 | 
					 		if (numa_enabled) {
 | 
				
			||||||
 | 
					 			if (!numa_based_remove(usr_drc_count))
 | 
				
			||||||
 | 
					-- 
 | 
				
			||||||
 | 
					2.31.1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
							
								
								
									
										438
									
								
								0002-drmgr-read-the-CPU-NUMA-topology.patch
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										438
									
								
								0002-drmgr-read-the-CPU-NUMA-topology.patch
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,438 @@
 | 
				
			|||||||
 | 
					From 88caa91a4c8f0ac2376da433f697bc6845595dac Mon Sep 17 00:00:00 2001
 | 
				
			||||||
 | 
					From: Laurent Dufour <ldufour@linux.ibm.com>
 | 
				
			||||||
 | 
					Date: Wed, 2 Dec 2020 16:10:57 +0100
 | 
				
			||||||
 | 
					Subject: [PATCH 2/3] drmgr: read the CPU NUMA topology
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					This will be used in the next commit to compute LMB removal based on the
 | 
				
			||||||
 | 
					NUMA topology.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The NUMA topology is read using the libnuma, so a dependency against it is
 | 
				
			||||||
 | 
					added in the configure file.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Signed-off-by: Laurent Dufour <ldufour@linux.ibm.com>
 | 
				
			||||||
 | 
					---
 | 
				
			||||||
 | 
					 Makefile.am             |   5 +-
 | 
				
			||||||
 | 
					 configure.ac            |   4 +
 | 
				
			||||||
 | 
					 src/drmgr/common_numa.c | 268 ++++++++++++++++++++++++++++++++++++++++
 | 
				
			||||||
 | 
					 src/drmgr/common_numa.h |  83 +++++++++++++
 | 
				
			||||||
 | 
					 4 files changed, 359 insertions(+), 1 deletion(-)
 | 
				
			||||||
 | 
					 create mode 100644 src/drmgr/common_numa.c
 | 
				
			||||||
 | 
					 create mode 100644 src/drmgr/common_numa.h
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					diff --git a/Makefile.am b/Makefile.am
 | 
				
			||||||
 | 
					index 2ff2232537df..31baaa74b353 100644
 | 
				
			||||||
 | 
					--- a/Makefile.am
 | 
				
			||||||
 | 
					+++ b/Makefile.am
 | 
				
			||||||
 | 
					@@ -155,6 +155,7 @@ src_drmgr_drmgr_SOURCES = \
 | 
				
			||||||
 | 
					 	src/drmgr/common_cpu.c \
 | 
				
			||||||
 | 
					 	src/drmgr/common_ofdt.c \
 | 
				
			||||||
 | 
					 	src/drmgr/common_pci.c \
 | 
				
			||||||
 | 
					+	src/drmgr/common_numa.c \
 | 
				
			||||||
 | 
					 	src/drmgr/drmgr.c \
 | 
				
			||||||
 | 
					 	src/drmgr/drmig_chrp_pmig.c \
 | 
				
			||||||
 | 
					 	src/drmgr/drslot_chrp_cpu.c \
 | 
				
			||||||
 | 
					@@ -171,13 +172,14 @@ noinst_HEADERS += \
 | 
				
			||||||
 | 
					 	src/drmgr/drcpu.h \
 | 
				
			||||||
 | 
					 	src/drmgr/dr.h \
 | 
				
			||||||
 | 
					 	src/drmgr/drmem.h \
 | 
				
			||||||
 | 
					+	src/drmgr/numa.h \
 | 
				
			||||||
 | 
					 	src/drmgr/drpci.h \
 | 
				
			||||||
 | 
					 	src/drmgr/rtas_calls.h \
 | 
				
			||||||
 | 
					 	src/drmgr/ofdt.h \
 | 
				
			||||||
 | 
					 	src/drmgr/rtas_calls.h \
 | 
				
			||||||
 | 
					 	src/drmgr/options.c
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					-src_drmgr_drmgr_LDADD = -lrtas
 | 
				
			||||||
 | 
					+src_drmgr_drmgr_LDADD = -lrtas -lnuma
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					 src_drmgr_lsslot_SOURCES = \
 | 
				
			||||||
 | 
					 	src/drmgr/lsslot.c \
 | 
				
			||||||
 | 
					@@ -186,6 +188,7 @@ src_drmgr_lsslot_SOURCES = \
 | 
				
			||||||
 | 
					 	src/drmgr/common_cpu.c \
 | 
				
			||||||
 | 
					 	src/drmgr/common_pci.c \
 | 
				
			||||||
 | 
					 	src/drmgr/common_ofdt.c \
 | 
				
			||||||
 | 
					+	src/drmgr/common_numa.c \
 | 
				
			||||||
 | 
					 	src/drmgr/rtas_calls.c \
 | 
				
			||||||
 | 
					 	src/drmgr/drslot_chrp_mem.c \
 | 
				
			||||||
 | 
					 	$(pseries_platform_SOURCES)
 | 
				
			||||||
 | 
					diff --git a/configure.ac b/configure.ac
 | 
				
			||||||
 | 
					index de3c6758389a..0239754cc4f4 100644
 | 
				
			||||||
 | 
					--- a/configure.ac
 | 
				
			||||||
 | 
					+++ b/configure.ac
 | 
				
			||||||
 | 
					@@ -42,6 +42,10 @@ AC_CHECK_HEADER(zlib.h,
 | 
				
			||||||
 | 
					 		[AC_CHECK_LIB(z, inflate, [], [AC_MSG_FAILURE([zlib library is required for compilation])])],
 | 
				
			||||||
 | 
					 		[AC_MSG_FAILURE([zlib.h is required for compiliation])])
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					+AC_CHECK_HEADER(numa.h,
 | 
				
			||||||
 | 
					+		[AC_CHECK_LIB(numa, numa_available, [], [AC_MSG_FAILURE([numa library is required for compilation])])],
 | 
				
			||||||
 | 
					+		[AC_MSG_FAILURE([numa.h is required for compiliation])])
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					 # check for librtas
 | 
				
			||||||
 | 
					 AC_ARG_WITH([librtas],
 | 
				
			||||||
 | 
					     [AS_HELP_STRING([--without-librtas],
 | 
				
			||||||
 | 
					diff --git a/src/drmgr/common_numa.c b/src/drmgr/common_numa.c
 | 
				
			||||||
 | 
					new file mode 100644
 | 
				
			||||||
 | 
					index 000000000000..5778769b25b6
 | 
				
			||||||
 | 
					--- /dev/null
 | 
				
			||||||
 | 
					+++ b/src/drmgr/common_numa.c
 | 
				
			||||||
 | 
					@@ -0,0 +1,268 @@
 | 
				
			||||||
 | 
					+/**
 | 
				
			||||||
 | 
					+ * @file common_numa.c
 | 
				
			||||||
 | 
					+ *
 | 
				
			||||||
 | 
					+ * Copyright (C) IBM Corporation 2020
 | 
				
			||||||
 | 
					+ *
 | 
				
			||||||
 | 
					+ * This program is free software; you can redistribute it and/or
 | 
				
			||||||
 | 
					+ * modify it under the terms of the GNU General Public License
 | 
				
			||||||
 | 
					+ * as published by the Free Software Foundation; either version 2
 | 
				
			||||||
 | 
					+ * of the License, or (at your option) any later version.
 | 
				
			||||||
 | 
					+ *
 | 
				
			||||||
 | 
					+ * This program is distributed in the hope that it will be useful,
 | 
				
			||||||
 | 
					+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
				
			||||||
 | 
					+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
				
			||||||
 | 
					+ * GNU General Public License for more details.
 | 
				
			||||||
 | 
					+ *
 | 
				
			||||||
 | 
					+ * You should have received a copy of the GNU General Public License
 | 
				
			||||||
 | 
					+ * along with this program; if not, write to the Free Software
 | 
				
			||||||
 | 
					+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 | 
				
			||||||
 | 
					+ */
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+#include <stdio.h>
 | 
				
			||||||
 | 
					+#include <errno.h>
 | 
				
			||||||
 | 
					+#include <numa.h>
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+#include "dr.h"
 | 
				
			||||||
 | 
					+#include "ofdt.h"
 | 
				
			||||||
 | 
					+#include "drmem.h"		/* for DYNAMIC_RECONFIG_MEM */
 | 
				
			||||||
 | 
					+#include "common_numa.h"
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+#define RTAS_DIRECTORY		"/proc/device-tree/rtas"
 | 
				
			||||||
 | 
					+#define CHOSEN_DIRECTORY	"/proc/device-tree/chosen"
 | 
				
			||||||
 | 
					+#define ASSOC_REF_POINTS	"ibm,associativity-reference-points"
 | 
				
			||||||
 | 
					+#define ASSOC_LOOKUP_ARRAYS	"ibm,associativity-lookup-arrays"
 | 
				
			||||||
 | 
					+#define ARCHITECTURE_VEC_5	"ibm,architecture-vec-5"
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+/*
 | 
				
			||||||
 | 
					+ * Allocate and read a property, return the size.
 | 
				
			||||||
 | 
					+ * The read property is not converted to the host endianess.
 | 
				
			||||||
 | 
					+ */
 | 
				
			||||||
 | 
					+static int load_property(char *dir, char *prop, uint32_t **buf)
 | 
				
			||||||
 | 
					+{
 | 
				
			||||||
 | 
					+	int size;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	size = get_property_size(dir, prop);
 | 
				
			||||||
 | 
					+	if (!size)
 | 
				
			||||||
 | 
					+		return -ENOENT;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	*buf = zalloc(size);
 | 
				
			||||||
 | 
					+	if (!*buf) {
 | 
				
			||||||
 | 
					+		say(ERROR, "Could not allocate buffer read %s (%d bytes)\n",
 | 
				
			||||||
 | 
					+		    prop, size);
 | 
				
			||||||
 | 
					+		return -ENOMEM;
 | 
				
			||||||
 | 
					+	}
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	if (get_property(dir, prop, *buf, size)) {
 | 
				
			||||||
 | 
					+		free(*buf);
 | 
				
			||||||
 | 
					+		say(ERROR, "Can't retrieve %s/%s\n", dir, prop);
 | 
				
			||||||
 | 
					+		return -EINVAL;
 | 
				
			||||||
 | 
					+	}
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	return size;
 | 
				
			||||||
 | 
					+}
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+/*
 | 
				
			||||||
 | 
					+ * Get the minimal common depth, based on the form 1 of the ibm,associativ-
 | 
				
			||||||
 | 
					+ * ity-reference-points property. We only support that form.
 | 
				
			||||||
 | 
					+ *
 | 
				
			||||||
 | 
					+ * We should check that the "ibm,architecture-vec-5" property byte 5 bit 0
 | 
				
			||||||
 | 
					+ * has the value of one.
 | 
				
			||||||
 | 
					+ */
 | 
				
			||||||
 | 
					+static int get_min_common_depth(struct numa_topology *numa)
 | 
				
			||||||
 | 
					+{
 | 
				
			||||||
 | 
					+	int size;
 | 
				
			||||||
 | 
					+	uint32_t *p;
 | 
				
			||||||
 | 
					+	unsigned char val;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	size = load_property(CHOSEN_DIRECTORY, ARCHITECTURE_VEC_5, &p);
 | 
				
			||||||
 | 
					+	if (size < 0)
 | 
				
			||||||
 | 
					+		return size;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	/* PAPR byte start at 1 (and not 0) but there is the length field */
 | 
				
			||||||
 | 
					+	if (size < 6) {
 | 
				
			||||||
 | 
					+		report_unknown_error(__FILE__, __LINE__);
 | 
				
			||||||
 | 
					+		free(p);
 | 
				
			||||||
 | 
					+		return -EINVAL;
 | 
				
			||||||
 | 
					+	}
 | 
				
			||||||
 | 
					+	val = ((unsigned char *)p)[5];
 | 
				
			||||||
 | 
					+	free(p);
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	if (!(val & 0x80))
 | 
				
			||||||
 | 
					+		return -ENOTSUP;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	size = load_property(RTAS_DIRECTORY, ASSOC_REF_POINTS, &p);
 | 
				
			||||||
 | 
					+	if (size <= 0)
 | 
				
			||||||
 | 
					+		return size;
 | 
				
			||||||
 | 
					+	if (size < sizeof(uint32_t)) {
 | 
				
			||||||
 | 
					+		report_unknown_error(__FILE__, __LINE__);
 | 
				
			||||||
 | 
					+		free(p);
 | 
				
			||||||
 | 
					+		return -EINVAL;
 | 
				
			||||||
 | 
					+	}
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	/* Get the first entry */
 | 
				
			||||||
 | 
					+	numa->min_common_depth = be32toh(*p);
 | 
				
			||||||
 | 
					+	free(p);
 | 
				
			||||||
 | 
					+	return 0;
 | 
				
			||||||
 | 
					+}
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+static int get_assoc_arrays(struct numa_topology *numa)
 | 
				
			||||||
 | 
					+{
 | 
				
			||||||
 | 
					+	int size;
 | 
				
			||||||
 | 
					+	int rc;
 | 
				
			||||||
 | 
					+	uint32_t *prop, i;
 | 
				
			||||||
 | 
					+	struct assoc_arrays *aa = &numa->aa;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	size = load_property(DYNAMIC_RECONFIG_MEM, ASSOC_LOOKUP_ARRAYS, &prop);
 | 
				
			||||||
 | 
					+	if (size < 0)
 | 
				
			||||||
 | 
					+		return size;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	size /= sizeof(uint32_t);
 | 
				
			||||||
 | 
					+	if (size < 2) {
 | 
				
			||||||
 | 
					+		say(ERROR, "Could not find the associativity lookup arrays\n");
 | 
				
			||||||
 | 
					+		free(prop);
 | 
				
			||||||
 | 
					+		return -EINVAL;
 | 
				
			||||||
 | 
					+	}
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	aa->n_arrays = be32toh(prop[0]);
 | 
				
			||||||
 | 
					+	aa->array_sz = be32toh(prop[1]);
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	rc = -EINVAL;
 | 
				
			||||||
 | 
					+	if (numa->min_common_depth > aa->array_sz) {
 | 
				
			||||||
 | 
					+		say(ERROR, "Bad min common depth or associativity array size\n");
 | 
				
			||||||
 | 
					+		goto out_free;
 | 
				
			||||||
 | 
					+	}
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	/* Sanity check */
 | 
				
			||||||
 | 
					+	if (size != (aa->n_arrays * aa->array_sz + 2)) {
 | 
				
			||||||
 | 
					+		say(ERROR, "Bad size of the associativity lookup arrays\n");
 | 
				
			||||||
 | 
					+		goto out_free;
 | 
				
			||||||
 | 
					+	}
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	aa->min_array = zalloc(aa->n_arrays * sizeof(uint32_t));
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	/* Keep only the most significant value */
 | 
				
			||||||
 | 
					+	for (i = 0; i < aa->n_arrays; i++) {
 | 
				
			||||||
 | 
					+		int prop_index = i * aa->array_sz + numa->min_common_depth + 1;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+		aa->min_array[i] = be32toh(prop[prop_index]);
 | 
				
			||||||
 | 
					+	}
 | 
				
			||||||
 | 
					+	rc = 0;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+out_free:
 | 
				
			||||||
 | 
					+	free(prop);
 | 
				
			||||||
 | 
					+	return rc;
 | 
				
			||||||
 | 
					+}
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+struct numa_node *numa_fetch_node(struct numa_topology *numa, int nid)
 | 
				
			||||||
 | 
					+{
 | 
				
			||||||
 | 
					+	struct numa_node *node;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	if (nid > MAX_NUMNODES) {
 | 
				
			||||||
 | 
					+		report_unknown_error(__FILE__, __LINE__);
 | 
				
			||||||
 | 
					+		return NULL;
 | 
				
			||||||
 | 
					+	}
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	node = numa->nodes[nid];
 | 
				
			||||||
 | 
					+	if (node)
 | 
				
			||||||
 | 
					+		return node;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	node = zalloc(sizeof(struct numa_node));
 | 
				
			||||||
 | 
					+	if (!node) {
 | 
				
			||||||
 | 
					+		say(ERROR, "Can't allocate a new node\n");
 | 
				
			||||||
 | 
					+		return NULL;
 | 
				
			||||||
 | 
					+	}
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	node->node_id = nid;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	if (!numa->node_count || nid < numa->node_min)
 | 
				
			||||||
 | 
					+		numa->node_min = nid;
 | 
				
			||||||
 | 
					+	if (nid > numa->node_max)
 | 
				
			||||||
 | 
					+		numa->node_max = nid;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	numa->nodes[nid] = node;
 | 
				
			||||||
 | 
					+	numa->node_count++;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	return node;
 | 
				
			||||||
 | 
					+}
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+/*
 | 
				
			||||||
 | 
					+ * Read the number of CPU for each node using the libnuma to get the details
 | 
				
			||||||
 | 
					+ * from sysfs.
 | 
				
			||||||
 | 
					+ */
 | 
				
			||||||
 | 
					+static int read_numa_topology(struct numa_topology *numa)
 | 
				
			||||||
 | 
					+{
 | 
				
			||||||
 | 
					+	struct bitmask *cpus;
 | 
				
			||||||
 | 
					+	struct numa_node *node;
 | 
				
			||||||
 | 
					+	int rc, max_node, nid, i;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	if (numa_available() < 0)
 | 
				
			||||||
 | 
					+		return -ENOENT;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	max_node = numa_max_node();
 | 
				
			||||||
 | 
					+	if (max_node >= MAX_NUMNODES) {
 | 
				
			||||||
 | 
					+		say(ERROR, "Too many nodes %d (max:%d)\n",
 | 
				
			||||||
 | 
					+		    max_node, MAX_NUMNODES);
 | 
				
			||||||
 | 
					+		return -EINVAL;
 | 
				
			||||||
 | 
					+	}
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	rc = 0;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	/* In case of allocation error, the libnuma is calling exit() */
 | 
				
			||||||
 | 
					+	cpus = numa_allocate_cpumask();
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	for (nid = 0; nid <= max_node; nid++) {
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+		if (!numa_bitmask_isbitset(numa_nodes_ptr, nid))
 | 
				
			||||||
 | 
					+			continue;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+		node = numa_fetch_node(numa, nid);
 | 
				
			||||||
 | 
					+		if (!node) {
 | 
				
			||||||
 | 
					+			rc = -ENOMEM;
 | 
				
			||||||
 | 
					+			break;
 | 
				
			||||||
 | 
					+		}
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+		rc = numa_node_to_cpus(nid, cpus);
 | 
				
			||||||
 | 
					+		if (rc < 0)
 | 
				
			||||||
 | 
					+			break;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+		/* Count the CPUs in that node */
 | 
				
			||||||
 | 
					+		for (i = 0; i < cpus->size; i++)
 | 
				
			||||||
 | 
					+			if (numa_bitmask_isbitset(cpus, i))
 | 
				
			||||||
 | 
					+				node->n_cpus++;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+		numa->cpu_count += node->n_cpus;
 | 
				
			||||||
 | 
					+	}
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	numa_bitmask_free(cpus);
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	if (rc) {
 | 
				
			||||||
 | 
					+		numa_foreach_node(numa, nid, node)
 | 
				
			||||||
 | 
					+			node->n_cpus = 0;
 | 
				
			||||||
 | 
					+		numa->cpu_count = 0;
 | 
				
			||||||
 | 
					+	}
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	return rc;
 | 
				
			||||||
 | 
					+}
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+int numa_get_topology(struct numa_topology *numa)
 | 
				
			||||||
 | 
					+{
 | 
				
			||||||
 | 
					+	int rc;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	rc = get_min_common_depth(numa);
 | 
				
			||||||
 | 
					+	if (rc)
 | 
				
			||||||
 | 
					+		return rc;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	rc = get_assoc_arrays(numa);
 | 
				
			||||||
 | 
					+	if (rc)
 | 
				
			||||||
 | 
					+		return rc;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	rc = read_numa_topology(numa);
 | 
				
			||||||
 | 
					+	if (rc)
 | 
				
			||||||
 | 
					+		return rc;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	if (!numa->node_count)
 | 
				
			||||||
 | 
					+		return -1;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	return 0;
 | 
				
			||||||
 | 
					+}
 | 
				
			||||||
 | 
					diff --git a/src/drmgr/common_numa.h b/src/drmgr/common_numa.h
 | 
				
			||||||
 | 
					new file mode 100644
 | 
				
			||||||
 | 
					index 000000000000..4d0054926819
 | 
				
			||||||
 | 
					--- /dev/null
 | 
				
			||||||
 | 
					+++ b/src/drmgr/common_numa.h
 | 
				
			||||||
 | 
					@@ -0,0 +1,83 @@
 | 
				
			||||||
 | 
					+/**
 | 
				
			||||||
 | 
					+ * @file numa.h
 | 
				
			||||||
 | 
					+ *
 | 
				
			||||||
 | 
					+ * Copyright (C) IBM Corporation 2020
 | 
				
			||||||
 | 
					+ *
 | 
				
			||||||
 | 
					+ * This program is free software; you can redistribute it and/or
 | 
				
			||||||
 | 
					+ * modify it under the terms of the GNU General Public License
 | 
				
			||||||
 | 
					+ * as published by the Free Software Foundation; either version 2
 | 
				
			||||||
 | 
					+ * of the License, or (at your option) any later version.
 | 
				
			||||||
 | 
					+ *
 | 
				
			||||||
 | 
					+ * This program is distributed in the hope that it will be useful,
 | 
				
			||||||
 | 
					+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
				
			||||||
 | 
					+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
				
			||||||
 | 
					+ * GNU General Public License for more details.
 | 
				
			||||||
 | 
					+ *
 | 
				
			||||||
 | 
					+ * You should have received a copy of the GNU General Public License
 | 
				
			||||||
 | 
					+ * along with this program; if not, write to the Free Software
 | 
				
			||||||
 | 
					+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 | 
				
			||||||
 | 
					+ */
 | 
				
			||||||
 | 
					+#ifndef _NUMA_H_
 | 
				
			||||||
 | 
					+#define _NUMA_H_
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+#define MAX_NUMNODES	256
 | 
				
			||||||
 | 
					+#define NUMA_NO_NODE	-1
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+struct numa_node {
 | 
				
			||||||
 | 
					+	int		node_id;
 | 
				
			||||||
 | 
					+	unsigned int	n_cpus;
 | 
				
			||||||
 | 
					+	unsigned int	n_lmbs;
 | 
				
			||||||
 | 
					+	unsigned int	ratio;
 | 
				
			||||||
 | 
					+	struct dr_node	*lmbs;			/* linked by lmb_numa_next */
 | 
				
			||||||
 | 
					+	struct numa_node *ratio_next;
 | 
				
			||||||
 | 
					+};
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+struct assoc_arrays {
 | 
				
			||||||
 | 
					+	uint32_t        n_arrays;
 | 
				
			||||||
 | 
					+	uint32_t        array_sz;
 | 
				
			||||||
 | 
					+	uint32_t        *min_array;
 | 
				
			||||||
 | 
					+};
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+struct numa_topology {
 | 
				
			||||||
 | 
					+	unsigned int		cpu_count;
 | 
				
			||||||
 | 
					+	unsigned int		lmb_count;
 | 
				
			||||||
 | 
					+	unsigned int		cpuless_node_count;
 | 
				
			||||||
 | 
					+	unsigned int		cpuless_lmb_count;
 | 
				
			||||||
 | 
					+	unsigned int		node_count, node_min, node_max;
 | 
				
			||||||
 | 
					+	struct numa_node	*nodes[MAX_NUMNODES];
 | 
				
			||||||
 | 
					+	struct numa_node	*ratio;
 | 
				
			||||||
 | 
					+	uint32_t		min_common_depth;
 | 
				
			||||||
 | 
					+	struct assoc_arrays	aa;
 | 
				
			||||||
 | 
					+};
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+int numa_get_topology(struct numa_topology *numa);
 | 
				
			||||||
 | 
					+struct numa_node *numa_fetch_node(struct numa_topology *numa, int node_id);
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+static inline int numa_aa_index_to_node(struct numa_topology *numa,
 | 
				
			||||||
 | 
					+					uint32_t aa_index)
 | 
				
			||||||
 | 
					+{
 | 
				
			||||||
 | 
					+	if (aa_index < numa->aa.n_arrays)
 | 
				
			||||||
 | 
					+		return numa->aa.min_array[aa_index];
 | 
				
			||||||
 | 
					+	return NUMA_NO_NODE;
 | 
				
			||||||
 | 
					+}
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+static inline int next_node(struct numa_topology *numa, int nid,
 | 
				
			||||||
 | 
					+			    struct numa_node **node)
 | 
				
			||||||
 | 
					+{
 | 
				
			||||||
 | 
					+	for (nid++; nid <= numa->node_max; nid++)
 | 
				
			||||||
 | 
					+		if (numa->nodes[nid]) {
 | 
				
			||||||
 | 
					+			*node = numa->nodes[nid];
 | 
				
			||||||
 | 
					+			break;
 | 
				
			||||||
 | 
					+		}
 | 
				
			||||||
 | 
					+	return nid;
 | 
				
			||||||
 | 
					+}
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+#define numa_foreach_node(numa, nid, node)				\
 | 
				
			||||||
 | 
					+	for (nid = (numa)->node_min, node = (numa)->nodes[nid];	\
 | 
				
			||||||
 | 
					+	     nid <= (numa)->node_max;					\
 | 
				
			||||||
 | 
					+	     nid = next_node(numa, nid, &(node)))
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+#define numa_foreach_node_by_ratio(numa, node)				\
 | 
				
			||||||
 | 
					+	for (node = (numa)->ratio; node; node = node->ratio_next)
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+#endif /* _NUMA_H_ */
 | 
				
			||||||
 | 
					-- 
 | 
				
			||||||
 | 
					2.29.2
 | 
				
			||||||
 | 
					
 | 
				
			||||||
							
								
								
									
										443
									
								
								0003-drmgr-introduce-NUMA-based-LMB-removal.patch
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										443
									
								
								0003-drmgr-introduce-NUMA-based-LMB-removal.patch
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,443 @@
 | 
				
			|||||||
 | 
					From 3c549c7494e729a68b64ac5519bcf1506b24f945 Mon Sep 17 00:00:00 2001
 | 
				
			||||||
 | 
					From: Laurent Dufour <ldufour@linux.ibm.com>
 | 
				
			||||||
 | 
					Date: Wed, 25 Nov 2020 18:03:45 +0100
 | 
				
			||||||
 | 
					Subject: [PATCH 3/3] drmgr: introduce NUMA based LMB removal
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					When the NUMA topology can be read, all the LMBs found in the Device Tree
 | 
				
			||||||
 | 
					are linked the corresponding node. LMB not associated to node are
 | 
				
			||||||
 | 
					considered as not used.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					LMB associated to CPU less node are accounted separately because they will
 | 
				
			||||||
 | 
					be targeted first to be remove. The LMB are removed from the CPU less nodes
 | 
				
			||||||
 | 
					to reach an average number LMBs per CPU less node.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Node with CPU have a ration indexed on their number of CPUs. The higher a
 | 
				
			||||||
 | 
					node have CPU the lower number LMB will be removed. This way node with a
 | 
				
			||||||
 | 
					high number of CPU will get a higher amount of memory.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					When a LMB can't be removed (because its memory can't be offlined by the
 | 
				
			||||||
 | 
					kernel), the LMB count for node is decremented and the LMB is removed from
 | 
				
			||||||
 | 
					the node's LMB list. This way, it is no more accounted as 'active' and the
 | 
				
			||||||
 | 
					removal operation will continue without taking it in account anymore.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The removal is done through the remove by DRC index API, allowing to remove
 | 
				
			||||||
 | 
					a LMB at a time. One futur optimization would be to extend that API to
 | 
				
			||||||
 | 
					remove a linear range of LMB each time.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					If the NUMA topology can't be read, we fallback using the legacy remove
 | 
				
			||||||
 | 
					way.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Signed-off-by: Laurent Dufour <ldufour@linux.ibm.com>
 | 
				
			||||||
 | 
					---
 | 
				
			||||||
 | 
					 src/drmgr/drslot_chrp_mem.c | 335 +++++++++++++++++++++++++++++++++++-
 | 
				
			||||||
 | 
					 src/drmgr/ofdt.h            |   2 +
 | 
				
			||||||
 | 
					 2 files changed, 336 insertions(+), 1 deletion(-)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					diff --git a/src/drmgr/drslot_chrp_mem.c b/src/drmgr/drslot_chrp_mem.c
 | 
				
			||||||
 | 
					index 502aa3e9fff0..47d9f7b8ed90 100644
 | 
				
			||||||
 | 
					--- a/src/drmgr/drslot_chrp_mem.c
 | 
				
			||||||
 | 
					+++ b/src/drmgr/drslot_chrp_mem.c
 | 
				
			||||||
 | 
					@@ -31,12 +31,16 @@
 | 
				
			||||||
 | 
					 #include "dr.h"
 | 
				
			||||||
 | 
					 #include "ofdt.h"
 | 
				
			||||||
 | 
					 #include "drmem.h"
 | 
				
			||||||
 | 
					+#include "common_numa.h"
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					 static int block_sz_bytes = 0;
 | 
				
			||||||
 | 
					 static char *state_strs[] = {"offline", "online"};
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					 static char *usagestr = "-c mem {-a | -r} {-q <quantity> -p {variable_weight | ent_capacity} | {-q <quantity> | -s [<drc_name> | <drc_index>]}}";
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					+static struct numa_topology numa;
 | 
				
			||||||
 | 
					+static int numa_enabled = 0;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					 /**
 | 
				
			||||||
 | 
					  * mem_usage
 | 
				
			||||||
 | 
					  * @brief return usage string
 | 
				
			||||||
 | 
					@@ -306,6 +310,31 @@ get_mem_node_lmbs(struct lmb_list_head *lmb_list)
 | 
				
			||||||
 | 
					 	return rc;
 | 
				
			||||||
 | 
					 }
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					+static int link_lmb_to_numa_node(struct dr_node *lmb)
 | 
				
			||||||
 | 
					+{
 | 
				
			||||||
 | 
					+	int nid;
 | 
				
			||||||
 | 
					+	struct numa_node *node;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	nid = numa_aa_index_to_node(&numa, lmb->lmb_aa_index);
 | 
				
			||||||
 | 
					+	if (nid == NUMA_NO_NODE)
 | 
				
			||||||
 | 
					+		return 0;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	node = numa_fetch_node(&numa, nid);
 | 
				
			||||||
 | 
					+	if (!node)
 | 
				
			||||||
 | 
					+		return -ENOMEM;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	lmb->lmb_numa_next = node->lmbs;
 | 
				
			||||||
 | 
					+	node->lmbs = lmb;
 | 
				
			||||||
 | 
					+	node->n_lmbs++;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	if (node->n_cpus)
 | 
				
			||||||
 | 
					+		numa.lmb_count++;
 | 
				
			||||||
 | 
					+	else
 | 
				
			||||||
 | 
					+		numa.cpuless_lmb_count++;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	return 0;
 | 
				
			||||||
 | 
					+}
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					 int add_lmb(struct lmb_list_head *lmb_list, uint32_t drc_index,
 | 
				
			||||||
 | 
					 	    uint64_t address, uint64_t lmb_sz, uint32_t aa_index,
 | 
				
			||||||
 | 
					 	    uint32_t flags)
 | 
				
			||||||
 | 
					@@ -324,6 +353,9 @@ int add_lmb(struct lmb_list_head *lmb_list, uint32_t drc_index,
 | 
				
			||||||
 | 
					 	lmb->lmb_address = address;
 | 
				
			||||||
 | 
					 	lmb->lmb_aa_index = aa_index;
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					+	if (numa_enabled && link_lmb_to_numa_node(lmb))
 | 
				
			||||||
 | 
					+		return -ENOMEM;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					 	if (flags & DRMEM_ASSIGNED) {
 | 
				
			||||||
 | 
					 		int rc;
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					@@ -490,7 +522,7 @@ get_dynamic_reconfig_lmbs(struct lmb_list_head *lmb_list)
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					 	if (stat(DYNAMIC_RECONFIG_MEM_V1, &sbuf) == 0) {
 | 
				
			||||||
 | 
					 		rc = get_dynamic_reconfig_lmbs_v1(lmb_sz, lmb_list);
 | 
				
			||||||
 | 
					-	} else if (is_lsslot_cmd &&
 | 
				
			||||||
 | 
					+	} else if ((is_lsslot_cmd || numa_enabled) &&
 | 
				
			||||||
 | 
					 		   stat(DYNAMIC_RECONFIG_MEM_V2, &sbuf) == 0) {
 | 
				
			||||||
 | 
					 		rc = get_dynamic_reconfig_lmbs_v2(lmb_sz, lmb_list);
 | 
				
			||||||
 | 
					 	} else {
 | 
				
			||||||
 | 
					@@ -1424,11 +1456,312 @@ int valid_mem_options(void)
 | 
				
			||||||
 | 
					 	return 0;
 | 
				
			||||||
 | 
					 }
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					+static int remove_lmb_by_index(uint32_t drc_index)
 | 
				
			||||||
 | 
					+{
 | 
				
			||||||
 | 
					+	char cmdbuf[128];
 | 
				
			||||||
 | 
					+	int offset;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	offset = sprintf(cmdbuf, "memory remove index 0x%x", drc_index);
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	return __do_kernel_dlpar(cmdbuf, offset, 1 /* Don't report error */);
 | 
				
			||||||
 | 
					+}
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+static int remove_lmb_from_node(struct numa_node *node, uint32_t count)
 | 
				
			||||||
 | 
					+{
 | 
				
			||||||
 | 
					+	struct dr_node *lmb;
 | 
				
			||||||
 | 
					+	int err, done = 0, unlinked = 0;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	say(DEBUG, "Try removing %d / %d LMBs from node %d\n",
 | 
				
			||||||
 | 
					+	    count, node->n_lmbs, node->node_id);
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	for (lmb = node->lmbs; lmb && done < count; lmb = lmb->lmb_numa_next) {
 | 
				
			||||||
 | 
					+		unlinked ++;
 | 
				
			||||||
 | 
					+		err = remove_lmb_by_index(lmb->drc_index);
 | 
				
			||||||
 | 
					+		if (err)
 | 
				
			||||||
 | 
					+			say(WARN,"Can't remove LMB node:%d index:0x%x: %s\n",
 | 
				
			||||||
 | 
					+			    node->node_id, lmb->drc_index, strerror(-err));
 | 
				
			||||||
 | 
					+		else
 | 
				
			||||||
 | 
					+			done++;
 | 
				
			||||||
 | 
					+	}
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	/*
 | 
				
			||||||
 | 
					+	 * Decrement the node LMB's count since whatever is the success
 | 
				
			||||||
 | 
					+	 * of the removal operation, it will not be tried again on that
 | 
				
			||||||
 | 
					+	 * LMB.
 | 
				
			||||||
 | 
					+	 */
 | 
				
			||||||
 | 
					+	node->n_lmbs -= unlinked;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	/*
 | 
				
			||||||
 | 
					+	 * Update the node's list of LMB to not process the one we removed or
 | 
				
			||||||
 | 
					+	 * tried to removed again.
 | 
				
			||||||
 | 
					+	 */
 | 
				
			||||||
 | 
					+	node->lmbs = lmb;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	/* Update numa's counters */
 | 
				
			||||||
 | 
					+	if (node->n_cpus)
 | 
				
			||||||
 | 
					+		numa.lmb_count -= unlinked;
 | 
				
			||||||
 | 
					+	else
 | 
				
			||||||
 | 
					+		numa.cpuless_node_count -= unlinked;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	if (!node->n_lmbs) {
 | 
				
			||||||
 | 
					+		node->ratio = 0; /* for sanity only */
 | 
				
			||||||
 | 
					+		if (node->n_cpus)
 | 
				
			||||||
 | 
					+			numa.cpu_count -= node->n_cpus;
 | 
				
			||||||
 | 
					+		else
 | 
				
			||||||
 | 
					+			numa.cpuless_node_count--;
 | 
				
			||||||
 | 
					+	}
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	say(INFO, "Removed %d LMBs from node %d\n", done, node->node_id);
 | 
				
			||||||
 | 
					+	return done;
 | 
				
			||||||
 | 
					+}
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+#define min(a,b) ((a < b) ? a : b)
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+static void update_cpuless_node_ratio(void)
 | 
				
			||||||
 | 
					+{
 | 
				
			||||||
 | 
					+	struct numa_node *node;
 | 
				
			||||||
 | 
					+	int nid;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	/*
 | 
				
			||||||
 | 
					+	 * Assumptions:
 | 
				
			||||||
 | 
					+	 * 1. numa->cpuless_node_count is up to date
 | 
				
			||||||
 | 
					+	 * 2. numa->cpuless_lmb_count is up to date
 | 
				
			||||||
 | 
					+	 * Nodes with no memory and nodes with CPUs are ignored here.
 | 
				
			||||||
 | 
					+	 */
 | 
				
			||||||
 | 
					+	numa_foreach_node(&numa, nid, node) {
 | 
				
			||||||
 | 
					+		if (node->n_cpus ||!node->n_lmbs)
 | 
				
			||||||
 | 
					+			continue;
 | 
				
			||||||
 | 
					+		node->ratio = (node->n_lmbs * 100) / numa.cpuless_lmb_count;
 | 
				
			||||||
 | 
					+	}
 | 
				
			||||||
 | 
					+}
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+/*
 | 
				
			||||||
 | 
					+ * Remove LMBs from node without CPUs only.
 | 
				
			||||||
 | 
					+ * The more the node has LMBs, the more LMBs will be removed from it.
 | 
				
			||||||
 | 
					+ *
 | 
				
			||||||
 | 
					+ * We have to retry the operation multiple times because some LMB cannot be
 | 
				
			||||||
 | 
					+ * removed due to the page usage in the kernel. In that case, that LMB is no
 | 
				
			||||||
 | 
					+ * more taken in account and the node's LMB count is decremented, assuming that
 | 
				
			||||||
 | 
					+ * LMB is unremovable at this time. Thus each node's ratio has to be computed on
 | 
				
			||||||
 | 
					+ * each iteration. This is not a big deal, usually, there are not so much nodes.
 | 
				
			||||||
 | 
					+ */
 | 
				
			||||||
 | 
					+static int remove_cpuless_lmbs(uint32_t count)
 | 
				
			||||||
 | 
					+{
 | 
				
			||||||
 | 
					+	struct numa_node *node;
 | 
				
			||||||
 | 
					+	int nid;
 | 
				
			||||||
 | 
					+	uint32_t total = count, todo, done = 0, this_loop;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	while (count) {
 | 
				
			||||||
 | 
					+		count = min(count, numa.cpuless_lmb_count);
 | 
				
			||||||
 | 
					+		if (!count)
 | 
				
			||||||
 | 
					+			break;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+		update_cpuless_node_ratio();
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+		this_loop = 0;
 | 
				
			||||||
 | 
					+		numa_foreach_node(&numa, nid, node) {
 | 
				
			||||||
 | 
					+			if (!node->n_lmbs || node->n_cpus)
 | 
				
			||||||
 | 
					+				continue;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+			todo = (count * node->ratio) / 100;
 | 
				
			||||||
 | 
					+			todo = min(todo, node->n_lmbs);
 | 
				
			||||||
 | 
					+			/* Fix rounded value to 0 */
 | 
				
			||||||
 | 
					+			if (!todo && node->n_lmbs)
 | 
				
			||||||
 | 
					+				todo = (count - this_loop);
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+			if (todo)
 | 
				
			||||||
 | 
					+				todo = remove_lmb_from_node(node, todo);
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+			this_loop += todo;
 | 
				
			||||||
 | 
					+			done += todo;
 | 
				
			||||||
 | 
					+			if (done >= total)
 | 
				
			||||||
 | 
					+				break;
 | 
				
			||||||
 | 
					+		}
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+		/* Don't continue if we didn't make any progress. */
 | 
				
			||||||
 | 
					+		if (!this_loop)
 | 
				
			||||||
 | 
					+			break;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+		count -= this_loop;
 | 
				
			||||||
 | 
					+	}
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	say(DEBUG, "%d / %d LMBs removed from the CPU less nodes\n",
 | 
				
			||||||
 | 
					+	    done, total);
 | 
				
			||||||
 | 
					+	return done;
 | 
				
			||||||
 | 
					+}
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+static void update_node_ratio(void)
 | 
				
			||||||
 | 
					+{
 | 
				
			||||||
 | 
					+	int nid;
 | 
				
			||||||
 | 
					+	struct numa_node *node, *n, **p;
 | 
				
			||||||
 | 
					+	uint32_t cpu_ratio, mem_ratio;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	/*
 | 
				
			||||||
 | 
					+	 * Assumptions:
 | 
				
			||||||
 | 
					+	 * 1. numa->cpu_count is up to date
 | 
				
			||||||
 | 
					+	 * 2. numa->lmb_count is up to date
 | 
				
			||||||
 | 
					+	 * Nodes with no memory and nodes with no CPU are ignored here.
 | 
				
			||||||
 | 
					+	 */
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	numa.ratio = NULL;
 | 
				
			||||||
 | 
					+	numa_foreach_node(&numa, nid, node) {
 | 
				
			||||||
 | 
					+		if (!node->n_lmbs || !node->n_cpus)
 | 
				
			||||||
 | 
					+			continue;
 | 
				
			||||||
 | 
					+		cpu_ratio = (node->n_cpus * 100) / numa.cpu_count;
 | 
				
			||||||
 | 
					+		mem_ratio = (node->n_lmbs * 100) / numa.lmb_count;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+		/* Say that CPU ratio is 90% of the ratio */
 | 
				
			||||||
 | 
					+		node->ratio = (cpu_ratio * 9 + mem_ratio) / 10;
 | 
				
			||||||
 | 
					+	}
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	/* Create an ordered link of the nodes */
 | 
				
			||||||
 | 
					+	numa_foreach_node(&numa, nid, node) {
 | 
				
			||||||
 | 
					+		if (!node->n_lmbs || !node->n_cpus)
 | 
				
			||||||
 | 
					+			continue;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+		p = &numa.ratio;
 | 
				
			||||||
 | 
					+		for (n = numa.ratio;
 | 
				
			||||||
 | 
					+		     n && n->ratio < node->ratio; n = n->ratio_next)
 | 
				
			||||||
 | 
					+			p = &n->ratio_next;
 | 
				
			||||||
 | 
					+		*p = node;
 | 
				
			||||||
 | 
					+		node->ratio_next = n;
 | 
				
			||||||
 | 
					+	}
 | 
				
			||||||
 | 
					+}
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+/*
 | 
				
			||||||
 | 
					+ * Remove LMBs from node with CPUs.
 | 
				
			||||||
 | 
					+ *
 | 
				
			||||||
 | 
					+ * The less a node has CPU, the more memory will be removed from it.
 | 
				
			||||||
 | 
					+ *
 | 
				
			||||||
 | 
					+ * As for the CPU less nodes, we must iterate because some LMBs may not be
 | 
				
			||||||
 | 
					+ * removable at this time.
 | 
				
			||||||
 | 
					+ */
 | 
				
			||||||
 | 
					+static int remove_cpu_lmbs(uint32_t count)
 | 
				
			||||||
 | 
					+{
 | 
				
			||||||
 | 
					+	struct numa_node *node;
 | 
				
			||||||
 | 
					+	uint32_t total = count, todo, done = 0, this_loop;
 | 
				
			||||||
 | 
					+	uint32_t new_lmb_count;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	while(count) {
 | 
				
			||||||
 | 
					+		count = min(count, numa.lmb_count);
 | 
				
			||||||
 | 
					+		if (!count)
 | 
				
			||||||
 | 
					+			break;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+		update_node_ratio();
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+		new_lmb_count = numa.lmb_count - count;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+		this_loop = 0;
 | 
				
			||||||
 | 
					+		numa_foreach_node_by_ratio(&numa, node) {
 | 
				
			||||||
 | 
					+			if (!node->n_lmbs || !node->n_cpus)
 | 
				
			||||||
 | 
					+				continue;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+			todo = (new_lmb_count * node->ratio)  / 100;
 | 
				
			||||||
 | 
					+			todo = node->n_lmbs - min(todo, node->n_lmbs);
 | 
				
			||||||
 | 
					+			todo = min(count, todo);
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+			if (todo) {
 | 
				
			||||||
 | 
					+				todo = remove_lmb_from_node(node, todo);
 | 
				
			||||||
 | 
					+				count -= todo;
 | 
				
			||||||
 | 
					+				this_loop += todo;
 | 
				
			||||||
 | 
					+			}
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+			if (!count)
 | 
				
			||||||
 | 
					+				break;
 | 
				
			||||||
 | 
					+		}
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+		/* Don't continue if we didn't make any progress. */
 | 
				
			||||||
 | 
					+		if (!this_loop)
 | 
				
			||||||
 | 
					+			break;
 | 
				
			||||||
 | 
					+		done += this_loop;
 | 
				
			||||||
 | 
					+	}
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	say(DEBUG, "%d / %d LMBs removed from the CPU nodes\n",
 | 
				
			||||||
 | 
					+	    done, total);
 | 
				
			||||||
 | 
					+	return done;
 | 
				
			||||||
 | 
					+}
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+static void build_numa_topology(void)
 | 
				
			||||||
 | 
					+{
 | 
				
			||||||
 | 
					+	int rc;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	rc = numa_get_topology(&numa);
 | 
				
			||||||
 | 
					+	if (rc)
 | 
				
			||||||
 | 
					+		return;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	numa_enabled = 1;
 | 
				
			||||||
 | 
					+}
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+static void clear_numa_lmb_links(void)
 | 
				
			||||||
 | 
					+{
 | 
				
			||||||
 | 
					+	int nid;
 | 
				
			||||||
 | 
					+	struct numa_node *node;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	numa_foreach_node(&numa, nid, node)
 | 
				
			||||||
 | 
					+		node->lmbs = NULL;
 | 
				
			||||||
 | 
					+}
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+static int numa_based_remove(uint32_t count)
 | 
				
			||||||
 | 
					+{
 | 
				
			||||||
 | 
					+	struct lmb_list_head *lmb_list;
 | 
				
			||||||
 | 
					+	struct numa_node *node;
 | 
				
			||||||
 | 
					+	int nid;
 | 
				
			||||||
 | 
					+	uint32_t done = 0;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	/*
 | 
				
			||||||
 | 
					+	 * Read the LMBs
 | 
				
			||||||
 | 
					+	 * Link the LMBs to their node
 | 
				
			||||||
 | 
					+	 * Update global counter
 | 
				
			||||||
 | 
					+	 */
 | 
				
			||||||
 | 
					+	lmb_list = get_lmbs(LMB_NORMAL_SORT);
 | 
				
			||||||
 | 
					+	if (lmb_list == NULL) {
 | 
				
			||||||
 | 
					+		clear_numa_lmb_links();
 | 
				
			||||||
 | 
					+		return -1;
 | 
				
			||||||
 | 
					+	}
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	if (!numa.node_count) {
 | 
				
			||||||
 | 
					+		clear_numa_lmb_links();
 | 
				
			||||||
 | 
					+		free_lmbs(lmb_list);
 | 
				
			||||||
 | 
					+		return -EINVAL;
 | 
				
			||||||
 | 
					+	}
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	numa_foreach_node(&numa, nid, node) {
 | 
				
			||||||
 | 
					+		say(INFO, "node %4d %4d CPUs %8d LMBs\n",
 | 
				
			||||||
 | 
					+		    nid, node->n_cpus, node->n_lmbs);
 | 
				
			||||||
 | 
					+	}
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	done += remove_cpuless_lmbs(count);
 | 
				
			||||||
 | 
					+	count -= done;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	done += remove_cpu_lmbs(count);
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	report_resource_count(done);
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	clear_numa_lmb_links();
 | 
				
			||||||
 | 
					+	free_lmbs(lmb_list);
 | 
				
			||||||
 | 
					+	return 0;
 | 
				
			||||||
 | 
					+}
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					 int do_mem_kernel_dlpar(void)
 | 
				
			||||||
 | 
					 {
 | 
				
			||||||
 | 
					 	char cmdbuf[128];
 | 
				
			||||||
 | 
					 	int rc, offset;
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	if (usr_action == REMOVE && usr_drc_count) {
 | 
				
			||||||
 | 
					+		build_numa_topology();
 | 
				
			||||||
 | 
					+		if (numa_enabled) {
 | 
				
			||||||
 | 
					+			if (!numa_based_remove(usr_drc_count))
 | 
				
			||||||
 | 
					+				return 0;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+			/*
 | 
				
			||||||
 | 
					+			 * If the NUMA based removal failed, lets try the legacy
 | 
				
			||||||
 | 
					+			 * way.
 | 
				
			||||||
 | 
					+			 */
 | 
				
			||||||
 | 
					+			say(WARN, "Can't do NUMA based removal operation.\n");
 | 
				
			||||||
 | 
					+		}
 | 
				
			||||||
 | 
					+	}
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					 	offset = sprintf(cmdbuf, "%s ", "memory");
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					 	switch (usr_action) {
 | 
				
			||||||
 | 
					diff --git a/src/drmgr/ofdt.h b/src/drmgr/ofdt.h
 | 
				
			||||||
 | 
					index 3850a77229b4..3c2840b2e0ee 100644
 | 
				
			||||||
 | 
					--- a/src/drmgr/ofdt.h
 | 
				
			||||||
 | 
					+++ b/src/drmgr/ofdt.h
 | 
				
			||||||
 | 
					@@ -92,6 +92,7 @@ struct dr_node {
 | 
				
			||||||
 | 
					 			uint32_t	_lmb_aa_index;
 | 
				
			||||||
 | 
					 			struct mem_scn	*_mem_scns;
 | 
				
			||||||
 | 
					 			struct of_node	*_of_node;
 | 
				
			||||||
 | 
					+			struct dr_node	*_numa_next;
 | 
				
			||||||
 | 
					 		} _smem;
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					 #define lmb_address	_node_u._smem._address
 | 
				
			||||||
 | 
					@@ -99,6 +100,7 @@ struct dr_node {
 | 
				
			||||||
 | 
					 #define lmb_aa_index	_node_u._smem._lmb_aa_index
 | 
				
			||||||
 | 
					 #define lmb_mem_scns	_node_u._smem._mem_scns
 | 
				
			||||||
 | 
					 #define lmb_of_node	_node_u._smem._of_node
 | 
				
			||||||
 | 
					+#define lmb_numa_next	_node_u._smem._numa_next
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					 		struct hea_info {
 | 
				
			||||||
 | 
					 			uint		_port_no;
 | 
				
			||||||
 | 
					-- 
 | 
				
			||||||
 | 
					2.29.2
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -0,0 +1,33 @@
 | 
				
			|||||||
 | 
					commit 0b59d4a372aa266caa75f3b6a253b8f5aeaf3802
 | 
				
			||||||
 | 
					Author: Mingming Cao <mmc@linux.vnet.ibm.com>
 | 
				
			||||||
 | 
					Date:   Mon Mar 1 19:34:29 2021 -0800
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    hcnmgr: Avoid cleanup of bond interface at boot time when no HNV exists
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    At boot time, hcn scans the device tree and discovers if there was a new
 | 
				
			||||||
 | 
					    HNV being added while lpar was inactive. It also cleans up the old hnv
 | 
				
			||||||
 | 
					    interfaces. This patch avoids cleaning up bonding interface when no HNV
 | 
				
			||||||
 | 
					    network devices exists.
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    Signed-off-by: Mingming Cao <mmc@linux.vnet.ibm.com>
 | 
				
			||||||
 | 
					    [tyreld: fixup commit log]
 | 
				
			||||||
 | 
					    Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					diff --git a/scripts/hcnmgr b/scripts/hcnmgr
 | 
				
			||||||
 | 
					index a76505e..c95edba 100644
 | 
				
			||||||
 | 
					--- a/scripts/hcnmgr
 | 
				
			||||||
 | 
					+++ b/scripts/hcnmgr
 | 
				
			||||||
 | 
					@@ -575,7 +575,13 @@ scanhcn() {
 | 
				
			||||||
 | 
					 		done
 | 
				
			||||||
 | 
					 	fi
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					+	if [ ${HcnIds[@]} -eq 0 ]; then
 | 
				
			||||||
 | 
					+		hcnlog DEBUG "scanhcn: scan for hybrid virtual network finished"
 | 
				
			||||||
 | 
					+		return $E_SUCCESS
 | 
				
			||||||
 | 
					+	fi
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					 	# Next clean up dead connections left from orgitinal LPAR after inactive miration
 | 
				
			||||||
 | 
					+	# Only do this when the HNV ID array is not empty
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					 	# list of all HCN ids
 | 
				
			||||||
 | 
					 	ids="${HcnIds[*]}"
 | 
				
			||||||
@ -0,0 +1,32 @@
 | 
				
			|||||||
 | 
					commit 1cb8bd89d6386c60e75c47d4a4452d3f130d5138
 | 
				
			||||||
 | 
					Author: Mingming Cao <mmc@linux.vnet.ibm.com>
 | 
				
			||||||
 | 
					Date:   Fri Mar 12 14:18:18 2021 -0800
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    hcnmgr: Avoid using xargs to process NM show connections
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    When removing HNV bonding connections xargs can fail to process the output of
 | 
				
			||||||
 | 
					    nmcli show propererly.
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    Instead of piping into xargs fix this by using a loop to check for all related
 | 
				
			||||||
 | 
					    bonding connections and remove them explicitly one by one.
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    Signed-off-by: Mingming Cao <mmc@linux.vnet.ibm.com>
 | 
				
			||||||
 | 
					    [tyreld: fixed up commit log]
 | 
				
			||||||
 | 
					    Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					diff --git a/scripts/hcnmgr b/scripts/hcnmgr
 | 
				
			||||||
 | 
					index d66b5d1..30d31e7 100644
 | 
				
			||||||
 | 
					--- a/scripts/hcnmgr
 | 
				
			||||||
 | 
					+++ b/scripts/hcnmgr
 | 
				
			||||||
 | 
					@@ -377,7 +377,10 @@ rmhcn() {
 | 
				
			||||||
 | 
					 	fi
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					 	hcnlog INFO "rmhcn: delete bond $BONDNAME and slaves "
 | 
				
			||||||
 | 
					-	nmcli -f NAME con show | grep "$BONDNAME" | xargs sudo nmcli con delete
 | 
				
			||||||
 | 
					+	for connection in $(nmcli -f NAME con show | grep "$BONDNAME"); do
 | 
				
			||||||
 | 
					+		hcnlog INFO "Delete bonding connection $connection"
 | 
				
			||||||
 | 
					+		nmcli con delete "$connection"
 | 
				
			||||||
 | 
					+	done
 | 
				
			||||||
 | 
					 	hcnlog DEBUG "rmhcn: exit"
 | 
				
			||||||
 | 
					 	return $E_SUCCESS
 | 
				
			||||||
 | 
					 }
 | 
				
			||||||
@ -0,0 +1,26 @@
 | 
				
			|||||||
 | 
					commit 366e17553ed647613668678c2d301d369038f41b
 | 
				
			||||||
 | 
					Author: Brahadambal Srinivasan <latha@linux.vnet.ibm.com>
 | 
				
			||||||
 | 
					Date:   Thu Nov 12 19:00:47 2020 +0530
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Update ppc64-cpu usage
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    'ppc64_cpu --help' doesn't list '--version' as an option. This patch
 | 
				
			||||||
 | 
					    adds the option in the usage information of ppc64-cpu command.
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    Signed-off-by: Brahadambal Srinivasan <latha@linux.vnet.ibm.com>
 | 
				
			||||||
 | 
					    Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					diff --git a/src/ppc64_cpu.c b/src/ppc64_cpu.c
 | 
				
			||||||
 | 
					index 71f4720..2b0f66c 100644
 | 
				
			||||||
 | 
					--- a/src/ppc64_cpu.c
 | 
				
			||||||
 | 
					+++ b/src/ppc64_cpu.c
 | 
				
			||||||
 | 
					@@ -1195,7 +1195,8 @@ static void usage(void)
 | 
				
			||||||
 | 
					 "ppc64_cpu --subcores-per-core       # Get number of subcores per core\n"
 | 
				
			||||||
 | 
					 "ppc64_cpu --subcores-per-core=X     # Set subcores per core to X (1 or 4)\n"
 | 
				
			||||||
 | 
					 "ppc64_cpu --threads-per-core        # Get threads per core\n"
 | 
				
			||||||
 | 
					-"ppc64_cpu --info                    # Display system state information)\n");
 | 
				
			||||||
 | 
					+"ppc64_cpu --info                    # Display system state information\n"
 | 
				
			||||||
 | 
					+"ppc64_cpu --version                 # Display version of ppc64-cpu\n");
 | 
				
			||||||
 | 
					 }
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					 struct option longopts[] = {
 | 
				
			||||||
@ -0,0 +1,30 @@
 | 
				
			|||||||
 | 
					commit d9bcb21179ccfea122f326aca4690afe0f7de0c6
 | 
				
			||||||
 | 
					Author: Mingming Cao <mmc@linux.vnet.ibm.com>
 | 
				
			||||||
 | 
					Date:   Mon Mar 1 21:34:34 2021 -0800
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    hcnmgr: Wait for sysfs device ready when looking up device name
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    At the time of calling ofpathname to look up for devicename, wait
 | 
				
			||||||
 | 
					    for sysfs device ready. Otherwise, the OS may be in the middle of device
 | 
				
			||||||
 | 
					    renaming.
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    Signed-off-by: Mingming Cao <mmc@linux.vnet.ibm.com>
 | 
				
			||||||
 | 
					    [tyreld: fixed up commit log]
 | 
				
			||||||
 | 
					    Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					diff --git a/scripts/hcnmgr b/scripts/hcnmgr
 | 
				
			||||||
 | 
					index c95edba..0d20e7d 100644
 | 
				
			||||||
 | 
					--- a/scripts/hcnmgr
 | 
				
			||||||
 | 
					+++ b/scripts/hcnmgr
 | 
				
			||||||
 | 
					@@ -241,7 +241,10 @@ get_dev_hcn() {
 | 
				
			||||||
 | 
					 	# Let's retry a few times.
 | 
				
			||||||
 | 
					 	while [ $wait != 0 ]; do
 | 
				
			||||||
 | 
					 		if DEVNAME=$(ofpathname -l "$(echo "$1" | sed -e "s/\/proc\/device-tree//")" 2>/dev/null); then
 | 
				
			||||||
 | 
					-			break
 | 
				
			||||||
 | 
					+			if [ -e /sys/class/net/"$DEVNAME" ]; then
 | 
				
			||||||
 | 
					+				hcnlog DEBUG "ofpathname waiting for /sys/class/net device $DEVNAME ready"
 | 
				
			||||||
 | 
					+				break
 | 
				
			||||||
 | 
					+			fi
 | 
				
			||||||
 | 
					 		fi
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					 		hcnlog DEBUG "ofpathname return $?, devname is $DEVNAME rety counter $wait"
 | 
				
			||||||
@ -0,0 +1,30 @@
 | 
				
			|||||||
 | 
					commit e25d71be411b610e5e889f8efaaf04b38c2d9ecb
 | 
				
			||||||
 | 
					Author: Mingming Cao <mmc@linux.vnet.ibm.com>
 | 
				
			||||||
 | 
					Date:   Fri Mar 12 13:50:33 2021 -0800
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    hcnmgr: Avoid using ifcfg file for checking bonding interface status
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    When configuring migratable sr_iov into hybrid network, it checks if
 | 
				
			||||||
 | 
					    there is an existing HNV using the presense of ifcfg file location. This
 | 
				
			||||||
 | 
					    is not preferred as the location can be different on distros.
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    This patch fixes this by using NetworkManager nmcli.
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    Signed-off-by: Mingming Cao <mmc@linux.vnet.ibm.com>
 | 
				
			||||||
 | 
					    [tyreld: fixed spelling]
 | 
				
			||||||
 | 
					    Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					diff --git a/scripts/hcnmgr b/scripts/hcnmgr
 | 
				
			||||||
 | 
					index 0d20e7d..d66b5d1 100644
 | 
				
			||||||
 | 
					--- a/scripts/hcnmgr
 | 
				
			||||||
 | 
					+++ b/scripts/hcnmgr
 | 
				
			||||||
 | 
					@@ -282,8 +282,7 @@ do_config_vdevice() {
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					 	hcnlog DEBUG "Check if there is bond $BONDNAME with hcn id $HCNID"
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					-	hcnlog DEBUG "ifconfig file $IFCONFIG_PATH/ifconfig-$BONDNAME"
 | 
				
			||||||
 | 
					-	if [ ! -e "$IFCONFIG_PATH/ifcfg-$BONDNAME" ]; then
 | 
				
			||||||
 | 
					+	if ! nmcli -f NAME con show --active | grep -q "$BONDNAME\s"; then
 | 
				
			||||||
 | 
					 		hcnlog INFO "nmcli con add type bond con-name $BONDNAME ifname $BONDNAME"
 | 
				
			||||||
 | 
					 		nmcli con add type bond con-name "$BONDNAME" ifname "$BONDNAME"
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
@ -0,0 +1,132 @@
 | 
				
			|||||||
 | 
					commit 97269d301797e23b75d0c7a5cb63ce280783f615
 | 
				
			||||||
 | 
					Author: Laurent Dufour <ldufour@linux.ibm.com>
 | 
				
			||||||
 | 
					Date:   Thu Mar 4 14:51:38 2021 +0100
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    lpartstat: add -x option for the security flavor
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    This allows user to get the security flavor settings fer the LPAR.
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    The output is :
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    $ lparstat -x
 | 
				
			||||||
 | 
					    Speculative Execution Mode                   : 1
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    Where the output number means
 | 
				
			||||||
 | 
					     0 = Speculative execution fully enabled
 | 
				
			||||||
 | 
					     1 = Speculative execution controls to mitigate user-to-kernel side-channel
 | 
				
			||||||
 | 
					         attacks
 | 
				
			||||||
 | 
					     2 = Speculative execution controls to mitigate user-to-kernel and
 | 
				
			||||||
 | 
					         user-to-user side-channel attacks
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    In the case the running kernel is not exposing the security flavor in
 | 
				
			||||||
 | 
					    /proc/powerpc/lparcfg, the output is:
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    $ lparstat -x
 | 
				
			||||||
 | 
					    Speculative Execution Mode                   : -
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    Signed-off-by: Laurent Dufour <ldufour@linux.ibm.com>
 | 
				
			||||||
 | 
					    Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					diff --git a/src/lparstat.c b/src/lparstat.c
 | 
				
			||||||
 | 
					index 23e4b85..00922c4 100644
 | 
				
			||||||
 | 
					--- a/src/lparstat.c
 | 
				
			||||||
 | 
					+++ b/src/lparstat.c
 | 
				
			||||||
 | 
					@@ -42,6 +42,7 @@
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					 static bool o_legacy = false;
 | 
				
			||||||
 | 
					 static bool o_scaled = false;
 | 
				
			||||||
 | 
					+static bool o_security = false;
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					 static int threads_per_cpu;
 | 
				
			||||||
 | 
					 static int cpus_in_system;
 | 
				
			||||||
 | 
					@@ -1152,6 +1153,15 @@ void print_scaled_output(int interval, int count)
 | 
				
			||||||
 | 
					 	} while (--count > 0);
 | 
				
			||||||
 | 
					 }
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					+static void print_security_flavor(void)
 | 
				
			||||||
 | 
					+{
 | 
				
			||||||
 | 
					+	char value[64];
 | 
				
			||||||
 | 
					+	char *descr;
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					+	get_sysdata("security_flavor", &descr, value);
 | 
				
			||||||
 | 
					+	fprintf(stdout, "%-45s: %s\n", descr, value);
 | 
				
			||||||
 | 
					+}
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					 static void usage(void)
 | 
				
			||||||
 | 
					 {
 | 
				
			||||||
 | 
					 	printf("Usage:  lparstat [ options ]\n\tlparstat <interval> [ count ]\n\n"
 | 
				
			||||||
 | 
					@@ -1159,6 +1169,7 @@ static void usage(void)
 | 
				
			||||||
 | 
					 	       "\t-h, --help		Show this message and exit.\n"
 | 
				
			||||||
 | 
					 	       "\t-V, --version	\tDisplay lparstat version information.\n"
 | 
				
			||||||
 | 
					 	       "\t-i			Lists details on the LPAR configuration.\n"
 | 
				
			||||||
 | 
					+	       "\t-x			Print the security mode settings for the LPAR.\n"
 | 
				
			||||||
 | 
					 	       "\t-E			Print SPURR metrics.\n"
 | 
				
			||||||
 | 
					 	       "\t-l, --legacy		Print the report in legacy format.\n"
 | 
				
			||||||
 | 
					 	       "interval		The interval parameter specifies the amount of time between each report.\n"
 | 
				
			||||||
 | 
					@@ -1184,7 +1195,7 @@ int main(int argc, char *argv[])
 | 
				
			||||||
 | 
					 		exit(1);
 | 
				
			||||||
 | 
					 	}
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					-	while ((c = getopt_long(argc, argv, "iEVhl",
 | 
				
			||||||
 | 
					+	while ((c = getopt_long(argc, argv, "iEVhlx",
 | 
				
			||||||
 | 
					 				long_opts, &opt_index)) != -1) {
 | 
				
			||||||
 | 
					 		switch(c) {
 | 
				
			||||||
 | 
					 			case 'i':
 | 
				
			||||||
 | 
					@@ -1199,6 +1210,9 @@ int main(int argc, char *argv[])
 | 
				
			||||||
 | 
					 			case 'V':
 | 
				
			||||||
 | 
					 				printf("lparstat - %s\n", VERSION);
 | 
				
			||||||
 | 
					 				return 0;
 | 
				
			||||||
 | 
					+			case 'x':
 | 
				
			||||||
 | 
					+				o_security = true;
 | 
				
			||||||
 | 
					+				break;
 | 
				
			||||||
 | 
					 			case 'h':
 | 
				
			||||||
 | 
					 				usage();
 | 
				
			||||||
 | 
					 				return 0;
 | 
				
			||||||
 | 
					@@ -1223,6 +1237,8 @@ int main(int argc, char *argv[])
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					 	if (i_option)
 | 
				
			||||||
 | 
					 		print_iflag_data();
 | 
				
			||||||
 | 
					+	else if (o_security)
 | 
				
			||||||
 | 
					+		print_security_flavor();
 | 
				
			||||||
 | 
					 	else if (o_scaled) {
 | 
				
			||||||
 | 
					 		print_scaled_output(interval, count);
 | 
				
			||||||
 | 
					 		close_cpu_sysfs_fds(threads_in_system);
 | 
				
			||||||
 | 
					diff --git a/src/lparstat.h b/src/lparstat.h
 | 
				
			||||||
 | 
					index 9b7117f..26ed4ba 100644
 | 
				
			||||||
 | 
					--- a/src/lparstat.h
 | 
				
			||||||
 | 
					+++ b/src/lparstat.h
 | 
				
			||||||
 | 
					@@ -302,6 +302,10 @@ struct sysentry system_data[] = {
 | 
				
			||||||
 | 
					 	 .descr = "Idle CPU value - SPURR",
 | 
				
			||||||
 | 
					 	 .get = &get_cpu_idle_spurr},
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					+	/* Security flavor */
 | 
				
			||||||
 | 
					+	{.name = "security_flavor",
 | 
				
			||||||
 | 
					+	 .descr = "Speculative Execution Mode"},
 | 
				
			||||||
 | 
					+
 | 
				
			||||||
 | 
					 	{.name[0] = '\0'},
 | 
				
			||||||
 | 
					 };
 | 
				
			||||||
 | 
					 
 | 
				
			||||||
 | 
					diff -up powerpc-utils-1.3.8/man/lparstat.8.me powerpc-utils-1.3.8/man/lparstat.8
 | 
				
			||||||
 | 
					--- powerpc-utils-1.3.8/man/lparstat.8.me	2021-04-20 15:49:18.305532697 +0200
 | 
				
			||||||
 | 
					+++ powerpc-utils-1.3.8/man/lparstat.8	2021-04-20 15:52:04.703021972 +0200
 | 
				
			||||||
 | 
					@@ -209,6 +209,20 @@ The variable memory capacity weight of t
 | 
				
			||||||
 | 
					 .TP
 | 
				
			||||||
 | 
					 .SH
 | 
				
			||||||
 | 
					 .TP
 | 
				
			||||||
 | 
					+\fB\-x\fR
 | 
				
			||||||
 | 
					+Display the LPAR security flavor mode
 | 
				
			||||||
 | 
					+.RS
 | 
				
			||||||
 | 
					+.TP
 | 
				
			||||||
 | 
					+.B 0
 | 
				
			||||||
 | 
					+Speculative execution fully enabled
 | 
				
			||||||
 | 
					+.TP
 | 
				
			||||||
 | 
					+.B 1
 | 
				
			||||||
 | 
					+Speculative execution controls to mitigate user-to-kernel side-channel attacks
 | 
				
			||||||
 | 
					+.TP
 | 
				
			||||||
 | 
					+.B 2
 | 
				
			||||||
 | 
					+Speculative execution controls to mitigate user-to-kernel and user-to-user side-channel attacks
 | 
				
			||||||
 | 
					+.RE
 | 
				
			||||||
 | 
					+.TP
 | 
				
			||||||
 | 
					 \fB\-E\fR
 | 
				
			||||||
 | 
					 Display Scaled Processor Utilization Resource Register(SPURR) based CPU utilization.
 | 
				
			||||||
 | 
					 .RS
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user