148 lines
4.4 KiB
Diff
148 lines
4.4 KiB
Diff
commit 8cbd0de88d162e387f11569eee1bdecef8fad2e3
|
|
Author: Oliver O'Halloran <oohall@gmail.com>
|
|
Date: Wed Sep 23 16:12:20 2020 +1000
|
|
|
|
opal-prd: Have a worker process handle page offlining
|
|
|
|
The memory_error() hservice interface expects the memory_error() call to
|
|
just accept the offline request and return without actually offlining the
|
|
memory. Currently we will attempt to offline the marked pages before
|
|
returning to HBRT which can result in an excessively long time spent in the
|
|
memory_error() hservice call which blocks HBRT from processing other
|
|
errors. Fix this by adding a worker process which performs the page
|
|
offlining via the sysfs memory error interfaces.
|
|
|
|
Reviewed-by: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
|
|
Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
|
|
|
|
diff --git a/external/opal-prd/opal-prd.c b/external/opal-prd/opal-prd.c
|
|
index 40e5a984..d74d8039 100644
|
|
--- a/external/opal-prd/opal-prd.c
|
|
+++ b/external/opal-prd/opal-prd.c
|
|
@@ -27,6 +27,7 @@
|
|
#include <stdarg.h>
|
|
#include <time.h>
|
|
#include <poll.h>
|
|
+#include <signal.h>
|
|
#include <dirent.h>
|
|
|
|
#include <endian.h>
|
|
@@ -696,13 +697,42 @@ out:
|
|
return rc;
|
|
}
|
|
|
|
+static int memory_error_worker(const char *sysfsfile, const char *type,
|
|
+ uint64_t i_start_addr, uint64_t i_endAddr)
|
|
+{
|
|
+ int memfd, rc, n, ret = 0;
|
|
+ char buf[ADDR_STRING_SZ];
|
|
+ uint64_t addr;
|
|
+
|
|
+ memfd = open(sysfsfile, O_WRONLY);
|
|
+ if (memfd < 0) {
|
|
+ pr_log(LOG_CRIT, "MEM: Failed to offline memory! "
|
|
+ "Unable to open sysfs node %s: %m", sysfsfile);
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ for (addr = i_start_addr; addr <= i_endAddr; addr += ctx->page_size) {
|
|
+ n = snprintf(buf, ADDR_STRING_SZ, "0x%lx", addr);
|
|
+ rc = write(memfd, buf, n);
|
|
+ if (rc != n) {
|
|
+ pr_log(LOG_CRIT, "MEM: Failed to offline memory! "
|
|
+ "page addr: %016lx type: %s: %m",
|
|
+ addr, type);
|
|
+ ret = 1;
|
|
+ }
|
|
+ }
|
|
+ pr_log(LOG_CRIT, "MEM: Offlined %016lx,%016lx, type %s: %m\n",
|
|
+ i_start_addr, addr, type);
|
|
+
|
|
+ close(memfd);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
int hservice_memory_error(uint64_t i_start_addr, uint64_t i_endAddr,
|
|
enum MemoryError_t i_errorType)
|
|
{
|
|
const char *sysfsfile, *typestr;
|
|
- char buf[ADDR_STRING_SZ];
|
|
- int memfd, rc, n, ret = 0;
|
|
- uint64_t addr;
|
|
+ pid_t pid;
|
|
|
|
switch(i_errorType) {
|
|
case MEMORY_ERROR_CE:
|
|
@@ -722,26 +752,21 @@ int hservice_memory_error(uint64_t i_start_addr, uint64_t i_endAddr,
|
|
pr_log(LOG_ERR, "MEM: Memory error: range %016lx-%016lx, type: %s",
|
|
i_start_addr, i_endAddr, typestr);
|
|
|
|
+ /*
|
|
+ * HBRT expects the memory offlining process to happen in the background
|
|
+ * after the notification is delivered.
|
|
+ */
|
|
+ pid = fork();
|
|
+ if (pid > 0)
|
|
+ exit(memory_error_worker(sysfsfile, typestr, i_start_addr, i_endAddr));
|
|
|
|
- memfd = open(sysfsfile, O_WRONLY);
|
|
- if (memfd < 0) {
|
|
- pr_log(LOG_CRIT, "MEM: Failed to offline memory! "
|
|
- "Unable to open sysfs node %s: %m", sysfsfile);
|
|
+ if (pid < 0) {
|
|
+ perror("MEM: unable to fork worker to offline memory!\n");
|
|
return -1;
|
|
}
|
|
|
|
- for (addr = i_start_addr; addr <= i_endAddr; addr += ctx->page_size) {
|
|
- n = snprintf(buf, ADDR_STRING_SZ, "0x%lx", addr);
|
|
- rc = write(memfd, buf, n);
|
|
- if (rc != n) {
|
|
- pr_log(LOG_CRIT, "MEM: Failed to offline memory! "
|
|
- "page addr: %016lx type: %d: %m",
|
|
- addr, i_errorType);
|
|
- ret = rc;
|
|
- }
|
|
- }
|
|
-
|
|
- return ret;
|
|
+ pr_log(LOG_INFO, "MEM: forked off %d to handle mem error\n", pid);
|
|
+ return 0;
|
|
}
|
|
|
|
uint64_t hservice_get_interface_capabilities(uint64_t set)
|
|
@@ -2112,6 +2137,10 @@ static int init_control_socket(struct opal_prd_ctx *ctx)
|
|
return 0;
|
|
}
|
|
|
|
+static struct sigaction sigchild_action = {
|
|
+ .sa_flags = SA_NOCLDWAIT | SA_RESTART,
|
|
+ .sa_handler = SIG_DFL,
|
|
+};
|
|
|
|
static int run_prd_daemon(struct opal_prd_ctx *ctx)
|
|
{
|
|
@@ -2243,6 +2272,22 @@ static int run_prd_daemon(struct opal_prd_ctx *ctx)
|
|
pr_debug("SCOM: f00f: %lx", be64toh(val));
|
|
}
|
|
|
|
+ /*
|
|
+ * Setup the SIGCHLD handler to automatically reap the worker threads
|
|
+ * we use for memory offlining. We can't do this earlier since the
|
|
+ * modprobe helper spawns workers and wants to check their exit status
|
|
+ * with waitpid(). Auto-reaping breaks that so enable it just before
|
|
+ * entering the attn loop.
|
|
+ *
|
|
+ * We also setup system call restarting on SIGCHLD since opal-prd
|
|
+ * doesn't make any real attempt to handle blocking functions exiting
|
|
+ * due to EINTR.
|
|
+ */
|
|
+ if (sigaction(SIGCHLD, &sigchild_action, NULL)) {
|
|
+ pr_log(LOG_ERR, "CTRL: Failed to register signal handler %m\n");
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
run_attn_loop(ctx);
|
|
rc = 0;
|
|
|