import opal-prd-6.6.3-2.el8
This commit is contained in:
parent
9093333aba
commit
ba6f8bff7e
2
.gitignore
vendored
2
.gitignore
vendored
@ -1 +1 @@
|
|||||||
SOURCES/skiboot-6.6.tar.gz
|
SOURCES/skiboot-6.6.3.tar.gz
|
||||||
|
@ -1 +1 @@
|
|||||||
97a6f924c558a9c8315333b591eae4d4ea3c9f9d SOURCES/skiboot-6.6.tar.gz
|
7ba62e1904d77dee4d9b38aad0d4ad273cf0a651 SOURCES/skiboot-6.6.3.tar.gz
|
||||||
|
@ -0,0 +1,147 @@
|
|||||||
|
commit 8cbd0de88d162e387f11569eee1bdecef8fad2e3
|
||||||
|
Author: Oliver O'Halloran <oohall@gmail.com>
|
||||||
|
Date: Wed Sep 23 16:12:20 2020 +1000
|
||||||
|
|
||||||
|
opal-prd: Have a worker process handle page offlining
|
||||||
|
|
||||||
|
The memory_error() hservice interface expects the memory_error() call to
|
||||||
|
just accept the offline request and return without actually offlining the
|
||||||
|
memory. Currently we will attempt to offline the marked pages before
|
||||||
|
returning to HBRT which can result in an excessively long time spent in the
|
||||||
|
memory_error() hservice call which blocks HBRT from processing other
|
||||||
|
errors. Fix this by adding a worker process which performs the page
|
||||||
|
offlining via the sysfs memory error interfaces.
|
||||||
|
|
||||||
|
Reviewed-by: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
|
||||||
|
Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
|
||||||
|
|
||||||
|
diff --git a/external/opal-prd/opal-prd.c b/external/opal-prd/opal-prd.c
|
||||||
|
index 40e5a984..d74d8039 100644
|
||||||
|
--- a/external/opal-prd/opal-prd.c
|
||||||
|
+++ b/external/opal-prd/opal-prd.c
|
||||||
|
@@ -27,6 +27,7 @@
|
||||||
|
#include <stdarg.h>
|
||||||
|
#include <time.h>
|
||||||
|
#include <poll.h>
|
||||||
|
+#include <signal.h>
|
||||||
|
#include <dirent.h>
|
||||||
|
|
||||||
|
#include <endian.h>
|
||||||
|
@@ -696,13 +697,42 @@ out:
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
+static int memory_error_worker(const char *sysfsfile, const char *type,
|
||||||
|
+ uint64_t i_start_addr, uint64_t i_endAddr)
|
||||||
|
+{
|
||||||
|
+ int memfd, rc, n, ret = 0;
|
||||||
|
+ char buf[ADDR_STRING_SZ];
|
||||||
|
+ uint64_t addr;
|
||||||
|
+
|
||||||
|
+ memfd = open(sysfsfile, O_WRONLY);
|
||||||
|
+ if (memfd < 0) {
|
||||||
|
+ pr_log(LOG_CRIT, "MEM: Failed to offline memory! "
|
||||||
|
+ "Unable to open sysfs node %s: %m", sysfsfile);
|
||||||
|
+ return -1;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ for (addr = i_start_addr; addr <= i_endAddr; addr += ctx->page_size) {
|
||||||
|
+ n = snprintf(buf, ADDR_STRING_SZ, "0x%lx", addr);
|
||||||
|
+ rc = write(memfd, buf, n);
|
||||||
|
+ if (rc != n) {
|
||||||
|
+ pr_log(LOG_CRIT, "MEM: Failed to offline memory! "
|
||||||
|
+ "page addr: %016lx type: %s: %m",
|
||||||
|
+ addr, type);
|
||||||
|
+ ret = 1;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ pr_log(LOG_CRIT, "MEM: Offlined %016lx,%016lx, type %s: %m\n",
|
||||||
|
+ i_start_addr, addr, type);
|
||||||
|
+
|
||||||
|
+ close(memfd);
|
||||||
|
+ return ret;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
int hservice_memory_error(uint64_t i_start_addr, uint64_t i_endAddr,
|
||||||
|
enum MemoryError_t i_errorType)
|
||||||
|
{
|
||||||
|
const char *sysfsfile, *typestr;
|
||||||
|
- char buf[ADDR_STRING_SZ];
|
||||||
|
- int memfd, rc, n, ret = 0;
|
||||||
|
- uint64_t addr;
|
||||||
|
+ pid_t pid;
|
||||||
|
|
||||||
|
switch(i_errorType) {
|
||||||
|
case MEMORY_ERROR_CE:
|
||||||
|
@@ -722,26 +752,21 @@ int hservice_memory_error(uint64_t i_start_addr, uint64_t i_endAddr,
|
||||||
|
pr_log(LOG_ERR, "MEM: Memory error: range %016lx-%016lx, type: %s",
|
||||||
|
i_start_addr, i_endAddr, typestr);
|
||||||
|
|
||||||
|
+ /*
|
||||||
|
+ * HBRT expects the memory offlining process to happen in the background
|
||||||
|
+ * after the notification is delivered.
|
||||||
|
+ */
|
||||||
|
+ pid = fork();
|
||||||
|
+ if (pid > 0)
|
||||||
|
+ exit(memory_error_worker(sysfsfile, typestr, i_start_addr, i_endAddr));
|
||||||
|
|
||||||
|
- memfd = open(sysfsfile, O_WRONLY);
|
||||||
|
- if (memfd < 0) {
|
||||||
|
- pr_log(LOG_CRIT, "MEM: Failed to offline memory! "
|
||||||
|
- "Unable to open sysfs node %s: %m", sysfsfile);
|
||||||
|
+ if (pid < 0) {
|
||||||
|
+ perror("MEM: unable to fork worker to offline memory!\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
- for (addr = i_start_addr; addr <= i_endAddr; addr += ctx->page_size) {
|
||||||
|
- n = snprintf(buf, ADDR_STRING_SZ, "0x%lx", addr);
|
||||||
|
- rc = write(memfd, buf, n);
|
||||||
|
- if (rc != n) {
|
||||||
|
- pr_log(LOG_CRIT, "MEM: Failed to offline memory! "
|
||||||
|
- "page addr: %016lx type: %d: %m",
|
||||||
|
- addr, i_errorType);
|
||||||
|
- ret = rc;
|
||||||
|
- }
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- return ret;
|
||||||
|
+ pr_log(LOG_INFO, "MEM: forked off %d to handle mem error\n", pid);
|
||||||
|
+ return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t hservice_get_interface_capabilities(uint64_t set)
|
||||||
|
@@ -2112,6 +2137,10 @@ static int init_control_socket(struct opal_prd_ctx *ctx)
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
+static struct sigaction sigchild_action = {
|
||||||
|
+ .sa_flags = SA_NOCLDWAIT | SA_RESTART,
|
||||||
|
+ .sa_handler = SIG_DFL,
|
||||||
|
+};
|
||||||
|
|
||||||
|
static int run_prd_daemon(struct opal_prd_ctx *ctx)
|
||||||
|
{
|
||||||
|
@@ -2243,6 +2272,22 @@ static int run_prd_daemon(struct opal_prd_ctx *ctx)
|
||||||
|
pr_debug("SCOM: f00f: %lx", be64toh(val));
|
||||||
|
}
|
||||||
|
|
||||||
|
+ /*
|
||||||
|
+ * Setup the SIGCHLD handler to automatically reap the worker threads
|
||||||
|
+ * we use for memory offlining. We can't do this earlier since the
|
||||||
|
+ * modprobe helper spawns workers and wants to check their exit status
|
||||||
|
+ * with waitpid(). Auto-reaping breaks that so enable it just before
|
||||||
|
+ * entering the attn loop.
|
||||||
|
+ *
|
||||||
|
+ * We also setup system call restarting on SIGCHLD since opal-prd
|
||||||
|
+ * doesn't make any real attempt to handle blocking functions exiting
|
||||||
|
+ * due to EINTR.
|
||||||
|
+ */
|
||||||
|
+ if (sigaction(SIGCHLD, &sigchild_action, NULL)) {
|
||||||
|
+ pr_log(LOG_ERR, "CTRL: Failed to register signal handler %m\n");
|
||||||
|
+ return -1;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
run_attn_loop(ctx);
|
||||||
|
rc = 0;
|
||||||
|
|
25
SOURCES/skiboot-6.6.2-ffspart.patch
Normal file
25
SOURCES/skiboot-6.6.2-ffspart.patch
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
commit 6278c6df4ff2123725efc10e5e6ea48d02fda55a
|
||||||
|
Author: Dan Horák <dan@danny.cz>
|
||||||
|
Date: Mon Aug 10 12:59:04 2020 +0200
|
||||||
|
|
||||||
|
external/ffspart: define $(sbindir) for Makefile
|
||||||
|
|
||||||
|
Right now the $(sbindir) variable isn't defined, so the binary gets installed
|
||||||
|
directly into $(DESTDIR).
|
||||||
|
|
||||||
|
Signed-off-by: Dan Horák <dan@danny.cz>
|
||||||
|
|
||||||
|
diff --git a/external/ffspart/rules.mk b/external/ffspart/rules.mk
|
||||||
|
index 40972c688..e006dc5b7 100644
|
||||||
|
--- a/external/ffspart/rules.mk
|
||||||
|
+++ b/external/ffspart/rules.mk
|
||||||
|
@@ -10,6 +10,9 @@ LIBFLASH_SRC := $(addprefix libflash/,$(LIBFLASH_FILES))
|
||||||
|
OBJS += $(LIBFLASH_OBJS)
|
||||||
|
OBJS += common-arch_flash.o
|
||||||
|
|
||||||
|
+prefix = /usr/local/
|
||||||
|
+sbindir = $(prefix)/sbin
|
||||||
|
+
|
||||||
|
CC = $(CROSS_COMPILE)gcc
|
||||||
|
|
||||||
|
FFSPART_VERSION ?= $(shell ./make_version.sh $(EXE))
|
@ -1,8 +1,8 @@
|
|||||||
%global project skiboot
|
%global project skiboot
|
||||||
|
|
||||||
Name: opal-prd
|
Name: opal-prd
|
||||||
Version: 6.6
|
Version: 6.6.3
|
||||||
Release: 1%{?dist}
|
Release: 2%{?dist}
|
||||||
Summary: OPAL Processor Recovery Diagnostics Daemon
|
Summary: OPAL Processor Recovery Diagnostics Daemon
|
||||||
|
|
||||||
Group: System Environment/Daemons
|
Group: System Environment/Daemons
|
||||||
@ -27,6 +27,10 @@ Source0: https://github.com/open-power/%{project}/archive/v%{version}/%{project}
|
|||||||
Source1: opal-prd-rsyslog
|
Source1: opal-prd-rsyslog
|
||||||
Source2: opal-prd-logrotate
|
Source2: opal-prd-logrotate
|
||||||
|
|
||||||
|
Patch0: skiboot-6.6.2-ffspart.patch
|
||||||
|
# upstream fix
|
||||||
|
Patch1: opal-prd-6.6.3-8cbd0de88d162e387f11569eee1bdecef8fad2e3.patch
|
||||||
|
|
||||||
%description
|
%description
|
||||||
This package provides a daemon to load and run the OpenPower firmware's
|
This package provides a daemon to load and run the OpenPower firmware's
|
||||||
Processor Recovery Diagnostics binary. This is responsible for run time
|
Processor Recovery Diagnostics binary. This is responsible for run time
|
||||||
@ -56,6 +60,8 @@ services to the OS (Linux) on IBM Power and OpenPower systems.
|
|||||||
|
|
||||||
%prep
|
%prep
|
||||||
%setup -q -n %{project}-%{version}
|
%setup -q -n %{project}-%{version}
|
||||||
|
%patch0 -p1 -b .build
|
||||||
|
%patch1 -p1 -b .8cbd0de88d162e387f11569eee1bdecef8fad2e3
|
||||||
|
|
||||||
%build
|
%build
|
||||||
OPAL_PRD_VERSION=%{version} make V=1 CC="gcc" CFLAGS="%{build_cflags}" LDFLAGS="%{build_ldflags}" ASFLAGS="-m64 -Wa,--generate-missing-build-notes=yes" -C external/opal-prd
|
OPAL_PRD_VERSION=%{version} make V=1 CC="gcc" CFLAGS="%{build_cflags}" LDFLAGS="%{build_ldflags}" ASFLAGS="-m64 -Wa,--generate-missing-build-notes=yes" -C external/opal-prd
|
||||||
@ -123,6 +129,12 @@ install -m 644 %{SOURCE2} %{buildroot}/%{_sysconfdir}/logrotate.d/opal-prd
|
|||||||
%{_datadir}/qemu/
|
%{_datadir}/qemu/
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
|
* Mon Oct 05 2020 Than Ngo <than@redhat.com> - 6.6.3-2
|
||||||
|
- Resolves: #1885134, fix which makes the actual page off lining asynchronous
|
||||||
|
|
||||||
|
* Thu Oct 01 2020 Than Ngo <than@redhat.com> - 6.6.3-1
|
||||||
|
- Resolves: #1844427, rebase to 6.6.3
|
||||||
|
|
||||||
* Fri Apr 24 2020 Than Ngo <than@redhat.com> - 6.6-1
|
* Fri Apr 24 2020 Than Ngo <than@redhat.com> - 6.6-1
|
||||||
- Resolves: #1779211, rebase to 6.6
|
- Resolves: #1779211, rebase to 6.6
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user