import opal-prd-6.6.3-2.el8

This commit is contained in:
CentOS Sources 2021-03-30 15:45:09 -04:00 committed by Stepan Oksanichenko
parent 9093333aba
commit ba6f8bff7e
5 changed files with 188 additions and 4 deletions

2
.gitignore vendored
View File

@ -1 +1 @@
SOURCES/skiboot-6.6.tar.gz
SOURCES/skiboot-6.6.3.tar.gz

View File

@ -1 +1 @@
97a6f924c558a9c8315333b591eae4d4ea3c9f9d SOURCES/skiboot-6.6.tar.gz
7ba62e1904d77dee4d9b38aad0d4ad273cf0a651 SOURCES/skiboot-6.6.3.tar.gz

View File

@ -0,0 +1,147 @@
commit 8cbd0de88d162e387f11569eee1bdecef8fad2e3
Author: Oliver O'Halloran <oohall@gmail.com>
Date: Wed Sep 23 16:12:20 2020 +1000
opal-prd: Have a worker process handle page offlining
The memory_error() hservice interface expects the memory_error() call to
just accept the offline request and return without actually offlining the
memory. Currently we will attempt to offline the marked pages before
returning to HBRT which can result in an excessively long time spent in the
memory_error() hservice call which blocks HBRT from processing other
errors. Fix this by adding a worker process which performs the page
offlining via the sysfs memory error interfaces.
Reviewed-by: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
diff --git a/external/opal-prd/opal-prd.c b/external/opal-prd/opal-prd.c
index 40e5a984..d74d8039 100644
--- a/external/opal-prd/opal-prd.c
+++ b/external/opal-prd/opal-prd.c
@@ -27,6 +27,7 @@
#include <stdarg.h>
#include <time.h>
#include <poll.h>
+#include <signal.h>
#include <dirent.h>
#include <endian.h>
@@ -696,13 +697,42 @@ out:
return rc;
}
+static int memory_error_worker(const char *sysfsfile, const char *type,
+ uint64_t i_start_addr, uint64_t i_endAddr)
+{
+ int memfd, rc, n, ret = 0;
+ char buf[ADDR_STRING_SZ];
+ uint64_t addr;
+
+ memfd = open(sysfsfile, O_WRONLY);
+ if (memfd < 0) {
+ pr_log(LOG_CRIT, "MEM: Failed to offline memory! "
+ "Unable to open sysfs node %s: %m", sysfsfile);
+ return -1;
+ }
+
+ for (addr = i_start_addr; addr <= i_endAddr; addr += ctx->page_size) {
+ n = snprintf(buf, ADDR_STRING_SZ, "0x%lx", addr);
+ rc = write(memfd, buf, n);
+ if (rc != n) {
+ pr_log(LOG_CRIT, "MEM: Failed to offline memory! "
+ "page addr: %016lx type: %s: %m",
+ addr, type);
+ ret = 1;
+ }
+ }
+ pr_log(LOG_CRIT, "MEM: Offlined %016lx,%016lx, type %s: %m\n",
+ i_start_addr, addr, type);
+
+ close(memfd);
+ return ret;
+}
+
int hservice_memory_error(uint64_t i_start_addr, uint64_t i_endAddr,
enum MemoryError_t i_errorType)
{
const char *sysfsfile, *typestr;
- char buf[ADDR_STRING_SZ];
- int memfd, rc, n, ret = 0;
- uint64_t addr;
+ pid_t pid;
switch(i_errorType) {
case MEMORY_ERROR_CE:
@@ -722,26 +752,21 @@ int hservice_memory_error(uint64_t i_start_addr, uint64_t i_endAddr,
pr_log(LOG_ERR, "MEM: Memory error: range %016lx-%016lx, type: %s",
i_start_addr, i_endAddr, typestr);
+ /*
+ * HBRT expects the memory offlining process to happen in the background
+ * after the notification is delivered.
+ */
+ pid = fork();
+ if (pid > 0)
+ exit(memory_error_worker(sysfsfile, typestr, i_start_addr, i_endAddr));
- memfd = open(sysfsfile, O_WRONLY);
- if (memfd < 0) {
- pr_log(LOG_CRIT, "MEM: Failed to offline memory! "
- "Unable to open sysfs node %s: %m", sysfsfile);
+ if (pid < 0) {
+ perror("MEM: unable to fork worker to offline memory!\n");
return -1;
}
- for (addr = i_start_addr; addr <= i_endAddr; addr += ctx->page_size) {
- n = snprintf(buf, ADDR_STRING_SZ, "0x%lx", addr);
- rc = write(memfd, buf, n);
- if (rc != n) {
- pr_log(LOG_CRIT, "MEM: Failed to offline memory! "
- "page addr: %016lx type: %d: %m",
- addr, i_errorType);
- ret = rc;
- }
- }
-
- return ret;
+ pr_log(LOG_INFO, "MEM: forked off %d to handle mem error\n", pid);
+ return 0;
}
uint64_t hservice_get_interface_capabilities(uint64_t set)
@@ -2112,6 +2137,10 @@ static int init_control_socket(struct opal_prd_ctx *ctx)
return 0;
}
+static struct sigaction sigchild_action = {
+ .sa_flags = SA_NOCLDWAIT | SA_RESTART,
+ .sa_handler = SIG_DFL,
+};
static int run_prd_daemon(struct opal_prd_ctx *ctx)
{
@@ -2243,6 +2272,22 @@ static int run_prd_daemon(struct opal_prd_ctx *ctx)
pr_debug("SCOM: f00f: %lx", be64toh(val));
}
+ /*
+ * Setup the SIGCHLD handler to automatically reap the worker threads
+ * we use for memory offlining. We can't do this earlier since the
+ * modprobe helper spawns workers and wants to check their exit status
+ * with waitpid(). Auto-reaping breaks that so enable it just before
+ * entering the attn loop.
+ *
+ * We also setup system call restarting on SIGCHLD since opal-prd
+ * doesn't make any real attempt to handle blocking functions exiting
+ * due to EINTR.
+ */
+ if (sigaction(SIGCHLD, &sigchild_action, NULL)) {
+ pr_log(LOG_ERR, "CTRL: Failed to register signal handler %m\n");
+ return -1;
+ }
+
run_attn_loop(ctx);
rc = 0;

View File

@ -0,0 +1,25 @@
commit 6278c6df4ff2123725efc10e5e6ea48d02fda55a
Author: Dan Horák <dan@danny.cz>
Date: Mon Aug 10 12:59:04 2020 +0200
external/ffspart: define $(sbindir) for Makefile
Right now the $(sbindir) variable isn't defined, so the binary gets installed
directly into $(DESTDIR).
Signed-off-by: Dan Horák <dan@danny.cz>
diff --git a/external/ffspart/rules.mk b/external/ffspart/rules.mk
index 40972c688..e006dc5b7 100644
--- a/external/ffspart/rules.mk
+++ b/external/ffspart/rules.mk
@@ -10,6 +10,9 @@ LIBFLASH_SRC := $(addprefix libflash/,$(LIBFLASH_FILES))
OBJS += $(LIBFLASH_OBJS)
OBJS += common-arch_flash.o
+prefix = /usr/local/
+sbindir = $(prefix)/sbin
+
CC = $(CROSS_COMPILE)gcc
FFSPART_VERSION ?= $(shell ./make_version.sh $(EXE))

View File

@ -1,8 +1,8 @@
%global project skiboot
Name: opal-prd
Version: 6.6
Release: 1%{?dist}
Version: 6.6.3
Release: 2%{?dist}
Summary: OPAL Processor Recovery Diagnostics Daemon
Group: System Environment/Daemons
@ -27,6 +27,10 @@ Source0: https://github.com/open-power/%{project}/archive/v%{version}/%{project}
Source1: opal-prd-rsyslog
Source2: opal-prd-logrotate
Patch0: skiboot-6.6.2-ffspart.patch
# upstream fix
Patch1: opal-prd-6.6.3-8cbd0de88d162e387f11569eee1bdecef8fad2e3.patch
%description
This package provides a daemon to load and run the OpenPower firmware's
Processor Recovery Diagnostics binary. This is responsible for run time
@ -56,6 +60,8 @@ services to the OS (Linux) on IBM Power and OpenPower systems.
%prep
%setup -q -n %{project}-%{version}
%patch0 -p1 -b .build
%patch1 -p1 -b .8cbd0de88d162e387f11569eee1bdecef8fad2e3
%build
OPAL_PRD_VERSION=%{version} make V=1 CC="gcc" CFLAGS="%{build_cflags}" LDFLAGS="%{build_ldflags}" ASFLAGS="-m64 -Wa,--generate-missing-build-notes=yes" -C external/opal-prd
@ -123,6 +129,12 @@ install -m 644 %{SOURCE2} %{buildroot}/%{_sysconfdir}/logrotate.d/opal-prd
%{_datadir}/qemu/
%changelog
* Mon Oct 05 2020 Than Ngo <than@redhat.com> - 6.6.3-2
- Resolves: #1885134, fix which makes the actual page off lining asynchronous
* Thu Oct 01 2020 Than Ngo <than@redhat.com> - 6.6.3-1
- Resolves: #1844427, rebase to 6.6.3
* Fri Apr 24 2020 Than Ngo <than@redhat.com> - 6.6-1
- Resolves: #1779211, rebase to 6.6