Compare commits
No commits in common. "c8" and "c8s-private-than" have entirely different histories.
c8
...
c8s-privat
1
.gitignore
vendored
1
.gitignore
vendored
@ -1 +1,2 @@
|
||||
SOURCES/skiboot-6.7.1.tar.gz
|
||||
/skiboot-6.7.1.tar.gz
|
||||
|
||||
@ -1 +0,0 @@
|
||||
1fddbe662d1e08e35640203876328fad6f70f392 SOURCES/skiboot-6.7.1.tar.gz
|
||||
6
gating.yaml
Normal file
6
gating.yaml
Normal file
@ -0,0 +1,6 @@
|
||||
--- !Policy
|
||||
product_versions:
|
||||
- rhel-8
|
||||
decision_context: osci_compose_gate
|
||||
rules:
|
||||
- !PassingTestCaseRule {test_case_name: baseos-ci.brew-build.tier1.functional}
|
||||
39
opal-prd-remove-misleading-errno-error-message.patch
Normal file
39
opal-prd-remove-misleading-errno-error-message.patch
Normal file
@ -0,0 +1,39 @@
|
||||
commit 5240ab5f83f16cee96ae92cd5f701dd65e6c003c
|
||||
Author: Mahesh Salgaonkar <mahesh@linux.ibm.com>
|
||||
Date: Fri Sep 19 22:49:45 2025 +0530
|
||||
|
||||
external/opal-prd: remove misleading errno error message
|
||||
|
||||
While handling memory error opal-prd displays misleading errno error
|
||||
messages even when handler was successfully able to soft/hard offline the
|
||||
requested memory page.
|
||||
|
||||
opal-prd[49096]: MEM: Memory error: range 0000000eeb445700-0000000eeb445700, type: correctable
|
||||
opal-prd[49096]: MEM: Offlined 0000000eeb445700,0000000eeb455700, type correctable: No such file or directory
|
||||
|
||||
In above example, an error message 'No such file or directory' was
|
||||
displayed even after successfully offlining memory. This is because
|
||||
printf in success case was using '%m' which prints errno based error
|
||||
message. The value in errno is significant only when the return value of
|
||||
the call indicated an error. The value of errno is never set to zero by
|
||||
any system call or library function.
|
||||
|
||||
Hence, in success case do not use '%m' in printf to avoid misleading
|
||||
error message
|
||||
|
||||
Signed-off-by: Mahesh Salgaonkar <mahesh@linux.ibm.com>
|
||||
Signed-off-by: Reza Arbab <arbab@linux.ibm.com>
|
||||
|
||||
diff --git a/external/opal-prd/opal-prd.c b/external/opal-prd/opal-prd.c
|
||||
index da947c827..d85e3e9bd 100644
|
||||
--- a/external/opal-prd/opal-prd.c
|
||||
+++ b/external/opal-prd/opal-prd.c
|
||||
@@ -721,7 +721,7 @@ static int memory_error_worker(const char *sysfsfile, const char *type,
|
||||
ret = 1;
|
||||
}
|
||||
}
|
||||
- pr_log(LOG_CRIT, "MEM: Offlined %016lx,%016lx, type %s: %m\n",
|
||||
+ pr_log(LOG_CRIT, "MEM: Offlined %016lx,%016lx, type %s\n",
|
||||
i_start_addr, addr, type);
|
||||
|
||||
close(memfd);
|
||||
58
opal-prd-service-shutdown-on-memory-errors.patch
Normal file
58
opal-prd-service-shutdown-on-memory-errors.patch
Normal file
@ -0,0 +1,58 @@
|
||||
commit 00416008b8ce018dd149182bf54a650eb95f9309
|
||||
Author: Mahesh Salgaonkar <mahesh@linux.ibm.com>
|
||||
Date: Fri Sep 19 22:49:44 2025 +0530
|
||||
|
||||
external/opal-prd: Fix opal-prd service shutdown on memory errors
|
||||
|
||||
Whenever there is a memory error reported, opal-prd tries to spawn a
|
||||
child process using fork to delegate the memory offline work to child
|
||||
process. After handling memory error child process suppose to exit.
|
||||
However, instead of delegating the task to child process the main thread
|
||||
itself handles the memory error and exits. Thus causing opal-prd service
|
||||
to go into stop/restart loop and eventually hits the systemd restart
|
||||
limit leaving opal-prd service unavailable.
|
||||
|
||||
opal-prd[49096]: MEM: Memory error: range 0000000eeb445700-0000000eeb445700, type: correctable
|
||||
opal-prd[49096]: MEM: Offlined 0000000eeb445700,0000000eeb455700, type correctable: No such file or directory
|
||||
systemd[1]: opal-prd.service: Service RestartSec=100ms expired, scheduling restart.
|
||||
systemd[1]: opal-prd.service: Scheduled restart job, restart counter is at 7.
|
||||
systemd[1]: opal-prd.service: Start request repeated too quickly.
|
||||
systemd[1]: opal-prd.service: Failed with result 'start-limit-hit'.
|
||||
systemd[1]: Failed to start OPAL PRD daemon
|
||||
|
||||
The fork() function, on success, returns pid of child process (pid > 0)
|
||||
in the parent and 0 in the child. Instead of invoking memory worker
|
||||
when return value pid == 0, it invokes worker when pid > 0 which is
|
||||
parent process itself.
|
||||
|
||||
pid = fork();
|
||||
if (pid > 0)
|
||||
exit(memory_error_worker(sysfsfile, typestr, i_start_addr,
|
||||
i_endAddr));
|
||||
|
||||
The above logic causes the parent thread to exit after handling memory
|
||||
error. Fix this by changing the if condition to (pid == 0).
|
||||
|
||||
Fixes: 8cbd0de88d16 ("opal-prd: Have a worker process handle page offlining")
|
||||
Signed-off-by: Mahesh Salgaonkar <mahesh@linux.ibm.com>
|
||||
Signed-off-by: Reza Arbab <arbab@linux.ibm.com>
|
||||
|
||||
diff --git a/external/opal-prd/opal-prd.c b/external/opal-prd/opal-prd.c
|
||||
index 1c610da4c..da947c827 100644
|
||||
--- a/external/opal-prd/opal-prd.c
|
||||
+++ b/external/opal-prd/opal-prd.c
|
||||
@@ -755,9 +755,13 @@ int hservice_memory_error(uint64_t i_start_addr, uint64_t i_endAddr,
|
||||
/*
|
||||
* HBRT expects the memory offlining process to happen in the background
|
||||
* after the notification is delivered.
|
||||
+ *
|
||||
+ * fork() return value:
|
||||
+ * On success, the PID of the child process is returned in the parent,
|
||||
+ * and 0 is returned in the child.
|
||||
*/
|
||||
pid = fork();
|
||||
- if (pid > 0)
|
||||
+ if (pid == 0)
|
||||
exit(memory_error_worker(sysfsfile, typestr, i_start_addr, i_endAddr));
|
||||
|
||||
if (pid < 0) {
|
||||
@ -2,7 +2,7 @@
|
||||
|
||||
Name: opal-prd
|
||||
Version: 6.7.1
|
||||
Release: 1%{?dist}
|
||||
Release: 1%{?dist}.1
|
||||
Summary: OPAL Processor Recovery Diagnostics Daemon
|
||||
|
||||
Group: System Environment/Daemons
|
||||
@ -30,6 +30,8 @@ Source1: opal-prd-rsyslog
|
||||
Source2: opal-prd-logrotate
|
||||
|
||||
# upstream fix
|
||||
Patch1: opal-prd-remove-misleading-errno-error-message.patch
|
||||
Patch2: opal-prd-service-shutdown-on-memory-errors.patch
|
||||
|
||||
%description
|
||||
This package provides a daemon to load and run the OpenPower firmware's
|
||||
@ -60,6 +62,8 @@ services to the OS (Linux) on IBM Power and OpenPower systems.
|
||||
|
||||
%prep
|
||||
%setup -q -n %{project}-%{version}
|
||||
%patch -P1 -p1 -b .remove-misleading-errno-error-message
|
||||
%patch -P2 -p1 -b .service-shutdown-on-memory-errors
|
||||
|
||||
%build
|
||||
OPAL_PRD_VERSION=%{version} make V=1 CC="gcc" CFLAGS="%{build_cflags}" LDFLAGS="%{build_ldflags}" ASFLAGS="-m64 -Wa,--generate-missing-build-notes=yes" -C external/opal-prd
|
||||
@ -136,6 +140,9 @@ install -D -p -m 755 external/pci-scripts/phberr.py %{buildroot}%{_bindir}/phber
|
||||
%{_datadir}/qemu/
|
||||
|
||||
%changelog
|
||||
* Tue Nov 18 2025 Than Ngo <than@redhat.com> - 6.7.1-1.1
|
||||
- Resolves: RHEL-125568, Fix opal-prd service shutdown on memory errors
|
||||
|
||||
* Mon May 10 2021 Than Ngo <than@redhat.com> - 6.7.1-1
|
||||
- Resolves: #1921665, rebase to 6.7.1
|
||||
|
||||
Loading…
Reference in New Issue
Block a user