Compare commits
No commits in common. "c8" and "c8s-private-than" have entirely different histories.
c8
...
c8s-privat
1
.gitignore
vendored
1
.gitignore
vendored
@ -1 +1,2 @@
|
|||||||
SOURCES/skiboot-6.7.1.tar.gz
|
SOURCES/skiboot-6.7.1.tar.gz
|
||||||
|
/skiboot-6.7.1.tar.gz
|
||||||
|
|||||||
@ -1 +0,0 @@
|
|||||||
1fddbe662d1e08e35640203876328fad6f70f392 SOURCES/skiboot-6.7.1.tar.gz
|
|
||||||
6
gating.yaml
Normal file
6
gating.yaml
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
--- !Policy
|
||||||
|
product_versions:
|
||||||
|
- rhel-8
|
||||||
|
decision_context: osci_compose_gate
|
||||||
|
rules:
|
||||||
|
- !PassingTestCaseRule {test_case_name: baseos-ci.brew-build.tier1.functional}
|
||||||
39
opal-prd-remove-misleading-errno-error-message.patch
Normal file
39
opal-prd-remove-misleading-errno-error-message.patch
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
commit 5240ab5f83f16cee96ae92cd5f701dd65e6c003c
|
||||||
|
Author: Mahesh Salgaonkar <mahesh@linux.ibm.com>
|
||||||
|
Date: Fri Sep 19 22:49:45 2025 +0530
|
||||||
|
|
||||||
|
external/opal-prd: remove misleading errno error message
|
||||||
|
|
||||||
|
While handling memory error opal-prd displays misleading errno error
|
||||||
|
messages even when handler was successfully able to soft/hard offline the
|
||||||
|
requested memory page.
|
||||||
|
|
||||||
|
opal-prd[49096]: MEM: Memory error: range 0000000eeb445700-0000000eeb445700, type: correctable
|
||||||
|
opal-prd[49096]: MEM: Offlined 0000000eeb445700,0000000eeb455700, type correctable: No such file or directory
|
||||||
|
|
||||||
|
In above example, an error message 'No such file or directory' was
|
||||||
|
displayed even after successfully offlining memory. This is because
|
||||||
|
printf in success case was using '%m' which prints errno based error
|
||||||
|
message. The value in errno is significant only when the return value of
|
||||||
|
the call indicated an error. The value of errno is never set to zero by
|
||||||
|
any system call or library function.
|
||||||
|
|
||||||
|
Hence, in success case do not use '%m' in printf to avoid misleading
|
||||||
|
error message
|
||||||
|
|
||||||
|
Signed-off-by: Mahesh Salgaonkar <mahesh@linux.ibm.com>
|
||||||
|
Signed-off-by: Reza Arbab <arbab@linux.ibm.com>
|
||||||
|
|
||||||
|
diff --git a/external/opal-prd/opal-prd.c b/external/opal-prd/opal-prd.c
|
||||||
|
index da947c827..d85e3e9bd 100644
|
||||||
|
--- a/external/opal-prd/opal-prd.c
|
||||||
|
+++ b/external/opal-prd/opal-prd.c
|
||||||
|
@@ -721,7 +721,7 @@ static int memory_error_worker(const char *sysfsfile, const char *type,
|
||||||
|
ret = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
- pr_log(LOG_CRIT, "MEM: Offlined %016lx,%016lx, type %s: %m\n",
|
||||||
|
+ pr_log(LOG_CRIT, "MEM: Offlined %016lx,%016lx, type %s\n",
|
||||||
|
i_start_addr, addr, type);
|
||||||
|
|
||||||
|
close(memfd);
|
||||||
58
opal-prd-service-shutdown-on-memory-errors.patch
Normal file
58
opal-prd-service-shutdown-on-memory-errors.patch
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
commit 00416008b8ce018dd149182bf54a650eb95f9309
|
||||||
|
Author: Mahesh Salgaonkar <mahesh@linux.ibm.com>
|
||||||
|
Date: Fri Sep 19 22:49:44 2025 +0530
|
||||||
|
|
||||||
|
external/opal-prd: Fix opal-prd service shutdown on memory errors
|
||||||
|
|
||||||
|
Whenever there is a memory error reported, opal-prd tries to spawn a
|
||||||
|
child process using fork to delegate the memory offline work to child
|
||||||
|
process. After handling memory error child process suppose to exit.
|
||||||
|
However, instead of delegating the task to child process the main thread
|
||||||
|
itself handles the memory error and exits. Thus causing opal-prd service
|
||||||
|
to go into stop/restart loop and eventually hits the systemd restart
|
||||||
|
limit leaving opal-prd service unavailable.
|
||||||
|
|
||||||
|
opal-prd[49096]: MEM: Memory error: range 0000000eeb445700-0000000eeb445700, type: correctable
|
||||||
|
opal-prd[49096]: MEM: Offlined 0000000eeb445700,0000000eeb455700, type correctable: No such file or directory
|
||||||
|
systemd[1]: opal-prd.service: Service RestartSec=100ms expired, scheduling restart.
|
||||||
|
systemd[1]: opal-prd.service: Scheduled restart job, restart counter is at 7.
|
||||||
|
systemd[1]: opal-prd.service: Start request repeated too quickly.
|
||||||
|
systemd[1]: opal-prd.service: Failed with result 'start-limit-hit'.
|
||||||
|
systemd[1]: Failed to start OPAL PRD daemon
|
||||||
|
|
||||||
|
The fork() function, on success, returns pid of child process (pid > 0)
|
||||||
|
in the parent and 0 in the child. Instead of invoking memory worker
|
||||||
|
when return value pid == 0, it invokes worker when pid > 0 which is
|
||||||
|
parent process itself.
|
||||||
|
|
||||||
|
pid = fork();
|
||||||
|
if (pid > 0)
|
||||||
|
exit(memory_error_worker(sysfsfile, typestr, i_start_addr,
|
||||||
|
i_endAddr));
|
||||||
|
|
||||||
|
The above logic causes the parent thread to exit after handling memory
|
||||||
|
error. Fix this by changing the if condition to (pid == 0).
|
||||||
|
|
||||||
|
Fixes: 8cbd0de88d16 ("opal-prd: Have a worker process handle page offlining")
|
||||||
|
Signed-off-by: Mahesh Salgaonkar <mahesh@linux.ibm.com>
|
||||||
|
Signed-off-by: Reza Arbab <arbab@linux.ibm.com>
|
||||||
|
|
||||||
|
diff --git a/external/opal-prd/opal-prd.c b/external/opal-prd/opal-prd.c
|
||||||
|
index 1c610da4c..da947c827 100644
|
||||||
|
--- a/external/opal-prd/opal-prd.c
|
||||||
|
+++ b/external/opal-prd/opal-prd.c
|
||||||
|
@@ -755,9 +755,13 @@ int hservice_memory_error(uint64_t i_start_addr, uint64_t i_endAddr,
|
||||||
|
/*
|
||||||
|
* HBRT expects the memory offlining process to happen in the background
|
||||||
|
* after the notification is delivered.
|
||||||
|
+ *
|
||||||
|
+ * fork() return value:
|
||||||
|
+ * On success, the PID of the child process is returned in the parent,
|
||||||
|
+ * and 0 is returned in the child.
|
||||||
|
*/
|
||||||
|
pid = fork();
|
||||||
|
- if (pid > 0)
|
||||||
|
+ if (pid == 0)
|
||||||
|
exit(memory_error_worker(sysfsfile, typestr, i_start_addr, i_endAddr));
|
||||||
|
|
||||||
|
if (pid < 0) {
|
||||||
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
Name: opal-prd
|
Name: opal-prd
|
||||||
Version: 6.7.1
|
Version: 6.7.1
|
||||||
Release: 1%{?dist}
|
Release: 1%{?dist}.1
|
||||||
Summary: OPAL Processor Recovery Diagnostics Daemon
|
Summary: OPAL Processor Recovery Diagnostics Daemon
|
||||||
|
|
||||||
Group: System Environment/Daemons
|
Group: System Environment/Daemons
|
||||||
@ -30,6 +30,8 @@ Source1: opal-prd-rsyslog
|
|||||||
Source2: opal-prd-logrotate
|
Source2: opal-prd-logrotate
|
||||||
|
|
||||||
# upstream fix
|
# upstream fix
|
||||||
|
Patch1: opal-prd-remove-misleading-errno-error-message.patch
|
||||||
|
Patch2: opal-prd-service-shutdown-on-memory-errors.patch
|
||||||
|
|
||||||
%description
|
%description
|
||||||
This package provides a daemon to load and run the OpenPower firmware's
|
This package provides a daemon to load and run the OpenPower firmware's
|
||||||
@ -60,6 +62,8 @@ services to the OS (Linux) on IBM Power and OpenPower systems.
|
|||||||
|
|
||||||
%prep
|
%prep
|
||||||
%setup -q -n %{project}-%{version}
|
%setup -q -n %{project}-%{version}
|
||||||
|
%patch -P1 -p1 -b .remove-misleading-errno-error-message
|
||||||
|
%patch -P2 -p1 -b .service-shutdown-on-memory-errors
|
||||||
|
|
||||||
%build
|
%build
|
||||||
OPAL_PRD_VERSION=%{version} make V=1 CC="gcc" CFLAGS="%{build_cflags}" LDFLAGS="%{build_ldflags}" ASFLAGS="-m64 -Wa,--generate-missing-build-notes=yes" -C external/opal-prd
|
OPAL_PRD_VERSION=%{version} make V=1 CC="gcc" CFLAGS="%{build_cflags}" LDFLAGS="%{build_ldflags}" ASFLAGS="-m64 -Wa,--generate-missing-build-notes=yes" -C external/opal-prd
|
||||||
@ -136,6 +140,9 @@ install -D -p -m 755 external/pci-scripts/phberr.py %{buildroot}%{_bindir}/phber
|
|||||||
%{_datadir}/qemu/
|
%{_datadir}/qemu/
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
|
* Tue Nov 18 2025 Than Ngo <than@redhat.com> - 6.7.1-1.1
|
||||||
|
- Resolves: RHEL-125568, Fix opal-prd service shutdown on memory errors
|
||||||
|
|
||||||
* Mon May 10 2021 Than Ngo <than@redhat.com> - 6.7.1-1
|
* Mon May 10 2021 Than Ngo <than@redhat.com> - 6.7.1-1
|
||||||
- Resolves: #1921665, rebase to 6.7.1
|
- Resolves: #1921665, rebase to 6.7.1
|
||||||
|
|
||||||
Loading…
Reference in New Issue
Block a user