mcelog: enable mcelog daemon mode, triggers, and use systemd

Update mcelog to use daemon mode by default, fix the upstream
triggers to actually work, enable systemd, and temporarily add
a hack that causes mcelog to run once knowing it will fail due
to an upstream kernel bug (first read of /dev/mcelog will fail).

NOTE: as soon as upstream fixes are taken this will be cleaned up.

Signed-off-by: Jon Masters <jcm@jonmasters.org>
This commit is contained in:
Jon Masters 2010-11-10 05:23:28 -05:00
parent 982761ae29
commit 09802bf945
5 changed files with 176 additions and 17 deletions

View File

@ -0,0 +1,34 @@
diff -urNp mcelog-1.0pre3_orig/Makefile mcelog-1.0pre3/Makefile
--- mcelog-1.0pre3_orig/Makefile 2010-01-20 21:36:52.000000000 -0500
+++ mcelog-1.0pre3/Makefile 2010-11-10 04:51:05.512725239 -0500
@@ -57,7 +57,7 @@ install: mcelog
install -m 644 -p mcelog.8 ${prefix}/share/man/man8
install -m 644 -p -b mcelog.conf ${etcprefix}/etc/mcelog/mcelog.conf
for i in ${TRIGGERS} ; do \
- install -m 755 -p -b triggers/$$i ${etcprefix}/etc/mcelog ; \
+ install -m 755 -p -b triggers/$$i ${etcprefix}/etc/mcelog/triggers ; \
done
ifdef DOCDIR
install -m 644 -p ${DOC} ${DOCDIR}
diff -urNp mcelog-1.0pre3_orig/mcelog.cron mcelog-1.0pre3/mcelog.cron
--- mcelog-1.0pre3_orig/mcelog.cron 2010-01-20 21:36:52.000000000 -0500
+++ mcelog-1.0pre3/mcelog.cron 2010-11-10 04:51:05.593724528 -0500
@@ -1,2 +1,5 @@
#!/bin/bash
-/usr/sbin/mcelog --ignorenodev --filter >> /var/log/mcelog
+
+# Disabled by default on Fedora since this is run as daemon
+# using the mcelog.service systemd configuration entries.
+#/usr/sbin/mcelog --ignorenodev --filter >> /var/log/mcelog
diff -urNp mcelog-1.0pre3_orig/triggers/cache-error-trigger mcelog-1.0pre3/triggers/cache-error-trigger
--- mcelog-1.0pre3_orig/triggers/cache-error-trigger 2010-01-20 21:36:52.000000000 -0500
+++ mcelog-1.0pre3/triggers/cache-error-trigger 2010-11-10 04:51:05.594724981 -0500
@@ -17,7 +17,7 @@
#
# offline the CPUs (except CPU #0) sharing the affected cache
#
-for i in $CPUS_AFFECTED ; do
+for i in $AFFECTED_CPUS ; do
if [ $i = 0 ] ; then
logger -s -p daemon.warn -t mcelog "Not offlining CPU 0"
continue

57
mcelog.conf Normal file
View File

@ -0,0 +1,57 @@
#
# config file for mcelog
# For further options, see the mcelog manpage and documentation
#
# Filter out known broken events by default
filter = yes
# don't log memory errors individually
#filter-memory-errors = yes
# output in undecoded raw format to be easier machine readable
#raw = yes
[server]
# An upstream bug prevents this from being disabled
# Only allow root to connect by default
client-user = root
# Path to socket client uses to connect
socket-path = /var/run/mcelog-client
[dimm]
# Enable DIMM-tracking
dimm-tracking-enabled = yes
# Disable DIMM DMI pre-population unless supported on your system
dmi-prepopulate = no
# execute these triggers when the rate of corrected or uncorrected
# errors per DIMM exceeds the threshold
uc-error-trigger = dimm-error-trigger
uc-error-threshold = 1 / 24h
ce-error-trigger = dimm-error-trigger
ce-error-threshold = 10 / 24h
[socket]
# Memory error accounting per socket
socket-tracing-enabled = yes
mem-uc-error-threshold = 100 / 24h
mem-ce-error-trigger = socket-memory-error-trigger
mem-ce-error-threshold = 100 / 24h
mem-ce-error-log = yes
[cache]
# Attempt to off-line CPUs causing cache errors
cache-threshold-trigger = cache-error-trigger
cache-threshold-log = yes
[page]
# Try to soft-offline a 4K page if it exceeds the threshold
memory-ce-threshold = 10 / 24h
memory-ce-trigger = page-error-trigger
memory-ce-log = yes
memory-ce-action = soft
[trigger]
# Maximum number of running triggers
children-max = 2
directory = /etc/mcelog/triggers

17
mcelog.service Normal file
View File

@ -0,0 +1,17 @@
[Unit]
Description=Machine Check Exception Logging Daemon
After=syslog.target
# FIXME - due to upstream kernel bug always start the mcelog process
# twice using the following ExecStartPre hack. This needs fixing.
# There is a bug filed against systemd for the ExecStartPre bit
# since it is not possible to specify that the ExecStarPre bit
# is allowed and expected to fail without aborting the daemon.
[Service]
ExecStartPre=/etc/mcelog/mcelog.setup
ExecStart=/usr/sbin/mcelog --ignorenodev --daemon --foreground
StandardOutput=syslog
[Install]
WantedBy=multi-user.target

12
mcelog.setup Normal file
View File

@ -0,0 +1,12 @@
#!/bin/sh
#
# An upstream kernel bug prevents mcelog from starting normally in
# daemon mode the first time it is run. So, in the systemd service,
# we want to start it twice - one as a ExecStartPre that will fail.
# But systemd will abort the process if the "pre" fails, so we use
# this script - temporarily - to start the first process.
#
# Waiting on Andi Kleen to fix upstream.
#
/usr/sbin/mcelog --ignorenodev --syslog --foreground
exit 0

View File

@ -3,51 +3,90 @@
Summary: Tool to translate x86-64 CPU Machine Check Exception data.
Name: mcelog
Version: 1.0
Release: 0.1.%{pre_release}%{?dist}
Release: 0.2.%{pre_release}%{?dist}
Epoch: 2
Group: System Environment/Base
License: GPLv2
Source0: http://www.kernel.org/pub/linux/utils/cpu/mce/mcelog-%{version}%{pre_release}.tar.bz2
Source1: mcelog.conf
Source2: mcelog.service
Source10: mcelog.setup
Patch0: mcelog-1.0pre3-fix-trigger-path-and-cacheing.patch
URL: http://www.kernel.org/pub/linux/utils/cpu/mce/
Buildroot: %{_tmppath}/%{name}-%{version}-root
ExclusiveArch: x86_64
ExclusiveArch: i686 x86_64
%description
mcelog is a daemon that collects and decodes Machine Check Exception data
on x86-64 machines.
mcelog is a utility that collects and decodes Machine Check Exception data
on x86-32 and x86-64 systems. It can be run either as a daemon, or by cron.
%prep
%setup -q -n %{name}-%{version}%{pre_release}
%patch0 -p1 -b .fix-triggers-and-cacheing
%build
rm -rf %{buildroot}
mkdir -p %{buildroot}%{_sbindir}
mkdir -p %{buildroot}%{_mandir}
rm -rf $RPM_BUILD_ROOT
mkdir -p $RPM_BUILD_ROOT/%{_sysconfdir}
mkdir -p $RPM_BUILD_ROOT/%{_sbindir}
mkdir -p $RPM_BUILD_ROOT/%{_mandir}
make CFLAGS="$RPM_OPT_FLAGS -fpie -pie"
%install
mkdir -p %{buildroot}%{_mandir}/man{1,8}
mkdir -p %{buildroot}%{_sysconfdir}/cron.hourly
mkdir -p %{buildroot}%{_sbindir}
install mcelog %{buildroot}%{_sbindir}/mcelog
install mcelog.cron %{buildroot}%{_sysconfdir}/cron.hourly/mcelog.cron
cp mcelog.8 %{buildroot}%{_mandir}/man8
cd ..
chmod -R a-s %{buildroot}
mkdir -p $RPM_BUILD_ROOT/%{_mandir}/man{1,8}
mkdir -p $RPM_BUILD_ROOT/%{_sysconfdir}/mcelog
mkdir -p $RPM_BUILD_ROOT/%{_sysconfdir}/mcelog/triggers
mkdir -p $RPM_BUILD_ROOT/lib/systemd/system
mkdir -p $RPM_BUILD_ROOT/%{_sysconfdir}/cron.hourly
mkdir -p $RPM_BUILD_ROOT/%{_sbindir}
install -p -m755 mcelog $RPM_BUILD_ROOT/%{_sbindir}/mcelog
install -p -m644 %{SOURCE1} $RPM_BUILD_ROOT/%{_sysconfdir}/mcelog/mcelog.conf
install -p -m755 %{SOURCE10} $RPM_BUILD_ROOT/%{_sysconfdir}/mcelog/mcelog.setup
install -p -m755 triggers/cache-error-trigger $RPM_BUILD_ROOT/%{_sysconfdir}/mcelog/triggers/cache-error-trigger
install -p -m755 triggers/dimm-error-trigger $RPM_BUILD_ROOT/%{_sysconfdir}/mcelog/triggers/dimm-error-trigger
install -p -m755 triggers/page-error-trigger $RPM_BUILD_ROOT/%{_sysconfdir}/mcelog/triggers/page-error-trigger
install -p -m755 triggers/socket-memory-error-trigger $RPM_BUILD_ROOT/%{_sysconfdir}/mcelog/triggers/socket-memory-error-trigger
install -p -m755 mcelog.cron $RPM_BUILD_ROOT/%{_sysconfdir}/cron.hourly/mcelog.cron
install -p -m644 %{SOURCE2} $RPM_BUILD_ROOT/lib/systemd/system/mcelog.service
install -p -m644 mcelog.8 $RPM_BUILD_ROOT/%{_mandir}/man8
%clean
rm -rf %{buildroot}
rm -rf $RPM_BUILD_ROOT
%post
systemctl enable mcelog.service &> /dev/null ||
systemctl daemon-reload &> /dev/null
%preun
# Handle removing mcelog
if [ "$1" -eq 0 ]; then
systemctl disable mcelog.service &> /dev/null
systemctl stop mcelog.service &> /dev/null
fi
%postun
# Handle upgrading mcelog
if [ "$1" -ge 1 ]; then
systemctl try-restart mcelog.service &> /dev/null
fi
%files
%defattr(-,root,root,-)
%doc README CHANGES
%{_sbindir}/mcelog
%dir %{_sysconfdir}/mcelog
%{_sysconfdir}/mcelog/triggers
%config(noreplace) %{_sysconfdir}/mcelog/mcelog.conf
%{_sysconfdir}/mcelog/mcelog.setup
%{_sysconfdir}/cron.hourly/mcelog.cron
/lib/systemd/system/mcelog.service
%attr(0644,root,root) %{_mandir}/*/*
%changelog
* Tue Nov 09 2010 Jon Masters <jcm@rehat.com> 2:1.0-0.1.pre3
* Wed Nov 10 2010 Jon Masters <jcm@redhat.com> 2:1.0-0.2.pre3
- Rework mcelog to use daemon mode and systemd.
* Tue Nov 09 2010 Jon Masters <jcm@redhat.com> 2:1.0-0.1.pre3
- Bump epoch and use standard Fedora Packaging Guidelines for NVR.
- Switch to using signed bz2 source and remove dead patch.