From 09802bf945c53d5d4d47fdadef50fc50230d172d Mon Sep 17 00:00:00 2001 From: Jon Masters Date: Wed, 10 Nov 2010 05:23:28 -0500 Subject: [PATCH] mcelog: enable mcelog daemon mode, triggers, and use systemd Update mcelog to use daemon mode by default, fix the upstream triggers to actually work, enable systemd, and temporarily add a hack that causes mcelog to run once knowing it will fail due to an upstream kernel bug (first read of /dev/mcelog will fail). NOTE: as soon as upstream fixes are taken this will be cleaned up. Signed-off-by: Jon Masters --- ....0pre3-fix-trigger-path-and-cacheing.patch | 34 +++++++++ mcelog.conf | 57 +++++++++++++++ mcelog.service | 17 +++++ mcelog.setup | 12 +++ mcelog.spec | 73 ++++++++++++++----- 5 files changed, 176 insertions(+), 17 deletions(-) create mode 100644 mcelog-1.0pre3-fix-trigger-path-and-cacheing.patch create mode 100644 mcelog.conf create mode 100644 mcelog.service create mode 100644 mcelog.setup diff --git a/mcelog-1.0pre3-fix-trigger-path-and-cacheing.patch b/mcelog-1.0pre3-fix-trigger-path-and-cacheing.patch new file mode 100644 index 0000000..5206b67 --- /dev/null +++ b/mcelog-1.0pre3-fix-trigger-path-and-cacheing.patch @@ -0,0 +1,34 @@ +diff -urNp mcelog-1.0pre3_orig/Makefile mcelog-1.0pre3/Makefile +--- mcelog-1.0pre3_orig/Makefile 2010-01-20 21:36:52.000000000 -0500 ++++ mcelog-1.0pre3/Makefile 2010-11-10 04:51:05.512725239 -0500 +@@ -57,7 +57,7 @@ install: mcelog + install -m 644 -p mcelog.8 ${prefix}/share/man/man8 + install -m 644 -p -b mcelog.conf ${etcprefix}/etc/mcelog/mcelog.conf + for i in ${TRIGGERS} ; do \ +- install -m 755 -p -b triggers/$$i ${etcprefix}/etc/mcelog ; \ ++ install -m 755 -p -b triggers/$$i ${etcprefix}/etc/mcelog/triggers ; \ + done + ifdef DOCDIR + install -m 644 -p ${DOC} ${DOCDIR} +diff -urNp mcelog-1.0pre3_orig/mcelog.cron mcelog-1.0pre3/mcelog.cron +--- mcelog-1.0pre3_orig/mcelog.cron 2010-01-20 21:36:52.000000000 -0500 ++++ mcelog-1.0pre3/mcelog.cron 2010-11-10 04:51:05.593724528 -0500 +@@ -1,2 +1,5 @@ + #!/bin/bash +-/usr/sbin/mcelog --ignorenodev --filter >> /var/log/mcelog ++ ++# Disabled by default on Fedora since this is run as daemon ++# using the mcelog.service systemd configuration entries. ++#/usr/sbin/mcelog --ignorenodev --filter >> /var/log/mcelog +diff -urNp mcelog-1.0pre3_orig/triggers/cache-error-trigger mcelog-1.0pre3/triggers/cache-error-trigger +--- mcelog-1.0pre3_orig/triggers/cache-error-trigger 2010-01-20 21:36:52.000000000 -0500 ++++ mcelog-1.0pre3/triggers/cache-error-trigger 2010-11-10 04:51:05.594724981 -0500 +@@ -17,7 +17,7 @@ + # + # offline the CPUs (except CPU #0) sharing the affected cache + # +-for i in $CPUS_AFFECTED ; do ++for i in $AFFECTED_CPUS ; do + if [ $i = 0 ] ; then + logger -s -p daemon.warn -t mcelog "Not offlining CPU 0" + continue diff --git a/mcelog.conf b/mcelog.conf new file mode 100644 index 0000000..e4acbef --- /dev/null +++ b/mcelog.conf @@ -0,0 +1,57 @@ +# +# config file for mcelog +# For further options, see the mcelog manpage and documentation +# + +# Filter out known broken events by default +filter = yes +# don't log memory errors individually +#filter-memory-errors = yes + +# output in undecoded raw format to be easier machine readable +#raw = yes + +[server] +# An upstream bug prevents this from being disabled +# Only allow root to connect by default +client-user = root +# Path to socket client uses to connect +socket-path = /var/run/mcelog-client + +[dimm] +# Enable DIMM-tracking +dimm-tracking-enabled = yes +# Disable DIMM DMI pre-population unless supported on your system +dmi-prepopulate = no + +# execute these triggers when the rate of corrected or uncorrected +# errors per DIMM exceeds the threshold +uc-error-trigger = dimm-error-trigger +uc-error-threshold = 1 / 24h +ce-error-trigger = dimm-error-trigger +ce-error-threshold = 10 / 24h + +[socket] +# Memory error accounting per socket +socket-tracing-enabled = yes +mem-uc-error-threshold = 100 / 24h +mem-ce-error-trigger = socket-memory-error-trigger +mem-ce-error-threshold = 100 / 24h +mem-ce-error-log = yes + +[cache] +# Attempt to off-line CPUs causing cache errors +cache-threshold-trigger = cache-error-trigger +cache-threshold-log = yes + +[page] +# Try to soft-offline a 4K page if it exceeds the threshold +memory-ce-threshold = 10 / 24h +memory-ce-trigger = page-error-trigger +memory-ce-log = yes +memory-ce-action = soft + +[trigger] +# Maximum number of running triggers +children-max = 2 +directory = /etc/mcelog/triggers diff --git a/mcelog.service b/mcelog.service new file mode 100644 index 0000000..62d8cbe --- /dev/null +++ b/mcelog.service @@ -0,0 +1,17 @@ +[Unit] +Description=Machine Check Exception Logging Daemon +After=syslog.target + +# FIXME - due to upstream kernel bug always start the mcelog process +# twice using the following ExecStartPre hack. This needs fixing. +# There is a bug filed against systemd for the ExecStartPre bit +# since it is not possible to specify that the ExecStarPre bit +# is allowed and expected to fail without aborting the daemon. + +[Service] +ExecStartPre=/etc/mcelog/mcelog.setup +ExecStart=/usr/sbin/mcelog --ignorenodev --daemon --foreground +StandardOutput=syslog + +[Install] +WantedBy=multi-user.target diff --git a/mcelog.setup b/mcelog.setup new file mode 100644 index 0000000..c1966b8 --- /dev/null +++ b/mcelog.setup @@ -0,0 +1,12 @@ +#!/bin/sh +# +# An upstream kernel bug prevents mcelog from starting normally in +# daemon mode the first time it is run. So, in the systemd service, +# we want to start it twice - one as a ExecStartPre that will fail. +# But systemd will abort the process if the "pre" fails, so we use +# this script - temporarily - to start the first process. +# +# Waiting on Andi Kleen to fix upstream. +# +/usr/sbin/mcelog --ignorenodev --syslog --foreground +exit 0 diff --git a/mcelog.spec b/mcelog.spec index c793efe..98efcf8 100644 --- a/mcelog.spec +++ b/mcelog.spec @@ -3,51 +3,90 @@ Summary: Tool to translate x86-64 CPU Machine Check Exception data. Name: mcelog Version: 1.0 -Release: 0.1.%{pre_release}%{?dist} +Release: 0.2.%{pre_release}%{?dist} Epoch: 2 Group: System Environment/Base License: GPLv2 Source0: http://www.kernel.org/pub/linux/utils/cpu/mce/mcelog-%{version}%{pre_release}.tar.bz2 +Source1: mcelog.conf +Source2: mcelog.service +Source10: mcelog.setup +Patch0: mcelog-1.0pre3-fix-trigger-path-and-cacheing.patch URL: http://www.kernel.org/pub/linux/utils/cpu/mce/ Buildroot: %{_tmppath}/%{name}-%{version}-root -ExclusiveArch: x86_64 +ExclusiveArch: i686 x86_64 %description -mcelog is a daemon that collects and decodes Machine Check Exception data -on x86-64 machines. +mcelog is a utility that collects and decodes Machine Check Exception data +on x86-32 and x86-64 systems. It can be run either as a daemon, or by cron. %prep %setup -q -n %{name}-%{version}%{pre_release} +%patch0 -p1 -b .fix-triggers-and-cacheing %build -rm -rf %{buildroot} -mkdir -p %{buildroot}%{_sbindir} -mkdir -p %{buildroot}%{_mandir} +rm -rf $RPM_BUILD_ROOT +mkdir -p $RPM_BUILD_ROOT/%{_sysconfdir} +mkdir -p $RPM_BUILD_ROOT/%{_sbindir} +mkdir -p $RPM_BUILD_ROOT/%{_mandir} make CFLAGS="$RPM_OPT_FLAGS -fpie -pie" %install -mkdir -p %{buildroot}%{_mandir}/man{1,8} -mkdir -p %{buildroot}%{_sysconfdir}/cron.hourly -mkdir -p %{buildroot}%{_sbindir} -install mcelog %{buildroot}%{_sbindir}/mcelog -install mcelog.cron %{buildroot}%{_sysconfdir}/cron.hourly/mcelog.cron -cp mcelog.8 %{buildroot}%{_mandir}/man8 -cd .. -chmod -R a-s %{buildroot} +mkdir -p $RPM_BUILD_ROOT/%{_mandir}/man{1,8} +mkdir -p $RPM_BUILD_ROOT/%{_sysconfdir}/mcelog +mkdir -p $RPM_BUILD_ROOT/%{_sysconfdir}/mcelog/triggers +mkdir -p $RPM_BUILD_ROOT/lib/systemd/system +mkdir -p $RPM_BUILD_ROOT/%{_sysconfdir}/cron.hourly +mkdir -p $RPM_BUILD_ROOT/%{_sbindir} +install -p -m755 mcelog $RPM_BUILD_ROOT/%{_sbindir}/mcelog +install -p -m644 %{SOURCE1} $RPM_BUILD_ROOT/%{_sysconfdir}/mcelog/mcelog.conf +install -p -m755 %{SOURCE10} $RPM_BUILD_ROOT/%{_sysconfdir}/mcelog/mcelog.setup +install -p -m755 triggers/cache-error-trigger $RPM_BUILD_ROOT/%{_sysconfdir}/mcelog/triggers/cache-error-trigger +install -p -m755 triggers/dimm-error-trigger $RPM_BUILD_ROOT/%{_sysconfdir}/mcelog/triggers/dimm-error-trigger +install -p -m755 triggers/page-error-trigger $RPM_BUILD_ROOT/%{_sysconfdir}/mcelog/triggers/page-error-trigger +install -p -m755 triggers/socket-memory-error-trigger $RPM_BUILD_ROOT/%{_sysconfdir}/mcelog/triggers/socket-memory-error-trigger +install -p -m755 mcelog.cron $RPM_BUILD_ROOT/%{_sysconfdir}/cron.hourly/mcelog.cron +install -p -m644 %{SOURCE2} $RPM_BUILD_ROOT/lib/systemd/system/mcelog.service +install -p -m644 mcelog.8 $RPM_BUILD_ROOT/%{_mandir}/man8 %clean -rm -rf %{buildroot} +rm -rf $RPM_BUILD_ROOT + +%post +systemctl enable mcelog.service &> /dev/null || +systemctl daemon-reload &> /dev/null + +%preun +# Handle removing mcelog +if [ "$1" -eq 0 ]; then + systemctl disable mcelog.service &> /dev/null + systemctl stop mcelog.service &> /dev/null +fi + +%postun +# Handle upgrading mcelog +if [ "$1" -ge 1 ]; then + systemctl try-restart mcelog.service &> /dev/null +fi %files %defattr(-,root,root,-) %doc README CHANGES %{_sbindir}/mcelog +%dir %{_sysconfdir}/mcelog +%{_sysconfdir}/mcelog/triggers +%config(noreplace) %{_sysconfdir}/mcelog/mcelog.conf +%{_sysconfdir}/mcelog/mcelog.setup %{_sysconfdir}/cron.hourly/mcelog.cron +/lib/systemd/system/mcelog.service %attr(0644,root,root) %{_mandir}/*/* %changelog -* Tue Nov 09 2010 Jon Masters 2:1.0-0.1.pre3 +* Wed Nov 10 2010 Jon Masters 2:1.0-0.2.pre3 +- Rework mcelog to use daemon mode and systemd. + +* Tue Nov 09 2010 Jon Masters 2:1.0-0.1.pre3 - Bump epoch and use standard Fedora Packaging Guidelines for NVR. - Switch to using signed bz2 source and remove dead patch.