watchdog/0002-Generalize-and-make-wa...

154 lines
5.8 KiB
Diff

From 00cf0b0afc7cac797713276435322aeacd6020cb Mon Sep 17 00:00:00 2001
From: "Maciej S. Szmigiero" <mail@maciej.szmigiero.name>
Date: Fri, 16 Dec 2016 23:18:00 +0100
Subject: [PATCH 02/10] Generalize and make watchdog refresh "settimeout"
workaround configurable
Commit 0d156df287656 introduced a workaround for a bug in the kernel
it87_wdt driver, where on some boards the watchdog timer wasn't refreshed
correctly. The workaround was to set its timeout again every refresh.
However, this workaround was introduced unconditionally for every user of
the it87_wdt driver.
Currently, this kernel bug is supposed to be fixed ( by kernel commit
0bcd0b6a47431 ) so let's revert to old behavior by default so we by
coincidence don't mask future bugs and also comply with the kernel watchdog
API.
Let's also print an informational message for an user how to re-enable the
workaround in case this driver bug wasn't fixed completely or the user
has an older kernel version without the fix.
This change also makes this workaround available generically in case
a similar problem occurs in future in some other driver.
---
include/extern.h | 1 +
src/configfile.c | 3 +++
src/keep_alive.c | 20 +++++++++++---------
watchdog.conf | 3 +++
watchdog.conf.5 | 6 ++++++
5 files changed, 24 insertions(+), 9 deletions(-)
diff --git a/include/extern.h b/include/extern.h
index f00e4cf..cbf97fd 100644
--- a/include/extern.h
+++ b/include/extern.h
@@ -91,6 +91,7 @@ extern char *logdir;
extern char *heartbeat;
extern int hbstamps;
+extern int refresh_use_settimeout;
extern int realtime;
extern struct list *tr_bin_list;
diff --git a/src/configfile.c b/src/configfile.c
index a0996e2..10bbc69 100644
--- a/src/configfile.c
+++ b/src/configfile.c
@@ -28,6 +28,7 @@ static void add_test_binaries(const char *path);
#define ADMIN "admin"
#define CHANGE "change"
#define DEVICE "watchdog-device"
+#define DEVICE_USE_SETTIMEOUT "watchdog-refresh-use-settimeout"
#define DEVICE_TIMEOUT "watchdog-timeout"
#define FILENAME "file"
#define INTERFACE "interface"
@@ -95,6 +96,7 @@ char *logdir = "/var/log/watchdog";
char *heartbeat = NULL;
int hbstamps = 300;
+int refresh_use_settimeout = FALSE;
int realtime = FALSE;
/* Self-repairing binaries list */
@@ -206,6 +208,7 @@ void read_config(char *configfile)
} else if (READ_INT(LOGTICK, &logtick) == 0) {
ticker = logtick;
} else if (READ_STRING(DEVICE, &devname) == 0) {
+ } else if (READ_ENUM(DEVICE_USE_SETTIMEOUT, &refresh_use_settimeout) == 0) {
} else if (READ_INT(DEVICE_TIMEOUT, &dev_timeout) == 0) {
} else if (READ_LIST(TEMP, &temp_list) == 0) {
} else if (READ_INT(MAXTEMP, &maxtemp) == 0) {
diff --git a/src/keep_alive.c b/src/keep_alive.c
index 2f77665..a57b0b5 100644
--- a/src/keep_alive.c
+++ b/src/keep_alive.c
@@ -29,7 +29,6 @@
static int watchdog_fd = -1;
static int timeout_used = TIMER_MARGIN;
-static int Refresh_using_ioctl = FALSE;
/*
* Open the watchdog timer (if name non-NULL) and set the time-out value (if non-zero).
@@ -68,16 +67,19 @@ int open_watchdog(char *name, int timeout)
/* The IT8728 on Gigabyte motherboard (and similar) would trip due to the normal
* refresh in the device driver failing to reset the timer for no obvious reason
* (though the normal operation used the Consumer IR sender to refresh via an
- * interrupt - also a non-obvious method!) so this work-around simply sets the
- * time-out every refresh operation.
+ * interrupt - also a non-obvious method!) so let's warn users of these
+ * watchdogs and direct them to a workaround option.
*
- * See https://bugs.launchpad.net/ubuntu/+source/linux/+bug/932381
+ * See https://bugs.launchpad.net/ubuntu/+source/linux/+bug/932381 and
+ * https://bugzilla.kernel.org/show_bug.cgi?id=42801
*
*/
- Refresh_using_ioctl = FALSE;
- if (strcmp("IT87 WDT", (char *)ident.identity) == 0) {
- Refresh_using_ioctl = TRUE;
- log_message(LOG_INFO, "Running IT87 module fix-up");
+ if (!refresh_use_settimeout && strcmp("IT87 WDT", (char *)ident.identity) == 0) {
+ log_message(LOG_INFO,
+ "IT87 watchdog detected, if watchdog trips by itself when the first timeout interval elapses "
+ "try updating to the latest kernel");
+ log_message(LOG_INFO, "if this does not help please report a kernel bug (not this package bug!) "
+ "and try using watchdog-refresh-use-settimeout=yes config option as a workaround");
}
return rv;
@@ -163,7 +165,7 @@ int keep_alive(void)
if (watchdog_fd == -1)
return (ENOERR);
- if (Refresh_using_ioctl) {
+ if (refresh_use_settimeout) {
int timeout = timeout_used;
if (ioctl(watchdog_fd, WDIOC_SETTIMEOUT, &timeout) < 0) {
err = errno;
diff --git a/watchdog.conf b/watchdog.conf
index 3ccdb97..dcd59ce 100644
--- a/watchdog.conf
+++ b/watchdog.conf
@@ -29,6 +29,9 @@
#repair-maximum = 1
#watchdog-device = /dev/watchdog
+# If your watchdog trips by itself when the first timeout interval elapses try
+# uncommenting the line below
+#watchdog-refresh-use-settimeout = yes
# Defaults compiled into the binary
#temperature-sensor =
diff --git a/watchdog.conf.5 b/watchdog.conf.5
index 5af519d..4e5155a 100644
--- a/watchdog.conf.5
+++ b/watchdog.conf.5
@@ -64,6 +64,12 @@ Set the watchdog device name, typically /dev/watchdog. Default is to disable
keep alive support. This should be tested by running the daemon from the
command line before configuring it to start automatically on booting.
.TP
+watchdog-refresh-use-settimeout = <yes|no>
+Refresh watchdog timer by setting its timeout instead of using a normal watchdog
+refresh operation.
+Might help if your watchdog trips by itself when the first timeout interval
+elapses.
+.TP
watchdog-timeout = <timeout>
Set the watchdog device timeout during startup. If not set, a default is used
that should be set to the kernel timer margin at compile time.
--
2.20.1