309 lines
8.0 KiB
Diff
309 lines
8.0 KiB
Diff
From acc0021a9490ae4964c08a6839308efa2709f926 Mon Sep 17 00:00:00 2001
|
|
From: David Teigland <teigland@redhat.com>
|
|
Date: Tue, 16 Jan 2024 11:01:01 -0600
|
|
Subject: [PATCH] wdmd: adjust values for iTCO_wdt
|
|
|
|
iTCO_wdt does not fire until two successive timeouts, so the
|
|
values for set/get need to be adjusted by a factor or 2 to
|
|
make the watchdog fire at the correct time.
|
|
---
|
|
wdmd/main.c | 162 ++++++++++++++++++++++++++++++++++++++++++++--------
|
|
1 file changed, 139 insertions(+), 23 deletions(-)
|
|
|
|
diff --git a/wdmd/main.c b/wdmd/main.c
|
|
index aebacbea1ead..de54e6ddcdf1 100644
|
|
--- a/wdmd/main.c
|
|
+++ b/wdmd/main.c
|
|
@@ -31,6 +31,7 @@
|
|
#include <sys/un.h>
|
|
#include <sys/stat.h>
|
|
#include <sys/mman.h>
|
|
+#include <sys/time.h>
|
|
#include <sys/signalfd.h>
|
|
#include <linux/watchdog.h>
|
|
|
|
@@ -55,6 +56,7 @@
|
|
#define DEFAULT_SOCKET_MODE (S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP)
|
|
|
|
#define WDPATH_SIZE 64
|
|
+#define WD_ID_SIZE 64
|
|
|
|
static int test_interval = DEFAULT_TEST_INTERVAL;
|
|
static int fire_timeout = DEFAULT_FIRE_TIMEOUT;
|
|
@@ -68,6 +70,8 @@ static time_t last_closeunclean;
|
|
static char lockfile_path[PATH_MAX];
|
|
static int dev_fd = -1;
|
|
static int shm_fd;
|
|
+static int forcefire;
|
|
+static int itco; /* watchdog_identity is "iTCO_wdt" */
|
|
|
|
static int allow_scripts;
|
|
static int kill_script_sec;
|
|
@@ -75,6 +79,7 @@ static const char *scripts_dir = "/etc/wdmd.d";
|
|
static char watchdog_path[WDPATH_SIZE];
|
|
static char option_path[WDPATH_SIZE];
|
|
static char saved_path[WDPATH_SIZE];
|
|
+static char watchdog_identity[WD_ID_SIZE];
|
|
|
|
struct script_status {
|
|
uint64_t start;
|
|
@@ -141,6 +146,19 @@ static uint64_t monotime(void)
|
|
return ts.tv_sec;
|
|
}
|
|
|
|
+char time_str_buf[128];
|
|
+
|
|
+static char *time_str(void)
|
|
+{
|
|
+ struct timeval cur_time;
|
|
+ struct tm time_info;
|
|
+
|
|
+ gettimeofday(&cur_time, NULL);
|
|
+ localtime_r(&cur_time.tv_sec, &time_info);
|
|
+ strftime(time_str_buf, sizeof(time_str_buf), "%Y-%m-%d %H:%M:%S ", &time_info);
|
|
+ return time_str_buf;
|
|
+}
|
|
+
|
|
/*
|
|
* test clients
|
|
*/
|
|
@@ -979,6 +997,49 @@ static void close_watchdog(void)
|
|
dev_fd = -1;
|
|
}
|
|
|
|
+static int setup_identity(char *wdpath)
|
|
+{
|
|
+ char sysfs_path[PATH_MAX] = { 0 };
|
|
+ char *base, *p;
|
|
+ int fd, rv;
|
|
+
|
|
+ /*
|
|
+ * This function will be called multiple times when probing
|
|
+ * different watchdog paths for one that works.
|
|
+ */
|
|
+ itco = 0;
|
|
+ memset(watchdog_identity, 0, sizeof(watchdog_identity));
|
|
+
|
|
+ /*
|
|
+ * $ cat /sys/class/watchdog/watchdog0/identity
|
|
+ * iTCO_wdt
|
|
+ */
|
|
+ if (!(base = basename(wdpath)))
|
|
+ return -1;
|
|
+
|
|
+ snprintf(sysfs_path, PATH_MAX-1, "/sys/class/watchdog/%s/identity", base);
|
|
+
|
|
+ if ((fd = open(sysfs_path, O_RDONLY)) < 0)
|
|
+ return -1;
|
|
+
|
|
+ rv = read(fd, watchdog_identity, WD_ID_SIZE-1);
|
|
+
|
|
+ close(fd);
|
|
+
|
|
+ if (rv <= 0)
|
|
+ return -1;
|
|
+
|
|
+ if ((p = strchr(watchdog_identity, '\n')))
|
|
+ *p = '\0';
|
|
+
|
|
+ log_debug("%s %s %s", wdpath, sysfs_path, watchdog_identity);
|
|
+
|
|
+ if (!strcmp(watchdog_identity, "iTCO_wdt"))
|
|
+ itco = 1;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
static int _setup_watchdog(char *path)
|
|
{
|
|
struct stat buf;
|
|
@@ -991,6 +1052,8 @@ static int _setup_watchdog(char *path)
|
|
if (rv < 0)
|
|
return -1;
|
|
|
|
+ setup_identity(path); /* Sets watchdog_identity and itco */
|
|
+
|
|
rv = open_dev();
|
|
if (rv < 0)
|
|
return -1;
|
|
@@ -1004,10 +1067,15 @@ static int _setup_watchdog(char *path)
|
|
return -1;
|
|
}
|
|
|
|
- if (timeout == fire_timeout)
|
|
- goto out;
|
|
-
|
|
- timeout = fire_timeout;
|
|
+ if (itco) {
|
|
+ if ((2 * timeout) == fire_timeout)
|
|
+ goto out;
|
|
+ timeout = fire_timeout / 2;
|
|
+ } else {
|
|
+ if (timeout == fire_timeout)
|
|
+ goto out;
|
|
+ timeout = fire_timeout;
|
|
+ }
|
|
|
|
rv = ioctl(dev_fd, WDIOC_SETTIMEOUT, &timeout);
|
|
if (rv < 0) {
|
|
@@ -1016,13 +1084,20 @@ static int _setup_watchdog(char *path)
|
|
return -1;
|
|
}
|
|
|
|
- if (timeout != fire_timeout) {
|
|
- log_error("%s failed to set new timeout", watchdog_path);
|
|
- close_watchdog();
|
|
- return -1;
|
|
+ if (itco) {
|
|
+ if ((2 * timeout) != fire_timeout) {
|
|
+ log_error("%s failed to set new timeout", watchdog_path);
|
|
+ close_watchdog();
|
|
+ return -1;
|
|
+ }
|
|
+ } else {
|
|
+ if (timeout != fire_timeout) {
|
|
+ log_error("%s failed to set new timeout", watchdog_path);
|
|
+ close_watchdog();
|
|
+ return -1;
|
|
+ }
|
|
}
|
|
out:
|
|
- log_error("%s armed with fire_timeout %d", watchdog_path, fire_timeout);
|
|
|
|
/* TODO: save watchdog_path in /run/wdmd/saved_path,
|
|
* and in startup read that file, copying it to saved_path */
|
|
@@ -1102,6 +1177,8 @@ static int probe_dev(const char *path)
|
|
return -1;
|
|
}
|
|
|
|
+ setup_identity((char *)path); /* Sets watchdog_identity and itco */
|
|
+
|
|
fd = open(path, O_WRONLY | O_CLOEXEC);
|
|
if (fd < 0) {
|
|
fprintf(stderr, "error %d open %s\n", errno, path);
|
|
@@ -1117,14 +1194,22 @@ static int probe_dev(const char *path)
|
|
goto out;
|
|
}
|
|
|
|
- if (timeout == fire_timeout) {
|
|
- printf("%s\n", path);
|
|
- rv = 0;
|
|
- goto out;
|
|
+ if (itco) {
|
|
+ if ((2 * timeout) == fire_timeout) {
|
|
+ printf("%s\n", path);
|
|
+ rv = 0;
|
|
+ goto out;
|
|
+ }
|
|
+ timeout = fire_timeout / 2;
|
|
+ } else {
|
|
+ if (timeout == fire_timeout) {
|
|
+ printf("%s\n", path);
|
|
+ rv = 0;
|
|
+ goto out;
|
|
+ }
|
|
+ timeout = fire_timeout;
|
|
}
|
|
|
|
- timeout = fire_timeout;
|
|
-
|
|
rv = ioctl(fd, WDIOC_SETTIMEOUT, &timeout);
|
|
if (rv < 0) {
|
|
fprintf(stderr, "error %d ioctl settimeout %s\n", errno, path);
|
|
@@ -1132,10 +1217,18 @@ static int probe_dev(const char *path)
|
|
goto out;
|
|
}
|
|
|
|
- if (timeout != fire_timeout) {
|
|
- fprintf(stderr, "error %d invalid timeout %s\n", errno, path);
|
|
- rv = -1;
|
|
- goto out;
|
|
+ if (itco) {
|
|
+ if ((2 * timeout) != fire_timeout) {
|
|
+ fprintf(stderr, "error %d invalid timeout %s\n", errno, path);
|
|
+ rv = -1;
|
|
+ goto out;
|
|
+ }
|
|
+ } else {
|
|
+ if (timeout != fire_timeout) {
|
|
+ fprintf(stderr, "error %d invalid timeout %s\n", errno, path);
|
|
+ rv = -1;
|
|
+ goto out;
|
|
+ }
|
|
}
|
|
|
|
printf("%s\n", path);
|
|
@@ -1518,6 +1611,7 @@ static void print_usage_and_exit(int status)
|
|
printf("--dump, -d print debug from daemon\n");
|
|
printf("--probe, -p print path of functional watchdog device\n");
|
|
printf("-D debug: no fork and print all logging to stderr\n");
|
|
+ printf("-F force watchdog reset for testing (use with -D)\n");
|
|
printf("-H 0|1 use high priority features (1 yes, 0 no, default %d)\n",
|
|
DEFAULT_HIGH_PRIORITY);
|
|
printf("-G <name> group ownership for the socket\n");
|
|
@@ -1556,11 +1650,12 @@ int main(int argc, char *argv[])
|
|
{"help", no_argument, 0, 'h' },
|
|
{"probe", no_argument, 0, 'p' },
|
|
{"dump", no_argument, 0, 'd' },
|
|
+ {"forcefire", no_argument, 0, 'F' },
|
|
{"version", no_argument, 0, 'V' },
|
|
{0, 0, 0, 0 }
|
|
};
|
|
|
|
- c = getopt_long(argc, argv, "hpdVDH:G:S:s:k:w:",
|
|
+ c = getopt_long(argc, argv, "hpdVDFH:G:S:s:k:w:",
|
|
long_options, &option_index);
|
|
if (c == -1)
|
|
break;
|
|
@@ -1581,6 +1676,9 @@ int main(int argc, char *argv[])
|
|
case 'D':
|
|
daemon_debug = 1;
|
|
break;
|
|
+ case 'F':
|
|
+ forcefire = 1;
|
|
+ break;
|
|
case 'G':
|
|
socket_gname = strdup(optarg);
|
|
break;
|
|
@@ -1633,9 +1731,6 @@ int main(int argc, char *argv[])
|
|
|
|
openlog("wdmd", LOG_CONS | LOG_PID, LOG_DAEMON);
|
|
|
|
- log_error("wdmd started S%d H%d G%d", allow_scripts, high_priority,
|
|
- socket_gid);
|
|
-
|
|
setup_priority();
|
|
|
|
rv = lockfile();
|
|
@@ -1666,6 +1761,27 @@ int main(int argc, char *argv[])
|
|
if (rv < 0)
|
|
goto out_clients;
|
|
|
|
+ log_error("wdmd started S%d H%d G%d using %s \"%s\"", allow_scripts, high_priority,
|
|
+ socket_gid, watchdog_path, watchdog_identity[0] ? watchdog_identity : "unknown");
|
|
+ log_error("%s armed with fire_timeout %d", watchdog_path, fire_timeout);
|
|
+
|
|
+ if (daemon_debug && forcefire) {
|
|
+ int sleep_sec = 0;
|
|
+ int i;
|
|
+ setbuf(stdout, NULL);
|
|
+ printf("%s waiting for watchdog to reset machine:\n", time_str());
|
|
+ for (i = 1; i < fire_timeout + 5; i++) {
|
|
+ sleep(1);
|
|
+ sleep_sec++;
|
|
+ if (sleep_sec >= fire_timeout+1)
|
|
+ printf("%s %d %s failed to fire after timeout %d seconds\n", time_str(), i, watchdog_path, fire_timeout);
|
|
+ else
|
|
+ printf("%s %d\n", time_str(), i);
|
|
+ }
|
|
+ close_watchdog();
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
rv = test_loop();
|
|
|
|
close_watchdog();
|
|
--
|
|
2.43.0
|
|
|