From acc0021a9490ae4964c08a6839308efa2709f926 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Tue, 16 Jan 2024 11:01:01 -0600 Subject: [PATCH] wdmd: adjust values for iTCO_wdt iTCO_wdt does not fire until two successive timeouts, so the values for set/get need to be adjusted by a factor or 2 to make the watchdog fire at the correct time. --- wdmd/main.c | 162 ++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 139 insertions(+), 23 deletions(-) diff --git a/wdmd/main.c b/wdmd/main.c index aebacbea1ead..de54e6ddcdf1 100644 --- a/wdmd/main.c +++ b/wdmd/main.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -55,6 +56,7 @@ #define DEFAULT_SOCKET_MODE (S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP) #define WDPATH_SIZE 64 +#define WD_ID_SIZE 64 static int test_interval = DEFAULT_TEST_INTERVAL; static int fire_timeout = DEFAULT_FIRE_TIMEOUT; @@ -68,6 +70,8 @@ static time_t last_closeunclean; static char lockfile_path[PATH_MAX]; static int dev_fd = -1; static int shm_fd; +static int forcefire; +static int itco; /* watchdog_identity is "iTCO_wdt" */ static int allow_scripts; static int kill_script_sec; @@ -75,6 +79,7 @@ static const char *scripts_dir = "/etc/wdmd.d"; static char watchdog_path[WDPATH_SIZE]; static char option_path[WDPATH_SIZE]; static char saved_path[WDPATH_SIZE]; +static char watchdog_identity[WD_ID_SIZE]; struct script_status { uint64_t start; @@ -141,6 +146,19 @@ static uint64_t monotime(void) return ts.tv_sec; } +char time_str_buf[128]; + +static char *time_str(void) +{ + struct timeval cur_time; + struct tm time_info; + + gettimeofday(&cur_time, NULL); + localtime_r(&cur_time.tv_sec, &time_info); + strftime(time_str_buf, sizeof(time_str_buf), "%Y-%m-%d %H:%M:%S ", &time_info); + return time_str_buf; +} + /* * test clients */ @@ -979,6 +997,49 @@ static void close_watchdog(void) dev_fd = -1; } +static int setup_identity(char *wdpath) +{ + char sysfs_path[PATH_MAX] = { 0 }; + char *base, *p; + int fd, rv; + + /* + * This function will be called multiple times when probing + * different watchdog paths for one that works. + */ + itco = 0; + memset(watchdog_identity, 0, sizeof(watchdog_identity)); + + /* + * $ cat /sys/class/watchdog/watchdog0/identity + * iTCO_wdt + */ + if (!(base = basename(wdpath))) + return -1; + + snprintf(sysfs_path, PATH_MAX-1, "/sys/class/watchdog/%s/identity", base); + + if ((fd = open(sysfs_path, O_RDONLY)) < 0) + return -1; + + rv = read(fd, watchdog_identity, WD_ID_SIZE-1); + + close(fd); + + if (rv <= 0) + return -1; + + if ((p = strchr(watchdog_identity, '\n'))) + *p = '\0'; + + log_debug("%s %s %s", wdpath, sysfs_path, watchdog_identity); + + if (!strcmp(watchdog_identity, "iTCO_wdt")) + itco = 1; + + return 0; +} + static int _setup_watchdog(char *path) { struct stat buf; @@ -991,6 +1052,8 @@ static int _setup_watchdog(char *path) if (rv < 0) return -1; + setup_identity(path); /* Sets watchdog_identity and itco */ + rv = open_dev(); if (rv < 0) return -1; @@ -1004,10 +1067,15 @@ static int _setup_watchdog(char *path) return -1; } - if (timeout == fire_timeout) - goto out; - - timeout = fire_timeout; + if (itco) { + if ((2 * timeout) == fire_timeout) + goto out; + timeout = fire_timeout / 2; + } else { + if (timeout == fire_timeout) + goto out; + timeout = fire_timeout; + } rv = ioctl(dev_fd, WDIOC_SETTIMEOUT, &timeout); if (rv < 0) { @@ -1016,13 +1084,20 @@ static int _setup_watchdog(char *path) return -1; } - if (timeout != fire_timeout) { - log_error("%s failed to set new timeout", watchdog_path); - close_watchdog(); - return -1; + if (itco) { + if ((2 * timeout) != fire_timeout) { + log_error("%s failed to set new timeout", watchdog_path); + close_watchdog(); + return -1; + } + } else { + if (timeout != fire_timeout) { + log_error("%s failed to set new timeout", watchdog_path); + close_watchdog(); + return -1; + } } out: - log_error("%s armed with fire_timeout %d", watchdog_path, fire_timeout); /* TODO: save watchdog_path in /run/wdmd/saved_path, * and in startup read that file, copying it to saved_path */ @@ -1102,6 +1177,8 @@ static int probe_dev(const char *path) return -1; } + setup_identity((char *)path); /* Sets watchdog_identity and itco */ + fd = open(path, O_WRONLY | O_CLOEXEC); if (fd < 0) { fprintf(stderr, "error %d open %s\n", errno, path); @@ -1117,14 +1194,22 @@ static int probe_dev(const char *path) goto out; } - if (timeout == fire_timeout) { - printf("%s\n", path); - rv = 0; - goto out; + if (itco) { + if ((2 * timeout) == fire_timeout) { + printf("%s\n", path); + rv = 0; + goto out; + } + timeout = fire_timeout / 2; + } else { + if (timeout == fire_timeout) { + printf("%s\n", path); + rv = 0; + goto out; + } + timeout = fire_timeout; } - timeout = fire_timeout; - rv = ioctl(fd, WDIOC_SETTIMEOUT, &timeout); if (rv < 0) { fprintf(stderr, "error %d ioctl settimeout %s\n", errno, path); @@ -1132,10 +1217,18 @@ static int probe_dev(const char *path) goto out; } - if (timeout != fire_timeout) { - fprintf(stderr, "error %d invalid timeout %s\n", errno, path); - rv = -1; - goto out; + if (itco) { + if ((2 * timeout) != fire_timeout) { + fprintf(stderr, "error %d invalid timeout %s\n", errno, path); + rv = -1; + goto out; + } + } else { + if (timeout != fire_timeout) { + fprintf(stderr, "error %d invalid timeout %s\n", errno, path); + rv = -1; + goto out; + } } printf("%s\n", path); @@ -1518,6 +1611,7 @@ static void print_usage_and_exit(int status) printf("--dump, -d print debug from daemon\n"); printf("--probe, -p print path of functional watchdog device\n"); printf("-D debug: no fork and print all logging to stderr\n"); + printf("-F force watchdog reset for testing (use with -D)\n"); printf("-H 0|1 use high priority features (1 yes, 0 no, default %d)\n", DEFAULT_HIGH_PRIORITY); printf("-G group ownership for the socket\n"); @@ -1556,11 +1650,12 @@ int main(int argc, char *argv[]) {"help", no_argument, 0, 'h' }, {"probe", no_argument, 0, 'p' }, {"dump", no_argument, 0, 'd' }, + {"forcefire", no_argument, 0, 'F' }, {"version", no_argument, 0, 'V' }, {0, 0, 0, 0 } }; - c = getopt_long(argc, argv, "hpdVDH:G:S:s:k:w:", + c = getopt_long(argc, argv, "hpdVDFH:G:S:s:k:w:", long_options, &option_index); if (c == -1) break; @@ -1581,6 +1676,9 @@ int main(int argc, char *argv[]) case 'D': daemon_debug = 1; break; + case 'F': + forcefire = 1; + break; case 'G': socket_gname = strdup(optarg); break; @@ -1633,9 +1731,6 @@ int main(int argc, char *argv[]) openlog("wdmd", LOG_CONS | LOG_PID, LOG_DAEMON); - log_error("wdmd started S%d H%d G%d", allow_scripts, high_priority, - socket_gid); - setup_priority(); rv = lockfile(); @@ -1666,6 +1761,27 @@ int main(int argc, char *argv[]) if (rv < 0) goto out_clients; + log_error("wdmd started S%d H%d G%d using %s \"%s\"", allow_scripts, high_priority, + socket_gid, watchdog_path, watchdog_identity[0] ? watchdog_identity : "unknown"); + log_error("%s armed with fire_timeout %d", watchdog_path, fire_timeout); + + if (daemon_debug && forcefire) { + int sleep_sec = 0; + int i; + setbuf(stdout, NULL); + printf("%s waiting for watchdog to reset machine:\n", time_str()); + for (i = 1; i < fire_timeout + 5; i++) { + sleep(1); + sleep_sec++; + if (sleep_sec >= fire_timeout+1) + printf("%s %d %s failed to fire after timeout %d seconds\n", time_str(), i, watchdog_path, fire_timeout); + else + printf("%s %d\n", time_str(), i); + } + close_watchdog(); + return -1; + } + rv = test_loop(); close_watchdog(); -- 2.43.0