Compare commits

...

No commits in common. "c8" and "c8-beta" have entirely different histories.
c8 ... c8-beta

2 changed files with 320 additions and 7 deletions

View File

@ -0,0 +1,308 @@
From acc0021a9490ae4964c08a6839308efa2709f926 Mon Sep 17 00:00:00 2001
From: David Teigland <teigland@redhat.com>
Date: Tue, 16 Jan 2024 11:01:01 -0600
Subject: [PATCH] wdmd: adjust values for iTCO_wdt
iTCO_wdt does not fire until two successive timeouts, so the
values for set/get need to be adjusted by a factor or 2 to
make the watchdog fire at the correct time.
---
wdmd/main.c | 162 ++++++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 139 insertions(+), 23 deletions(-)
diff --git a/wdmd/main.c b/wdmd/main.c
index aebacbea1ead..de54e6ddcdf1 100644
--- a/wdmd/main.c
+++ b/wdmd/main.c
@@ -31,6 +31,7 @@
#include <sys/un.h>
#include <sys/stat.h>
#include <sys/mman.h>
+#include <sys/time.h>
#include <sys/signalfd.h>
#include <linux/watchdog.h>
@@ -55,6 +56,7 @@
#define DEFAULT_SOCKET_MODE (S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP)
#define WDPATH_SIZE 64
+#define WD_ID_SIZE 64
static int test_interval = DEFAULT_TEST_INTERVAL;
static int fire_timeout = DEFAULT_FIRE_TIMEOUT;
@@ -68,6 +70,8 @@ static time_t last_closeunclean;
static char lockfile_path[PATH_MAX];
static int dev_fd = -1;
static int shm_fd;
+static int forcefire;
+static int itco; /* watchdog_identity is "iTCO_wdt" */
static int allow_scripts;
static int kill_script_sec;
@@ -75,6 +79,7 @@ static const char *scripts_dir = "/etc/wdmd.d";
static char watchdog_path[WDPATH_SIZE];
static char option_path[WDPATH_SIZE];
static char saved_path[WDPATH_SIZE];
+static char watchdog_identity[WD_ID_SIZE];
struct script_status {
uint64_t start;
@@ -141,6 +146,19 @@ static uint64_t monotime(void)
return ts.tv_sec;
}
+char time_str_buf[128];
+
+static char *time_str(void)
+{
+ struct timeval cur_time;
+ struct tm time_info;
+
+ gettimeofday(&cur_time, NULL);
+ localtime_r(&cur_time.tv_sec, &time_info);
+ strftime(time_str_buf, sizeof(time_str_buf), "%Y-%m-%d %H:%M:%S ", &time_info);
+ return time_str_buf;
+}
+
/*
* test clients
*/
@@ -979,6 +997,49 @@ static void close_watchdog(void)
dev_fd = -1;
}
+static int setup_identity(char *wdpath)
+{
+ char sysfs_path[PATH_MAX] = { 0 };
+ char *base, *p;
+ int fd, rv;
+
+ /*
+ * This function will be called multiple times when probing
+ * different watchdog paths for one that works.
+ */
+ itco = 0;
+ memset(watchdog_identity, 0, sizeof(watchdog_identity));
+
+ /*
+ * $ cat /sys/class/watchdog/watchdog0/identity
+ * iTCO_wdt
+ */
+ if (!(base = basename(wdpath)))
+ return -1;
+
+ snprintf(sysfs_path, PATH_MAX-1, "/sys/class/watchdog/%s/identity", base);
+
+ if ((fd = open(sysfs_path, O_RDONLY)) < 0)
+ return -1;
+
+ rv = read(fd, watchdog_identity, WD_ID_SIZE-1);
+
+ close(fd);
+
+ if (rv <= 0)
+ return -1;
+
+ if ((p = strchr(watchdog_identity, '\n')))
+ *p = '\0';
+
+ log_debug("%s %s %s", wdpath, sysfs_path, watchdog_identity);
+
+ if (!strcmp(watchdog_identity, "iTCO_wdt"))
+ itco = 1;
+
+ return 0;
+}
+
static int _setup_watchdog(char *path)
{
struct stat buf;
@@ -991,6 +1052,8 @@ static int _setup_watchdog(char *path)
if (rv < 0)
return -1;
+ setup_identity(path); /* Sets watchdog_identity and itco */
+
rv = open_dev();
if (rv < 0)
return -1;
@@ -1004,10 +1067,15 @@ static int _setup_watchdog(char *path)
return -1;
}
- if (timeout == fire_timeout)
- goto out;
-
- timeout = fire_timeout;
+ if (itco) {
+ if ((2 * timeout) == fire_timeout)
+ goto out;
+ timeout = fire_timeout / 2;
+ } else {
+ if (timeout == fire_timeout)
+ goto out;
+ timeout = fire_timeout;
+ }
rv = ioctl(dev_fd, WDIOC_SETTIMEOUT, &timeout);
if (rv < 0) {
@@ -1016,13 +1084,20 @@ static int _setup_watchdog(char *path)
return -1;
}
- if (timeout != fire_timeout) {
- log_error("%s failed to set new timeout", watchdog_path);
- close_watchdog();
- return -1;
+ if (itco) {
+ if ((2 * timeout) != fire_timeout) {
+ log_error("%s failed to set new timeout", watchdog_path);
+ close_watchdog();
+ return -1;
+ }
+ } else {
+ if (timeout != fire_timeout) {
+ log_error("%s failed to set new timeout", watchdog_path);
+ close_watchdog();
+ return -1;
+ }
}
out:
- log_error("%s armed with fire_timeout %d", watchdog_path, fire_timeout);
/* TODO: save watchdog_path in /run/wdmd/saved_path,
* and in startup read that file, copying it to saved_path */
@@ -1102,6 +1177,8 @@ static int probe_dev(const char *path)
return -1;
}
+ setup_identity((char *)path); /* Sets watchdog_identity and itco */
+
fd = open(path, O_WRONLY | O_CLOEXEC);
if (fd < 0) {
fprintf(stderr, "error %d open %s\n", errno, path);
@@ -1117,14 +1194,22 @@ static int probe_dev(const char *path)
goto out;
}
- if (timeout == fire_timeout) {
- printf("%s\n", path);
- rv = 0;
- goto out;
+ if (itco) {
+ if ((2 * timeout) == fire_timeout) {
+ printf("%s\n", path);
+ rv = 0;
+ goto out;
+ }
+ timeout = fire_timeout / 2;
+ } else {
+ if (timeout == fire_timeout) {
+ printf("%s\n", path);
+ rv = 0;
+ goto out;
+ }
+ timeout = fire_timeout;
}
- timeout = fire_timeout;
-
rv = ioctl(fd, WDIOC_SETTIMEOUT, &timeout);
if (rv < 0) {
fprintf(stderr, "error %d ioctl settimeout %s\n", errno, path);
@@ -1132,10 +1217,18 @@ static int probe_dev(const char *path)
goto out;
}
- if (timeout != fire_timeout) {
- fprintf(stderr, "error %d invalid timeout %s\n", errno, path);
- rv = -1;
- goto out;
+ if (itco) {
+ if ((2 * timeout) != fire_timeout) {
+ fprintf(stderr, "error %d invalid timeout %s\n", errno, path);
+ rv = -1;
+ goto out;
+ }
+ } else {
+ if (timeout != fire_timeout) {
+ fprintf(stderr, "error %d invalid timeout %s\n", errno, path);
+ rv = -1;
+ goto out;
+ }
}
printf("%s\n", path);
@@ -1518,6 +1611,7 @@ static void print_usage_and_exit(int status)
printf("--dump, -d print debug from daemon\n");
printf("--probe, -p print path of functional watchdog device\n");
printf("-D debug: no fork and print all logging to stderr\n");
+ printf("-F force watchdog reset for testing (use with -D)\n");
printf("-H 0|1 use high priority features (1 yes, 0 no, default %d)\n",
DEFAULT_HIGH_PRIORITY);
printf("-G <name> group ownership for the socket\n");
@@ -1556,11 +1650,12 @@ int main(int argc, char *argv[])
{"help", no_argument, 0, 'h' },
{"probe", no_argument, 0, 'p' },
{"dump", no_argument, 0, 'd' },
+ {"forcefire", no_argument, 0, 'F' },
{"version", no_argument, 0, 'V' },
{0, 0, 0, 0 }
};
- c = getopt_long(argc, argv, "hpdVDH:G:S:s:k:w:",
+ c = getopt_long(argc, argv, "hpdVDFH:G:S:s:k:w:",
long_options, &option_index);
if (c == -1)
break;
@@ -1581,6 +1676,9 @@ int main(int argc, char *argv[])
case 'D':
daemon_debug = 1;
break;
+ case 'F':
+ forcefire = 1;
+ break;
case 'G':
socket_gname = strdup(optarg);
break;
@@ -1633,9 +1731,6 @@ int main(int argc, char *argv[])
openlog("wdmd", LOG_CONS | LOG_PID, LOG_DAEMON);
- log_error("wdmd started S%d H%d G%d", allow_scripts, high_priority,
- socket_gid);
-
setup_priority();
rv = lockfile();
@@ -1666,6 +1761,27 @@ int main(int argc, char *argv[])
if (rv < 0)
goto out_clients;
+ log_error("wdmd started S%d H%d G%d using %s \"%s\"", allow_scripts, high_priority,
+ socket_gid, watchdog_path, watchdog_identity[0] ? watchdog_identity : "unknown");
+ log_error("%s armed with fire_timeout %d", watchdog_path, fire_timeout);
+
+ if (daemon_debug && forcefire) {
+ int sleep_sec = 0;
+ int i;
+ setbuf(stdout, NULL);
+ printf("%s waiting for watchdog to reset machine:\n", time_str());
+ for (i = 1; i < fire_timeout + 5; i++) {
+ sleep(1);
+ sleep_sec++;
+ if (sleep_sec >= fire_timeout+1)
+ printf("%s %d %s failed to fire after timeout %d seconds\n", time_str(), i, watchdog_path, fire_timeout);
+ else
+ printf("%s %d\n", time_str(), i);
+ }
+ close_watchdog();
+ return -1;
+ }
+
rv = test_loop();
close_watchdog();
--
2.43.0

View File

@ -1,6 +1,6 @@
Name: sanlock
Version: 3.8.4
Release: 4%{?dist}
Release: 5%{?dist}
Summary: A shared storage lock manager
Group: System Environment/Base
@ -23,10 +23,11 @@ Requires(preun): systemd-units
Requires(postun): systemd-units
Source0: https://releases.pagure.org/sanlock/%{name}-%{version}.tar.gz
Patch0: 0001-sanlock-fix-memory-leak-of-lockspace-renewal_history.patch
Patch1: 0002-sanlock-fix-pthread_create-error-check.patch
Patch2: 0003-Revert-sanlock-Shrink-thread-pool-when-there-is-no-w.patch
Patch3: 0004-sanlock-fix-pthread_create-error-paths.patch
Patch1: 0001-sanlock-fix-memory-leak-of-lockspace-renewal_history.patch
Patch2: 0002-sanlock-fix-pthread_create-error-check.patch
Patch3: 0003-Revert-sanlock-Shrink-thread-pool-when-there-is-no-w.patch
Patch4: 0004-sanlock-fix-pthread_create-error-paths.patch
Patch5: 0005-wdmd-adjust-values-for-iTCO_wdt.patch
%global python_package python3-%{name}
@ -35,10 +36,11 @@ The sanlock daemon manages leases for applications on hosts using shared storage
%prep
%setup -q
%patch0 -p1 -b .backup0
%patch1 -p1 -b .backup1
%patch2 -p1 -b .backup2
%patch3 -p1 -b .backup3
%patch4 -p1 -b .backup4
%patch5 -p1 -b .backup5
%build
# upstream does not require configure
@ -98,7 +100,7 @@ getent passwd sanlock > /dev/null || /usr/sbin/useradd \
%systemd_preun wdmd.service sanlock.service
%postun
%systemd_postun
%systemd_postun wdmd.service sanlock.service
%files
/usr/lib/systemd/systemd-wdmd
@ -192,6 +194,9 @@ common sanlock lockspace.
%changelog
* Tue Jan 30 2024 David Teigland <teigland@redhat.com> - 3.8.4-5
- adjust wdmd values for iTCO_wdt watchdog driver
* Wed Jul 06 2022 David Teigland <teigland@redhat.com> - 3.8.4-4
- rebuild with larger release number