107 lines
3.8 KiB
Diff
107 lines
3.8 KiB
Diff
From f16e570838d1c6cd30b5883f364b0f437c314b1f Mon Sep 17 00:00:00 2001
|
|
From: Sumit Bose <sbose@redhat.com>
|
|
Date: Fri, 9 Jun 2023 12:31:39 +0200
|
|
Subject: [PATCH 1/2] watchdog: add arm_watchdog() and disarm_watchdog() calls
|
|
MIME-Version: 1.0
|
|
Content-Type: text/plain; charset=UTF-8
|
|
Content-Transfer-Encoding: 8bit
|
|
|
|
Those two new calls can be used if there are requests stuck by e.g.
|
|
waiting on replies where there is no other way to handle the timeout and
|
|
get the system back into a stable state. They should be only used as a
|
|
last resort.
|
|
|
|
Resolves: https://github.com/SSSD/sssd/issues/6803
|
|
|
|
Reviewed-by: Alexey Tikhonov <atikhono@redhat.com>
|
|
Reviewed-by: Pavel Březina <pbrezina@redhat.com>
|
|
(cherry picked from commit 75f2b35ad3b9256de905d05c5108400d35688554)
|
|
---
|
|
src/util/util.h | 12 ++++++++++++
|
|
src/util/util_watchdog.c | 28 ++++++++++++++++++++++++++--
|
|
2 files changed, 38 insertions(+), 2 deletions(-)
|
|
|
|
diff --git a/src/util/util.h b/src/util/util.h
|
|
index 11dc40d57..02fd53237 100644
|
|
--- a/src/util/util.h
|
|
+++ b/src/util/util.h
|
|
@@ -791,6 +791,18 @@ int setup_watchdog(struct tevent_context *ev, int interval);
|
|
void teardown_watchdog(void);
|
|
int get_watchdog_ticks(void);
|
|
|
|
+/* The arm_watchdog() and disarm_watchdog() calls will disable and re-enable
|
|
+ * the watchdog reset, respectively. This means that after arm_watchdog() is
|
|
+ * called the watchdog will not be resetted anymore and it will kill the
|
|
+ * process if disarm_watchdog() wasn't called before.
|
|
+ * Those calls should only be used when there is no other way to handle
|
|
+ * waiting request and recover into a stable state.
|
|
+ * Those calls cannot be nested, i.e. after calling arm_watchdog() it should
|
|
+ * not be called a second time in a different request because then
|
|
+ * disarm_watchdog() will disable the watchdog coverage for both. */
|
|
+void arm_watchdog(void);
|
|
+void disarm_watchdog(void);
|
|
+
|
|
/* from files.c */
|
|
int sss_remove_tree(const char *root);
|
|
int sss_remove_subtree(const char *root);
|
|
diff --git a/src/util/util_watchdog.c b/src/util/util_watchdog.c
|
|
index b1534e499..abafd94b9 100644
|
|
--- a/src/util/util_watchdog.c
|
|
+++ b/src/util/util_watchdog.c
|
|
@@ -40,6 +40,7 @@ struct watchdog_ctx {
|
|
time_t timestamp;
|
|
struct tevent_fd *tfd;
|
|
int pipefd[2];
|
|
+ bool armed; /* if 'true' ticks counter will not be reset */
|
|
} watchdog_ctx;
|
|
|
|
static void watchdog_detect_timeshift(void)
|
|
@@ -89,8 +90,13 @@ static void watchdog_event_handler(struct tevent_context *ev,
|
|
struct timeval current_time,
|
|
void *private_data)
|
|
{
|
|
- /* first thing reset the watchdog ticks */
|
|
- watchdog_reset();
|
|
+ if (!watchdog_ctx.armed) {
|
|
+ /* first thing reset the watchdog ticks */
|
|
+ watchdog_reset();
|
|
+ } else {
|
|
+ DEBUG(SSSDBG_IMPORTANT_INFO,
|
|
+ "Watchdog armed, process might be terminated soon.\n");
|
|
+ }
|
|
|
|
/* then set a new watchodg event */
|
|
watchdog_ctx.te = tevent_add_timer(ev, ev,
|
|
@@ -197,6 +203,7 @@ int setup_watchdog(struct tevent_context *ev, int interval)
|
|
watchdog_ctx.ev = ev;
|
|
watchdog_ctx.input_interval = interval;
|
|
watchdog_ctx.timestamp = time(NULL);
|
|
+ watchdog_ctx.armed = false;
|
|
|
|
ret = pipe(watchdog_ctx.pipefd);
|
|
if (ret == -1) {
|
|
@@ -264,3 +271,20 @@ int get_watchdog_ticks(void)
|
|
{
|
|
return __sync_add_and_fetch(&watchdog_ctx.ticks, 0);
|
|
}
|
|
+
|
|
+void arm_watchdog(void)
|
|
+{
|
|
+ if (watchdog_ctx.armed) {
|
|
+ DEBUG(SSSDBG_CRIT_FAILURE,
|
|
+ "arm_watchdog() is called although the watchdog is already armed. "
|
|
+ "This indicates a programming error and should be avoided because "
|
|
+ "it will most probably not work as expected.\n");
|
|
+ }
|
|
+
|
|
+ watchdog_ctx.armed = true;
|
|
+}
|
|
+
|
|
+void disarm_watchdog(void)
|
|
+{
|
|
+ watchdog_ctx.armed = false;
|
|
+}
|
|
--
|
|
2.38.1
|
|
|