sssd/SOURCES/0001-watchdog-add-arm_watch...

107 lines
3.8 KiB
Diff

From f16e570838d1c6cd30b5883f364b0f437c314b1f Mon Sep 17 00:00:00 2001
From: Sumit Bose <sbose@redhat.com>
Date: Fri, 9 Jun 2023 12:31:39 +0200
Subject: [PATCH 1/2] watchdog: add arm_watchdog() and disarm_watchdog() calls
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Those two new calls can be used if there are requests stuck by e.g.
waiting on replies where there is no other way to handle the timeout and
get the system back into a stable state. They should be only used as a
last resort.
Resolves: https://github.com/SSSD/sssd/issues/6803
Reviewed-by: Alexey Tikhonov <atikhono@redhat.com>
Reviewed-by: Pavel Březina <pbrezina@redhat.com>
(cherry picked from commit 75f2b35ad3b9256de905d05c5108400d35688554)
---
src/util/util.h | 12 ++++++++++++
src/util/util_watchdog.c | 28 ++++++++++++++++++++++++++--
2 files changed, 38 insertions(+), 2 deletions(-)
diff --git a/src/util/util.h b/src/util/util.h
index 11dc40d57..02fd53237 100644
--- a/src/util/util.h
+++ b/src/util/util.h
@@ -791,6 +791,18 @@ int setup_watchdog(struct tevent_context *ev, int interval);
void teardown_watchdog(void);
int get_watchdog_ticks(void);
+/* The arm_watchdog() and disarm_watchdog() calls will disable and re-enable
+ * the watchdog reset, respectively. This means that after arm_watchdog() is
+ * called the watchdog will not be resetted anymore and it will kill the
+ * process if disarm_watchdog() wasn't called before.
+ * Those calls should only be used when there is no other way to handle
+ * waiting request and recover into a stable state.
+ * Those calls cannot be nested, i.e. after calling arm_watchdog() it should
+ * not be called a second time in a different request because then
+ * disarm_watchdog() will disable the watchdog coverage for both. */
+void arm_watchdog(void);
+void disarm_watchdog(void);
+
/* from files.c */
int sss_remove_tree(const char *root);
int sss_remove_subtree(const char *root);
diff --git a/src/util/util_watchdog.c b/src/util/util_watchdog.c
index b1534e499..abafd94b9 100644
--- a/src/util/util_watchdog.c
+++ b/src/util/util_watchdog.c
@@ -40,6 +40,7 @@ struct watchdog_ctx {
time_t timestamp;
struct tevent_fd *tfd;
int pipefd[2];
+ bool armed; /* if 'true' ticks counter will not be reset */
} watchdog_ctx;
static void watchdog_detect_timeshift(void)
@@ -89,8 +90,13 @@ static void watchdog_event_handler(struct tevent_context *ev,
struct timeval current_time,
void *private_data)
{
- /* first thing reset the watchdog ticks */
- watchdog_reset();
+ if (!watchdog_ctx.armed) {
+ /* first thing reset the watchdog ticks */
+ watchdog_reset();
+ } else {
+ DEBUG(SSSDBG_IMPORTANT_INFO,
+ "Watchdog armed, process might be terminated soon.\n");
+ }
/* then set a new watchodg event */
watchdog_ctx.te = tevent_add_timer(ev, ev,
@@ -197,6 +203,7 @@ int setup_watchdog(struct tevent_context *ev, int interval)
watchdog_ctx.ev = ev;
watchdog_ctx.input_interval = interval;
watchdog_ctx.timestamp = time(NULL);
+ watchdog_ctx.armed = false;
ret = pipe(watchdog_ctx.pipefd);
if (ret == -1) {
@@ -264,3 +271,20 @@ int get_watchdog_ticks(void)
{
return __sync_add_and_fetch(&watchdog_ctx.ticks, 0);
}
+
+void arm_watchdog(void)
+{
+ if (watchdog_ctx.armed) {
+ DEBUG(SSSDBG_CRIT_FAILURE,
+ "arm_watchdog() is called although the watchdog is already armed. "
+ "This indicates a programming error and should be avoided because "
+ "it will most probably not work as expected.\n");
+ }
+
+ watchdog_ctx.armed = true;
+}
+
+void disarm_watchdog(void)
+{
+ watchdog_ctx.armed = false;
+}
--
2.38.1