* Fri May 5 2023 Klaus Wenninger <kwenning@redhat.com> - 2.1.5-8
- Fix overall timeout calculation if watchdog and another fencing device share a topology level - Resolves: rhbz2187424
This commit is contained in:
parent
ac8081c5a4
commit
854ff6077b
142
006-watchdog-fencing-topology.patch
Normal file
142
006-watchdog-fencing-topology.patch
Normal file
@ -0,0 +1,142 @@
|
||||
From 17cc49e1564b0ae55cc8212d14c5c055f88040da Mon Sep 17 00:00:00 2001
|
||||
From: Klaus Wenninger <klaus.wenninger@aon.at>
|
||||
Date: Tue, 14 Feb 2023 15:35:37 +0100
|
||||
Subject: [PATCH] Fix: watchdog-fencing: terminate dangling timer before
|
||||
watchdog-waiting
|
||||
|
||||
---
|
||||
daemons/fenced/fenced_remote.c | 6 +++++-
|
||||
1 file changed, 5 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
|
||||
index 5c3fe25e3..aab185adb 100644
|
||||
--- a/daemons/fenced/fenced_remote.c
|
||||
+++ b/daemons/fenced/fenced_remote.c
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
- * Copyright 2009-2022 the Pacemaker project contributors
|
||||
+ * Copyright 2009-2023 the Pacemaker project contributors
|
||||
*
|
||||
* The version control history for this file may have further details.
|
||||
*
|
||||
@@ -1702,6 +1702,10 @@ check_watchdog_fencing_and_wait(remote_fencing_op_t * op)
|
||||
"client %s " CRM_XS " id=%.8s",
|
||||
(stonith_watchdog_timeout_ms / 1000),
|
||||
op->target, op->action, op->client_name, op->id);
|
||||
+
|
||||
+ if (op->op_timer_one) {
|
||||
+ g_source_remove(op->op_timer_one);
|
||||
+ }
|
||||
op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms,
|
||||
remote_op_watchdog_done, op);
|
||||
return TRUE;
|
||||
--
|
||||
2.39.0
|
||||
|
||||
From f2cc2a4277124230903a18713e50604a8f1842cd Mon Sep 17 00:00:00 2001
|
||||
From: Klaus Wenninger <klaus.wenninger@aon.at>
|
||||
Date: Wed, 1 Mar 2023 15:00:15 +0100
|
||||
Subject: [PATCH] Refactor: watchdog-fencing: convenience function
|
||||
pcmk__is_fencing_action
|
||||
|
||||
for consistency and add comment making clear why this block exits
|
||||
with new timer set in any case
|
||||
---
|
||||
daemons/fenced/fenced_remote.c | 5 ++++-
|
||||
1 file changed, 4 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
|
||||
index aab185adb..e0f8de057 100644
|
||||
--- a/daemons/fenced/fenced_remote.c
|
||||
+++ b/daemons/fenced/fenced_remote.c
|
||||
@@ -1834,7 +1834,7 @@ request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer)
|
||||
if (!((stonith_watchdog_timeout_ms > 0)
|
||||
&& (pcmk__str_eq(device, STONITH_WATCHDOG_ID, pcmk__str_none)
|
||||
|| (pcmk__str_eq(peer->host, op->target, pcmk__str_casei)
|
||||
- && !pcmk__str_eq(op->action, "on", pcmk__str_none)))
|
||||
+ && pcmk__is_fencing_action(op->action)))
|
||||
&& check_watchdog_fencing_and_wait(op))) {
|
||||
|
||||
/* Some thoughts about self-fencing cases reaching this point:
|
||||
@@ -1854,6 +1854,9 @@ request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer)
|
||||
Otherwise the selection of stonith-watchdog-timeout at
|
||||
least is questionable.
|
||||
*/
|
||||
+
|
||||
+ /* coming here we're not waiting for watchdog timeout -
|
||||
+ thus engage timer with timout evaluated before */
|
||||
op->op_timer_one = g_timeout_add((1000 * timeout_one), remote_op_timeout_one, op);
|
||||
}
|
||||
|
||||
--
|
||||
2.39.0
|
||||
|
||||
From c4eb45a986f8865fc5e69350fd5b9f4b056d9d69 Mon Sep 17 00:00:00 2001
|
||||
From: Klaus Wenninger <klaus.wenninger@aon.at>
|
||||
Date: Tue, 14 Feb 2023 11:57:17 +0100
|
||||
Subject: [PATCH] Fix: watchdog-fencing: correctly derive timeout with topology
|
||||
|
||||
up to now the timeout for watchdog-fencing was just added to
|
||||
the overall timeout if the node to be fenced was visible and
|
||||
reported back to the query.
|
||||
---
|
||||
daemons/fenced/fenced_remote.c | 28 +++++++++++++++++++++++++---
|
||||
1 file changed, 25 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
|
||||
index e0f8de057..3b7ab05e9 100644
|
||||
--- a/daemons/fenced/fenced_remote.c
|
||||
+++ b/daemons/fenced/fenced_remote.c
|
||||
@@ -969,8 +969,9 @@ advance_topology_level(remote_fencing_op_t *op, bool empty_ok)
|
||||
return pcmk_rc_ok;
|
||||
}
|
||||
|
||||
- crm_info("All fencing options targeting %s for client %s@%s failed "
|
||||
+ crm_info("All %sfencing options targeting %s for client %s@%s failed "
|
||||
CRM_XS " id=%.8s",
|
||||
+ (stonith_watchdog_timeout_ms > 0)?"non-watchdog ":"",
|
||||
op->target, op->client_name, op->originator, op->id);
|
||||
return ENODEV;
|
||||
}
|
||||
@@ -1434,8 +1435,17 @@ stonith_choose_peer(remote_fencing_op_t * op)
|
||||
&& pcmk_is_set(op->call_options, st_opt_topology)
|
||||
&& (advance_topology_level(op, false) == pcmk_rc_ok));
|
||||
|
||||
- crm_notice("Couldn't find anyone to fence (%s) %s using %s",
|
||||
- op->action, op->target, (device? device : "any device"));
|
||||
+ if ((stonith_watchdog_timeout_ms > 0)
|
||||
+ && pcmk__is_fencing_action(op->action)
|
||||
+ && pcmk__str_eq(device, STONITH_WATCHDOG_ID, pcmk__str_none)
|
||||
+ && node_does_watchdog_fencing(op->target)) {
|
||||
+ crm_info("Couldn't contact watchdog-fencing target-node (%s)",
|
||||
+ op->target);
|
||||
+ /* check_watchdog_fencing_and_wait will log additional info */
|
||||
+ } else {
|
||||
+ crm_notice("Couldn't find anyone to fence (%s) %s using %s",
|
||||
+ op->action, op->target, (device? device : "any device"));
|
||||
+ }
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -1531,6 +1541,18 @@ get_op_total_timeout(const remote_fencing_op_t *op,
|
||||
continue;
|
||||
}
|
||||
for (device_list = tp->levels[i]; device_list; device_list = device_list->next) {
|
||||
+ /* in case of watchdog-device we add the timeout to the budget
|
||||
+ regardless of if we got a reply or not
|
||||
+ */
|
||||
+ if ((stonith_watchdog_timeout_ms > 0)
|
||||
+ && pcmk__is_fencing_action(op->action)
|
||||
+ && pcmk__str_eq(device_list->data, STONITH_WATCHDOG_ID,
|
||||
+ pcmk__str_none)
|
||||
+ && node_does_watchdog_fencing(op->target)) {
|
||||
+ total_timeout += stonith_watchdog_timeout_ms / 1000;
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
for (iter = op->query_results; iter != NULL; iter = iter->next) {
|
||||
const peer_device_info_t *peer = iter->data;
|
||||
|
||||
--
|
||||
2.39.0
|
||||
|
@ -36,7 +36,7 @@
|
||||
## can be incremented to build packages reliably considered "newer"
|
||||
## than previously built packages with the same pcmkversion)
|
||||
%global pcmkversion 2.1.5
|
||||
%global specversion 7
|
||||
%global specversion 8
|
||||
|
||||
## Upstream commit (full commit ID, abbreviated commit ID, or tag) to build
|
||||
%global commit a3f44794f94e1571c6ba0042915ade369b4ce4b1
|
||||
@ -253,6 +253,7 @@ Patch002: 002-remote-regression.patch
|
||||
Patch003: 003-history-cleanup.patch
|
||||
Patch004: 004-g_source_remove.patch
|
||||
Patch005: 005-query-null.patch
|
||||
Patch006: 006-watchdog-fencing-topology.patch
|
||||
|
||||
Requires: resource-agents
|
||||
Requires: %{pkgname_pcmk_libs}%{?_isa} = %{version}-%{release}
|
||||
@ -855,6 +856,11 @@ exit 0
|
||||
%license %{nagios_name}-%{nagios_hash}/COPYING
|
||||
|
||||
%changelog
|
||||
* Fri May 5 2023 Klaus Wenninger <kwenning@redhat.com> - 2.1.5-8
|
||||
- Fix overall timeout calculation if watchdog and another fencing
|
||||
device share a topology level
|
||||
- Resolves: rhbz2187424
|
||||
|
||||
* Wed Feb 22 2023 Chris Lumens <clumens@redhat.com> - 2.1.5-7
|
||||
- Additional fixes for SIGABRT during pacemaker-fenced shutdown
|
||||
- Backport fix for attrd_updater -QA not displaying all nodes
|
||||
|
Loading…
Reference in New Issue
Block a user