dc8cd7d750
- Resolves: rhbz1988568
1313 lines
47 KiB
Diff
1313 lines
47 KiB
Diff
From b49f49576ef9d801a48ce7a01a78c72e65be7880 Mon Sep 17 00:00:00 2001
|
|
From: Klaus Wenninger <klaus.wenninger@aon.at>
|
|
Date: Fri, 30 Jul 2021 18:07:25 +0200
|
|
Subject: [PATCH 1/3] Fix, Refactor: fenced: add return value to
|
|
get_agent_metadata
|
|
|
|
Used to distinguish between empty metadata per design,
|
|
case of failed getting metadata that might succeed on a
|
|
retry and fatal failure.
|
|
Fixes as well regression that leads to endless retries getting
|
|
metadata for #watchdog - not superserious as it happens with
|
|
delays in between but still undesirable.
|
|
---
|
|
daemons/fenced/fenced_commands.c | 92 +++++++++++++++++++-------------
|
|
1 file changed, 55 insertions(+), 37 deletions(-)
|
|
|
|
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
|
|
index a778801b1..cd9968f1a 100644
|
|
--- a/daemons/fenced/fenced_commands.c
|
|
+++ b/daemons/fenced/fenced_commands.c
|
|
@@ -69,7 +69,7 @@ static void stonith_send_reply(xmlNode * reply, int call_options, const char *re
|
|
static void search_devices_record_result(struct device_search_s *search, const char *device,
|
|
gboolean can_fence);
|
|
|
|
-static xmlNode * get_agent_metadata(const char *agent);
|
|
+static int get_agent_metadata(const char *agent, xmlNode **metadata);
|
|
static void read_action_metadata(stonith_device_t *device);
|
|
|
|
typedef struct async_command_s {
|
|
@@ -323,19 +323,26 @@ fork_cb(GPid pid, gpointer user_data)
|
|
static int
|
|
get_agent_metadata_cb(gpointer data) {
|
|
stonith_device_t *device = data;
|
|
+ guint period_ms;
|
|
|
|
- device->agent_metadata = get_agent_metadata(device->agent);
|
|
- if (device->agent_metadata) {
|
|
- read_action_metadata(device);
|
|
- stonith__device_parameter_flags(&(device->flags), device->id,
|
|
+ switch (get_agent_metadata(device->agent, &device->agent_metadata)) {
|
|
+ case pcmk_rc_ok:
|
|
+ if (device->agent_metadata) {
|
|
+ read_action_metadata(device);
|
|
+ stonith__device_parameter_flags(&(device->flags), device->id,
|
|
device->agent_metadata);
|
|
- return G_SOURCE_REMOVE;
|
|
- } else {
|
|
- guint period_ms = pcmk__mainloop_timer_get_period(device->timer);
|
|
- if (period_ms < 160 * 1000) {
|
|
- mainloop_timer_set_period(device->timer, 2 * period_ms);
|
|
- }
|
|
- return G_SOURCE_CONTINUE;
|
|
+ }
|
|
+ return G_SOURCE_REMOVE;
|
|
+
|
|
+ case EAGAIN:
|
|
+ period_ms = pcmk__mainloop_timer_get_period(device->timer);
|
|
+ if (period_ms < 160 * 1000) {
|
|
+ mainloop_timer_set_period(device->timer, 2 * period_ms);
|
|
+ }
|
|
+ return G_SOURCE_CONTINUE;
|
|
+
|
|
+ default:
|
|
+ return G_SOURCE_REMOVE;
|
|
}
|
|
}
|
|
|
|
@@ -700,38 +707,41 @@ init_metadata_cache(void) {
|
|
}
|
|
}
|
|
|
|
-static xmlNode *
|
|
-get_agent_metadata(const char *agent)
|
|
+int
|
|
+get_agent_metadata(const char *agent, xmlNode ** metadata)
|
|
{
|
|
- xmlNode *xml = NULL;
|
|
char *buffer = NULL;
|
|
|
|
+ if (metadata == NULL) {
|
|
+ return EINVAL;
|
|
+ }
|
|
+ *metadata = NULL;
|
|
+ if (pcmk__str_eq(agent, STONITH_WATCHDOG_AGENT, pcmk__str_none)) {
|
|
+ return pcmk_rc_ok;
|
|
+ }
|
|
init_metadata_cache();
|
|
buffer = g_hash_table_lookup(metadata_cache, agent);
|
|
- if(pcmk__str_eq(agent, STONITH_WATCHDOG_AGENT, pcmk__str_casei)) {
|
|
- return NULL;
|
|
-
|
|
- } else if(buffer == NULL) {
|
|
+ if (buffer == NULL) {
|
|
stonith_t *st = stonith_api_new();
|
|
int rc;
|
|
|
|
if (st == NULL) {
|
|
crm_warn("Could not get agent meta-data: "
|
|
"API memory allocation failed");
|
|
- return NULL;
|
|
+ return EAGAIN;
|
|
}
|
|
- rc = st->cmds->metadata(st, st_opt_sync_call, agent, NULL, &buffer, 10);
|
|
+ rc = st->cmds->metadata(st, st_opt_sync_call, agent,
|
|
+ NULL, &buffer, 10);
|
|
stonith_api_delete(st);
|
|
if (rc || !buffer) {
|
|
crm_err("Could not retrieve metadata for fencing agent %s", agent);
|
|
- return NULL;
|
|
+ return EAGAIN;
|
|
}
|
|
g_hash_table_replace(metadata_cache, strdup(agent), buffer);
|
|
}
|
|
|
|
- xml = string2xml(buffer);
|
|
-
|
|
- return xml;
|
|
+ *metadata = string2xml(buffer);
|
|
+ return pcmk_rc_ok;
|
|
}
|
|
|
|
static gboolean
|
|
@@ -962,19 +972,27 @@ build_device_from_xml(xmlNode * msg)
|
|
g_list_free_full(device->targets, free);
|
|
device->targets = NULL;
|
|
}
|
|
- device->agent_metadata = get_agent_metadata(device->agent);
|
|
- if (device->agent_metadata) {
|
|
- read_action_metadata(device);
|
|
- stonith__device_parameter_flags(&(device->flags), device->id,
|
|
- device->agent_metadata);
|
|
- } else {
|
|
- if (device->timer == NULL) {
|
|
- device->timer = mainloop_timer_add("get_agent_metadata", 10 * 1000,
|
|
+ switch (get_agent_metadata(device->agent, &device->agent_metadata)) {
|
|
+ case pcmk_rc_ok:
|
|
+ if (device->agent_metadata) {
|
|
+ read_action_metadata(device);
|
|
+ stonith__device_parameter_flags(&(device->flags), device->id,
|
|
+ device->agent_metadata);
|
|
+ }
|
|
+ break;
|
|
+
|
|
+ case EAGAIN:
|
|
+ if (device->timer == NULL) {
|
|
+ device->timer = mainloop_timer_add("get_agent_metadata", 10 * 1000,
|
|
TRUE, get_agent_metadata_cb, device);
|
|
- }
|
|
- if (!mainloop_timer_running(device->timer)) {
|
|
- mainloop_timer_start(device->timer);
|
|
- }
|
|
+ }
|
|
+ if (!mainloop_timer_running(device->timer)) {
|
|
+ mainloop_timer_start(device->timer);
|
|
+ }
|
|
+ break;
|
|
+
|
|
+ default:
|
|
+ break;
|
|
}
|
|
|
|
value = g_hash_table_lookup(device->params, "nodeid");
|
|
--
|
|
2.27.0
|
|
|
|
|
|
From 5dd1e4459335764e0adf5fa78d81c875ae2332e9 Mon Sep 17 00:00:00 2001
|
|
From: Klaus Wenninger <klaus.wenninger@aon.at>
|
|
Date: Fri, 30 Jul 2021 18:15:10 +0200
|
|
Subject: [PATCH 2/3] feature: watchdog-fencing: allow restriction to certain
|
|
nodes
|
|
|
|
Bump CRM_FEATURE_SET to 3.11.0 to encourage cluster being
|
|
fully upgraded to a version that supports the feature
|
|
before explicitly adding a watchdog-fence-device.
|
|
---
|
|
configure.ac | 1 +
|
|
daemons/controld/controld_control.c | 2 +-
|
|
daemons/controld/controld_fencing.c | 14 ++
|
|
daemons/controld/controld_fencing.h | 1 +
|
|
daemons/fenced/Makefile.am | 2 +-
|
|
daemons/fenced/fence_watchdog.in | 283 ++++++++++++++++++++++++++++
|
|
daemons/fenced/fenced_commands.c | 141 +++++++++++---
|
|
daemons/fenced/fenced_remote.c | 71 ++++---
|
|
daemons/fenced/pacemaker-fenced.c | 131 +++++++++----
|
|
daemons/fenced/pacemaker-fenced.h | 5 +-
|
|
include/crm/crm.h | 2 +-
|
|
include/crm/fencing/internal.h | 8 +-
|
|
lib/fencing/st_client.c | 61 ++++++
|
|
lib/lrmd/lrmd_client.c | 6 +-
|
|
rpm/pacemaker.spec.in | 3 +
|
|
16 files changed, 635 insertions(+), 97 deletions(-)
|
|
create mode 100755 daemons/fenced/fence_watchdog.in
|
|
|
|
diff --git a/configure.ac b/configure.ac
|
|
index 436100c81..013562e46 100644
|
|
--- a/configure.ac
|
|
+++ b/configure.ac
|
|
@@ -1972,6 +1972,7 @@ CONFIG_FILES_EXEC([cts/cts-cli],
|
|
[cts/support/fence_dummy],
|
|
[cts/support/pacemaker-cts-dummyd],
|
|
[daemons/fenced/fence_legacy],
|
|
+ [daemons/fenced/fence_watchdog],
|
|
[doc/abi-check],
|
|
[extra/resources/ClusterMon],
|
|
[extra/resources/HealthSMART],
|
|
diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c
|
|
index 45a70bb92..b5da6a46c 100644
|
|
--- a/daemons/controld/controld_control.c
|
|
+++ b/daemons/controld/controld_control.c
|
|
@@ -615,7 +615,7 @@ static pcmk__cluster_option_t crmd_opts[] = {
|
|
},
|
|
{
|
|
"stonith-watchdog-timeout", NULL, "time", NULL,
|
|
- "0", pcmk__valid_sbd_timeout,
|
|
+ "0", controld_verify_stonith_watchdog_timeout,
|
|
"How long to wait before we can assume nodes are safely down "
|
|
"when watchdog-based self-fencing via SBD is in use",
|
|
"If nonzero, along with `have-watchdog=true` automatically set by the "
|
|
diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c
|
|
index 0fba6613b..6c2a6c550 100644
|
|
--- a/daemons/controld/controld_fencing.c
|
|
+++ b/daemons/controld/controld_fencing.c
|
|
@@ -11,6 +11,7 @@
|
|
#include <crm/crm.h>
|
|
#include <crm/msg_xml.h>
|
|
#include <crm/common/xml.h>
|
|
+#include <crm/stonith-ng.h>
|
|
#include <crm/fencing/internal.h>
|
|
|
|
#include <pacemaker-controld.h>
|
|
@@ -886,6 +887,19 @@ te_fence_node(crm_graph_t *graph, crm_action_t *action)
|
|
return TRUE;
|
|
}
|
|
|
|
+bool
|
|
+controld_verify_stonith_watchdog_timeout(const char *value)
|
|
+{
|
|
+ gboolean rv = TRUE;
|
|
+
|
|
+ if (stonith_api && (stonith_api->state != stonith_disconnected) &&
|
|
+ stonith__watchdog_fencing_enabled_for_node_api(stonith_api,
|
|
+ fsa_our_uname)) {
|
|
+ rv = pcmk__valid_sbd_timeout(value);
|
|
+ }
|
|
+ return rv;
|
|
+}
|
|
+
|
|
/* end stonith API client functions */
|
|
|
|
|
|
diff --git a/daemons/controld/controld_fencing.h b/daemons/controld/controld_fencing.h
|
|
index d0ecc8234..ef68a0c83 100644
|
|
--- a/daemons/controld/controld_fencing.h
|
|
+++ b/daemons/controld/controld_fencing.h
|
|
@@ -24,6 +24,7 @@ void update_stonith_max_attempts(const char* value);
|
|
void controld_trigger_fencer_connect(void);
|
|
void controld_disconnect_fencer(bool destroy);
|
|
gboolean te_fence_node(crm_graph_t *graph, crm_action_t *action);
|
|
+bool controld_verify_stonith_watchdog_timeout(const char *value);
|
|
|
|
// stonith cleanup list
|
|
void add_stonith_cleanup(const char *target);
|
|
diff --git a/daemons/fenced/Makefile.am b/daemons/fenced/Makefile.am
|
|
index 43413e11d..2923d7c9b 100644
|
|
--- a/daemons/fenced/Makefile.am
|
|
+++ b/daemons/fenced/Makefile.am
|
|
@@ -15,7 +15,7 @@ halibdir = $(CRM_DAEMON_DIR)
|
|
|
|
halib_PROGRAMS = pacemaker-fenced cts-fence-helper
|
|
|
|
-sbin_SCRIPTS = fence_legacy
|
|
+sbin_SCRIPTS = fence_legacy fence_watchdog
|
|
|
|
noinst_HEADERS = pacemaker-fenced.h
|
|
|
|
diff --git a/daemons/fenced/fence_watchdog.in b/daemons/fenced/fence_watchdog.in
|
|
new file mode 100755
|
|
index 000000000..c83304f1d
|
|
--- /dev/null
|
|
+++ b/daemons/fenced/fence_watchdog.in
|
|
@@ -0,0 +1,283 @@
|
|
+#!@PYTHON@
|
|
+"""Dummy watchdog fence agent for providing meta-data for the pacemaker internal agent
|
|
+"""
|
|
+
|
|
+__copyright__ = "Copyright 2012-2021 the Pacemaker project contributors"
|
|
+__license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY"
|
|
+
|
|
+import io
|
|
+import os
|
|
+import re
|
|
+import sys
|
|
+import atexit
|
|
+import getopt
|
|
+
|
|
+SHORT_DESC = "Dummy watchdog fence agent"
|
|
+LONG_DESC = """fence_watchdog just provides
|
|
+meta-data - actual fencing is done by the pacemaker internal watchdog agent."""
|
|
+
|
|
+ALL_OPT = {
|
|
+ "version" : {
|
|
+ "getopt" : "V",
|
|
+ "longopt" : "version",
|
|
+ "help" : "-V, --version Display version information and exit",
|
|
+ "required" : "0",
|
|
+ "shortdesc" : "Display version information and exit",
|
|
+ "order" : 53
|
|
+ },
|
|
+ "help" : {
|
|
+ "getopt" : "h",
|
|
+ "longopt" : "help",
|
|
+ "help" : "-h, --help Display this help and exit",
|
|
+ "required" : "0",
|
|
+ "shortdesc" : "Display help and exit",
|
|
+ "order" : 54
|
|
+ },
|
|
+ "action" : {
|
|
+ "getopt" : "o:",
|
|
+ "longopt" : "action",
|
|
+ "help" : "-o, --action=[action] Action: metadata",
|
|
+ "required" : "1",
|
|
+ "shortdesc" : "Fencing Action",
|
|
+ "default" : "metadata",
|
|
+ "order" : 1
|
|
+ },
|
|
+ "nodename" : {
|
|
+ "getopt" : "N:",
|
|
+ "longopt" : "nodename",
|
|
+ "help" : "-N, --nodename Node name of fence victim (ignored)",
|
|
+ "required" : "0",
|
|
+ "shortdesc" : "Ignored",
|
|
+ "order" : 2
|
|
+ },
|
|
+ "plug" : {
|
|
+ "getopt" : "n:",
|
|
+ "longopt" : "plug",
|
|
+ "help" : "-n, --plug=[id] Physical plug number on device (ignored)",
|
|
+ "required" : "1",
|
|
+ "shortdesc" : "Ignored",
|
|
+ "order" : 4
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+def agent():
|
|
+ """ Return name this file was run as. """
|
|
+
|
|
+ return os.path.basename(sys.argv[0])
|
|
+
|
|
+
|
|
+def fail_usage(message):
|
|
+ """ Print a usage message and exit. """
|
|
+
|
|
+ sys.exit("%s\nPlease use '-h' for usage" % message)
|
|
+
|
|
+
|
|
+def show_docs(options):
|
|
+ """ Handle informational options (display info and exit). """
|
|
+
|
|
+ device_opt = options["device_opt"]
|
|
+
|
|
+ if "-h" in options:
|
|
+ usage(device_opt)
|
|
+ sys.exit(0)
|
|
+
|
|
+ if "-o" in options and options["-o"].lower() == "metadata":
|
|
+ metadata(device_opt, options)
|
|
+ sys.exit(0)
|
|
+
|
|
+ if "-V" in options:
|
|
+ print(AGENT_VERSION)
|
|
+ sys.exit(0)
|
|
+
|
|
+
|
|
+def sorted_options(avail_opt):
|
|
+ """ Return a list of all options, in their internally specified order. """
|
|
+
|
|
+ sorted_list = [(key, ALL_OPT[key]) for key in avail_opt]
|
|
+ sorted_list.sort(key=lambda x: x[1]["order"])
|
|
+ return sorted_list
|
|
+
|
|
+
|
|
+def usage(avail_opt):
|
|
+ """ Print a usage message. """
|
|
+ print(LONG_DESC)
|
|
+ print()
|
|
+ print("Usage:")
|
|
+ print("\t" + agent() + " [options]")
|
|
+ print("Options:")
|
|
+
|
|
+ for dummy, value in sorted_options(avail_opt):
|
|
+ if len(value["help"]) != 0:
|
|
+ print(" " + value["help"])
|
|
+
|
|
+
|
|
+def metadata(avail_opt, options):
|
|
+ """ Print agent metadata. """
|
|
+
|
|
+ print("""<?xml version="1.0" ?>
|
|
+<resource-agent name="%s" shortdesc="%s">
|
|
+<longdesc>%s</longdesc>
|
|
+<parameters>""" % (agent(), SHORT_DESC, LONG_DESC))
|
|
+
|
|
+ for option, dummy in sorted_options(avail_opt):
|
|
+ if "shortdesc" in ALL_OPT[option]:
|
|
+ print(' <parameter name="' + option +
|
|
+ 'required="' + ALL_OPT[option]["required"] + '">')
|
|
+
|
|
+ default = ""
|
|
+ default_name_arg = "-" + ALL_OPT[option]["getopt"][:-1]
|
|
+ default_name_no_arg = "-" + ALL_OPT[option]["getopt"]
|
|
+
|
|
+ if "default" in ALL_OPT[option]:
|
|
+ default = 'default="%s"' % str(ALL_OPT[option]["default"])
|
|
+ elif default_name_arg in options:
|
|
+ if options[default_name_arg]:
|
|
+ try:
|
|
+ default = 'default="%s"' % options[default_name_arg]
|
|
+ except TypeError:
|
|
+ ## @todo/@note: Currently there is no clean way how to handle lists
|
|
+ ## we can create a string from it but we can't set it on command line
|
|
+ default = 'default="%s"' % str(options[default_name_arg])
|
|
+ elif default_name_no_arg in options:
|
|
+ default = 'default="true"'
|
|
+
|
|
+ mixed = ALL_OPT[option]["help"]
|
|
+ ## split it between option and help text
|
|
+ res = re.compile(r"^(.*--\S+)\s+", re.IGNORECASE | re.S).search(mixed)
|
|
+ if None != res:
|
|
+ mixed = res.group(1)
|
|
+ mixed = mixed.replace("<", "<").replace(">", ">")
|
|
+ print(' <getopt mixed="' + mixed + '" />')
|
|
+
|
|
+ if ALL_OPT[option]["getopt"].count(":") > 0:
|
|
+ print(' <content type="string" ' + default + ' />')
|
|
+ else:
|
|
+ print(' <content type="boolean" ' + default + ' />')
|
|
+
|
|
+ print(' <shortdesc lang="en">' + ALL_OPT[option]["shortdesc"] + '</shortdesc>')
|
|
+ print(' </parameter>')
|
|
+
|
|
+ print(' </parameters>\n <actions>')
|
|
+ print(' <action name="on" />')
|
|
+ print(' <action name="off" />')
|
|
+ print(' <action name="reboot" />')
|
|
+ print(' <action name="monitor" />')
|
|
+ print(' <action name="list" />')
|
|
+ print(' <action name="metadata" />')
|
|
+ print(' </actions>')
|
|
+ print('</resource-agent>')
|
|
+
|
|
+
|
|
+def option_longopt(option):
|
|
+ """ Return the getopt-compatible long-option name of the given option. """
|
|
+
|
|
+ if ALL_OPT[option]["getopt"].endswith(":"):
|
|
+ return ALL_OPT[option]["longopt"] + "="
|
|
+ else:
|
|
+ return ALL_OPT[option]["longopt"]
|
|
+
|
|
+
|
|
+def opts_from_command_line(argv, avail_opt):
|
|
+ """ Read options from command-line arguments. """
|
|
+
|
|
+ # Prepare list of options for getopt
|
|
+ getopt_string = ""
|
|
+ longopt_list = []
|
|
+ for k in avail_opt:
|
|
+ if k in ALL_OPT:
|
|
+ getopt_string += ALL_OPT[k]["getopt"]
|
|
+ else:
|
|
+ fail_usage("Parse error: unknown option '" + k + "'")
|
|
+
|
|
+ if k in ALL_OPT and "longopt" in ALL_OPT[k]:
|
|
+ longopt_list.append(option_longopt(k))
|
|
+
|
|
+ try:
|
|
+ opt, dummy = getopt.gnu_getopt(argv, getopt_string, longopt_list)
|
|
+ except getopt.GetoptError as error:
|
|
+ fail_usage("Parse error: " + error.msg)
|
|
+
|
|
+ # Transform longopt to short one which are used in fencing agents
|
|
+ old_opt = opt
|
|
+ opt = {}
|
|
+ for old_option in dict(old_opt).keys():
|
|
+ if old_option.startswith("--"):
|
|
+ for option in ALL_OPT.keys():
|
|
+ if "longopt" in ALL_OPT[option] and "--" + ALL_OPT[option]["longopt"] == old_option:
|
|
+ opt["-" + ALL_OPT[option]["getopt"].rstrip(":")] = dict(old_opt)[old_option]
|
|
+ else:
|
|
+ opt[old_option] = dict(old_opt)[old_option]
|
|
+
|
|
+ return opt
|
|
+
|
|
+
|
|
+def opts_from_stdin(avail_opt):
|
|
+ """ Read options from standard input. """
|
|
+
|
|
+ opt = {}
|
|
+ name = ""
|
|
+ for line in sys.stdin.readlines():
|
|
+ line = line.strip()
|
|
+ if line.startswith("#") or (len(line) == 0):
|
|
+ continue
|
|
+
|
|
+ (name, value) = (line + "=").split("=", 1)
|
|
+ value = value[:-1]
|
|
+
|
|
+ if name not in avail_opt:
|
|
+ print("Parse error: Ignoring unknown option '%s'" % line,
|
|
+ file=sys.stderr)
|
|
+ continue
|
|
+
|
|
+ if ALL_OPT[name]["getopt"].endswith(":"):
|
|
+ opt["-"+ALL_OPT[name]["getopt"].rstrip(":")] = value
|
|
+ elif value.lower() in ["1", "yes", "on", "true"]:
|
|
+ opt["-"+ALL_OPT[name]["getopt"]] = "1"
|
|
+
|
|
+ return opt
|
|
+
|
|
+
|
|
+def process_input(avail_opt):
|
|
+ """ Set standard environment variables, and parse all options. """
|
|
+
|
|
+ # Set standard environment
|
|
+ os.putenv("LANG", "C")
|
|
+ os.putenv("LC_ALL", "C")
|
|
+
|
|
+ # Read options from command line or standard input
|
|
+ if len(sys.argv) > 1:
|
|
+ return opts_from_command_line(sys.argv[1:], avail_opt)
|
|
+ else:
|
|
+ return opts_from_stdin(avail_opt)
|
|
+
|
|
+
|
|
+def atexit_handler():
|
|
+ """ Close stdout on exit. """
|
|
+
|
|
+ try:
|
|
+ sys.stdout.close()
|
|
+ os.close(1)
|
|
+ except IOError:
|
|
+ sys.exit("%s failed to close standard output" % agent())
|
|
+
|
|
+
|
|
+def main():
|
|
+ """ Make it so! """
|
|
+
|
|
+ device_opt = ALL_OPT.keys()
|
|
+
|
|
+ ## Defaults for fence agent
|
|
+ atexit.register(atexit_handler)
|
|
+ options = process_input(device_opt)
|
|
+ options["device_opt"] = device_opt
|
|
+ show_docs(options)
|
|
+
|
|
+ print("Watchdog fencing may be initiated only by the cluster, not this agent.",
|
|
+ file=sys.stderr)
|
|
+
|
|
+ sys.exit(1)
|
|
+
|
|
+
|
|
+if __name__ == "__main__":
|
|
+ main()
|
|
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
|
|
index cd9968f1a..9470ea2c1 100644
|
|
--- a/daemons/fenced/fenced_commands.c
|
|
+++ b/daemons/fenced/fenced_commands.c
|
|
@@ -397,15 +397,13 @@ stonith_device_execute(stonith_device_t * device)
|
|
return TRUE;
|
|
}
|
|
|
|
- if(pcmk__str_eq(device->agent, STONITH_WATCHDOG_AGENT, pcmk__str_casei)) {
|
|
- if(pcmk__str_eq(cmd->action, "reboot", pcmk__str_casei)) {
|
|
- pcmk__panic(__func__);
|
|
- goto done;
|
|
-
|
|
- } else if(pcmk__str_eq(cmd->action, "off", pcmk__str_casei)) {
|
|
- pcmk__panic(__func__);
|
|
- goto done;
|
|
-
|
|
+ if (pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT,
|
|
+ STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) {
|
|
+ if (pcmk__strcase_any_of(cmd->action, "reboot", "off", NULL)) {
|
|
+ if (node_does_watchdog_fencing(stonith_our_uname)) {
|
|
+ pcmk__panic(__func__);
|
|
+ goto done;
|
|
+ }
|
|
} else {
|
|
crm_info("Faking success for %s watchdog operation", cmd->action);
|
|
cmd->done_cb(0, 0, NULL, cmd);
|
|
@@ -716,7 +714,7 @@ get_agent_metadata(const char *agent, xmlNode ** metadata)
|
|
return EINVAL;
|
|
}
|
|
*metadata = NULL;
|
|
- if (pcmk__str_eq(agent, STONITH_WATCHDOG_AGENT, pcmk__str_none)) {
|
|
+ if (pcmk__str_eq(agent, STONITH_WATCHDOG_AGENT_INTERNAL, pcmk__str_none)) {
|
|
return pcmk_rc_ok;
|
|
}
|
|
init_metadata_cache();
|
|
@@ -1050,24 +1048,6 @@ schedule_internal_command(const char *origin,
|
|
schedule_stonith_command(cmd, device);
|
|
}
|
|
|
|
-gboolean
|
|
-string_in_list(GList *list, const char *item)
|
|
-{
|
|
- int lpc = 0;
|
|
- int max = g_list_length(list);
|
|
-
|
|
- for (lpc = 0; lpc < max; lpc++) {
|
|
- const char *value = g_list_nth_data(list, lpc);
|
|
-
|
|
- if (pcmk__str_eq(item, value, pcmk__str_casei)) {
|
|
- return TRUE;
|
|
- } else {
|
|
- crm_trace("%d: '%s' != '%s'", lpc, item, value);
|
|
- }
|
|
- }
|
|
- return FALSE;
|
|
-}
|
|
-
|
|
static void
|
|
status_search_cb(GPid pid, int rc, const char *output, gpointer user_data)
|
|
{
|
|
@@ -1144,7 +1124,7 @@ dynamic_list_search_cb(GPid pid, int rc, const char *output, gpointer user_data)
|
|
if (!alias) {
|
|
alias = search->host;
|
|
}
|
|
- if (string_in_list(dev->targets, alias)) {
|
|
+ if (pcmk__str_in_list(dev->targets, alias, pcmk__str_casei)) {
|
|
can_fence = TRUE;
|
|
}
|
|
}
|
|
@@ -1215,9 +1195,62 @@ stonith_device_register(xmlNode * msg, const char **desc, gboolean from_cib)
|
|
stonith_device_t *dup = NULL;
|
|
stonith_device_t *device = build_device_from_xml(msg);
|
|
guint ndevices = 0;
|
|
+ int rv = pcmk_ok;
|
|
|
|
CRM_CHECK(device != NULL, return -ENOMEM);
|
|
|
|
+ /* do we have a watchdog-device? */
|
|
+ if (pcmk__str_eq(device->id, STONITH_WATCHDOG_ID, pcmk__str_none) ||
|
|
+ pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT,
|
|
+ STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) do {
|
|
+ if (stonith_watchdog_timeout_ms <= 0) {
|
|
+ crm_err("Ignoring watchdog fence device without "
|
|
+ "stonith-watchdog-timeout set.");
|
|
+ rv = -ENODEV;
|
|
+ /* fall through to cleanup & return */
|
|
+ } else if (!pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT,
|
|
+ STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) {
|
|
+ crm_err("Ignoring watchdog fence device with unknown "
|
|
+ "agent '%s' unequal '" STONITH_WATCHDOG_AGENT "'.",
|
|
+ device->agent?device->agent:"");
|
|
+ rv = -ENODEV;
|
|
+ /* fall through to cleanup & return */
|
|
+ } else if (!pcmk__str_eq(device->id, STONITH_WATCHDOG_ID,
|
|
+ pcmk__str_none)) {
|
|
+ crm_err("Ignoring watchdog fence device "
|
|
+ "named %s !='"STONITH_WATCHDOG_ID"'.",
|
|
+ device->id?device->id:"");
|
|
+ rv = -ENODEV;
|
|
+ /* fall through to cleanup & return */
|
|
+ } else {
|
|
+ if (pcmk__str_eq(device->agent, STONITH_WATCHDOG_AGENT,
|
|
+ pcmk__str_none)) {
|
|
+ /* this either has an empty list or the targets
|
|
+ configured for watchdog-fencing
|
|
+ */
|
|
+ g_list_free_full(stonith_watchdog_targets, free);
|
|
+ stonith_watchdog_targets = device->targets;
|
|
+ device->targets = NULL;
|
|
+ }
|
|
+ if (node_does_watchdog_fencing(stonith_our_uname)) {
|
|
+ g_list_free_full(device->targets, free);
|
|
+ device->targets = stonith__parse_targets(stonith_our_uname);
|
|
+ g_hash_table_replace(device->params,
|
|
+ strdup(PCMK_STONITH_HOST_LIST),
|
|
+ strdup(stonith_our_uname));
|
|
+ /* proceed as with any other stonith-device */
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ crm_debug("Skip registration of watchdog fence device on node not in host-list.");
|
|
+ /* cleanup and fall through to more cleanup and return */
|
|
+ device->targets = NULL;
|
|
+ stonith_device_remove(device->id, from_cib);
|
|
+ }
|
|
+ free_device(device);
|
|
+ return rv;
|
|
+ } while (0);
|
|
+
|
|
dup = device_has_duplicate(device);
|
|
if (dup) {
|
|
ndevices = g_hash_table_size(device_list);
|
|
@@ -1598,6 +1631,39 @@ stonith_level_remove(xmlNode *msg, char **desc)
|
|
* (CIB registration is not sufficient), because monitor should not be
|
|
* possible unless the device is "started" (API registered).
|
|
*/
|
|
+
|
|
+static char *
|
|
+list_to_string(GList *list, const char *delim, gboolean terminate_with_delim)
|
|
+{
|
|
+ int max = g_list_length(list);
|
|
+ size_t delim_len = delim?strlen(delim):0;
|
|
+ size_t alloc_size = 1 + (max?((max-1+(terminate_with_delim?1:0))*delim_len):0);
|
|
+ char *rv;
|
|
+ GList *gIter;
|
|
+
|
|
+ for (gIter = list; gIter != NULL; gIter = gIter->next) {
|
|
+ const char *value = (const char *) gIter->data;
|
|
+
|
|
+ alloc_size += strlen(value);
|
|
+ }
|
|
+ rv = calloc(alloc_size, sizeof(char));
|
|
+ if (rv) {
|
|
+ char *pos = rv;
|
|
+ const char *lead_delim = "";
|
|
+
|
|
+ for (gIter = list; gIter != NULL; gIter = gIter->next) {
|
|
+ const char *value = (const char *) gIter->data;
|
|
+
|
|
+ pos = &pos[sprintf(pos, "%s%s", lead_delim, value)];
|
|
+ lead_delim = delim;
|
|
+ }
|
|
+ if (max && terminate_with_delim) {
|
|
+ sprintf(pos, "%s", delim);
|
|
+ }
|
|
+ }
|
|
+ return rv;
|
|
+}
|
|
+
|
|
static int
|
|
stonith_device_action(xmlNode * msg, char **output)
|
|
{
|
|
@@ -1615,6 +1681,19 @@ stonith_device_action(xmlNode * msg, char **output)
|
|
return -EPROTO;
|
|
}
|
|
|
|
+ if (pcmk__str_eq(id, STONITH_WATCHDOG_ID, pcmk__str_none)) {
|
|
+ if (stonith_watchdog_timeout_ms <= 0) {
|
|
+ return -ENODEV;
|
|
+ } else {
|
|
+ if (pcmk__str_eq(action, "list", pcmk__str_casei)) {
|
|
+ *output = list_to_string(stonith_watchdog_targets, "\n", TRUE);
|
|
+ return pcmk_ok;
|
|
+ } else if (pcmk__str_eq(action, "monitor", pcmk__str_casei)) {
|
|
+ return pcmk_ok;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
device = g_hash_table_lookup(device_list, id);
|
|
if ((device == NULL)
|
|
|| (!device->api_registered && !strcmp(action, "monitor"))) {
|
|
@@ -1742,7 +1821,7 @@ can_fence_host_with_device(stonith_device_t * dev, struct device_search_s *searc
|
|
* Only use if all hosts on which the device can be active can always fence all listed hosts
|
|
*/
|
|
|
|
- if (string_in_list(dev->targets, host)) {
|
|
+ if (pcmk__str_in_list(dev->targets, host, pcmk__str_casei)) {
|
|
can = TRUE;
|
|
} else if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_MAP)
|
|
&& g_hash_table_lookup(dev->aliases, host)) {
|
|
@@ -1763,7 +1842,7 @@ can_fence_host_with_device(stonith_device_t * dev, struct device_search_s *searc
|
|
return;
|
|
}
|
|
|
|
- if (string_in_list(dev->targets, alias)) {
|
|
+ if (pcmk__str_in_list(dev->targets, alias, pcmk__str_casei)) {
|
|
can = TRUE;
|
|
}
|
|
|
|
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
|
|
index cf91acaed..224f2baba 100644
|
|
--- a/daemons/fenced/fenced_remote.c
|
|
+++ b/daemons/fenced/fenced_remote.c
|
|
@@ -1522,6 +1522,25 @@ advance_topology_device_in_level(remote_fencing_op_t *op, const char *device,
|
|
}
|
|
}
|
|
|
|
+static gboolean
|
|
+check_watchdog_fencing_and_wait(remote_fencing_op_t * op)
|
|
+{
|
|
+ if (node_does_watchdog_fencing(op->target)) {
|
|
+
|
|
+ crm_notice("Waiting %lds for %s to self-fence (%s) for "
|
|
+ "client %s " CRM_XS " id=%.8s",
|
|
+ (stonith_watchdog_timeout_ms / 1000),
|
|
+ op->target, op->action, op->client_name, op->id);
|
|
+ op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms,
|
|
+ remote_op_watchdog_done, op);
|
|
+ return TRUE;
|
|
+ } else {
|
|
+ crm_debug("Skipping fallback to watchdog-fencing as %s is "
|
|
+ "not in host-list", op->target);
|
|
+ }
|
|
+ return FALSE;
|
|
+}
|
|
+
|
|
void
|
|
call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer, int rc)
|
|
{
|
|
@@ -1592,26 +1611,33 @@ call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer, int rc)
|
|
g_source_remove(op->op_timer_one);
|
|
}
|
|
|
|
- if(stonith_watchdog_timeout_ms > 0 && device && pcmk__str_eq(device, "watchdog", pcmk__str_casei)) {
|
|
- crm_notice("Waiting %lds for %s to self-fence (%s) for client %s "
|
|
- CRM_XS " id=%.8s", (stonith_watchdog_timeout_ms / 1000),
|
|
- op->target, op->action, op->client_name, op->id);
|
|
- op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, remote_op_watchdog_done, op);
|
|
-
|
|
- /* TODO check devices to verify watchdog will be in use */
|
|
- } else if(stonith_watchdog_timeout_ms > 0
|
|
- && pcmk__str_eq(peer->host, op->target, pcmk__str_casei)
|
|
- && !pcmk__str_eq(op->action, "on", pcmk__str_casei)) {
|
|
- crm_notice("Waiting %lds for %s to self-fence (%s) for client %s "
|
|
- CRM_XS " id=%.8s", (stonith_watchdog_timeout_ms / 1000),
|
|
- op->target, op->action, op->client_name, op->id);
|
|
- op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, remote_op_watchdog_done, op);
|
|
-
|
|
- } else {
|
|
+ if (!(stonith_watchdog_timeout_ms > 0 && (
|
|
+ (pcmk__str_eq(device, STONITH_WATCHDOG_ID,
|
|
+ pcmk__str_none)) ||
|
|
+ (pcmk__str_eq(peer->host, op->target, pcmk__str_casei)
|
|
+ && !pcmk__str_eq(op->action, "on", pcmk__str_casei))) &&
|
|
+ check_watchdog_fencing_and_wait(op))) {
|
|
+
|
|
+ /* Some thoughts about self-fencing cases reaching this point:
|
|
+ - Actually check in check_watchdog_fencing_and_wait
|
|
+ shouldn't fail if STONITH_WATCHDOG_ID is
|
|
+ chosen as fencing-device and it being present implies
|
|
+ watchdog-fencing is enabled anyway
|
|
+ - If watchdog-fencing is disabled either in general or for
|
|
+ a specific target - detected in check_watchdog_fencing_and_wait -
|
|
+ for some other kind of self-fencing we can't expect
|
|
+ a success answer but timeout is fine if the node doesn't
|
|
+ come back in between
|
|
+ - Delicate might be the case where we have watchdog-fencing
|
|
+ enabled for a node but the watchdog-fencing-device isn't
|
|
+ explicitly chosen for suicide. Local pe-execution in sbd
|
|
+ may detect the node as unclean and lead to timely suicide.
|
|
+ Otherwise the selection of stonith-watchdog-timeout at
|
|
+ least is questionable.
|
|
+ */
|
|
op->op_timer_one = g_timeout_add((1000 * timeout_one), remote_op_timeout_one, op);
|
|
}
|
|
|
|
-
|
|
send_cluster_message(crm_get_peer(0, peer->host), crm_msg_stonith_ng, remote_op, FALSE);
|
|
peer->tried = TRUE;
|
|
free_xml(remote_op);
|
|
@@ -1645,12 +1671,11 @@ call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer, int rc)
|
|
* but we have all the expected replies, then no devices
|
|
* are available to execute the fencing operation. */
|
|
|
|
- if(stonith_watchdog_timeout_ms && pcmk__str_eq(device, "watchdog", pcmk__str_null_matches | pcmk__str_casei)) {
|
|
- crm_notice("Waiting %lds for %s to self-fence (%s) for client %s "
|
|
- CRM_XS " id=%.8s", (stonith_watchdog_timeout_ms / 1000),
|
|
- op->target, op->action, op->client_name, op->id);
|
|
- op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, remote_op_watchdog_done, op);
|
|
- return;
|
|
+ if(stonith_watchdog_timeout_ms > 0 && pcmk__str_eq(device,
|
|
+ STONITH_WATCHDOG_ID, pcmk__str_null_matches)) {
|
|
+ if (check_watchdog_fencing_and_wait(op)) {
|
|
+ return;
|
|
+ }
|
|
}
|
|
|
|
if (op->state == st_query) {
|
|
diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c
|
|
index 39738d8be..7f8b427d9 100644
|
|
--- a/daemons/fenced/pacemaker-fenced.c
|
|
+++ b/daemons/fenced/pacemaker-fenced.c
|
|
@@ -42,6 +42,7 @@
|
|
|
|
char *stonith_our_uname = NULL;
|
|
long stonith_watchdog_timeout_ms = 0;
|
|
+GList *stonith_watchdog_targets = NULL;
|
|
|
|
static GMainLoop *mainloop = NULL;
|
|
|
|
@@ -578,7 +579,44 @@ our_node_allowed_for(pe_resource_t *rsc)
|
|
}
|
|
|
|
static void
|
|
-watchdog_device_update(xmlNode *cib)
|
|
+watchdog_device_update(void)
|
|
+{
|
|
+ if (stonith_watchdog_timeout_ms > 0) {
|
|
+ if (!g_hash_table_lookup(device_list, STONITH_WATCHDOG_ID) &&
|
|
+ !stonith_watchdog_targets) {
|
|
+ /* getting here watchdog-fencing enabled, no device there yet
|
|
+ and reason isn't stonith_watchdog_targets preventing that
|
|
+ */
|
|
+ int rc;
|
|
+ xmlNode *xml;
|
|
+
|
|
+ xml = create_device_registration_xml(
|
|
+ STONITH_WATCHDOG_ID,
|
|
+ st_namespace_internal,
|
|
+ STONITH_WATCHDOG_AGENT,
|
|
+ NULL, /* stonith_device_register will add our
|
|
+ own name as PCMK_STONITH_HOST_LIST param
|
|
+ so we can skip that here
|
|
+ */
|
|
+ NULL);
|
|
+ rc = stonith_device_register(xml, NULL, TRUE);
|
|
+ free_xml(xml);
|
|
+ if (rc != pcmk_ok) {
|
|
+ crm_crit("Cannot register watchdog pseudo fence agent");
|
|
+ crm_exit(CRM_EX_FATAL);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ } else {
|
|
+ /* be silent if no device - todo parameter to stonith_device_remove */
|
|
+ if (g_hash_table_lookup(device_list, STONITH_WATCHDOG_ID)) {
|
|
+ stonith_device_remove(STONITH_WATCHDOG_ID, TRUE);
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+static void
|
|
+update_stonith_watchdog_timeout_ms(xmlNode *cib)
|
|
{
|
|
xmlNode *stonith_enabled_xml = NULL;
|
|
const char *stonith_enabled_s = NULL;
|
|
@@ -608,33 +646,7 @@ watchdog_device_update(xmlNode *cib)
|
|
}
|
|
}
|
|
|
|
- if (timeout_ms != stonith_watchdog_timeout_ms) {
|
|
- crm_notice("New watchdog timeout %lds (was %lds)", timeout_ms/1000, stonith_watchdog_timeout_ms/1000);
|
|
- stonith_watchdog_timeout_ms = timeout_ms;
|
|
-
|
|
- if (stonith_watchdog_timeout_ms > 0) {
|
|
- int rc;
|
|
- xmlNode *xml;
|
|
- stonith_key_value_t *params = NULL;
|
|
-
|
|
- params = stonith_key_value_add(params, PCMK_STONITH_HOST_LIST,
|
|
- stonith_our_uname);
|
|
-
|
|
- xml = create_device_registration_xml("watchdog", st_namespace_internal,
|
|
- STONITH_WATCHDOG_AGENT, params,
|
|
- NULL);
|
|
- stonith_key_value_freeall(params, 1, 1);
|
|
- rc = stonith_device_register(xml, NULL, FALSE);
|
|
- free_xml(xml);
|
|
- if (rc != pcmk_ok) {
|
|
- crm_crit("Cannot register watchdog pseudo fence agent");
|
|
- crm_exit(CRM_EX_FATAL);
|
|
- }
|
|
-
|
|
- } else {
|
|
- stonith_device_remove("watchdog", FALSE);
|
|
- }
|
|
- }
|
|
+ stonith_watchdog_timeout_ms = timeout_ms;
|
|
}
|
|
|
|
/*!
|
|
@@ -677,6 +689,16 @@ static void cib_device_update(pe_resource_t *rsc, pe_working_set_t *data_set)
|
|
return;
|
|
}
|
|
|
|
+ /* if watchdog-fencing is disabled handle any watchdog-fence
|
|
+ resource as if it was disabled
|
|
+ */
|
|
+ if ((stonith_watchdog_timeout_ms <= 0) &&
|
|
+ pcmk__str_eq(rsc->id, STONITH_WATCHDOG_ID, pcmk__str_none)) {
|
|
+ crm_info("Watchdog-fencing disabled thus handling "
|
|
+ "device %s as disabled", rsc->id);
|
|
+ return;
|
|
+ }
|
|
+
|
|
/* Check whether our node is allowed for this resource (and its parent if in a group) */
|
|
node = our_node_allowed_for(rsc);
|
|
if (rsc->parent && (rsc->parent->variant == pe_group)) {
|
|
@@ -772,6 +794,12 @@ cib_devices_update(void)
|
|
}
|
|
}
|
|
|
|
+ /* have list repopulated if cib has a watchdog-fencing-resource
|
|
+ TODO: keep a cached list for queries happening while we are refreshing
|
|
+ */
|
|
+ g_list_free_full(stonith_watchdog_targets, free);
|
|
+ stonith_watchdog_targets = NULL;
|
|
+
|
|
for (gIter = fenced_data_set->resources; gIter != NULL; gIter = gIter->next) {
|
|
cib_device_update(gIter->data, fenced_data_set);
|
|
}
|
|
@@ -825,6 +853,8 @@ update_cib_stonith_devices_v2(const char *event, xmlNode * msg)
|
|
if (search != NULL) {
|
|
*search = 0;
|
|
stonith_device_remove(rsc_id, TRUE);
|
|
+ /* watchdog_device_update called afterwards
|
|
+ to fall back to implicit definition if needed */
|
|
} else {
|
|
crm_warn("Ignoring malformed CIB update (resource deletion)");
|
|
}
|
|
@@ -968,6 +998,24 @@ node_has_attr(const char *node, const char *name, const char *value)
|
|
return (match != NULL);
|
|
}
|
|
|
|
+/*!
|
|
+ * \internal
|
|
+ * \brief Check whether a node does watchdog-fencing
|
|
+ *
|
|
+ * \param[in] node Name of node to check
|
|
+ *
|
|
+ * \return TRUE if node found in stonith_watchdog_targets
|
|
+ * or stonith_watchdog_targets is empty indicating
|
|
+ * all nodes are doing watchdog-fencing
|
|
+ */
|
|
+gboolean
|
|
+node_does_watchdog_fencing(const char *node)
|
|
+{
|
|
+ return ((stonith_watchdog_targets == NULL) ||
|
|
+ pcmk__str_in_list(stonith_watchdog_targets, node, pcmk__str_casei));
|
|
+}
|
|
+
|
|
+
|
|
static void
|
|
update_fencing_topology(const char *event, xmlNode * msg)
|
|
{
|
|
@@ -1073,6 +1121,8 @@ update_cib_cache_cb(const char *event, xmlNode * msg)
|
|
xmlNode *stonith_enabled_xml = NULL;
|
|
const char *stonith_enabled_s = NULL;
|
|
static gboolean stonith_enabled_saved = TRUE;
|
|
+ long timeout_ms_saved = stonith_watchdog_timeout_ms;
|
|
+ gboolean need_full_refresh = FALSE;
|
|
|
|
if(!have_cib_devices) {
|
|
crm_trace("Skipping updates until we get a full dump");
|
|
@@ -1127,6 +1177,7 @@ update_cib_cache_cb(const char *event, xmlNode * msg)
|
|
}
|
|
|
|
pcmk__refresh_node_caches_from_cib(local_cib);
|
|
+ update_stonith_watchdog_timeout_ms(local_cib);
|
|
|
|
stonith_enabled_xml = get_xpath_object("//nvpair[@name='stonith-enabled']",
|
|
local_cib, LOG_NEVER);
|
|
@@ -1134,23 +1185,30 @@ update_cib_cache_cb(const char *event, xmlNode * msg)
|
|
stonith_enabled_s = crm_element_value(stonith_enabled_xml, XML_NVPAIR_ATTR_VALUE);
|
|
}
|
|
|
|
- watchdog_device_update(local_cib);
|
|
-
|
|
if (stonith_enabled_s && crm_is_true(stonith_enabled_s) == FALSE) {
|
|
crm_trace("Ignoring CIB updates while fencing is disabled");
|
|
stonith_enabled_saved = FALSE;
|
|
- return;
|
|
|
|
} else if (stonith_enabled_saved == FALSE) {
|
|
crm_info("Updating fencing device and topology lists "
|
|
"now that fencing is enabled");
|
|
stonith_enabled_saved = TRUE;
|
|
- fencing_topology_init();
|
|
- cib_devices_update();
|
|
+ need_full_refresh = TRUE;
|
|
|
|
} else {
|
|
- update_fencing_topology(event, msg);
|
|
- update_cib_stonith_devices(event, msg);
|
|
+ if (timeout_ms_saved != stonith_watchdog_timeout_ms) {
|
|
+ need_full_refresh = TRUE;
|
|
+ } else {
|
|
+ update_fencing_topology(event, msg);
|
|
+ update_cib_stonith_devices(event, msg);
|
|
+ watchdog_device_update();
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (need_full_refresh) {
|
|
+ fencing_topology_init();
|
|
+ cib_devices_update();
|
|
+ watchdog_device_update();
|
|
}
|
|
}
|
|
|
|
@@ -1162,10 +1220,11 @@ init_cib_cache_cb(xmlNode * msg, int call_id, int rc, xmlNode * output, void *us
|
|
local_cib = copy_xml(output);
|
|
|
|
pcmk__refresh_node_caches_from_cib(local_cib);
|
|
+ update_stonith_watchdog_timeout_ms(local_cib);
|
|
|
|
fencing_topology_init();
|
|
- watchdog_device_update(local_cib);
|
|
cib_devices_update();
|
|
+ watchdog_device_update();
|
|
}
|
|
|
|
static void
|
|
diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h
|
|
index d330fda4d..14e085e98 100644
|
|
--- a/daemons/fenced/pacemaker-fenced.h
|
|
+++ b/daemons/fenced/pacemaker-fenced.h
|
|
@@ -260,14 +260,15 @@ bool fencing_peer_active(crm_node_t *peer);
|
|
|
|
int stonith_manual_ack(xmlNode * msg, remote_fencing_op_t * op);
|
|
|
|
-gboolean string_in_list(GList *list, const char *item);
|
|
-
|
|
gboolean node_has_attr(const char *node, const char *name, const char *value);
|
|
|
|
+gboolean node_does_watchdog_fencing(const char *node);
|
|
+
|
|
extern char *stonith_our_uname;
|
|
extern gboolean stand_alone;
|
|
extern GHashTable *device_list;
|
|
extern GHashTable *topology;
|
|
extern long stonith_watchdog_timeout_ms;
|
|
+extern GList *stonith_watchdog_targets;
|
|
|
|
extern GHashTable *stonith_remote_op_list;
|
|
diff --git a/include/crm/crm.h b/include/crm/crm.h
|
|
index ee52c3630..7861c160e 100644
|
|
--- a/include/crm/crm.h
|
|
+++ b/include/crm/crm.h
|
|
@@ -66,7 +66,7 @@ extern "C" {
|
|
* >=3.0.13: Fail counts include operation name and interval
|
|
* >=3.2.0: DC supports PCMK_LRM_OP_INVALID and PCMK_LRM_OP_NOT_CONNECTED
|
|
*/
|
|
-# define CRM_FEATURE_SET "3.10.2"
|
|
+# define CRM_FEATURE_SET "3.11.0"
|
|
|
|
/* Pacemaker's CPG protocols use fixed-width binary fields for the sender and
|
|
* recipient of a CPG message. This imposes an arbitrary limit on cluster node
|
|
diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h
|
|
index 8bcb544d8..f222edba3 100644
|
|
--- a/include/crm/fencing/internal.h
|
|
+++ b/include/crm/fencing/internal.h
|
|
@@ -164,7 +164,10 @@ void stonith__device_parameter_flags(uint32_t *device_flags,
|
|
# define STONITH_OP_LEVEL_ADD "st_level_add"
|
|
# define STONITH_OP_LEVEL_DEL "st_level_remove"
|
|
|
|
-# define STONITH_WATCHDOG_AGENT "#watchdog"
|
|
+# define STONITH_WATCHDOG_AGENT "fence_watchdog"
|
|
+/* Don't change 2 below as it would break rolling upgrade */
|
|
+# define STONITH_WATCHDOG_AGENT_INTERNAL "#watchdog"
|
|
+# define STONITH_WATCHDOG_ID "watchdog"
|
|
|
|
# ifdef HAVE_STONITH_STONITH_H
|
|
// utilities from st_lha.c
|
|
@@ -211,4 +214,7 @@ stonith__op_state_pending(enum op_state state)
|
|
return state != st_failed && state != st_done;
|
|
}
|
|
|
|
+gboolean stonith__watchdog_fencing_enabled_for_node(const char *node);
|
|
+gboolean stonith__watchdog_fencing_enabled_for_node_api(stonith_t *st, const char *node);
|
|
+
|
|
#endif
|
|
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
|
|
index e285f51e2..0ff98157b 100644
|
|
--- a/lib/fencing/st_client.c
|
|
+++ b/lib/fencing/st_client.c
|
|
@@ -195,6 +195,67 @@ stonith_get_namespace(const char *agent, const char *namespace_s)
|
|
return st_namespace_invalid;
|
|
}
|
|
|
|
+gboolean
|
|
+stonith__watchdog_fencing_enabled_for_node_api(stonith_t *st, const char *node)
|
|
+{
|
|
+ gboolean rv = FALSE;
|
|
+ stonith_t *stonith_api = st?st:stonith_api_new();
|
|
+ char *list = NULL;
|
|
+
|
|
+ if(stonith_api) {
|
|
+ if (stonith_api->state == stonith_disconnected) {
|
|
+ int rc = stonith_api->cmds->connect(stonith_api, "stonith-api", NULL);
|
|
+
|
|
+ if (rc != pcmk_ok) {
|
|
+ crm_err("Failed connecting to Stonith-API for watchdog-fencing-query.");
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (stonith_api->state != stonith_disconnected) {
|
|
+ /* caveat!!!
|
|
+ * this might fail when when stonithd is just updating the device-list
|
|
+ * probably something we should fix as well for other api-calls */
|
|
+ int rc = stonith_api->cmds->list(stonith_api, st_opt_sync_call, STONITH_WATCHDOG_ID, &list, 0);
|
|
+ if ((rc != pcmk_ok) || (list == NULL)) {
|
|
+ /* due to the race described above it can happen that
|
|
+ * we drop in here - so as not to make remote nodes
|
|
+ * panic on that answer
|
|
+ */
|
|
+ crm_warn("watchdog-fencing-query failed");
|
|
+ } else if (list[0] == '\0') {
|
|
+ crm_warn("watchdog-fencing-query returned an empty list - any node");
|
|
+ rv = TRUE;
|
|
+ } else {
|
|
+ GList *targets = stonith__parse_targets(list);
|
|
+ rv = pcmk__str_in_list(targets, node, pcmk__str_casei);
|
|
+ g_list_free_full(targets, free);
|
|
+ }
|
|
+ free(list);
|
|
+ if (!st) {
|
|
+ /* if we're provided the api we still might have done the
|
|
+ * connection - but let's assume the caller won't bother
|
|
+ */
|
|
+ stonith_api->cmds->disconnect(stonith_api);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (!st) {
|
|
+ stonith_api_delete(stonith_api);
|
|
+ }
|
|
+ } else {
|
|
+ crm_err("Stonith-API for watchdog-fencing-query couldn't be created.");
|
|
+ }
|
|
+ crm_trace("Pacemaker assumes node %s %sto do watchdog-fencing.",
|
|
+ node, rv?"":"not ");
|
|
+ return rv;
|
|
+}
|
|
+
|
|
+gboolean
|
|
+stonith__watchdog_fencing_enabled_for_node(const char *node)
|
|
+{
|
|
+ return stonith__watchdog_fencing_enabled_for_node_api(NULL, node);
|
|
+}
|
|
+
|
|
static void
|
|
log_action(stonith_action_t *action, pid_t pid)
|
|
{
|
|
diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c
|
|
index 87d050ed1..bf4bceb42 100644
|
|
--- a/lib/lrmd/lrmd_client.c
|
|
+++ b/lib/lrmd/lrmd_client.c
|
|
@@ -34,6 +34,7 @@
|
|
#include <crm/msg_xml.h>
|
|
|
|
#include <crm/stonith-ng.h>
|
|
+#include <crm/fencing/internal.h>
|
|
|
|
#ifdef HAVE_GNUTLS_GNUTLS_H
|
|
# undef KEYFILE
|
|
@@ -934,7 +935,10 @@ lrmd__validate_remote_settings(lrmd_t *lrmd, GHashTable *hash)
|
|
crm_xml_add(data, F_LRMD_ORIGIN, __func__);
|
|
|
|
value = g_hash_table_lookup(hash, "stonith-watchdog-timeout");
|
|
- crm_xml_add(data, F_LRMD_WATCHDOG, value);
|
|
+ if ((value) &&
|
|
+ (stonith__watchdog_fencing_enabled_for_node(native->remote_nodename))) {
|
|
+ crm_xml_add(data, F_LRMD_WATCHDOG, value);
|
|
+ }
|
|
|
|
rc = lrmd_send_command(lrmd, LRMD_OP_CHECK, data, NULL, 0, 0,
|
|
(native->type == pcmk__client_ipc));
|
|
diff --git a/rpm/pacemaker.spec.in b/rpm/pacemaker.spec.in
|
|
index 79e78ede9..f58357a77 100644
|
|
--- a/rpm/pacemaker.spec.in
|
|
+++ b/rpm/pacemaker.spec.in
|
|
@@ -744,6 +744,7 @@ exit 0
|
|
%doc %{_mandir}/man8/crm_attribute.*
|
|
%doc %{_mandir}/man8/crm_master.*
|
|
%doc %{_mandir}/man8/fence_legacy.*
|
|
+%doc %{_mandir}/man8/fence_watchdog.*
|
|
%doc %{_mandir}/man8/pacemakerd.*
|
|
|
|
%doc %{_datadir}/pacemaker/alerts
|
|
@@ -796,6 +797,7 @@ exit 0
|
|
%{_sbindir}/crm_simulate
|
|
%{_sbindir}/crm_report
|
|
%{_sbindir}/crm_ticket
|
|
+%{_sbindir}/fence_watchdog
|
|
%{_sbindir}/stonith_admin
|
|
# "dirname" is owned by -schemas, which is a prerequisite
|
|
%{_datadir}/pacemaker/report.collector
|
|
@@ -822,6 +824,7 @@ exit 0
|
|
%exclude %{_mandir}/man8/crm_attribute.*
|
|
%exclude %{_mandir}/man8/crm_master.*
|
|
%exclude %{_mandir}/man8/fence_legacy.*
|
|
+%exclude %{_mandir}/man8/fence_watchdog.*
|
|
%exclude %{_mandir}/man8/pacemakerd.*
|
|
%exclude %{_mandir}/man8/pacemaker-remoted.*
|
|
|
|
--
|
|
2.27.0
|
|
|
|
|
|
From 53dd360f096e5f005e3221e8d44d82d3654b5172 Mon Sep 17 00:00:00 2001
|
|
From: Klaus Wenninger <klaus.wenninger@aon.at>
|
|
Date: Wed, 4 Aug 2021 15:57:23 +0200
|
|
Subject: [PATCH 3/3] Fix: watchdog-fencing: Silence warning without node
|
|
restriction
|
|
|
|
---
|
|
lib/fencing/st_client.c | 1 -
|
|
1 file changed, 1 deletion(-)
|
|
|
|
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
|
|
index 0ff98157b..14fa7b2a6 100644
|
|
--- a/lib/fencing/st_client.c
|
|
+++ b/lib/fencing/st_client.c
|
|
@@ -223,7 +223,6 @@ stonith__watchdog_fencing_enabled_for_node_api(stonith_t *st, const char *node)
|
|
*/
|
|
crm_warn("watchdog-fencing-query failed");
|
|
} else if (list[0] == '\0') {
|
|
- crm_warn("watchdog-fencing-query returned an empty list - any node");
|
|
rv = TRUE;
|
|
} else {
|
|
GList *targets = stonith__parse_targets(list);
|
|
--
|
|
2.27.0
|
|
|