274 lines
10 KiB
Diff
274 lines
10 KiB
Diff
From 5470f1d9c776dbf753e015fa96153b6a63c17b83 Mon Sep 17 00:00:00 2001
|
|
From: "Gao,Yan" <ygao@suse.com>
|
|
Date: Thu, 9 May 2019 13:24:35 +0200
|
|
Subject: [PATCH] Fix: controller: confirm cancel of failed monitors
|
|
|
|
Usually after a monitor has been cancelled from executor, contoller
|
|
erases the corresponding lrm_rsc_op from the cib, and DC will confirm
|
|
the cancel action by process_op_deletion() according to the cib diff.
|
|
|
|
But if a monitor has failed, the lrm_rsc_op will be recorded as
|
|
"last_failure". When cancelling it, the lrm_rsc_op won't get erased from
|
|
the cib given the logic on purpose in erase_lrm_history_by_op(). So that
|
|
the cancel action won't have a chance to get confirmed by DC with
|
|
process_op_deletion().
|
|
|
|
Previously cluster transition would get stuck waiting for the remaining
|
|
action timer to time out.
|
|
|
|
This commit fixes the issue by directly acknowledging the cancel action
|
|
in this case and enabling DC to be able to confirm it.
|
|
|
|
This also moves get_node_id() function into controld_utils.c for common
|
|
use.
|
|
|
|
Producer:
|
|
```
|
|
# Insert a 10s sleep in the monitor action of RA
|
|
# /usr/lib/ocf/resource.d/pacemaker/Stateful:
|
|
|
|
stateful_monitor() {
|
|
+ sleep 10
|
|
stateful_check_state "master"
|
|
|
|
# Add a promotable clone resource:
|
|
|
|
crm configure primitive stateful ocf:pacemaker:Stateful \
|
|
op monitor interval=5 role=Master \
|
|
op monitor interval=10 role=Slave
|
|
crm configure clone p-clone stateful \
|
|
meta promotable=true
|
|
|
|
# Wait for the resource instance to be started, promoted to be master,
|
|
# and monitor for master role to complete.
|
|
|
|
# Set is-managed=false for the promotable clone:
|
|
crm_resource --meta -p is-managed -v false -r p-clone
|
|
|
|
# Change the status of the master instance to be slave and immediately
|
|
# enforce refresh of it:
|
|
echo slave > /var/run/Stateful-stateful.state; crm_resource --refresh -r stateful --force
|
|
|
|
# Wait for probe to complete, and then monitor for slave role to be
|
|
# issued:
|
|
sleep 15
|
|
|
|
# While the monitor for slave role is still in progress, change the
|
|
# status to be master again:
|
|
echo master > /var/run/Stateful-stateful.state
|
|
|
|
# The monitor for slave role returns error. Cluster issues monitor for
|
|
# master role instead and tries to cancel the failed one for slave role.
|
|
# But cluster transition gets stuck. Depending on the monitor timeout
|
|
# configured for the slave role plus cluster-delay, only after that
|
|
# controller eventually says:
|
|
|
|
pacemaker-controld[21205] error: Node opensuse150 did not send cancel result (via controller) within 20000ms (action timeout plus cluster-delay)
|
|
pacemaker-controld[21205] error: [Action 1]: In-flight rsc op stateful_monitor_10000 on opensuse150 (priority: 0, waiting: none)
|
|
pacemaker-controld[21205] notice: Transition 6 aborted: Action lost
|
|
|
|
```
|
|
---
|
|
daemons/controld/controld_execd.c | 38 ++++++++++++++++++++++++++++++++
|
|
daemons/controld/controld_te_callbacks.c | 21 ++----------------
|
|
daemons/controld/controld_te_events.c | 32 +++++++++++++++++++++++++++
|
|
daemons/controld/controld_transition.h | 1 +
|
|
daemons/controld/controld_utils.c | 13 +++++++++++
|
|
daemons/controld/controld_utils.h | 2 ++
|
|
6 files changed, 88 insertions(+), 19 deletions(-)
|
|
|
|
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
|
|
index 976fed1..8282fed 100644
|
|
--- a/daemons/controld/controld_execd.c
|
|
+++ b/daemons/controld/controld_execd.c
|
|
@@ -2476,6 +2476,30 @@ unescape_newlines(const char *string)
|
|
return ret;
|
|
}
|
|
|
|
+static bool
|
|
+did_lrm_rsc_op_fail(lrm_state_t *lrm_state, const char * rsc_id,
|
|
+ const char * op_type, guint interval_ms)
|
|
+{
|
|
+ rsc_history_t *entry = NULL;
|
|
+
|
|
+ CRM_CHECK(lrm_state != NULL, return FALSE);
|
|
+ CRM_CHECK(rsc_id != NULL, return FALSE);
|
|
+ CRM_CHECK(op_type != NULL, return FALSE);
|
|
+
|
|
+ entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
|
|
+ if (entry == NULL || entry->failed == NULL) {
|
|
+ return FALSE;
|
|
+ }
|
|
+
|
|
+ if (crm_str_eq(entry->failed->rsc_id, rsc_id, TRUE)
|
|
+ && safe_str_eq(entry->failed->op_type, op_type)
|
|
+ && entry->failed->interval_ms == interval_ms) {
|
|
+ return TRUE;
|
|
+ }
|
|
+
|
|
+ return FALSE;
|
|
+}
|
|
+
|
|
void
|
|
process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op,
|
|
struct recurring_op_s *pending, xmlNode *action_xml)
|
|
@@ -2605,6 +2629,20 @@ process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op,
|
|
erase_lrm_history_by_op(lrm_state, op);
|
|
}
|
|
|
|
+ /* If the recurring operation had failed, the lrm_rsc_op is recorded as
|
|
+ * "last_failure" which won't get erased from the cib given the logic on
|
|
+ * purpose in erase_lrm_history_by_op(). So that the cancel action won't
|
|
+ * have a chance to get confirmed by DC with process_op_deletion().
|
|
+ * Cluster transition would get stuck waiting for the remaining action
|
|
+ * timer to time out.
|
|
+ *
|
|
+ * Directly acknowledge the cancel operation in this case.
|
|
+ */
|
|
+ if (did_lrm_rsc_op_fail(lrm_state, pending->rsc_id,
|
|
+ pending->op_type, pending->interval_ms)) {
|
|
+ need_direct_ack = TRUE;
|
|
+ }
|
|
+
|
|
} else if (op->rsc_deleted) {
|
|
/* This recurring operation was cancelled (but not by us, and the
|
|
* executor does not have resource information, likely due to resource
|
|
diff --git a/daemons/controld/controld_te_callbacks.c b/daemons/controld/controld_te_callbacks.c
|
|
index 51d908e..22b5f4b 100644
|
|
--- a/daemons/controld/controld_te_callbacks.c
|
|
+++ b/daemons/controld/controld_te_callbacks.c
|
|
@@ -32,19 +32,6 @@ static unsigned long int stonith_max_attempts = 10;
|
|
/* #define RSC_OP_TEMPLATE "//"XML_TAG_DIFF_ADDED"//"XML_TAG_CIB"//"XML_CIB_TAG_STATE"[@uname='%s']"//"XML_LRM_TAG_RSC_OP"[@id='%s]" */
|
|
#define RSC_OP_TEMPLATE "//"XML_TAG_DIFF_ADDED"//"XML_TAG_CIB"//"XML_LRM_TAG_RSC_OP"[@id='%s']"
|
|
|
|
-static const char *
|
|
-get_node_id(xmlNode * rsc_op)
|
|
-{
|
|
- xmlNode *node = rsc_op;
|
|
-
|
|
- while (node != NULL && safe_str_neq(XML_CIB_TAG_STATE, TYPE(node))) {
|
|
- node = node->parent;
|
|
- }
|
|
-
|
|
- CRM_CHECK(node != NULL, return NULL);
|
|
- return ID(node);
|
|
-}
|
|
-
|
|
void
|
|
update_stonith_max_attempts(const char* value)
|
|
{
|
|
@@ -374,12 +361,8 @@ process_op_deletion(const char *xpath, xmlNode *change)
|
|
node_uuid = extract_node_uuid(xpath);
|
|
cancel = get_cancel_action(key, node_uuid);
|
|
if (cancel) {
|
|
- crm_info("Cancellation of %s on %s confirmed (%d)",
|
|
- key, node_uuid, cancel->id);
|
|
- stop_te_timer(cancel->timer);
|
|
- te_action_confirmed(cancel);
|
|
- update_graph(transition_graph, cancel);
|
|
- trigger_graph();
|
|
+ confirm_cancel_action(cancel);
|
|
+
|
|
} else {
|
|
abort_transition(INFINITY, tg_restart, "Resource operation removal",
|
|
change);
|
|
diff --git a/daemons/controld/controld_te_events.c b/daemons/controld/controld_te_events.c
|
|
index c0d096f..b7b48a4 100644
|
|
--- a/daemons/controld/controld_te_events.c
|
|
+++ b/daemons/controld/controld_te_events.c
|
|
@@ -355,6 +355,27 @@ get_cancel_action(const char *id, const char *node)
|
|
return NULL;
|
|
}
|
|
|
|
+void
|
|
+confirm_cancel_action(crm_action_t *cancel)
|
|
+{
|
|
+ const char *op_key = NULL;
|
|
+ const char *node_name = NULL;
|
|
+
|
|
+ CRM_ASSERT(cancel != NULL);
|
|
+
|
|
+ op_key = crm_element_value(cancel->xml, XML_LRM_ATTR_TASK_KEY);
|
|
+ node_name = crm_element_value(cancel->xml, XML_LRM_ATTR_TARGET);
|
|
+
|
|
+ stop_te_timer(cancel->timer);
|
|
+ te_action_confirmed(cancel);
|
|
+ update_graph(transition_graph, cancel);
|
|
+
|
|
+ crm_info("Cancellation of %s on %s confirmed (action %d)",
|
|
+ op_key, node_name, cancel->id);
|
|
+
|
|
+ trigger_graph();
|
|
+}
|
|
+
|
|
/* downed nodes are listed like: <downed> <node id="UUID1" /> ... </downed> */
|
|
#define XPATH_DOWNED "//" XML_GRAPH_TAG_DOWNED \
|
|
"/" XML_CIB_TAG_NODE "[@" XML_ATTR_UUID "='%s']"
|
|
@@ -471,6 +492,17 @@ process_graph_event(xmlNode *event, const char *event_node)
|
|
/* Recurring actions have the transition number they were first
|
|
* scheduled in.
|
|
*/
|
|
+
|
|
+ if (status == PCMK_LRM_OP_CANCELLED) {
|
|
+ const char *node_id = get_node_id(event);
|
|
+
|
|
+ action = get_cancel_action(id, node_id);
|
|
+ if (action) {
|
|
+ confirm_cancel_action(action);
|
|
+ }
|
|
+ goto bail;
|
|
+ }
|
|
+
|
|
desc = "arrived after initial scheduling";
|
|
abort_transition(INFINITY, tg_restart, "Change in recurring result",
|
|
event);
|
|
diff --git a/daemons/controld/controld_transition.h b/daemons/controld/controld_transition.h
|
|
index 0a33599..a162f99 100644
|
|
--- a/daemons/controld/controld_transition.h
|
|
+++ b/daemons/controld/controld_transition.h
|
|
@@ -25,6 +25,7 @@ void execute_stonith_cleanup(void);
|
|
/* tengine */
|
|
extern crm_action_t *match_down_event(const char *target);
|
|
extern crm_action_t *get_cancel_action(const char *id, const char *node);
|
|
+void confirm_cancel_action(crm_action_t *cancel);
|
|
|
|
void controld_record_action_timeout(crm_action_t *action);
|
|
extern gboolean fail_incompletable_actions(crm_graph_t * graph, const char *down_node);
|
|
diff --git a/daemons/controld/controld_utils.c b/daemons/controld/controld_utils.c
|
|
index ca7e15d..35922f0 100644
|
|
--- a/daemons/controld/controld_utils.c
|
|
+++ b/daemons/controld/controld_utils.c
|
|
@@ -1073,3 +1073,16 @@ feature_set_compatible(const char *dc_version, const char *join_version)
|
|
// DC's minor version must be the same or older
|
|
return dc_v <= join_v;
|
|
}
|
|
+
|
|
+const char *
|
|
+get_node_id(xmlNode *lrm_rsc_op)
|
|
+{
|
|
+ xmlNode *node = lrm_rsc_op;
|
|
+
|
|
+ while (node != NULL && safe_str_neq(XML_CIB_TAG_STATE, TYPE(node))) {
|
|
+ node = node->parent;
|
|
+ }
|
|
+
|
|
+ CRM_CHECK(node != NULL, return NULL);
|
|
+ return ID(node);
|
|
+}
|
|
diff --git a/daemons/controld/controld_utils.h b/daemons/controld/controld_utils.h
|
|
index 2a92db5..68992f5 100644
|
|
--- a/daemons/controld/controld_utils.h
|
|
+++ b/daemons/controld/controld_utils.h
|
|
@@ -95,6 +95,8 @@ unsigned int cib_op_timeout(void);
|
|
bool feature_set_compatible(const char *dc_version, const char *join_version);
|
|
bool controld_action_is_recordable(const char *action);
|
|
|
|
+const char *get_node_id(xmlNode *lrm_rsc_op);
|
|
+
|
|
/* Convenience macro for registering a CIB callback
|
|
* (assumes that data can be freed with free())
|
|
*/
|
|
--
|
|
1.8.3.1
|
|
|